Merged in release/async (pull request #39)

Fixed listening import

Approved-by: Tiago Ribeiro
This commit is contained in:
carlos.mesquita
2024-11-15 11:21:29 +00:00
committed by Tiago Ribeiro
4 changed files with 148 additions and 56 deletions

View File

@@ -44,17 +44,26 @@ class MultipleChoiceExercise(ExerciseBase):
questions: List[MCQuestion] questions: List[MCQuestion]
class WriteBlankQuestion(BaseModel):
id: str
prompt: str
solution: List[str]
class WriteBlanksVariant(str, Enum): class WriteBlanksVariant(str, Enum):
QUESTIONS = "questions" QUESTIONS = "questions"
FILL = "fill" FILL = "fill"
FORM = "form" FORM = "form"
class WriteBlanksQuestionExercise(ExerciseBase):
type: Literal["writeBlanks"]
maxWords: int
questions: List[WriteBlankQuestion]
variant: WriteBlanksVariant
class WriteBlankSolution(BaseModel): class WriteBlankSolution(BaseModel):
id: str id: str
solution: List[str] solution: List[str]
class WriteBlanksExercise(ExerciseBase): class WriteBlanksExercise(ExerciseBase):
type: Literal["writeBlanks"] type: Literal["writeBlanks"]
maxWords: int maxWords: int
@@ -77,4 +86,4 @@ class ListeningSection(BaseModel):
class ListeningExam(BaseModel): class ListeningExam(BaseModel):
module: str = "listening" module: str = "listening"
minTimer: Optional[int] minTimer: Optional[int]
sections: List[ListeningSection] parts: List[ListeningSection]

View File

@@ -1,14 +1,75 @@
from typing import Dict, Any from typing import Dict, Any, List, Union, Optional
from app.dtos.exams.listening import TrueFalseExercise, MultipleChoiceExercise, WriteBlanksExercise, ListeningExam, \ from pydantic import BaseModel
ListeningSection
from app.dtos.exams.listening import (
TrueFalseExercise,
MultipleChoiceExercise,
WriteBlanksExercise,
ListeningExam,
ListeningSection,
WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion
)
class ListeningQuestionSection(BaseModel):
exercises: List[Union[TrueFalseExercise, MultipleChoiceExercise, WriteBlanksQuestionExercise]]
class ListeningQuestionExam(BaseModel):
parts: List[ListeningQuestionSection]
minTimer: Optional[int]
module: str = "listening"
class WriteBlankProcessor:
@staticmethod
def to_question_model(exercise_data: Dict[str, Any]) -> WriteBlanksQuestionExercise:
questions = [
WriteBlankQuestion(
id=q["id"],
prompt=q["prompt"],
solution=q["solution"]
)
for q in exercise_data.get("questions", [])
]
return WriteBlanksQuestionExercise(
type="writeBlanks",
prompt=exercise_data.get("prompt"),
maxWords=exercise_data.get("maxWords"),
questions=questions,
variant=exercise_data.get("variant", "questions")
)
@staticmethod
def to_text_model(question_model: WriteBlanksQuestionExercise) -> WriteBlanksExercise:
if question_model.variant == WriteBlanksVariant.QUESTIONS:
text = '\\n'.join(f"{q.prompt} {{{{{q.id}}}}}" for q in question_model.questions)
elif question_model.variant == WriteBlanksVariant.FILL:
text = ' '.join(f"{q.prompt}" for q in question_model.questions)
elif question_model.variant == WriteBlanksVariant.FORM:
text = '\\n'.join(f"{q.prompt}" for q in question_model.questions)
else:
raise ValueError(f"Unknown variant: {question_model.variant}")
solutions = [
WriteBlankSolution(id=q.id, solution=q.solution)
for q in question_model.questions
]
return WriteBlanksExercise(
type="writeBlanks",
prompt=question_model.prompt,
maxWords=question_model.maxWords,
text=text,
solutions=solutions,
variant=question_model.variant
)
class ListeningMapper: class ListeningMapper:
@staticmethod @staticmethod
def map_to_test_model(response: Dict[str, Any]) -> ListeningExam: def map_to_test_model(response: Dict[str, Any]) -> ListeningExam:
sections = [] question_parts = []
for section in response.get('sections', []): for section in response.get('parts', []):
section_exercises = [] section_exercises = []
for exercise in section['exercises']: for exercise in section['exercises']:
@@ -19,14 +80,33 @@ class ListeningMapper:
elif exercise_type == 'multipleChoice': elif exercise_type == 'multipleChoice':
section_exercises.append(MultipleChoiceExercise(**exercise)) section_exercises.append(MultipleChoiceExercise(**exercise))
elif exercise_type == 'writeBlanks': elif exercise_type == 'writeBlanks':
section_exercises.append(WriteBlanksExercise(**exercise)) question_model = WriteBlankProcessor.to_question_model(exercise)
section_exercises.append(question_model)
else: else:
raise ValueError(f"Unknown exercise type: {exercise_type}") raise ValueError(f"Unknown exercise type: {exercise_type}")
sections.append(ListeningSection(exercises=section_exercises)) question_parts.append(ListeningQuestionSection(exercises=section_exercises))
return ListeningExam( question_exam = ListeningQuestionExam(
sections=sections, parts=question_parts,
minTimer=response.get('minTimer'),
module="listening"
)
final_parts = []
for section in question_exam.parts:
final_exercises = []
for exercise in section.exercises:
if isinstance(exercise, WriteBlanksQuestionExercise):
final_exercises.append(WriteBlankProcessor.to_text_model(exercise))
else:
final_exercises.append(exercise)
final_parts.append(ListeningSection(exercises=final_exercises))
return ListeningExam(
parts=final_parts,
minTimer=response.get('minTimer'), minTimer=response.get('minTimer'),
module="listening" module="listening"
) )

View File

@@ -1,3 +1,4 @@
import json
from logging import getLogger from logging import getLogger
from typing import Dict, Any from typing import Dict, Any
from uuid import uuid4 from uuid import uuid4
@@ -18,7 +19,6 @@ class ImportListeningModule:
async def import_from_file( async def import_from_file(
self, self,
exercises: UploadFile, exercises: UploadFile,
audio: UploadFile,
solutions: UploadFile = None solutions: UploadFile = None
) -> Dict[str, Any] | None: ) -> Dict[str, Any] | None:
path_id = str(uuid4()) path_id = str(uuid4())
@@ -99,51 +99,36 @@ class ImportListeningModule:
} }
@staticmethod @staticmethod
def _write_blanks_questions_template() -> dict: def _write_blanks_template() -> dict:
return { return {
"type": "writeBlanks", "type": "writeBlanks",
"maxWords": "<number>", "maxWords": "<integer max words allowed per answer>",
"prompt": "<instructions>", "prompt": "<instructions>",
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can only occur at the end of sentence>", "questions": [
"solutions": [
{ {
"id": "<question number as string>", "id": "<question number as string>",
"prompt": "<question text with blanks replaced with {{id}}>",
"solution": ["<acceptable answer(s)>"] "solution": ["<acceptable answer(s)>"]
} }
], ],
"variant": "questions" "variant": "<one of: questions, fill, form - chosen based on format:\n" +
"- questions: for numbered questions with blank at end\n" +
"- fill: for paragraph/summary with blanks, it MUST be a PARAGRAPH not separated related questions!\n" +
"- form: when questions and fill dont meet the requirements>"
} }
@staticmethod @staticmethod
def _write_blanks_fill_template() -> dict: def _true_false():
return { return {
"type": "writeBlanks", "questions": [
"maxWords": "<number>",
"prompt": "<instructions>",
"text": "<A summary with blanks denoted by {{id}}>",
"solutions": [
{ {
"id": "<blank number as string inside {{}}>", "id": "<question number>",
"solution": ["<correct word>"] "prompt": "<statement to evaluate>",
"solution": "<one of: true, false, not_given>",
} }
], ],
"variant": "fill" "type": "trueFalse",
} "prompt": "<specific instructions including T/F/NG marking scheme>"
@staticmethod
def _write_blanks_form_template() -> dict:
return {
"type": "writeBlanks",
"maxWords": "<number>",
"prompt": "<instructions>",
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can happen mid text>",
"solutions": [
{
"id": "<blank number as string inside {{}}>",
"solution": ["<correct word>"]
}
],
"variant": "form"
} }
def _instructions(self, has_solutions: bool = False) -> Dict[str, str]: def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
@@ -152,29 +137,47 @@ class ImportListeningModule:
"role": "system", "role": "system",
"content": ( "content": (
f"You are processing a listening test exercise sheet{solutions_str}. " f"You are processing a listening test exercise sheet{solutions_str}. "
"Structure each exercise exactly according to these json templates:\n\n" "Structure the test according to this json template:\n\n"
f"{self._listening_json_schema()}\n\n"
"Each exercise within a section should follow these templates:\n\n"
f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n" f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n" f"2. True/False Questions:\n{self._true_false()}\n\n"
f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n" f"3. Write Blanks:\n{self._write_blanks_template()}\n\n"
f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n"
"\nImportant rules:\n" "\nImportant rules:\n"
"1. Keep exact question numbering from the original\n" "1. Keep exact question numbering from the original\n"
"2. Include all options for multiple choice questions\n" "2. Include all options for multiple choice questions\n"
"3. Mark blanks with {{id}} where id is the question number\n" "3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n"
"4. Set maxWords according to the instructions\n" "4. Set maxWords according to the instructions\n"
"5. Include all possible correct answers in solution arrays\n" "5. Include all possible correct answers in solution arrays\n"
"6. Maintain exact spacing and formatting from templates\n" "6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n"
"7. Use appropriate variant for writeBlanks (questions/fill/form)\n" "7. For writeBlanks, choose the appropriate variant:\n"
" - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n"
" - fill: for paragraph/summary with blanks\n"
" - form: when questions and fill dont meet the requirements\n"
"8. For text fields, use actual newlines between questions/sentences\n" "8. For text fields, use actual newlines between questions/sentences\n"
"9. Format text according to chosen variant:\n"
" - questions: each line should end with {{id}}\n"
" - fill: embed {{id}} naturally in the paragraph\n"
" - form: place {{id}} where blank should appear in text\n"
"10. For True/False, use exact values: true, false, or not_given\n\n"
"11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to "
"you and they are uppercase you should placed them in lowercase.\n\n"
"First identify all sections/parts by looking for 'SECTION n' headers or similar ones, "
"then for each section identify and structure its exercises according to the templates above."
) )
} }
def _listening_json_schema(self) -> Dict[str, Any]: def _listening_json_schema(self) -> Dict[str, Any]:
return { return {
"minTimer": "<integer representing minutes allowed for the exam as string, if there is none set it to 30>",
"parts": [
{
"intro": "<optional field that contains information about the section>",
"exercises": [ "exercises": [
self._multiple_choice_template(), self._multiple_choice_template(),
self._write_blanks_questions_template(), self._write_blanks_template(),
self._write_blanks_fill_template(), self._true_false()
self._write_blanks_form_template() ]
}
] ]
} }

View File

@@ -22,7 +22,7 @@ class OpenAI(ILLMService):
def __init__(self, client: AsyncOpenAI): def __init__(self, client: AsyncOpenAI):
self._client = client self._client = client
self._logger = logging.getLogger(__name__) self._logger = logging.getLogger(__name__)
self._default_model = "gpt-4o-2024-08-06" self._default_model = "gpt-4o"
async def prediction( async def prediction(
self, self,
@@ -125,8 +125,8 @@ class OpenAI(ILLMService):
result_content = result.choices[0].message.content result_content = result.choices[0].message.content
try: try:
print(result_content)
result_json = json.loads(result_content) result_json = json.loads(result_content)
print(str(result_json))
return map_to_model(result_json) return map_to_model(result_json)
except Exception as e: except Exception as e:
attempt += 1 attempt += 1