From 18103c931ef40860d823e58935adcf630dd549fd Mon Sep 17 00:00:00 2001 From: Carlos-Mesquita Date: Fri, 15 Nov 2024 02:47:37 +0000 Subject: [PATCH] Fixed listening import --- app/dtos/exams/listening.py | 13 ++- app/mappers/listening.py | 98 +++++++++++++++++-- .../impl/exam/listening/import_listening.py | 89 +++++++++-------- app/services/impl/third_parties/openai.py | 4 +- 4 files changed, 148 insertions(+), 56 deletions(-) diff --git a/app/dtos/exams/listening.py b/app/dtos/exams/listening.py index 1390da6..c580121 100644 --- a/app/dtos/exams/listening.py +++ b/app/dtos/exams/listening.py @@ -44,17 +44,26 @@ class MultipleChoiceExercise(ExerciseBase): questions: List[MCQuestion] +class WriteBlankQuestion(BaseModel): + id: str + prompt: str + solution: List[str] + class WriteBlanksVariant(str, Enum): QUESTIONS = "questions" FILL = "fill" FORM = "form" +class WriteBlanksQuestionExercise(ExerciseBase): + type: Literal["writeBlanks"] + maxWords: int + questions: List[WriteBlankQuestion] + variant: WriteBlanksVariant class WriteBlankSolution(BaseModel): id: str solution: List[str] - class WriteBlanksExercise(ExerciseBase): type: Literal["writeBlanks"] maxWords: int @@ -77,4 +86,4 @@ class ListeningSection(BaseModel): class ListeningExam(BaseModel): module: str = "listening" minTimer: Optional[int] - sections: List[ListeningSection] \ No newline at end of file + parts: List[ListeningSection] \ No newline at end of file diff --git a/app/mappers/listening.py b/app/mappers/listening.py index 5ee135b..ab0b3b5 100644 --- a/app/mappers/listening.py +++ b/app/mappers/listening.py @@ -1,14 +1,75 @@ -from typing import Dict, Any +from typing import Dict, Any, List, Union, Optional -from app.dtos.exams.listening import TrueFalseExercise, MultipleChoiceExercise, WriteBlanksExercise, ListeningExam, \ - ListeningSection +from pydantic import BaseModel + +from app.dtos.exams.listening import ( + TrueFalseExercise, + MultipleChoiceExercise, + WriteBlanksExercise, + ListeningExam, + ListeningSection, + WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion +) + +class ListeningQuestionSection(BaseModel): + exercises: List[Union[TrueFalseExercise, MultipleChoiceExercise, WriteBlanksQuestionExercise]] + +class ListeningQuestionExam(BaseModel): + parts: List[ListeningQuestionSection] + minTimer: Optional[int] + module: str = "listening" + +class WriteBlankProcessor: + @staticmethod + def to_question_model(exercise_data: Dict[str, Any]) -> WriteBlanksQuestionExercise: + questions = [ + WriteBlankQuestion( + id=q["id"], + prompt=q["prompt"], + solution=q["solution"] + ) + for q in exercise_data.get("questions", []) + ] + + return WriteBlanksQuestionExercise( + type="writeBlanks", + prompt=exercise_data.get("prompt"), + maxWords=exercise_data.get("maxWords"), + questions=questions, + variant=exercise_data.get("variant", "questions") + ) + + @staticmethod + def to_text_model(question_model: WriteBlanksQuestionExercise) -> WriteBlanksExercise: + if question_model.variant == WriteBlanksVariant.QUESTIONS: + text = '\\n'.join(f"{q.prompt} {{{{{q.id}}}}}" for q in question_model.questions) + elif question_model.variant == WriteBlanksVariant.FILL: + text = ' '.join(f"{q.prompt}" for q in question_model.questions) + elif question_model.variant == WriteBlanksVariant.FORM: + text = '\\n'.join(f"{q.prompt}" for q in question_model.questions) + else: + raise ValueError(f"Unknown variant: {question_model.variant}") + + solutions = [ + WriteBlankSolution(id=q.id, solution=q.solution) + for q in question_model.questions + ] + + return WriteBlanksExercise( + type="writeBlanks", + prompt=question_model.prompt, + maxWords=question_model.maxWords, + text=text, + solutions=solutions, + variant=question_model.variant + ) class ListeningMapper: @staticmethod def map_to_test_model(response: Dict[str, Any]) -> ListeningExam: - sections = [] - for section in response.get('sections', []): + question_parts = [] + for section in response.get('parts', []): section_exercises = [] for exercise in section['exercises']: @@ -19,14 +80,33 @@ class ListeningMapper: elif exercise_type == 'multipleChoice': section_exercises.append(MultipleChoiceExercise(**exercise)) elif exercise_type == 'writeBlanks': - section_exercises.append(WriteBlanksExercise(**exercise)) + question_model = WriteBlankProcessor.to_question_model(exercise) + section_exercises.append(question_model) else: raise ValueError(f"Unknown exercise type: {exercise_type}") - sections.append(ListeningSection(exercises=section_exercises)) + question_parts.append(ListeningQuestionSection(exercises=section_exercises)) - return ListeningExam( - sections=sections, + question_exam = ListeningQuestionExam( + parts=question_parts, minTimer=response.get('minTimer'), module="listening" ) + + final_parts = [] + for section in question_exam.parts: + final_exercises = [] + + for exercise in section.exercises: + if isinstance(exercise, WriteBlanksQuestionExercise): + final_exercises.append(WriteBlankProcessor.to_text_model(exercise)) + else: + final_exercises.append(exercise) + + final_parts.append(ListeningSection(exercises=final_exercises)) + + return ListeningExam( + parts=final_parts, + minTimer=response.get('minTimer'), + module="listening" + ) \ No newline at end of file diff --git a/app/services/impl/exam/listening/import_listening.py b/app/services/impl/exam/listening/import_listening.py index 8a945dc..af330d4 100644 --- a/app/services/impl/exam/listening/import_listening.py +++ b/app/services/impl/exam/listening/import_listening.py @@ -1,3 +1,4 @@ +import json from logging import getLogger from typing import Dict, Any from uuid import uuid4 @@ -18,7 +19,6 @@ class ImportListeningModule: async def import_from_file( self, exercises: UploadFile, - audio: UploadFile, solutions: UploadFile = None ) -> Dict[str, Any] | None: path_id = str(uuid4()) @@ -99,51 +99,36 @@ class ImportListeningModule: } @staticmethod - def _write_blanks_questions_template() -> dict: + def _write_blanks_template() -> dict: return { "type": "writeBlanks", - "maxWords": "", + "maxWords": "", "prompt": "", - "text": "", - "solutions": [ + "questions": [ { "id": "", + "prompt": "", "solution": [""] } ], - "variant": "questions" + "variant": "" } @staticmethod - def _write_blanks_fill_template() -> dict: + def _true_false(): return { - "type": "writeBlanks", - "maxWords": "", - "prompt": "", - "text": "", - "solutions": [ + "questions": [ { - "id": "", - "solution": [""] + "id": "", + "prompt": "", + "solution": "", } ], - "variant": "fill" - } - - @staticmethod - def _write_blanks_form_template() -> dict: - return { - "type": "writeBlanks", - "maxWords": "", - "prompt": "", - "text": "", - "solutions": [ - { - "id": "", - "solution": [""] - } - ], - "variant": "form" + "type": "trueFalse", + "prompt": "" } def _instructions(self, has_solutions: bool = False) -> Dict[str, str]: @@ -152,29 +137,47 @@ class ImportListeningModule: "role": "system", "content": ( f"You are processing a listening test exercise sheet{solutions_str}. " - "Structure each exercise exactly according to these json templates:\n\n" + "Structure the test according to this json template:\n\n" + f"{self._listening_json_schema()}\n\n" + "Each exercise within a section should follow these templates:\n\n" f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n" - f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n" - f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n" - f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n" + f"2. True/False Questions:\n{self._true_false()}\n\n" + f"3. Write Blanks:\n{self._write_blanks_template()}\n\n" "\nImportant rules:\n" "1. Keep exact question numbering from the original\n" "2. Include all options for multiple choice questions\n" - "3. Mark blanks with {{id}} where id is the question number\n" + "3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n" "4. Set maxWords according to the instructions\n" "5. Include all possible correct answers in solution arrays\n" - "6. Maintain exact spacing and formatting from templates\n" - "7. Use appropriate variant for writeBlanks (questions/fill/form)\n" + "6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n" + "7. For writeBlanks, choose the appropriate variant:\n" + " - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n" + " - fill: for paragraph/summary with blanks\n" + " - form: when questions and fill dont meet the requirements\n" "8. For text fields, use actual newlines between questions/sentences\n" + "9. Format text according to chosen variant:\n" + " - questions: each line should end with {{id}}\n" + " - fill: embed {{id}} naturally in the paragraph\n" + " - form: place {{id}} where blank should appear in text\n" + "10. For True/False, use exact values: true, false, or not_given\n\n" + "11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to " + "you and they are uppercase you should placed them in lowercase.\n\n" + "First identify all sections/parts by looking for 'SECTION n' headers or similar ones, " + "then for each section identify and structure its exercises according to the templates above." ) } def _listening_json_schema(self) -> Dict[str, Any]: return { - "exercises": [ - self._multiple_choice_template(), - self._write_blanks_questions_template(), - self._write_blanks_fill_template(), - self._write_blanks_form_template() + "minTimer": "", + "parts": [ + { + "intro": "", + "exercises": [ + self._multiple_choice_template(), + self._write_blanks_template(), + self._true_false() + ] + } ] } \ No newline at end of file diff --git a/app/services/impl/third_parties/openai.py b/app/services/impl/third_parties/openai.py index e4ae820..48c0d18 100644 --- a/app/services/impl/third_parties/openai.py +++ b/app/services/impl/third_parties/openai.py @@ -22,7 +22,7 @@ class OpenAI(ILLMService): def __init__(self, client: AsyncOpenAI): self._client = client self._logger = logging.getLogger(__name__) - self._default_model = "gpt-4o-2024-08-06" + self._default_model = "gpt-4o" async def prediction( self, @@ -125,8 +125,8 @@ class OpenAI(ILLMService): result_content = result.choices[0].message.content try: - print(result_content) result_json = json.loads(result_content) + print(str(result_json)) return map_to_model(result_json) except Exception as e: attempt += 1