Fixed listening import

2024-11-15 02:47:37 +00:00
parent e99eda485e
commit 18103c931e
4 changed files with 148 additions and 56 deletions
--- a/app/dtos/exams/listening.py
+++ b/app/dtos/exams/listening.py
@@ -44,17 +44,26 @@ class MultipleChoiceExercise(ExerciseBase):
    questions: List[MCQuestion]


+class WriteBlankQuestion(BaseModel):
+    id: str
+    prompt: str
+    solution: List[str]
+
 class WriteBlanksVariant(str, Enum):
    QUESTIONS = "questions"
    FILL = "fill"
    FORM = "form"

+class WriteBlanksQuestionExercise(ExerciseBase):
+    type: Literal["writeBlanks"]
+    maxWords: int
+    questions: List[WriteBlankQuestion]
+    variant: WriteBlanksVariant

 class WriteBlankSolution(BaseModel):
    id: str
    solution: List[str]

-
 class WriteBlanksExercise(ExerciseBase):
    type: Literal["writeBlanks"]
    maxWords: int
@@ -77,4 +86,4 @@ class ListeningSection(BaseModel):
 class ListeningExam(BaseModel):
    module: str = "listening"
    minTimer: Optional[int]
-    sections: List[ListeningSection]
+    parts: List[ListeningSection]
--- a/app/mappers/listening.py
+++ b/app/mappers/listening.py
@@ -1,14 +1,75 @@
-from typing import Dict, Any
+from typing import Dict, Any, List, Union, Optional

-from app.dtos.exams.listening import TrueFalseExercise, MultipleChoiceExercise, WriteBlanksExercise, ListeningExam, \
-    ListeningSection
+from pydantic import BaseModel
+
+from app.dtos.exams.listening import (
+    TrueFalseExercise,
+    MultipleChoiceExercise,
+    WriteBlanksExercise,
+    ListeningExam,
+    ListeningSection,
+    WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion
+)
+
+class ListeningQuestionSection(BaseModel):
+    exercises: List[Union[TrueFalseExercise, MultipleChoiceExercise, WriteBlanksQuestionExercise]]
+
+class ListeningQuestionExam(BaseModel):
+    parts: List[ListeningQuestionSection]
+    minTimer: Optional[int]
+    module: str = "listening"
+
+class WriteBlankProcessor:
+    @staticmethod
+    def to_question_model(exercise_data: Dict[str, Any]) -> WriteBlanksQuestionExercise:
+        questions = [
+            WriteBlankQuestion(
+                id=q["id"],
+                prompt=q["prompt"],
+                solution=q["solution"]
+            )
+            for q in exercise_data.get("questions", [])
+        ]
+
+        return WriteBlanksQuestionExercise(
+            type="writeBlanks",
+            prompt=exercise_data.get("prompt"),
+            maxWords=exercise_data.get("maxWords"),
+            questions=questions,
+            variant=exercise_data.get("variant", "questions")
+        )
+
+    @staticmethod
+    def to_text_model(question_model: WriteBlanksQuestionExercise) -> WriteBlanksExercise:
+        if question_model.variant == WriteBlanksVariant.QUESTIONS:
+            text = '\\n'.join(f"{q.prompt} {{{{{q.id}}}}}" for q in question_model.questions)
+        elif question_model.variant == WriteBlanksVariant.FILL:
+            text = ' '.join(f"{q.prompt}" for q in question_model.questions)
+        elif question_model.variant == WriteBlanksVariant.FORM:
+            text = '\\n'.join(f"{q.prompt}" for q in question_model.questions)
+        else:
+            raise ValueError(f"Unknown variant: {question_model.variant}")
+
+        solutions = [
+            WriteBlankSolution(id=q.id, solution=q.solution)
+            for q in question_model.questions
+        ]
+
+        return WriteBlanksExercise(
+            type="writeBlanks",
+            prompt=question_model.prompt,
+            maxWords=question_model.maxWords,
+            text=text,
+            solutions=solutions,
+            variant=question_model.variant
+        )


 class ListeningMapper:
    @staticmethod
    def map_to_test_model(response: Dict[str, Any]) -> ListeningExam:
-        sections = []
-        for section in response.get('sections', []):
+        question_parts = []
+        for section in response.get('parts', []):
            section_exercises = []

            for exercise in section['exercises']:
@@ -19,14 +80,33 @@ class ListeningMapper:
                elif exercise_type == 'multipleChoice':
                    section_exercises.append(MultipleChoiceExercise(**exercise))
                elif exercise_type == 'writeBlanks':
-                    section_exercises.append(WriteBlanksExercise(**exercise))
+                    question_model = WriteBlankProcessor.to_question_model(exercise)
+                    section_exercises.append(question_model)
                else:
                    raise ValueError(f"Unknown exercise type: {exercise_type}")

-            sections.append(ListeningSection(exercises=section_exercises))
+            question_parts.append(ListeningQuestionSection(exercises=section_exercises))

-        return ListeningExam(
-            sections=sections,
+        question_exam = ListeningQuestionExam(
+            parts=question_parts,
            minTimer=response.get('minTimer'),
            module="listening"
        )
+
+        final_parts = []
+        for section in question_exam.parts:
+            final_exercises = []
+
+            for exercise in section.exercises:
+                if isinstance(exercise, WriteBlanksQuestionExercise):
+                    final_exercises.append(WriteBlankProcessor.to_text_model(exercise))
+                else:
+                    final_exercises.append(exercise)
+
+            final_parts.append(ListeningSection(exercises=final_exercises))
+
+        return ListeningExam(
+            parts=final_parts,
+            minTimer=response.get('minTimer'),
+            module="listening"
+        )
--- a/app/services/impl/exam/listening/import_listening.py
+++ b/app/services/impl/exam/listening/import_listening.py
@@ -1,3 +1,4 @@
+import json
 from logging import getLogger
 from typing import Dict, Any
 from uuid import uuid4
@@ -18,7 +19,6 @@ class ImportListeningModule:
    async def import_from_file(
            self,
            exercises: UploadFile,
-            audio: UploadFile,
            solutions: UploadFile = None
    ) -> Dict[str, Any] | None:
        path_id = str(uuid4())
@@ -99,51 +99,36 @@ class ImportListeningModule:
        }

    @staticmethod
-    def _write_blanks_questions_template() -> dict:
+    def _write_blanks_template() -> dict:
        return {
            "type": "writeBlanks",
-            "maxWords": "<number>",
+            "maxWords": "<integer max words allowed per answer>",
            "prompt": "<instructions>",
-            "text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can only occur at the end of sentence>",
-            "solutions": [
+            "questions": [
                {
                    "id": "<question number as string>",
+                    "prompt": "<question text with blanks replaced with {{id}}>",
                    "solution": ["<acceptable answer(s)>"]
                }
            ],
-            "variant": "questions"
+            "variant": "<one of: questions, fill, form - chosen based on format:\n" +
+                       "- questions: for numbered questions with blank at end\n" +
+                       "- fill: for paragraph/summary with blanks, it MUST be a PARAGRAPH not separated related questions!\n" +
+                       "- form: when questions and fill dont meet the requirements>"
        }

    @staticmethod
-    def _write_blanks_fill_template() -> dict:
+    def _true_false():
        return {
-            "type": "writeBlanks",
-            "maxWords": "<number>",
-            "prompt": "<instructions>",
-            "text": "<A summary with blanks denoted by {{id}}>",
-            "solutions": [
+            "questions": [
                {
-                    "id": "<blank number as string inside {{}}>",
-                    "solution": ["<correct word>"]
+                    "id": "<question number>",
+                    "prompt": "<statement to evaluate>",
+                    "solution": "<one of: true, false, not_given>",
                }
            ],
-            "variant": "fill"
-        }
-
-    @staticmethod
-    def _write_blanks_form_template() -> dict:
-        return {
-            "type": "writeBlanks",
-            "maxWords": "<number>",
-            "prompt": "<instructions>",
-            "text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can happen mid text>",
-            "solutions": [
-                {
-                    "id": "<blank number as string inside {{}}>",
-                    "solution": ["<correct word>"]
-                }
-            ],
-            "variant": "form"
+            "type": "trueFalse",
+            "prompt": "<specific instructions including T/F/NG marking scheme>"
        }

    def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
@@ -152,29 +137,47 @@ class ImportListeningModule:
            "role": "system",
            "content": (
                f"You are processing a listening test exercise sheet{solutions_str}. "
-                "Structure each exercise exactly according to these json templates:\n\n"
+                "Structure the test according to this json template:\n\n"
+                f"{self._listening_json_schema()}\n\n"
+                "Each exercise within a section should follow these templates:\n\n"
                f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
-                f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n"
-                f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n"
-                f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n"
+                f"2. True/False Questions:\n{self._true_false()}\n\n"
+                f"3. Write Blanks:\n{self._write_blanks_template()}\n\n"
                "\nImportant rules:\n"
                "1. Keep exact question numbering from the original\n"
                "2. Include all options for multiple choice questions\n"
-                "3. Mark blanks with {{id}} where id is the question number\n"
+                "3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n"
                "4. Set maxWords according to the instructions\n"
                "5. Include all possible correct answers in solution arrays\n"
-                "6. Maintain exact spacing and formatting from templates\n"
-                "7. Use appropriate variant for writeBlanks (questions/fill/form)\n"
+                "6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n"
+                "7. For writeBlanks, choose the appropriate variant:\n"
+                "   - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n"
+                "   - fill: for paragraph/summary with blanks\n"
+                "   - form: when questions and fill dont meet the requirements\n"
                "8. For text fields, use actual newlines between questions/sentences\n"
+                "9. Format text according to chosen variant:\n"
+                "   - questions: each line should end with {{id}}\n"
+                "   - fill: embed {{id}} naturally in the paragraph\n"
+                "   - form: place {{id}} where blank should appear in text\n"
+                "10. For True/False, use exact values: true, false, or not_given\n\n"
+                "11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to "
+                "you and they are uppercase you should placed them in lowercase.\n\n"
+                "First identify all sections/parts by looking for 'SECTION n' headers or similar ones, "
+                "then for each section identify and structure its exercises according to the templates above."
            )
        }

    def _listening_json_schema(self) -> Dict[str, Any]:
        return {
-            "exercises": [
-                self._multiple_choice_template(),
-                self._write_blanks_questions_template(),
-                self._write_blanks_fill_template(),
-                self._write_blanks_form_template()
+            "minTimer": "<integer representing minutes allowed for the exam as string, if there is none set it to 30>",
+            "parts": [
+                {
+                    "intro": "<optional field that contains information about the section>",
+                    "exercises": [
+                        self._multiple_choice_template(),
+                        self._write_blanks_template(),
+                        self._true_false()
+                    ]
+                }
            ]
        }
--- a/app/services/impl/third_parties/openai.py
+++ b/app/services/impl/third_parties/openai.py
@@ -22,7 +22,7 @@ class OpenAI(ILLMService):
    def __init__(self, client: AsyncOpenAI):
        self._client = client
        self._logger = logging.getLogger(__name__)
-        self._default_model = "gpt-4o-2024-08-06"
+        self._default_model = "gpt-4o"

    async def prediction(
            self,
@@ -125,8 +125,8 @@ class OpenAI(ILLMService):
            result_content = result.choices[0].message.content

            try:
-                print(result_content)
                result_json = json.loads(result_content)
+                print(str(result_json))
                return map_to_model(result_json)
            except Exception as e:
                attempt += 1