From 18103c931ef40860d823e58935adcf630dd549fd Mon Sep 17 00:00:00 2001
From: Carlos-Mesquita <cmesquita1999@gmail.com>
Date: Fri, 15 Nov 2024 02:47:37 +0000
Subject: [PATCH] Fixed listening import

---
 app/dtos/exams/listening.py                   | 13 ++-
 app/mappers/listening.py                      | 98 +++++++++++++++++--
 .../impl/exam/listening/import_listening.py   | 89 +++++++++--------
 app/services/impl/third_parties/openai.py     |  4 +-
 4 files changed, 148 insertions(+), 56 deletions(-)

diff --git a/app/dtos/exams/listening.py b/app/dtos/exams/listening.py
index 1390da6..c580121 100644
--- a/app/dtos/exams/listening.py
+++ b/app/dtos/exams/listening.py
@@ -44,17 +44,26 @@ class MultipleChoiceExercise(ExerciseBase):
     questions: List[MCQuestion]
 
 
+class WriteBlankQuestion(BaseModel):
+    id: str
+    prompt: str
+    solution: List[str]
+
 class WriteBlanksVariant(str, Enum):
     QUESTIONS = "questions"
     FILL = "fill"
     FORM = "form"
 
+class WriteBlanksQuestionExercise(ExerciseBase):
+    type: Literal["writeBlanks"]
+    maxWords: int
+    questions: List[WriteBlankQuestion]
+    variant: WriteBlanksVariant
 
 class WriteBlankSolution(BaseModel):
     id: str
     solution: List[str]
 
-
 class WriteBlanksExercise(ExerciseBase):
     type: Literal["writeBlanks"]
     maxWords: int
@@ -77,4 +86,4 @@ class ListeningSection(BaseModel):
 class ListeningExam(BaseModel):
     module: str = "listening"
     minTimer: Optional[int]
-    sections: List[ListeningSection]
\ No newline at end of file
+    parts: List[ListeningSection]
\ No newline at end of file
diff --git a/app/mappers/listening.py b/app/mappers/listening.py
index 5ee135b..ab0b3b5 100644
--- a/app/mappers/listening.py
+++ b/app/mappers/listening.py
@@ -1,14 +1,75 @@
-from typing import Dict, Any
+from typing import Dict, Any, List, Union, Optional
 
-from app.dtos.exams.listening import TrueFalseExercise, MultipleChoiceExercise, WriteBlanksExercise, ListeningExam, \
-    ListeningSection
+from pydantic import BaseModel
+
+from app.dtos.exams.listening import (
+    TrueFalseExercise,
+    MultipleChoiceExercise,
+    WriteBlanksExercise,
+    ListeningExam,
+    ListeningSection,
+    WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion
+)
+
+class ListeningQuestionSection(BaseModel):
+    exercises: List[Union[TrueFalseExercise, MultipleChoiceExercise, WriteBlanksQuestionExercise]]
+
+class ListeningQuestionExam(BaseModel):
+    parts: List[ListeningQuestionSection]
+    minTimer: Optional[int]
+    module: str = "listening"
+
+class WriteBlankProcessor:
+    @staticmethod
+    def to_question_model(exercise_data: Dict[str, Any]) -> WriteBlanksQuestionExercise:
+        questions = [
+            WriteBlankQuestion(
+                id=q["id"],
+                prompt=q["prompt"],
+                solution=q["solution"]
+            )
+            for q in exercise_data.get("questions", [])
+        ]
+
+        return WriteBlanksQuestionExercise(
+            type="writeBlanks",
+            prompt=exercise_data.get("prompt"),
+            maxWords=exercise_data.get("maxWords"),
+            questions=questions,
+            variant=exercise_data.get("variant", "questions")
+        )
+
+    @staticmethod
+    def to_text_model(question_model: WriteBlanksQuestionExercise) -> WriteBlanksExercise:
+        if question_model.variant == WriteBlanksVariant.QUESTIONS:
+            text = '\\n'.join(f"{q.prompt} {{{{{q.id}}}}}" for q in question_model.questions)
+        elif question_model.variant == WriteBlanksVariant.FILL:
+            text = ' '.join(f"{q.prompt}" for q in question_model.questions)
+        elif question_model.variant == WriteBlanksVariant.FORM:
+            text = '\\n'.join(f"{q.prompt}" for q in question_model.questions)
+        else:
+            raise ValueError(f"Unknown variant: {question_model.variant}")
+
+        solutions = [
+            WriteBlankSolution(id=q.id, solution=q.solution)
+            for q in question_model.questions
+        ]
+
+        return WriteBlanksExercise(
+            type="writeBlanks",
+            prompt=question_model.prompt,
+            maxWords=question_model.maxWords,
+            text=text,
+            solutions=solutions,
+            variant=question_model.variant
+        )
 
 
 class ListeningMapper:
     @staticmethod
     def map_to_test_model(response: Dict[str, Any]) -> ListeningExam:
-        sections = []
-        for section in response.get('sections', []):
+        question_parts = []
+        for section in response.get('parts', []):
             section_exercises = []
 
             for exercise in section['exercises']:
@@ -19,14 +80,33 @@ class ListeningMapper:
                 elif exercise_type == 'multipleChoice':
                     section_exercises.append(MultipleChoiceExercise(**exercise))
                 elif exercise_type == 'writeBlanks':
-                    section_exercises.append(WriteBlanksExercise(**exercise))
+                    question_model = WriteBlankProcessor.to_question_model(exercise)
+                    section_exercises.append(question_model)
                 else:
                     raise ValueError(f"Unknown exercise type: {exercise_type}")
 
-            sections.append(ListeningSection(exercises=section_exercises))
+            question_parts.append(ListeningQuestionSection(exercises=section_exercises))
 
-        return ListeningExam(
-            sections=sections,
+        question_exam = ListeningQuestionExam(
+            parts=question_parts,
             minTimer=response.get('minTimer'),
             module="listening"
         )
+
+        final_parts = []
+        for section in question_exam.parts:
+            final_exercises = []
+
+            for exercise in section.exercises:
+                if isinstance(exercise, WriteBlanksQuestionExercise):
+                    final_exercises.append(WriteBlankProcessor.to_text_model(exercise))
+                else:
+                    final_exercises.append(exercise)
+
+            final_parts.append(ListeningSection(exercises=final_exercises))
+
+        return ListeningExam(
+            parts=final_parts,
+            minTimer=response.get('minTimer'),
+            module="listening"
+        )
\ No newline at end of file
diff --git a/app/services/impl/exam/listening/import_listening.py b/app/services/impl/exam/listening/import_listening.py
index 8a945dc..af330d4 100644
--- a/app/services/impl/exam/listening/import_listening.py
+++ b/app/services/impl/exam/listening/import_listening.py
@@ -1,3 +1,4 @@
+import json
 from logging import getLogger
 from typing import Dict, Any
 from uuid import uuid4
@@ -18,7 +19,6 @@ class ImportListeningModule:
     async def import_from_file(
             self,
             exercises: UploadFile,
-            audio: UploadFile,
             solutions: UploadFile = None
     ) -> Dict[str, Any] | None:
         path_id = str(uuid4())
@@ -99,51 +99,36 @@ class ImportListeningModule:
         }
 
     @staticmethod
-    def _write_blanks_questions_template() -> dict:
+    def _write_blanks_template() -> dict:
         return {
             "type": "writeBlanks",
-            "maxWords": "<number>",
+            "maxWords": "<integer max words allowed per answer>",
             "prompt": "<instructions>",
-            "text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can only occur at the end of sentence>",
-            "solutions": [
+            "questions": [
                 {
                     "id": "<question number as string>",
+                    "prompt": "<question text with blanks replaced with {{id}}>",
                     "solution": ["<acceptable answer(s)>"]
                 }
             ],
-            "variant": "questions"
+            "variant": "<one of: questions, fill, form - chosen based on format:\n" +
+                       "- questions: for numbered questions with blank at end\n" +
+                       "- fill: for paragraph/summary with blanks, it MUST be a PARAGRAPH not separated related questions!\n" +
+                       "- form: when questions and fill dont meet the requirements>"
         }
 
     @staticmethod
-    def _write_blanks_fill_template() -> dict:
+    def _true_false():
         return {
-            "type": "writeBlanks",
-            "maxWords": "<number>",
-            "prompt": "<instructions>",
-            "text": "<A summary with blanks denoted by {{id}}>",
-            "solutions": [
+            "questions": [
                 {
-                    "id": "<blank number as string inside {{}}>",
-                    "solution": ["<correct word>"]
+                    "id": "<question number>",
+                    "prompt": "<statement to evaluate>",
+                    "solution": "<one of: true, false, not_given>",
                 }
             ],
-            "variant": "fill"
-        }
-
-    @staticmethod
-    def _write_blanks_form_template() -> dict:
-        return {
-            "type": "writeBlanks",
-            "maxWords": "<number>",
-            "prompt": "<instructions>",
-            "text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can happen mid text>",
-            "solutions": [
-                {
-                    "id": "<blank number as string inside {{}}>",
-                    "solution": ["<correct word>"]
-                }
-            ],
-            "variant": "form"
+            "type": "trueFalse",
+            "prompt": "<specific instructions including T/F/NG marking scheme>"
         }
 
     def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
@@ -152,29 +137,47 @@ class ImportListeningModule:
             "role": "system",
             "content": (
                 f"You are processing a listening test exercise sheet{solutions_str}. "
-                "Structure each exercise exactly according to these json templates:\n\n"
+                "Structure the test according to this json template:\n\n"
+                f"{self._listening_json_schema()}\n\n"
+                "Each exercise within a section should follow these templates:\n\n"
                 f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
-                f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n"
-                f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n"
-                f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n"
+                f"2. True/False Questions:\n{self._true_false()}\n\n"
+                f"3. Write Blanks:\n{self._write_blanks_template()}\n\n"
                 "\nImportant rules:\n"
                 "1. Keep exact question numbering from the original\n"
                 "2. Include all options for multiple choice questions\n"
-                "3. Mark blanks with {{id}} where id is the question number\n"
+                "3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n"
                 "4. Set maxWords according to the instructions\n"
                 "5. Include all possible correct answers in solution arrays\n"
-                "6. Maintain exact spacing and formatting from templates\n"
-                "7. Use appropriate variant for writeBlanks (questions/fill/form)\n"
+                "6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n"
+                "7. For writeBlanks, choose the appropriate variant:\n"
+                "   - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n"
+                "   - fill: for paragraph/summary with blanks\n"
+                "   - form: when questions and fill dont meet the requirements\n"
                 "8. For text fields, use actual newlines between questions/sentences\n"
+                "9. Format text according to chosen variant:\n"
+                "   - questions: each line should end with {{id}}\n"
+                "   - fill: embed {{id}} naturally in the paragraph\n"
+                "   - form: place {{id}} where blank should appear in text\n"
+                "10. For True/False, use exact values: true, false, or not_given\n\n"
+                "11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to "
+                "you and they are uppercase you should placed them in lowercase.\n\n"
+                "First identify all sections/parts by looking for 'SECTION n' headers or similar ones, "
+                "then for each section identify and structure its exercises according to the templates above."
             )
         }
 
     def _listening_json_schema(self) -> Dict[str, Any]:
         return {
-            "exercises": [
-                self._multiple_choice_template(),
-                self._write_blanks_questions_template(),
-                self._write_blanks_fill_template(),
-                self._write_blanks_form_template()
+            "minTimer": "<integer representing minutes allowed for the exam as string, if there is none set it to 30>",
+            "parts": [
+                {
+                    "intro": "<optional field that contains information about the section>",
+                    "exercises": [
+                        self._multiple_choice_template(),
+                        self._write_blanks_template(),
+                        self._true_false()
+                    ]
+                }
             ]
         }
\ No newline at end of file
diff --git a/app/services/impl/third_parties/openai.py b/app/services/impl/third_parties/openai.py
index e4ae820..48c0d18 100644
--- a/app/services/impl/third_parties/openai.py
+++ b/app/services/impl/third_parties/openai.py
@@ -22,7 +22,7 @@ class OpenAI(ILLMService):
     def __init__(self, client: AsyncOpenAI):
         self._client = client
         self._logger = logging.getLogger(__name__)
-        self._default_model = "gpt-4o-2024-08-06"
+        self._default_model = "gpt-4o"
 
     async def prediction(
             self,
@@ -125,8 +125,8 @@ class OpenAI(ILLMService):
             result_content = result.choices[0].message.content
 
             try:
-                print(result_content)
                 result_json = json.loads(result_content)
+                print(str(result_json))
                 return map_to_model(result_json)
             except Exception as e:
                 attempt += 1