Merged in release/async (pull request #39)

Fixed listening import Approved-by: Tiago Ribeiro
2024-11-15 11:21:29 +00:00
parent d04759d979 93044203f6
commit c74b2b9b7b
4 changed files with 148 additions and 56 deletions
--- a/app/dtos/exams/listening.py
+++ b/app/dtos/exams/listening.py
@@ -44,17 +44,26 @@ class MultipleChoiceExercise(ExerciseBase):
    questions: List[MCQuestion]
 class WriteBlankQuestion(BaseModel):
    id: str
    prompt: str
    solution: List[str]
 class WriteBlanksVariant(str, Enum):
    QUESTIONS = "questions"
    FILL = "fill"
    FORM = "form"
 class WriteBlanksQuestionExercise(ExerciseBase):
    type: Literal["writeBlanks"]
    maxWords: int
    questions: List[WriteBlankQuestion]
    variant: WriteBlanksVariant
 class WriteBlankSolution(BaseModel):
    id: str
    solution: List[str]
 class WriteBlanksExercise(ExerciseBase):
    type: Literal["writeBlanks"]
    maxWords: int
@@ -77,4 +86,4 @@ class ListeningSection(BaseModel):
 class ListeningExam(BaseModel):
    module: str = "listening"
    minTimer: Optional[int]
-    sections: List[ListeningSection]
+    parts: List[ListeningSection]
--- a/app/mappers/listening.py
+++ b/app/mappers/listening.py
@@ -1,14 +1,75 @@
-from typing import Dict, Any
+from typing import Dict, Any, List, Union, Optional
-from app.dtos.exams.listening import TrueFalseExercise, MultipleChoiceExercise, WriteBlanksExercise, ListeningExam, \
+from pydantic import BaseModel
-    ListeningSection
+
 from app.dtos.exams.listening import (
    TrueFalseExercise,
    MultipleChoiceExercise,
    WriteBlanksExercise,
    ListeningExam,
    ListeningSection,
    WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion
 )
 class ListeningQuestionSection(BaseModel):
    exercises: List[Union[TrueFalseExercise, MultipleChoiceExercise, WriteBlanksQuestionExercise]]
 class ListeningQuestionExam(BaseModel):
    parts: List[ListeningQuestionSection]
    minTimer: Optional[int]
    module: str = "listening"
 class WriteBlankProcessor:
    @staticmethod
    def to_question_model(exercise_data: Dict[str, Any]) -> WriteBlanksQuestionExercise:
        questions = [
            WriteBlankQuestion(
                id=q["id"],
                prompt=q["prompt"],
                solution=q["solution"]
            )
            for q in exercise_data.get("questions", [])
        ]
        return WriteBlanksQuestionExercise(
            type="writeBlanks",
            prompt=exercise_data.get("prompt"),
            maxWords=exercise_data.get("maxWords"),
            questions=questions,
            variant=exercise_data.get("variant", "questions")
        )
    @staticmethod
    def to_text_model(question_model: WriteBlanksQuestionExercise) -> WriteBlanksExercise:
        if question_model.variant == WriteBlanksVariant.QUESTIONS:
            text = '\\n'.join(f"{q.prompt} {{{{{q.id}}}}}" for q in question_model.questions)
        elif question_model.variant == WriteBlanksVariant.FILL:
            text = ' '.join(f"{q.prompt}" for q in question_model.questions)
        elif question_model.variant == WriteBlanksVariant.FORM:
            text = '\\n'.join(f"{q.prompt}" for q in question_model.questions)
        else:
            raise ValueError(f"Unknown variant: {question_model.variant}")
        solutions = [
            WriteBlankSolution(id=q.id, solution=q.solution)
            for q in question_model.questions
        ]
        return WriteBlanksExercise(
            type="writeBlanks",
            prompt=question_model.prompt,
            maxWords=question_model.maxWords,
            text=text,
            solutions=solutions,
            variant=question_model.variant
        )
 class ListeningMapper:
    @staticmethod
    def map_to_test_model(response: Dict[str, Any]) -> ListeningExam:
-        sections = []
+        question_parts = []
-        for section in response.get('sections', []):
+        for section in response.get('parts', []):
            section_exercises = []
            for exercise in section['exercises']:
@@ -19,14 +80,33 @@ class ListeningMapper:
                elif exercise_type == 'multipleChoice':
                    section_exercises.append(MultipleChoiceExercise(**exercise))
                elif exercise_type == 'writeBlanks':
-                    section_exercises.append(WriteBlanksExercise(**exercise))
+                    question_model = WriteBlankProcessor.to_question_model(exercise)
                    section_exercises.append(question_model)
                else:
                    raise ValueError(f"Unknown exercise type: {exercise_type}")
-            sections.append(ListeningSection(exercises=section_exercises))
+            question_parts.append(ListeningQuestionSection(exercises=section_exercises))
-        return ListeningExam(
+        question_exam = ListeningQuestionExam(
-            sections=sections,
+            parts=question_parts,
            minTimer=response.get('minTimer'),
            module="listening"
        )
        final_parts = []
        for section in question_exam.parts:
            final_exercises = []
            for exercise in section.exercises:
                if isinstance(exercise, WriteBlanksQuestionExercise):
                    final_exercises.append(WriteBlankProcessor.to_text_model(exercise))
                else:
                    final_exercises.append(exercise)
            final_parts.append(ListeningSection(exercises=final_exercises))
        return ListeningExam(
            parts=final_parts,
            minTimer=response.get('minTimer'),
            module="listening"
        )
--- a/app/services/impl/exam/listening/import_listening.py
+++ b/app/services/impl/exam/listening/import_listening.py
@@ -1,3 +1,4 @@
 import json
 from logging import getLogger
 from typing import Dict, Any
 from uuid import uuid4
@@ -18,7 +19,6 @@ class ImportListeningModule:
    async def import_from_file(
            self,
            exercises: UploadFile,
            audio: UploadFile,
            solutions: UploadFile = None
    ) -> Dict[str, Any] | None:
        path_id = str(uuid4())
@@ -99,51 +99,36 @@ class ImportListeningModule:
        }
    @staticmethod
-    def _write_blanks_questions_template() -> dict:
+    def _write_blanks_template() -> dict:
        return {
            "type": "writeBlanks",
-            "maxWords": "<number>",
+            "maxWords": "<integer max words allowed per answer>",
            "prompt": "<instructions>",
-            "text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can only occur at the end of sentence>",
+            "questions": [
            "solutions": [
                {
                    "id": "<question number as string>",
                    "prompt": "<question text with blanks replaced with {{id}}>",
                    "solution": ["<acceptable answer(s)>"]
                }
            ],
-            "variant": "questions"
+            "variant": "<one of: questions, fill, form - chosen based on format:\n" +
                       "- questions: for numbered questions with blank at end\n" +
                       "- fill: for paragraph/summary with blanks, it MUST be a PARAGRAPH not separated related questions!\n" +
                       "- form: when questions and fill dont meet the requirements>"
        }
    @staticmethod
-    def _write_blanks_fill_template() -> dict:
+    def _true_false():
        return {
-            "type": "writeBlanks",
+            "questions": [
            "maxWords": "<number>",
            "prompt": "<instructions>",
            "text": "<A summary with blanks denoted by {{id}}>",
            "solutions": [
                {
-                    "id": "<blank number as string inside {{}}>",
+                    "id": "<question number>",
-                    "solution": ["<correct word>"]
+                    "prompt": "<statement to evaluate>",
                    "solution": "<one of: true, false, not_given>",
                }
            ],
-            "variant": "fill"
+            "type": "trueFalse",
-        }
+            "prompt": "<specific instructions including T/F/NG marking scheme>"
    @staticmethod
    def _write_blanks_form_template() -> dict:
        return {
            "type": "writeBlanks",
            "maxWords": "<number>",
            "prompt": "<instructions>",
            "text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can happen mid text>",
            "solutions": [
                {
                    "id": "<blank number as string inside {{}}>",
                    "solution": ["<correct word>"]
                }
            ],
            "variant": "form"
        }
    def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
@@ -152,29 +137,47 @@ class ImportListeningModule:
            "role": "system",
            "content": (
                f"You are processing a listening test exercise sheet{solutions_str}. "
-                "Structure each exercise exactly according to these json templates:\n\n"
+                "Structure the test according to this json template:\n\n"
                f"{self._listening_json_schema()}\n\n"
                "Each exercise within a section should follow these templates:\n\n"
                f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
-                f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n"
+                f"2. True/False Questions:\n{self._true_false()}\n\n"
-                f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n"
+                f"3. Write Blanks:\n{self._write_blanks_template()}\n\n"
                f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n"
                "\nImportant rules:\n"
                "1. Keep exact question numbering from the original\n"
                "2. Include all options for multiple choice questions\n"
-                "3. Mark blanks with {{id}} where id is the question number\n"
+                "3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n"
                "4. Set maxWords according to the instructions\n"
                "5. Include all possible correct answers in solution arrays\n"
-                "6. Maintain exact spacing and formatting from templates\n"
+                "6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n"
-                "7. Use appropriate variant for writeBlanks (questions/fill/form)\n"
+                "7. For writeBlanks, choose the appropriate variant:\n"
                "   - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n"
                "   - fill: for paragraph/summary with blanks\n"
                "   - form: when questions and fill dont meet the requirements\n"
                "8. For text fields, use actual newlines between questions/sentences\n"
                "9. Format text according to chosen variant:\n"
                "   - questions: each line should end with {{id}}\n"
                "   - fill: embed {{id}} naturally in the paragraph\n"
                "   - form: place {{id}} where blank should appear in text\n"
                "10. For True/False, use exact values: true, false, or not_given\n\n"
                "11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to "
                "you and they are uppercase you should placed them in lowercase.\n\n"
                "First identify all sections/parts by looking for 'SECTION n' headers or similar ones, "
                "then for each section identify and structure its exercises according to the templates above."
            )
        }
    def _listening_json_schema(self) -> Dict[str, Any]:
        return {
            "minTimer": "<integer representing minutes allowed for the exam as string, if there is none set it to 30>",
            "parts": [
                {
                    "intro": "<optional field that contains information about the section>",
                    "exercises": [
                        self._multiple_choice_template(),
-                self._write_blanks_questions_template(),
+                        self._write_blanks_template(),
-                self._write_blanks_fill_template(),
+                        self._true_false()
-                self._write_blanks_form_template()
+                    ]
                }
            ]
        }
--- a/app/services/impl/third_parties/openai.py
+++ b/app/services/impl/third_parties/openai.py
@@ -22,7 +22,7 @@ class OpenAI(ILLMService):
    def __init__(self, client: AsyncOpenAI):
        self._client = client
        self._logger = logging.getLogger(__name__)
-        self._default_model = "gpt-4o-2024-08-06"
+        self._default_model = "gpt-4o"
    async def prediction(
            self,
@@ -125,8 +125,8 @@ class OpenAI(ILLMService):
            result_content = result.choices[0].message.content
            try:
                print(result_content)
                result_json = json.loads(result_content)
                print(str(result_json))
                return map_to_model(result_json)
            except Exception as e:
                attempt += 1