Merged in release/async (pull request #43)

ENCOA-255 gpt was grouping parts by sections and the reading passages were not updated with text.content instead of the old context field Approved-by: Tiago Ribeiro
2024-12-04 09:18:03 +00:00
parent 12376d422d d64cb929c7
commit 06471e9fab
10 changed files with 83 additions and 57 deletions
--- a/app/api/level.py
+++ b/app/api/level.py
@@ -54,7 +54,6 @@ async def import_level(
 ):
    return await level_controller.upload_level(exercises, solutions)

-
@level_router.post(
    '/custom/',
    dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
--- a/app/configs/constants.py
+++ b/app/configs/constants.py
@@ -47,6 +47,7 @@ class ReadingExerciseType(str, Enum):
    trueFalse = "trueFalse"
    paragraphMatch = "paragraphMatch"
    ideaMatch = "ideaMatch"
+    multipleChoice = "multipleChoice"


 class ListeningExerciseType(str, Enum):
--- a/app/dtos/exams/level.py
+++ b/app/dtos/exams/level.py
@@ -47,10 +47,13 @@ class FillBlanksExercise(BaseModel):

 Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]

+class Text(BaseModel):
+    content: str
+    title: str

 class Part(BaseModel):
    exercises: List[Exercise]
-    context: Optional[str] = Field(default=None)
+    text: Optional[Text] = Field(default=None)


 class Exam(BaseModel):
--- a/app/mappers/level.py
+++ b/app/mappers/level.py
@@ -5,7 +5,7 @@ from pydantic import ValidationError
 from app.dtos.exams.level import (
    MultipleChoiceExercise,
    FillBlanksExercise,
-    Part, Exam
+    Part, Exam, Text
 )
 from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord

@@ -17,7 +17,7 @@ class LevelMapper:
        parts = []
        for part in response['parts']:
            part_exercises = part['exercises']
-            context = part.get('context', None)
+            text = part.get('text', None)

            exercises = []
            for exercise in part_exercises:
@@ -32,8 +32,13 @@ class LevelMapper:
                exercises.append(exercise_model)

            part_kwargs = {"exercises": exercises}
-            if context is not None:
-                part_kwargs["context"] = context
+            if text is not None and text.get('content', None):
+                title = text.get('title', 'Untitled')
+                if title == '':
+                    title = 'Untitled'
+                part_kwargs["text"] = Text(title=title, content=text['content'])
+            else:
+                part_kwargs["text"] = None

            part_model = Part(**part_kwargs)
            parts.append(part_model)
--- a/app/services/impl/exam/level/upload.py
+++ b/app/services/impl/exam/level/upload.py
@@ -35,7 +35,7 @@ class UploadLevelModule:
        #completion: Coroutine[Any, Any, Exam] = (
        #    self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
        #)
-        response = await self._html_completion(path_id)
+        response = await self._html_completion(path_id, solutions is not None)

        FileHelper.remove_directory(f'./tmp/{path_id}')

@@ -57,7 +57,10 @@ class UploadLevelModule:
        return {
            "parts": [
                {
-                    "context": "<this attribute is optional you may exclude it if not required>",
+                    "text": {
+                        "content": "<this attribute is mandatory if there is a text passage else this 'text' field is omitted>",
+                        "title": "<this attribute is optional you may exclude it if not required>",
+                    },
                    "exercises": [
                        self._multiple_choice_html(),
                        self._passage_blank_space_html()
@@ -66,16 +69,26 @@ class UploadLevelModule:
            ]
        }

-    async def _html_completion(self, path_id: str) -> Exam:
+    async def _html_completion(self, path_id: str, solutions_provided: bool) -> Exam:
        async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
            html = await f.read()

+        solutions = []
+        if solutions_provided:
+            async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
+                solutions_html = await f.read()
+                solutions.append({
+                    "role": "user",
+                    "content": f'The solutions to the question sheet are the following:\n\n{solutions_html}'
+                })
+
        return await self._llm.pydantic_prediction(
            [self._gpt_instructions_html(),
             {
                 "role": "user",
                 "content": html
-             }
+             },
+             *solutions
             ],
            LevelMapper.map_to_exam_model,
            str(self._level_json_schema())
@@ -86,7 +99,7 @@ class UploadLevelModule:
            "role": "system",
            "content": (
                'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
-                'Your current task is to scrape html english questions sheets.\n\n'
+                'Your current task is to scrape html english questions sheets and structure them into parts NOT sections.\n\n'

                'In the question sheet you will only see 4 types of question:\n'
                '- blank space multiple choice\n'
@@ -111,23 +124,26 @@ class UploadLevelModule:
                'out the best paragraph separation possible.'

                'You will place all the information in a single JSON: '
-                '{"parts": [{"exercises": [{...}], "context": ""}]}\n '
+                '{"parts": [{"exercises": [{...}], "text": {"title": "", "content": ""} ]}\n '
                'Where {...} are the exercises templates for each part of a question sheet and the optional field '
-                'context.'
-
-                'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
-                'so that you can group the exercises by the parts that are in the html, this is crucial since only '
-                'reading passage multiple choice require context and if the context is included in parts where it '
-                'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
+                'text, which contains the reading passages that are required in order to solve the part questions, '
+                '(if there are passages) place them in text.content and if there is a title place it in text.title '
+                'else omit the title field.\n'
+                
+                'IMPORTANT: As stated earlier your job is to structure the questions into PARTS not SECTION, this means '
+                'that if there is for example: Section 1, Part 1 and Part 2, Section 2, Part 1 and Part 2, you MUST '
+                'place in the parts array 4 parts NOT 2 parts with the exercises of both parts! If there are no sections '
+                'and only Parts then group them by parts, and when I say parts I mean it in the fucking literal sense of the'
+                ' word Part x which is in the html. '
+                'You must strictly adhere to this instruction, do not mistake sections for parts!\n'

                'The templates for the exercises are the following:\n'
                '- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
                f'{self._multiple_choice_html()}\n'
                f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'

-                'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
-                'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
-                'exercises exclude the context field.'
+                'IMPORTANT: The text.content field must be set with the reading passages of a part (if there is one)'
+                'without paragraphs or line numbers, with 2 newlines between paragraphs.'
            )
        }

@@ -135,30 +151,19 @@ class UploadLevelModule:
    def _multiple_choice_html():
        return {
            "type": "multipleChoice",
-            "prompt": "Select the appropriate option.",
+            "prompt": "<general instructions for this section>",
            "questions": [
                {
-                    "id": "<the question id>",
-                    "prompt": "<the question>",
-                    "solution": "<the option id solution>",
+                    "id": "<question number as string>",
+                    "prompt": "<question text>",
                    "options": [
                        {
-                            "id": "A",
-                            "text": "<the a option>"
-                        },
-                        {
-                            "id": "B",
-                            "text": "<the b option>"
-                        },
-                        {
-                            "id": "C",
-                            "text": "<the c option>"
-                        },
-                        {
-                            "id": "D",
-                            "text": "<the d option>"
+                            "id": "<A/B/C/D>",
+                            "text": "<option text>"
                        }
-                    ]
+                    ],
+                    "solution": "<correct option letter>",
+                    "variant": "text"
                }
            ]
        }
@@ -171,17 +176,17 @@ class UploadLevelModule:
            "prompt": "Click a blank to select the appropriate word for it.",
            "text": (
                "<The whole text for the exercise with replacements for blank spaces and their "
-                "ids with {{<question id>}} with 2 newlines between paragraphs>"
+                "ids with {{<question id/number>}} with 2 newlines between paragraphs>"
            ),
            "solutions": [
                {
-                    "id": "<question id>",
+                    "id": "<question number>",
                    "solution": "<the option that holds the solution>"
                }
            ],
            "words": [
                {
-                    "id": "<question id>",
+                    "id": "<question number>",
                    "options": {
                        "A": "<a option>",
                        "B": "<b option>",
@@ -205,7 +210,7 @@ class UploadLevelModule:
                self._multiple_choice_png(),
                {"type": "blanksPassage", "text": (
                    "<The whole text for the exercise with replacements for blank spaces and their "
-                    "ids with {{<question id>}} with 2 newlines between paragraphs>"
+                    "ids with {{<question number>}} with 2 newlines between paragraphs>"
                )},
                {"type": "passage", "context": (
                    "<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
--- a/app/services/impl/exam/listening/init.py
+++ b/app/services/impl/exam/listening/init.py
@@ -14,11 +14,10 @@ from app.configs.constants import (
 )
 from app.helpers import FileHelper
 from .import_listening import ImportListeningModule
-from .multiple_choice import MultipleChoice
 from .write_blank_forms import WriteBlankForms
 from .write_blanks import WriteBlanks
 from .write_blank_notes import WriteBlankNotes
-from ..shared import TrueFalse
+from ..shared import TrueFalse, MultipleChoice


 class ListeningService(IListeningService):
@@ -128,7 +127,7 @@ class ListeningService(IListeningService):
        if req_exercise.type == "multipleChoice" or req_exercise.type == "multipleChoice3Options":
            n_options = 4 if req_exercise.type == "multipleChoice" else 3
            question = await self._multiple_choice.gen_multiple_choice(
-                dialog_type, text, req_exercise.quantity, start_id, difficulty, n_options
+                text, req_exercise.quantity, start_id, difficulty, n_options
            )
            self._logger.info(f"Added multiple choice: {question}")
            return question
--- a/app/services/impl/exam/reading/init.py
+++ b/app/services/impl/exam/reading/init.py
@@ -10,7 +10,7 @@ from app.services.abc import IReadingService, ILLMService
 from .fill_blanks import FillBlanks
 from .idea_match import IdeaMatch
 from .paragraph_match import ParagraphMatch
-from ..shared import TrueFalse
+from ..shared import TrueFalse, MultipleChoice
 from .import_reading import ImportReadingModule
 from .write_blanks import WriteBlanks

@@ -24,6 +24,7 @@ class ReadingService(IReadingService):
        self._paragraph_match = ParagraphMatch(llm)
        self._true_false = TrueFalse(llm)
        self._write_blanks = WriteBlanks(llm)
+        self._multiple_choice = MultipleChoice(llm)
        self._logger = getLogger(__name__)
        self._import = ImportReadingModule(llm)

@@ -119,6 +120,12 @@ class ReadingService(IReadingService):
            question["variant"] = "ideaMatch"
            self._logger.info(f"Added idea match: {question}")
            return question
+        elif req_exercise.type == "multipleChoice":
+            question = await self._multiple_choice.gen_multiple_choice(
+                text, req_exercise.quantity, start_id, difficulty, 4
+            )
+            self._logger.info(f"Added multiple choice: {question}")
+            return question

    async def generate_reading_exercises(self, dto: ReadingDTO):
        exercise_tasks = []
--- a/app/services/impl/exam/reading/import_reading.py
+++ b/app/services/impl/exam/reading/import_reading.py
@@ -98,7 +98,11 @@ class ImportReadingModule:
                    ]
                }
            ],
-            "text": "<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] notice how there is a double backslash before the n -> I want an escaped newline in your output> ",
+            "text": (
+                "<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] "
+                "- notice how there the question number inside {{}} -> the text MUST always contain the question number in that format "
+                "- and notice how there is a double backslash before the n -> I want an escaped newline in your output> "
+                     ),
            "type": "writeBlanks",
            "prompt": "<specific instructions for this exercise section>"
        }
@@ -192,13 +196,14 @@ class ImportReadingModule:
                + (
                    "Solutions were not provided - analyze the passage carefully to determine correct answers."
                    if not solutions else
-                    "Use the provided solutions to fill in all answer fields accurately."
+                    "Use the provided solutions to fill in all answer fields accurately, if word answers have all letters "
+                    "uppercase convert them to lowercase before assigning them."
                )
                +
-                "Pay extra attention to fillblanks exercises the solution and option wording must match in case!"
-                "There can't be options in lowercase and solutions in uppercase!"
+                "Pay extra attention to fillblanks exercises the solution and option wording must match in case! "
+                "There can't be options in lowercase and solutions in uppercase! "
                "Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
-                "should be a text, if there isn't a title for it choose a reasonable one based on its contents."
+                "should be a text, if there isn't a title for it choose a reasonable one based on its contents. "
        )

        return {
--- a/app/services/impl/exam/shared/init.py
+++ b/app/services/impl/exam/shared/init.py
@@ -1,5 +1,7 @@
 from .true_false import TrueFalse
+from .multiple_choice import MultipleChoice

 __all__ = [
-    "TrueFalse"
-]
+    "TrueFalse",
+    "MultipleChoice"
+]
--- a/app/services/impl/exam/listening/multiple_choice.py
+++ b/app/services/impl/exam/listening/multiple_choice.py
@@ -11,7 +11,7 @@ class MultipleChoice:
        self._llm = llm

    async def gen_multiple_choice(
-            self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
+            self, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
    ):
        messages = [
            {
@@ -27,7 +27,7 @@ class MultipleChoice:
                "role": "user",
                "content": (
                        f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
-                        f'options for this {dialog_type}:\n"' + text + '"')
+                        f'options for this text:\n"' + text + '"')

            }
        ]