Changes to endpoints so they allow to only get context and then the exercises as well as tidying up a bit

2024-11-04 23:31:48 +00:00
parent 2a032c5aba
commit 84ed2f2f6a
83 changed files with 4229 additions and 1843 deletions
--- a/app/services/impl/exam/level/upload.py
+++ b/app/services/impl/exam/level/upload.py
@@ -1,18 +1,17 @@
 import aiofiles
 import os
-import uuid
 from logging import getLogger

-from typing import Dict, Any, Tuple, Coroutine
+from typing import Dict, Any, Coroutine

 import pdfplumber
 from fastapi import UploadFile

 from app.services.abc import ILLMService
 from app.helpers import LoggerHelper, FileHelper
-from app.mappers import ExamMapper
+from app.mappers import LevelMapper

-from app.dtos.exam import Exam
+from app.dtos.exams.level import Exam
 from app.dtos.sheet import Sheet


@@ -21,17 +20,15 @@ class UploadLevelModule:
        self._logger = getLogger(__name__)
        self._llm = openai

-    # TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
-    #  firestore, return the id right away, in generation view poll for the id
    async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
-        ext, path_id = await self._save_upload(file)
+        ext, path_id = await FileHelper.save_upload(file)
        FileHelper.convert_file_to_pdf(
-            f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
+            f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.pdf'
        )
        file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')

        if not file_has_images:
-            FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
+            FileHelper.convert_file_to_html(f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.html')

        completion: Coroutine[Any, Any, Exam] = (
            self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
@@ -41,7 +38,7 @@ class UploadLevelModule:
        FileHelper.remove_directory(f'./tmp/{path_id}')

        if response:
-            return self.fix_ids(response.dict(exclude_none=True))
+            return self.fix_ids(response.model_dump(exclude_none=True))
        return None

    @staticmethod
@@ -53,20 +50,6 @@ class UploadLevelModule:
                    return True
        return False

-    @staticmethod
-    async def _save_upload(file: UploadFile) -> Tuple[str, str]:
-        ext = file.filename.split('.')[-1]
-        path_id = str(uuid.uuid4())
-        os.makedirs(f'./tmp/{path_id}', exist_ok=True)
-
-        tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
-        file_bytes: bytes = await file.read()
-
-        async with aiofiles.open(tmp_filename, 'wb') as file:
-            await file.write(file_bytes)
-
-        return ext, path_id
-
    def _level_json_schema(self):
        return {
            "parts": [
@@ -91,7 +74,7 @@ class UploadLevelModule:
                 "content": html
             }
             ],
-            ExamMapper.map_to_exam_model,
+            LevelMapper.map_to_exam_model,
            str(self._level_json_schema())
        )

@@ -237,7 +220,7 @@ class UploadLevelModule:

            sheet = await self._png_batch(path_id, batch, json_schema)
            sheet.batch = i + 1
-            components.append(sheet.dict())
+            components.append(sheet.model_dump())

        batches = {"batches": components}

@@ -253,7 +236,7 @@ class UploadLevelModule:
                 ]
             }
             ],
-            ExamMapper.map_to_sheet,
+            LevelMapper.map_to_sheet,
            str(json_schema)
        )

@@ -326,67 +309,10 @@ class UploadLevelModule:
                 "content": str(batches)
             }
             ],
-            ExamMapper.map_to_exam_model,
+            LevelMapper.map_to_exam_model,
            str(self._level_json_schema())
        )

-    def _gpt_instructions_batches(self):
-        return {
-            "role": "system",
-            "content": (
-                'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
-                'components and solve the questions. Each batch may contain overlapping content with the previous '
-                'batch, or close enough content which needs to be excluded. The components are as follows:'
-
-                '- Part, a standalone part or part of a section of the question sheet: '
-                '{"type": "part", "part": "<name or number of the part>"}\n'
-
-                '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
-                'the prompt field of the template: blanks, underlines and normal. '
-
-                'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
-                'appropriate option to solve it.'
-
-                'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
-                'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
-                'the order in which the underlines appear in the prompt you will need to fix it.'
-
-                'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
-                'select the appropriate solution.'
-
-                f'The template for these questions is the same: {self._multiple_choice_png()}\n'
-
-                '- Reading Passages, there are two types of reading passages with different templates. The one with '
-                'type "blanksPassage" where the text field holds the passage and a blank is represented by '
-                '{{<some number>}} and the other one with type "passage" that has the context field with just '
-                'reading passages. For both of these components you will have to remove any additional data that might '
-                'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
-                ' if there are any. These components are used in conjunction with other ones.'
-
-                '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
-                'options with the question id and the options from a to d. The template is: '
-                f'{self._passage_blank_space_png()}\n\n'
-
-                'Now that you know the possible components here\'s what I want you to do:\n'
-                '1. Remove duplicates. A batch will have duplicates of other batches and the components of '
-                'the next batch should always take precedence over the previous one batch, what I mean by this is that '
-                'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
-                'you pick the next one.\n'
-                '2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
-                'exercise. For the multiple choice question follow the previous instruction to solve them and place '
-                f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
-                'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
-                f'the template for this exercise: {self._passage_blank_space_html()}.\n'
-                f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
-                f'You must group the exercises by  the parts in the order they appear in the batches components. '
-                f'The context field of a part is the context of a passage component that has text relevant to normal '
-                f'multiple choice questions.\n'
-
-                'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
-                'in your response and correctly structure the JSON.'
-            )
-        }
-
    @staticmethod
    def fix_ids(response):
        counter = 1