Changes to endpoints so they allow to only get context and then the exercises as well as tidying up a bit
This commit is contained in:
@@ -1,18 +1,17 @@
|
||||
import aiofiles
|
||||
import os
|
||||
import uuid
|
||||
from logging import getLogger
|
||||
|
||||
from typing import Dict, Any, Tuple, Coroutine
|
||||
from typing import Dict, Any, Coroutine
|
||||
|
||||
import pdfplumber
|
||||
from fastapi import UploadFile
|
||||
|
||||
from app.services.abc import ILLMService
|
||||
from app.helpers import LoggerHelper, FileHelper
|
||||
from app.mappers import ExamMapper
|
||||
from app.mappers import LevelMapper
|
||||
|
||||
from app.dtos.exam import Exam
|
||||
from app.dtos.exams.level import Exam
|
||||
from app.dtos.sheet import Sheet
|
||||
|
||||
|
||||
@@ -21,17 +20,15 @@ class UploadLevelModule:
|
||||
self._logger = getLogger(__name__)
|
||||
self._llm = openai
|
||||
|
||||
# TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
|
||||
# firestore, return the id right away, in generation view poll for the id
|
||||
async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
|
||||
ext, path_id = await self._save_upload(file)
|
||||
ext, path_id = await FileHelper.save_upload(file)
|
||||
FileHelper.convert_file_to_pdf(
|
||||
f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
|
||||
f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.pdf'
|
||||
)
|
||||
file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
|
||||
|
||||
if not file_has_images:
|
||||
FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
|
||||
FileHelper.convert_file_to_html(f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.html')
|
||||
|
||||
completion: Coroutine[Any, Any, Exam] = (
|
||||
self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
|
||||
@@ -41,7 +38,7 @@ class UploadLevelModule:
|
||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||
|
||||
if response:
|
||||
return self.fix_ids(response.dict(exclude_none=True))
|
||||
return self.fix_ids(response.model_dump(exclude_none=True))
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@@ -53,20 +50,6 @@ class UploadLevelModule:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def _save_upload(file: UploadFile) -> Tuple[str, str]:
|
||||
ext = file.filename.split('.')[-1]
|
||||
path_id = str(uuid.uuid4())
|
||||
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
|
||||
|
||||
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
|
||||
file_bytes: bytes = await file.read()
|
||||
|
||||
async with aiofiles.open(tmp_filename, 'wb') as file:
|
||||
await file.write(file_bytes)
|
||||
|
||||
return ext, path_id
|
||||
|
||||
def _level_json_schema(self):
|
||||
return {
|
||||
"parts": [
|
||||
@@ -91,7 +74,7 @@ class UploadLevelModule:
|
||||
"content": html
|
||||
}
|
||||
],
|
||||
ExamMapper.map_to_exam_model,
|
||||
LevelMapper.map_to_exam_model,
|
||||
str(self._level_json_schema())
|
||||
)
|
||||
|
||||
@@ -237,7 +220,7 @@ class UploadLevelModule:
|
||||
|
||||
sheet = await self._png_batch(path_id, batch, json_schema)
|
||||
sheet.batch = i + 1
|
||||
components.append(sheet.dict())
|
||||
components.append(sheet.model_dump())
|
||||
|
||||
batches = {"batches": components}
|
||||
|
||||
@@ -253,7 +236,7 @@ class UploadLevelModule:
|
||||
]
|
||||
}
|
||||
],
|
||||
ExamMapper.map_to_sheet,
|
||||
LevelMapper.map_to_sheet,
|
||||
str(json_schema)
|
||||
)
|
||||
|
||||
@@ -326,67 +309,10 @@ class UploadLevelModule:
|
||||
"content": str(batches)
|
||||
}
|
||||
],
|
||||
ExamMapper.map_to_exam_model,
|
||||
LevelMapper.map_to_exam_model,
|
||||
str(self._level_json_schema())
|
||||
)
|
||||
|
||||
def _gpt_instructions_batches(self):
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
|
||||
'components and solve the questions. Each batch may contain overlapping content with the previous '
|
||||
'batch, or close enough content which needs to be excluded. The components are as follows:'
|
||||
|
||||
'- Part, a standalone part or part of a section of the question sheet: '
|
||||
'{"type": "part", "part": "<name or number of the part>"}\n'
|
||||
|
||||
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
|
||||
'the prompt field of the template: blanks, underlines and normal. '
|
||||
|
||||
'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
|
||||
'appropriate option to solve it.'
|
||||
|
||||
'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
|
||||
'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
|
||||
'the order in which the underlines appear in the prompt you will need to fix it.'
|
||||
|
||||
'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
|
||||
'select the appropriate solution.'
|
||||
|
||||
f'The template for these questions is the same: {self._multiple_choice_png()}\n'
|
||||
|
||||
'- Reading Passages, there are two types of reading passages with different templates. The one with '
|
||||
'type "blanksPassage" where the text field holds the passage and a blank is represented by '
|
||||
'{{<some number>}} and the other one with type "passage" that has the context field with just '
|
||||
'reading passages. For both of these components you will have to remove any additional data that might '
|
||||
'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
|
||||
' if there are any. These components are used in conjunction with other ones.'
|
||||
|
||||
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
|
||||
'options with the question id and the options from a to d. The template is: '
|
||||
f'{self._passage_blank_space_png()}\n\n'
|
||||
|
||||
'Now that you know the possible components here\'s what I want you to do:\n'
|
||||
'1. Remove duplicates. A batch will have duplicates of other batches and the components of '
|
||||
'the next batch should always take precedence over the previous one batch, what I mean by this is that '
|
||||
'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
|
||||
'you pick the next one.\n'
|
||||
'2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
|
||||
'exercise. For the multiple choice question follow the previous instruction to solve them and place '
|
||||
f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
|
||||
'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
|
||||
f'the template for this exercise: {self._passage_blank_space_html()}.\n'
|
||||
f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
|
||||
f'You must group the exercises by the parts in the order they appear in the batches components. '
|
||||
f'The context field of a part is the context of a passage component that has text relevant to normal '
|
||||
f'multiple choice questions.\n'
|
||||
|
||||
'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
|
||||
'in your response and correctly structure the JSON.'
|
||||
)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def fix_ids(response):
|
||||
counter = 1
|
||||
|
||||
Reference in New Issue
Block a user