from logging import getLogger from typing import Dict, Any from uuid import uuid4 import aiofiles from fastapi import UploadFile from app.helpers import FileHelper from app.mappers.reading import ReadingMapper from app.services.abc import ILLMService from app.dtos.exams.reading import Exam class ImportReadingModule: def __init__(self, openai: ILLMService): self._logger = getLogger(__name__) self._llm = openai async def import_from_file( self, exercises: UploadFile, solutions: UploadFile = None ) -> Dict[str, Any] | None: path_id = str(uuid4()) ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id) FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html') if solutions: ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id) FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html') response = await self._get_reading_parts(path_id, solutions is not None) FileHelper.remove_directory(f'./tmp/{path_id}') if response: return response.model_dump(exclude_none=True) return None async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam: async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f: exercises_html = await f.read() messages = [ self._instructions(), { "role": "user", "content": f"Exam question sheet:\n\n{exercises_html}" } ] if solutions: async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f: solutions_html = await f.read() messages.append({ "role": "user", "content": f"Solutions:\n\n{solutions_html}" }) return await self._llm.pydantic_prediction( messages, ReadingMapper.map_to_exam_model, str(self._reading_json_schema()) ) def _reading_json_schema(self): json = self._reading_exam_template() json["parts"][0]["exercises"] = [ self._write_blanks(), self._fill_blanks(), self._match_sentences(), self._true_false() ] @staticmethod def _reading_exam_template(): return { "minTimer": "", "parts": [ { "text": { "title": "", "content": "<the text of the passage>", }, "exercises": [] } ] } @staticmethod def _write_blanks(): return { "maxWords": "<number of max words return the int value not string>", "solutions": [ { "id": "<number of the question as string>", "solution": [ "<at least one solution can have alternative solutions (that dont exceed maxWords)>" ] }, ], "text": "<all the questions formatted in this way: <question>{{<id>}}\\n<question2>{{<id2>}}\\n >", "type": "writeBlanks" } @staticmethod def _match_sentences(): return { "options": [ { "id": "<uppercase letter that identifies a paragraph>", "sentence": "<either a heading or an idea>" } ], "sentences": [ { "id": "<the question id not the option id>", "solution": "<id in options>", "sentence": "<heading or an idea>", } ], "type": "matchSentences", "variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>" } @staticmethod def _true_false(): return { "questions": [ { "prompt": "<question>", "solution": "<can only be one of these [\"true\", \"false\", \"not_given\"]>", "id": "<the question id>" } ], "type": "trueFalse" } @staticmethod def _fill_blanks(): return { "solutions": [ { "id": "<blank id>", "solution": "<word>" } ], "text": "<section of text with blanks denoted by {{<blank id>}}>", "type": "fillBlanks", "words": [ { "letter": "<uppercase letter that ids the words (may not be included and if not start at A)>", "word": "<word>" } ] } def _instructions(self, solutions = False): solutions_str = " and its solutions" if solutions else "" tail = ( "The solutions were not supplied so you will have to solve them. Do your utmost to get all the information and" "all the solutions right!" if not solutions else "Do your utmost to correctly identify the sections, its exercises and respective solutions" ) return { "role": "system", "content": ( f"You will receive html pertaining to an english exam question sheet{solutions_str}. Your job is to " f"structure the data into a single json with this template: {self._reading_exam_template()}\n" "You will need find out how many parts the exam has a correctly place its exercises. You will " "encounter 4 types of exercises:\n" " - \"writeBlanks\": short answer questions that have a answer word limit, generally two or three\n" " - \"matchSentences\": a sentence needs to be matched with a paragraph\n" " - \"trueFalse\": questions that its answers can only be true false or not given\n" " - \"fillBlanks\": a text that has blank spaces on a section of text and a word bank which " "contains the solutions and sometimes random words to throw off the students\n" "These 4 types of exercises will need to be placed in the correct json template inside each part, " "the templates are as follows:\n " f"writeBlanks: {self._write_blanks()}\n" f"matchSentences: {self._match_sentences()}\n" f"trueFalse: {self._true_false()}\n" f"fillBlanks: {self._fill_blanks()}\n\n" f"{tail}" ) }