191 lines
6.9 KiB
Python
191 lines
6.9 KiB
Python
from logging import getLogger
|
|
from typing import Dict, Any
|
|
from uuid import uuid4
|
|
|
|
import aiofiles
|
|
from fastapi import UploadFile
|
|
|
|
from app.helpers import FileHelper
|
|
from app.mappers.reading import ReadingMapper
|
|
from app.services.abc import ILLMService
|
|
from app.dtos.exams.reading import Exam
|
|
|
|
|
|
class ImportReadingModule:
|
|
def __init__(self, openai: ILLMService):
|
|
self._logger = getLogger(__name__)
|
|
self._llm = openai
|
|
|
|
async def import_from_file(
|
|
self, exercises: UploadFile, solutions: UploadFile = None
|
|
) -> Dict[str, Any] | None:
|
|
path_id = str(uuid4())
|
|
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
|
|
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
|
|
|
|
if solutions:
|
|
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
|
|
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
|
|
|
|
response = await self._get_reading_parts(path_id, solutions is not None)
|
|
|
|
FileHelper.remove_directory(f'./tmp/{path_id}')
|
|
if response:
|
|
return response.model_dump(exclude_none=True)
|
|
return None
|
|
|
|
async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
|
|
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
|
|
exercises_html = await f.read()
|
|
|
|
messages = [
|
|
self._instructions(),
|
|
{
|
|
"role": "user",
|
|
"content": f"Exam question sheet:\n\n{exercises_html}"
|
|
}
|
|
]
|
|
|
|
if solutions:
|
|
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
|
|
solutions_html = await f.read()
|
|
messages.append({
|
|
"role": "user",
|
|
"content": f"Solutions:\n\n{solutions_html}"
|
|
})
|
|
|
|
return await self._llm.pydantic_prediction(
|
|
messages,
|
|
ReadingMapper.map_to_exam_model,
|
|
str(self._reading_json_schema())
|
|
)
|
|
|
|
def _reading_json_schema(self):
|
|
json = self._reading_exam_template()
|
|
json["parts"][0]["exercises"] = [
|
|
self._write_blanks(),
|
|
self._fill_blanks(),
|
|
self._match_sentences(),
|
|
self._true_false()
|
|
]
|
|
|
|
@staticmethod
|
|
def _reading_exam_template():
|
|
return {
|
|
"minTimer": "<number of minutes as int not string>",
|
|
"parts": [
|
|
{
|
|
"text": {
|
|
"title": "<title of the passage>",
|
|
"content": "<the text of the passage>",
|
|
},
|
|
"exercises": []
|
|
}
|
|
]
|
|
}
|
|
|
|
@staticmethod
|
|
def _write_blanks():
|
|
return {
|
|
"maxWords": "<number of max words return the int value not string>",
|
|
"solutions": [
|
|
{
|
|
"id": "<number of the question as string>",
|
|
"solution": [
|
|
"<at least one solution can have alternative solutions (that dont exceed maxWords)>"
|
|
]
|
|
},
|
|
],
|
|
"text": "<all the questions formatted in this way: <question>{{<id>}}\\n<question2>{{<id2>}}\\n >",
|
|
"type": "writeBlanks"
|
|
}
|
|
|
|
@staticmethod
|
|
def _match_sentences():
|
|
return {
|
|
"options": [
|
|
{
|
|
"id": "<uppercase letter that identifies a paragraph>",
|
|
"sentence": "<either a heading or an idea>"
|
|
}
|
|
],
|
|
"sentences": [
|
|
{
|
|
"id": "<the question id not the option id>",
|
|
"solution": "<id in options>",
|
|
"sentence": "<heading or an idea>",
|
|
}
|
|
],
|
|
"type": "matchSentences",
|
|
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>"
|
|
}
|
|
|
|
@staticmethod
|
|
def _true_false():
|
|
return {
|
|
"questions": [
|
|
{
|
|
"prompt": "<question>",
|
|
"solution": "<can only be one of these [\"true\", \"false\", \"not_given\"]>",
|
|
"id": "<the question id>"
|
|
}
|
|
],
|
|
"type": "trueFalse"
|
|
}
|
|
|
|
@staticmethod
|
|
def _fill_blanks():
|
|
return {
|
|
"solutions": [
|
|
{
|
|
"id": "<blank id>",
|
|
"solution": "<word>"
|
|
}
|
|
],
|
|
"text": "<section of text with blanks denoted by {{<blank id>}}>",
|
|
"type": "fillBlanks",
|
|
"words": [
|
|
{
|
|
"letter": "<uppercase letter that ids the words (may not be included and if not start at A)>",
|
|
"word": "<word>"
|
|
}
|
|
]
|
|
}
|
|
|
|
def _instructions(self, solutions = False):
|
|
solutions_str = " and its solutions" if solutions else ""
|
|
tail = (
|
|
"The solutions were not supplied so you will have to solve them. Do your utmost to get all the information and"
|
|
"all the solutions right!"
|
|
if not solutions else
|
|
"Do your utmost to correctly identify the sections, its exercises and respective solutions"
|
|
)
|
|
|
|
return {
|
|
"role": "system",
|
|
"content": (
|
|
f"You will receive html pertaining to an english exam question sheet{solutions_str}. Your job is to "
|
|
f"structure the data into a single json with this template: {self._reading_exam_template()}\n"
|
|
|
|
"You will need find out how many parts the exam has a correctly place its exercises. You will "
|
|
"encounter 4 types of exercises:\n"
|
|
" - \"writeBlanks\": short answer questions that have a answer word limit, generally two or three\n"
|
|
" - \"matchSentences\": a sentence needs to be matched with a paragraph\n"
|
|
" - \"trueFalse\": questions that its answers can only be true false or not given\n"
|
|
" - \"fillBlanks\": a text that has blank spaces on a section of text and a word bank which "
|
|
"contains the solutions and sometimes random words to throw off the students\n"
|
|
|
|
"These 4 types of exercises will need to be placed in the correct json template inside each part, "
|
|
"the templates are as follows:\n "
|
|
|
|
f"writeBlanks: {self._write_blanks()}\n"
|
|
f"matchSentences: {self._match_sentences()}\n"
|
|
f"trueFalse: {self._true_false()}\n"
|
|
f"fillBlanks: {self._fill_blanks()}\n\n"
|
|
|
|
f"{tail}"
|
|
)
|
|
}
|
|
|
|
|