232 lines
8.7 KiB
Python
232 lines
8.7 KiB
Python
from logging import getLogger
|
|
from typing import Dict, Any
|
|
from uuid import uuid4
|
|
|
|
import aiofiles
|
|
from fastapi import UploadFile
|
|
|
|
from app.helpers import FileHelper
|
|
from app.mappers.reading import ReadingMapper
|
|
from app.services.abc import ILLMService
|
|
from app.dtos.exams.reading import Exam
|
|
|
|
|
|
class ImportReadingModule:
|
|
def __init__(self, openai: ILLMService):
|
|
self._logger = getLogger(__name__)
|
|
self._llm = openai
|
|
|
|
async def import_from_file(
|
|
self, exercises: UploadFile, solutions: UploadFile = None
|
|
) -> Dict[str, Any] | None:
|
|
path_id = str(uuid4())
|
|
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
|
|
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
|
|
|
|
if solutions:
|
|
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
|
|
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
|
|
|
|
response = await self._get_reading_parts(path_id, solutions is not None)
|
|
|
|
FileHelper.remove_directory(f'./tmp/{path_id}')
|
|
if response:
|
|
return response.model_dump(exclude_none=True)
|
|
return None
|
|
|
|
async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
|
|
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
|
|
exercises_html = await f.read()
|
|
|
|
messages = [
|
|
self._instructions(solutions),
|
|
{
|
|
"role": "user",
|
|
"content": f"Exam question sheet:\n\n{exercises_html}"
|
|
}
|
|
]
|
|
|
|
if solutions:
|
|
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
|
|
solutions_html = await f.read()
|
|
messages.append({
|
|
"role": "user",
|
|
"content": f"Solutions:\n\n{solutions_html}"
|
|
})
|
|
|
|
return await self._llm.pydantic_prediction(
|
|
messages,
|
|
ReadingMapper.map_to_exam_model,
|
|
str(self._reading_json_schema())
|
|
)
|
|
|
|
def _reading_json_schema(self):
|
|
json = self._reading_exam_template()
|
|
json["parts"][0]["exercises"] = [
|
|
self._write_blanks(),
|
|
self._fill_blanks(),
|
|
self._match_sentences(),
|
|
self._true_false(),
|
|
self._multiple_choice()
|
|
]
|
|
return json
|
|
|
|
@staticmethod
|
|
def _reading_exam_template():
|
|
return {
|
|
"minTimer": "<integer representing minutes allowed for the exam>",
|
|
"parts": [
|
|
{
|
|
"text": {
|
|
"title": "<title of the reading passage>",
|
|
"content": "<full text content of the reading passage>",
|
|
},
|
|
"exercises": []
|
|
}
|
|
]
|
|
}
|
|
|
|
@staticmethod
|
|
def _write_blanks():
|
|
return {
|
|
"maxWords": "<integer max words allowed per answer>",
|
|
"solutions": [
|
|
{
|
|
"id": "<question number as string>",
|
|
"solution": [
|
|
"<acceptable answer(s) within maxWords limit>"
|
|
]
|
|
}
|
|
],
|
|
"text": "<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] notice how there is a double backslash before the n -> I want an escaped newline in your output> ",
|
|
"type": "writeBlanks",
|
|
"prompt": "<specific instructions for this exercise section>"
|
|
}
|
|
|
|
@staticmethod
|
|
def _match_sentences():
|
|
return {
|
|
"options": [
|
|
{
|
|
"id": "<paragraph letter A-F>",
|
|
"sentence": "<THIS NEEDS TO BE A PARAGRAPH OF THE SECTION TEXT>"
|
|
}
|
|
],
|
|
"sentences": [
|
|
{
|
|
"id": "<question number as string>",
|
|
"solution": "<matching paragraph letter>",
|
|
"sentence": "<A SHORT SENTENCE THAT CONVEYS AND IDEA OR HEADING>"
|
|
}
|
|
],
|
|
"type": "matchSentences",
|
|
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>",
|
|
"prompt": "<specific instructions for this exercise section>"
|
|
}
|
|
|
|
@staticmethod
|
|
def _true_false():
|
|
return {
|
|
"questions": [
|
|
{
|
|
"id": "<question number>",
|
|
"prompt": "<statement to evaluate>",
|
|
"solution": "<one of: true, false, not_given>",
|
|
}
|
|
],
|
|
"type": "trueFalse",
|
|
"prompt": "<specific instructions including T/F/NG marking scheme>"
|
|
}
|
|
|
|
@staticmethod
|
|
def _multiple_choice():
|
|
return {
|
|
"questions": [
|
|
{
|
|
"id": "<question number>",
|
|
"prompt": "<question text>",
|
|
"options": [
|
|
{
|
|
"id": "<A, B, or C>",
|
|
"text": "<option text>"
|
|
}
|
|
],
|
|
"solution": "<correct option letter>",
|
|
"variant": "text"
|
|
}
|
|
],
|
|
"type": "multipleChoice",
|
|
"prompt": "<specific instructions for this exercise section>"
|
|
}
|
|
|
|
@staticmethod
|
|
def _fill_blanks():
|
|
return {
|
|
"solutions": [
|
|
{
|
|
"id": "<blank number>",
|
|
"solution": "<correct word>"
|
|
}
|
|
],
|
|
"text": "<text passage with blanks marked as {{<blank number>}}>",
|
|
"type": "fillBlanks",
|
|
"words": [
|
|
{
|
|
"letter": "<word identifier letter>",
|
|
"word": "<word from word bank>"
|
|
}
|
|
],
|
|
"prompt": "<specific instructions for this exercise section>"
|
|
}
|
|
|
|
def _instructions(self, solutions=False):
|
|
solutions_str = " and its solutions" if solutions else ""
|
|
tail = (
|
|
"Parse the exam carefully and identify:\n"
|
|
"1. Time limit from instructions\n"
|
|
"2. Reading passage title and full content\n"
|
|
"3. All exercise sections and their specific instructions\n"
|
|
"4. Question numbering and grouping\n"
|
|
"5. Word limits and formatting requirements\n"
|
|
"6. Specific marking schemes (e.g., T/F/NG)\n\n"
|
|
+ (
|
|
"Solutions were not provided - analyze the passage carefully to determine correct answers."
|
|
if not solutions else
|
|
"Use the provided solutions to fill in all answer fields accurately."
|
|
)
|
|
+
|
|
"Pay extra attention to fillblanks exercises the solution and option wording must match in case!"
|
|
"There can't be options in lowercase and solutions in uppercase!"
|
|
"Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
|
|
"should be a text, if there isn't a title for it choose a reasonable one based on its contents."
|
|
)
|
|
|
|
return {
|
|
"role": "system",
|
|
"content": (
|
|
f"You are processing an English reading comprehension exam{solutions_str}. Structure the data according "
|
|
f"to this json template: {self._reading_exam_template()}\n\n"
|
|
|
|
"The exam contains these exercise types:\n"
|
|
"1. \"writeBlanks\": Short answer questions with strict word limits\n"
|
|
"2. \"matchSentences\": Match headings or ideas with paragraphs, the sentences field\n"
|
|
"3. \"trueFalse\": Evaluate statements as True/False/Not Given\n"
|
|
"4. \"fillBlanks\": Complete text using provided word bank\n"
|
|
"5. \"multipleChoice\": Select correct option from choices\n\n"
|
|
|
|
"Exercise templates:\n"
|
|
f"writeBlanks: {self._write_blanks()}\n"
|
|
f"matchSentences: {self._match_sentences()}\n"
|
|
f"trueFalse: {self._true_false()}\n"
|
|
f"fillBlanks: {self._fill_blanks()}\n"
|
|
f"multipleChoice: {self._multiple_choice()}\n\n"
|
|
|
|
"Important details to capture:\n"
|
|
"- Exercise section instructions and constraints\n"
|
|
"- Question numbering and grouping\n"
|
|
"- Word limits and formatting requirements\n"
|
|
"- Marking schemes and answer formats\n\n"
|
|
|
|
f"{tail}"
|
|
)
|
|
} |