Fixed more or less reading import, attempted to do listening
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import asyncio
|
||||
from logging import getLogger
|
||||
import random
|
||||
from typing import Dict
|
||||
from typing import Dict, Any
|
||||
|
||||
from starlette.datastructures import UploadFile
|
||||
|
||||
@@ -13,6 +13,7 @@ from app.configs.constants import (
|
||||
FieldsAndExercises
|
||||
)
|
||||
from app.helpers import FileHelper
|
||||
from .import_listening import ImportListeningModule
|
||||
from .multiple_choice import MultipleChoice
|
||||
from .write_blank_forms import WriteBlankForms
|
||||
from .write_blanks import WriteBlanks
|
||||
@@ -46,6 +47,7 @@ class ListeningService(IListeningService):
|
||||
self._write_blanks = WriteBlanks(llm)
|
||||
self._write_blanks_forms = WriteBlankForms(llm)
|
||||
self._write_blanks_notes = WriteBlankNotes(llm)
|
||||
self._import = ImportListeningModule(llm)
|
||||
self._sections = {
|
||||
"section_1": {
|
||||
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
|
||||
@@ -81,6 +83,12 @@ class ListeningService(IListeningService):
|
||||
}
|
||||
}
|
||||
|
||||
async def import_exam(
|
||||
self, exercises: UploadFile, solutions: UploadFile = None
|
||||
) -> Dict[str, Any] | None:
|
||||
return await self._import.import_from_file(exercises, solutions)
|
||||
|
||||
|
||||
async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
|
||||
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
|
||||
|
||||
|
||||
180
app/services/impl/exam/listening/import_listening.py
Normal file
180
app/services/impl/exam/listening/import_listening.py
Normal file
@@ -0,0 +1,180 @@
|
||||
from logging import getLogger
|
||||
from typing import Dict, Any
|
||||
from uuid import uuid4
|
||||
import aiofiles
|
||||
from fastapi import UploadFile
|
||||
|
||||
from app.dtos.exams.listening import ListeningExam
|
||||
from app.helpers import FileHelper
|
||||
from app.mappers.listening import ListeningMapper
|
||||
from app.services.abc import ILLMService
|
||||
|
||||
|
||||
class ImportListeningModule:
|
||||
def __init__(self, llm_service: ILLMService):
|
||||
self._logger = getLogger(__name__)
|
||||
self._llm = llm_service
|
||||
|
||||
async def import_from_file(
|
||||
self,
|
||||
exercises: UploadFile,
|
||||
audio: UploadFile,
|
||||
solutions: UploadFile = None
|
||||
) -> Dict[str, Any] | None:
|
||||
path_id = str(uuid4())
|
||||
|
||||
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
|
||||
FileHelper.convert_file_to_html(
|
||||
f'./tmp/{path_id}/exercises.{ext}',
|
||||
f'./tmp/{path_id}/exercises.html'
|
||||
)
|
||||
|
||||
if solutions:
|
||||
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
|
||||
FileHelper.convert_file_to_html(
|
||||
f'./tmp/{path_id}/solutions.{ext}',
|
||||
f'./tmp/{path_id}/solutions.html'
|
||||
)
|
||||
|
||||
response = await self._get_listening_sections(path_id, solutions is not None)
|
||||
|
||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||
if response:
|
||||
return response.model_dump(exclude_none=True)
|
||||
return None
|
||||
|
||||
async def _get_listening_sections(
|
||||
self,
|
||||
path_id: str,
|
||||
has_solutions: bool = False
|
||||
) -> ListeningExam:
|
||||
async with aiofiles.open(
|
||||
f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8'
|
||||
) as f:
|
||||
exercises_html = await f.read()
|
||||
|
||||
messages = [
|
||||
self._instructions(has_solutions),
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Listening exercise sheet:\n\n{exercises_html}"
|
||||
}
|
||||
]
|
||||
|
||||
if has_solutions:
|
||||
async with aiofiles.open(
|
||||
f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8'
|
||||
) as f:
|
||||
solutions_html = await f.read()
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": f"Solutions:\n\n{solutions_html}"
|
||||
})
|
||||
|
||||
return await self._llm.pydantic_prediction(
|
||||
messages,
|
||||
ListeningMapper.map_to_test_model,
|
||||
str(self._listening_json_schema())
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _multiple_choice_template() -> dict:
|
||||
return {
|
||||
"type": "multipleChoice",
|
||||
"prompt": "<general instructions for this section>",
|
||||
"questions": [
|
||||
{
|
||||
"id": "<question number as string>",
|
||||
"prompt": "<question text>",
|
||||
"options": [
|
||||
{
|
||||
"id": "<A/B/C/D>",
|
||||
"text": "<option text>"
|
||||
}
|
||||
],
|
||||
"solution": "<correct option letter>",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _write_blanks_questions_template() -> dict:
|
||||
return {
|
||||
"type": "writeBlanks",
|
||||
"maxWords": "<number>",
|
||||
"prompt": "<instructions>",
|
||||
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can only occur at the end of sentence>",
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<question number as string>",
|
||||
"solution": ["<acceptable answer(s)>"]
|
||||
}
|
||||
],
|
||||
"variant": "questions"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _write_blanks_fill_template() -> dict:
|
||||
return {
|
||||
"type": "writeBlanks",
|
||||
"maxWords": "<number>",
|
||||
"prompt": "<instructions>",
|
||||
"text": "<A summary with blanks denoted by {{id}}>",
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<blank number as string inside {{}}>",
|
||||
"solution": ["<correct word>"]
|
||||
}
|
||||
],
|
||||
"variant": "fill"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _write_blanks_form_template() -> dict:
|
||||
return {
|
||||
"type": "writeBlanks",
|
||||
"maxWords": "<number>",
|
||||
"prompt": "<instructions>",
|
||||
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can happen mid text>",
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<blank number as string inside {{}}>",
|
||||
"solution": ["<correct word>"]
|
||||
}
|
||||
],
|
||||
"variant": "form"
|
||||
}
|
||||
|
||||
def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
|
||||
solutions_str = " and its solutions" if has_solutions else ""
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
f"You are processing a listening test exercise sheet{solutions_str}. "
|
||||
"Structure each exercise exactly according to these json templates:\n\n"
|
||||
f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
|
||||
f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n"
|
||||
f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n"
|
||||
f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n"
|
||||
"\nImportant rules:\n"
|
||||
"1. Keep exact question numbering from the original\n"
|
||||
"2. Include all options for multiple choice questions\n"
|
||||
"3. Mark blanks with {{id}} where id is the question number\n"
|
||||
"4. Set maxWords according to the instructions\n"
|
||||
"5. Include all possible correct answers in solution arrays\n"
|
||||
"6. Maintain exact spacing and formatting from templates\n"
|
||||
"7. Use appropriate variant for writeBlanks (questions/fill/form)\n"
|
||||
"8. For text fields, use actual newlines between questions/sentences\n"
|
||||
)
|
||||
}
|
||||
|
||||
def _listening_json_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"exercises": [
|
||||
self._multiple_choice_template(),
|
||||
self._write_blanks_questions_template(),
|
||||
self._write_blanks_fill_template(),
|
||||
self._write_blanks_form_template()
|
||||
]
|
||||
}
|
||||
@@ -39,7 +39,7 @@ class ImportReadingModule:
|
||||
exercises_html = await f.read()
|
||||
|
||||
messages = [
|
||||
self._instructions(),
|
||||
self._instructions(solutions),
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Exam question sheet:\n\n{exercises_html}"
|
||||
@@ -66,18 +66,20 @@ class ImportReadingModule:
|
||||
self._write_blanks(),
|
||||
self._fill_blanks(),
|
||||
self._match_sentences(),
|
||||
self._true_false()
|
||||
self._true_false(),
|
||||
self._multiple_choice()
|
||||
]
|
||||
return json
|
||||
|
||||
@staticmethod
|
||||
def _reading_exam_template():
|
||||
return {
|
||||
"minTimer": "<number of minutes as int not string>",
|
||||
"minTimer": "<integer representing minutes allowed for the exam>",
|
||||
"parts": [
|
||||
{
|
||||
"text": {
|
||||
"title": "<title of the passage>",
|
||||
"content": "<the text of the passage>",
|
||||
"title": "<title of the reading passage>",
|
||||
"content": "<full text content of the reading passage>",
|
||||
},
|
||||
"exercises": []
|
||||
}
|
||||
@@ -87,17 +89,18 @@ class ImportReadingModule:
|
||||
@staticmethod
|
||||
def _write_blanks():
|
||||
return {
|
||||
"maxWords": "<number of max words return the int value not string>",
|
||||
"maxWords": "<integer max words allowed per answer>",
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<number of the question as string>",
|
||||
"id": "<question number as string>",
|
||||
"solution": [
|
||||
"<at least one solution can have alternative solutions (that dont exceed maxWords)>"
|
||||
"<acceptable answer(s) within maxWords limit>"
|
||||
]
|
||||
},
|
||||
}
|
||||
],
|
||||
"text": "<all the questions formatted in this way: <question>{{<id>}}\\n<question2>{{<id2>}}\\n >",
|
||||
"type": "writeBlanks"
|
||||
"text": "<numbered questions with format: <question text>{{<question number>}}\\n>",
|
||||
"type": "writeBlanks",
|
||||
"prompt": "<specific instructions for this exercise section>"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -105,19 +108,20 @@ class ImportReadingModule:
|
||||
return {
|
||||
"options": [
|
||||
{
|
||||
"id": "<uppercase letter that identifies a paragraph>",
|
||||
"sentence": "<either a heading or an idea>"
|
||||
"id": "<paragraph letter A-F>",
|
||||
"sentence": "<THIS NEEDS TO BE A PARAGRAPH OF THE SECTION TEXT>"
|
||||
}
|
||||
],
|
||||
"sentences": [
|
||||
{
|
||||
"id": "<the question id not the option id>",
|
||||
"solution": "<id in options>",
|
||||
"sentence": "<heading or an idea>",
|
||||
"id": "<question number as string>",
|
||||
"solution": "<matching paragraph letter>",
|
||||
"sentence": "<A SHORT SENTENCE THAT CONVEYS AND IDEA OR HEADING>"
|
||||
}
|
||||
],
|
||||
"type": "matchSentences",
|
||||
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>"
|
||||
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>",
|
||||
"prompt": "<specific instructions for this exercise section>"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -125,12 +129,34 @@ class ImportReadingModule:
|
||||
return {
|
||||
"questions": [
|
||||
{
|
||||
"prompt": "<question>",
|
||||
"solution": "<can only be one of these [\"true\", \"false\", \"not_given\"]>",
|
||||
"id": "<the question id>"
|
||||
"id": "<question number>",
|
||||
"prompt": "<statement to evaluate>",
|
||||
"solution": "<one of: true, false, not_given>",
|
||||
}
|
||||
],
|
||||
"type": "trueFalse"
|
||||
"type": "trueFalse",
|
||||
"prompt": "<specific instructions including T/F/NG marking scheme>"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _multiple_choice():
|
||||
return {
|
||||
"questions": [
|
||||
{
|
||||
"id": "<question number>",
|
||||
"prompt": "<question text>",
|
||||
"options": [
|
||||
{
|
||||
"id": "<A, B, or C>",
|
||||
"text": "<option text>"
|
||||
}
|
||||
],
|
||||
"solution": "<correct option letter>",
|
||||
"variant": "text"
|
||||
}
|
||||
],
|
||||
"type": "multipleChoice",
|
||||
"prompt": "<specific instructions for this exercise section>"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -138,53 +164,69 @@ class ImportReadingModule:
|
||||
return {
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<blank id>",
|
||||
"solution": "<word>"
|
||||
"id": "<blank number>",
|
||||
"solution": "<correct word>"
|
||||
}
|
||||
],
|
||||
"text": "<section of text with blanks denoted by {{<blank id>}}>",
|
||||
"text": "<text passage with blanks marked as {{<blank number>}}>",
|
||||
"type": "fillBlanks",
|
||||
"words": [
|
||||
{
|
||||
"letter": "<uppercase letter that ids the words (may not be included and if not start at A)>",
|
||||
"word": "<word>"
|
||||
"letter": "<word identifier letter>",
|
||||
"word": "<word from word bank>"
|
||||
}
|
||||
]
|
||||
],
|
||||
"prompt": "<specific instructions for this exercise section>"
|
||||
}
|
||||
|
||||
def _instructions(self, solutions = False):
|
||||
def _instructions(self, solutions=False):
|
||||
solutions_str = " and its solutions" if solutions else ""
|
||||
tail = (
|
||||
"The solutions were not supplied so you will have to solve them. Do your utmost to get all the information and"
|
||||
"all the solutions right!"
|
||||
if not solutions else
|
||||
"Do your utmost to correctly identify the sections, its exercises and respective solutions"
|
||||
"Parse the exam carefully and identify:\n"
|
||||
"1. Time limit from instructions\n"
|
||||
"2. Reading passage title and full content\n"
|
||||
"3. All exercise sections and their specific instructions\n"
|
||||
"4. Question numbering and grouping\n"
|
||||
"5. Word limits and formatting requirements\n"
|
||||
"6. Specific marking schemes (e.g., T/F/NG)\n\n"
|
||||
+ (
|
||||
"Solutions were not provided - analyze the passage carefully to determine correct answers."
|
||||
if not solutions else
|
||||
"Use the provided solutions to fill in all answer fields accurately."
|
||||
)
|
||||
+
|
||||
"Pay extra attention to fillblanks exercises the solution and option wording must match in case!"
|
||||
"There can't be options in lowercase and solutions in uppercase!"
|
||||
"Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
|
||||
"should be a text, if there isn't a title for it choose a reasonable one based on its contents."
|
||||
)
|
||||
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
f"You will receive html pertaining to an english exam question sheet{solutions_str}. Your job is to "
|
||||
f"structure the data into a single json with this template: {self._reading_exam_template()}\n"
|
||||
|
||||
"You will need find out how many parts the exam has a correctly place its exercises. You will "
|
||||
"encounter 4 types of exercises:\n"
|
||||
" - \"writeBlanks\": short answer questions that have a answer word limit, generally two or three\n"
|
||||
" - \"matchSentences\": a sentence needs to be matched with a paragraph\n"
|
||||
" - \"trueFalse\": questions that its answers can only be true false or not given\n"
|
||||
" - \"fillBlanks\": a text that has blank spaces on a section of text and a word bank which "
|
||||
"contains the solutions and sometimes random words to throw off the students\n"
|
||||
|
||||
"These 4 types of exercises will need to be placed in the correct json template inside each part, "
|
||||
"the templates are as follows:\n "
|
||||
|
||||
f"You are processing an English reading comprehension exam{solutions_str}. Structure the data according "
|
||||
f"to this json template: {self._reading_exam_template()}\n\n"
|
||||
|
||||
"The exam contains these exercise types:\n"
|
||||
"1. \"writeBlanks\": Short answer questions with strict word limits\n"
|
||||
"2. \"matchSentences\": Match headings or ideas with paragraphs, the sentences field\n"
|
||||
"3. \"trueFalse\": Evaluate statements as True/False/Not Given\n"
|
||||
"4. \"fillBlanks\": Complete text using provided word bank\n"
|
||||
"5. \"multipleChoice\": Select correct option from choices\n\n"
|
||||
|
||||
"Exercise templates:\n"
|
||||
f"writeBlanks: {self._write_blanks()}\n"
|
||||
f"matchSentences: {self._match_sentences()}\n"
|
||||
f"trueFalse: {self._true_false()}\n"
|
||||
f"fillBlanks: {self._fill_blanks()}\n\n"
|
||||
|
||||
f"fillBlanks: {self._fill_blanks()}\n"
|
||||
f"multipleChoice: {self._multiple_choice()}\n\n"
|
||||
|
||||
"Important details to capture:\n"
|
||||
"- Exercise section instructions and constraints\n"
|
||||
"- Question numbering and grouping\n"
|
||||
"- Word limits and formatting requirements\n"
|
||||
"- Marking schemes and answer formats\n\n"
|
||||
|
||||
f"{tail}"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user