Fixed more or less reading import, attempted to do listening

This commit is contained in:
Carlos-Mesquita
2024-11-10 06:46:58 +00:00
parent 6909d75eb6
commit afeaf118c6
33 changed files with 3712 additions and 86 deletions

View File

@@ -1,7 +1,7 @@
import queue
from abc import ABC, abstractmethod
from queue import Queue
from typing import Dict, List
from typing import Dict, List, Any
from fastapi import UploadFile
@@ -23,3 +23,9 @@ class IListeningService(ABC):
@abstractmethod
async def get_dialog_from_audio(self, upload: UploadFile):
pass
@abstractmethod
async def import_exam(
self, exercises: UploadFile, solutions: UploadFile = None
) -> Dict[str, Any] | None:
pass

View File

@@ -1,7 +1,7 @@
import asyncio
from logging import getLogger
import random
from typing import Dict
from typing import Dict, Any
from starlette.datastructures import UploadFile
@@ -13,6 +13,7 @@ from app.configs.constants import (
FieldsAndExercises
)
from app.helpers import FileHelper
from .import_listening import ImportListeningModule
from .multiple_choice import MultipleChoice
from .write_blank_forms import WriteBlankForms
from .write_blanks import WriteBlanks
@@ -46,6 +47,7 @@ class ListeningService(IListeningService):
self._write_blanks = WriteBlanks(llm)
self._write_blanks_forms = WriteBlankForms(llm)
self._write_blanks_notes = WriteBlankNotes(llm)
self._import = ImportListeningModule(llm)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
@@ -81,6 +83,12 @@ class ListeningService(IListeningService):
}
}
async def import_exam(
self, exercises: UploadFile, solutions: UploadFile = None
) -> Dict[str, Any] | None:
return await self._import.import_from_file(exercises, solutions)
async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)

View File

@@ -0,0 +1,180 @@
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
import aiofiles
from fastapi import UploadFile
from app.dtos.exams.listening import ListeningExam
from app.helpers import FileHelper
from app.mappers.listening import ListeningMapper
from app.services.abc import ILLMService
class ImportListeningModule:
def __init__(self, llm_service: ILLMService):
self._logger = getLogger(__name__)
self._llm = llm_service
async def import_from_file(
self,
exercises: UploadFile,
audio: UploadFile,
solutions: UploadFile = None
) -> Dict[str, Any] | None:
path_id = str(uuid4())
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
FileHelper.convert_file_to_html(
f'./tmp/{path_id}/exercises.{ext}',
f'./tmp/{path_id}/exercises.html'
)
if solutions:
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(
f'./tmp/{path_id}/solutions.{ext}',
f'./tmp/{path_id}/solutions.html'
)
response = await self._get_listening_sections(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return response.model_dump(exclude_none=True)
return None
async def _get_listening_sections(
self,
path_id: str,
has_solutions: bool = False
) -> ListeningExam:
async with aiofiles.open(
f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8'
) as f:
exercises_html = await f.read()
messages = [
self._instructions(has_solutions),
{
"role": "user",
"content": f"Listening exercise sheet:\n\n{exercises_html}"
}
]
if has_solutions:
async with aiofiles.open(
f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8'
) as f:
solutions_html = await f.read()
messages.append({
"role": "user",
"content": f"Solutions:\n\n{solutions_html}"
})
return await self._llm.pydantic_prediction(
messages,
ListeningMapper.map_to_test_model,
str(self._listening_json_schema())
)
@staticmethod
def _multiple_choice_template() -> dict:
return {
"type": "multipleChoice",
"prompt": "<general instructions for this section>",
"questions": [
{
"id": "<question number as string>",
"prompt": "<question text>",
"options": [
{
"id": "<A/B/C/D>",
"text": "<option text>"
}
],
"solution": "<correct option letter>",
"variant": "text"
}
]
}
@staticmethod
def _write_blanks_questions_template() -> dict:
return {
"type": "writeBlanks",
"maxWords": "<number>",
"prompt": "<instructions>",
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can only occur at the end of sentence>",
"solutions": [
{
"id": "<question number as string>",
"solution": ["<acceptable answer(s)>"]
}
],
"variant": "questions"
}
@staticmethod
def _write_blanks_fill_template() -> dict:
return {
"type": "writeBlanks",
"maxWords": "<number>",
"prompt": "<instructions>",
"text": "<A summary with blanks denoted by {{id}}>",
"solutions": [
{
"id": "<blank number as string inside {{}}>",
"solution": ["<correct word>"]
}
],
"variant": "fill"
}
@staticmethod
def _write_blanks_form_template() -> dict:
return {
"type": "writeBlanks",
"maxWords": "<number>",
"prompt": "<instructions>",
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can happen mid text>",
"solutions": [
{
"id": "<blank number as string inside {{}}>",
"solution": ["<correct word>"]
}
],
"variant": "form"
}
def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
solutions_str = " and its solutions" if has_solutions else ""
return {
"role": "system",
"content": (
f"You are processing a listening test exercise sheet{solutions_str}. "
"Structure each exercise exactly according to these json templates:\n\n"
f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n"
f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n"
f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n"
"\nImportant rules:\n"
"1. Keep exact question numbering from the original\n"
"2. Include all options for multiple choice questions\n"
"3. Mark blanks with {{id}} where id is the question number\n"
"4. Set maxWords according to the instructions\n"
"5. Include all possible correct answers in solution arrays\n"
"6. Maintain exact spacing and formatting from templates\n"
"7. Use appropriate variant for writeBlanks (questions/fill/form)\n"
"8. For text fields, use actual newlines between questions/sentences\n"
)
}
def _listening_json_schema(self) -> Dict[str, Any]:
return {
"exercises": [
self._multiple_choice_template(),
self._write_blanks_questions_template(),
self._write_blanks_fill_template(),
self._write_blanks_form_template()
]
}

View File

@@ -39,7 +39,7 @@ class ImportReadingModule:
exercises_html = await f.read()
messages = [
self._instructions(),
self._instructions(solutions),
{
"role": "user",
"content": f"Exam question sheet:\n\n{exercises_html}"
@@ -66,18 +66,20 @@ class ImportReadingModule:
self._write_blanks(),
self._fill_blanks(),
self._match_sentences(),
self._true_false()
self._true_false(),
self._multiple_choice()
]
return json
@staticmethod
def _reading_exam_template():
return {
"minTimer": "<number of minutes as int not string>",
"minTimer": "<integer representing minutes allowed for the exam>",
"parts": [
{
"text": {
"title": "<title of the passage>",
"content": "<the text of the passage>",
"title": "<title of the reading passage>",
"content": "<full text content of the reading passage>",
},
"exercises": []
}
@@ -87,17 +89,18 @@ class ImportReadingModule:
@staticmethod
def _write_blanks():
return {
"maxWords": "<number of max words return the int value not string>",
"maxWords": "<integer max words allowed per answer>",
"solutions": [
{
"id": "<number of the question as string>",
"id": "<question number as string>",
"solution": [
"<at least one solution can have alternative solutions (that dont exceed maxWords)>"
"<acceptable answer(s) within maxWords limit>"
]
},
}
],
"text": "<all the questions formatted in this way: <question>{{<id>}}\\n<question2>{{<id2>}}\\n >",
"type": "writeBlanks"
"text": "<numbered questions with format: <question text>{{<question number>}}\\n>",
"type": "writeBlanks",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
@@ -105,19 +108,20 @@ class ImportReadingModule:
return {
"options": [
{
"id": "<uppercase letter that identifies a paragraph>",
"sentence": "<either a heading or an idea>"
"id": "<paragraph letter A-F>",
"sentence": "<THIS NEEDS TO BE A PARAGRAPH OF THE SECTION TEXT>"
}
],
"sentences": [
{
"id": "<the question id not the option id>",
"solution": "<id in options>",
"sentence": "<heading or an idea>",
"id": "<question number as string>",
"solution": "<matching paragraph letter>",
"sentence": "<A SHORT SENTENCE THAT CONVEYS AND IDEA OR HEADING>"
}
],
"type": "matchSentences",
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>"
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
@@ -125,12 +129,34 @@ class ImportReadingModule:
return {
"questions": [
{
"prompt": "<question>",
"solution": "<can only be one of these [\"true\", \"false\", \"not_given\"]>",
"id": "<the question id>"
"id": "<question number>",
"prompt": "<statement to evaluate>",
"solution": "<one of: true, false, not_given>",
}
],
"type": "trueFalse"
"type": "trueFalse",
"prompt": "<specific instructions including T/F/NG marking scheme>"
}
@staticmethod
def _multiple_choice():
return {
"questions": [
{
"id": "<question number>",
"prompt": "<question text>",
"options": [
{
"id": "<A, B, or C>",
"text": "<option text>"
}
],
"solution": "<correct option letter>",
"variant": "text"
}
],
"type": "multipleChoice",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
@@ -138,53 +164,69 @@ class ImportReadingModule:
return {
"solutions": [
{
"id": "<blank id>",
"solution": "<word>"
"id": "<blank number>",
"solution": "<correct word>"
}
],
"text": "<section of text with blanks denoted by {{<blank id>}}>",
"text": "<text passage with blanks marked as {{<blank number>}}>",
"type": "fillBlanks",
"words": [
{
"letter": "<uppercase letter that ids the words (may not be included and if not start at A)>",
"word": "<word>"
"letter": "<word identifier letter>",
"word": "<word from word bank>"
}
]
],
"prompt": "<specific instructions for this exercise section>"
}
def _instructions(self, solutions = False):
def _instructions(self, solutions=False):
solutions_str = " and its solutions" if solutions else ""
tail = (
"The solutions were not supplied so you will have to solve them. Do your utmost to get all the information and"
"all the solutions right!"
if not solutions else
"Do your utmost to correctly identify the sections, its exercises and respective solutions"
"Parse the exam carefully and identify:\n"
"1. Time limit from instructions\n"
"2. Reading passage title and full content\n"
"3. All exercise sections and their specific instructions\n"
"4. Question numbering and grouping\n"
"5. Word limits and formatting requirements\n"
"6. Specific marking schemes (e.g., T/F/NG)\n\n"
+ (
"Solutions were not provided - analyze the passage carefully to determine correct answers."
if not solutions else
"Use the provided solutions to fill in all answer fields accurately."
)
+
"Pay extra attention to fillblanks exercises the solution and option wording must match in case!"
"There can't be options in lowercase and solutions in uppercase!"
"Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
"should be a text, if there isn't a title for it choose a reasonable one based on its contents."
)
return {
"role": "system",
"content": (
f"You will receive html pertaining to an english exam question sheet{solutions_str}. Your job is to "
f"structure the data into a single json with this template: {self._reading_exam_template()}\n"
"You will need find out how many parts the exam has a correctly place its exercises. You will "
"encounter 4 types of exercises:\n"
" - \"writeBlanks\": short answer questions that have a answer word limit, generally two or three\n"
" - \"matchSentences\": a sentence needs to be matched with a paragraph\n"
" - \"trueFalse\": questions that its answers can only be true false or not given\n"
" - \"fillBlanks\": a text that has blank spaces on a section of text and a word bank which "
"contains the solutions and sometimes random words to throw off the students\n"
"These 4 types of exercises will need to be placed in the correct json template inside each part, "
"the templates are as follows:\n "
f"You are processing an English reading comprehension exam{solutions_str}. Structure the data according "
f"to this json template: {self._reading_exam_template()}\n\n"
"The exam contains these exercise types:\n"
"1. \"writeBlanks\": Short answer questions with strict word limits\n"
"2. \"matchSentences\": Match headings or ideas with paragraphs, the sentences field\n"
"3. \"trueFalse\": Evaluate statements as True/False/Not Given\n"
"4. \"fillBlanks\": Complete text using provided word bank\n"
"5. \"multipleChoice\": Select correct option from choices\n\n"
"Exercise templates:\n"
f"writeBlanks: {self._write_blanks()}\n"
f"matchSentences: {self._match_sentences()}\n"
f"trueFalse: {self._true_false()}\n"
f"fillBlanks: {self._fill_blanks()}\n\n"
f"fillBlanks: {self._fill_blanks()}\n"
f"multipleChoice: {self._multiple_choice()}\n\n"
"Important details to capture:\n"
"- Exercise section instructions and constraints\n"
"- Question numbering and grouping\n"
"- Word limits and formatting requirements\n"
"- Marking schemes and answer formats\n\n"
f"{tail}"
)
}
}

View File

@@ -2,6 +2,9 @@ import json
import re
import logging
from typing import List, Optional, Callable, TypeVar
from numba.core.transforms import consolidate_multi_exit_withs
from numba.cuda import const
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
@@ -123,7 +126,9 @@ class OpenAI(ILLMService):
while attempt < 3:
result = await self._client.chat.completions.create(**params)
result_content = result.choices[0].message.content
try:
print(result_content)
result_json = json.loads(result_content)
return map_to_model(result_json)
except Exception as e: