Fixed listening import

This commit is contained in:
Carlos-Mesquita
2024-11-15 02:47:37 +00:00
parent e99eda485e
commit 18103c931e
4 changed files with 148 additions and 56 deletions

View File

@@ -44,17 +44,26 @@ class MultipleChoiceExercise(ExerciseBase):
questions: List[MCQuestion]
class WriteBlankQuestion(BaseModel):
id: str
prompt: str
solution: List[str]
class WriteBlanksVariant(str, Enum):
QUESTIONS = "questions"
FILL = "fill"
FORM = "form"
class WriteBlanksQuestionExercise(ExerciseBase):
type: Literal["writeBlanks"]
maxWords: int
questions: List[WriteBlankQuestion]
variant: WriteBlanksVariant
class WriteBlankSolution(BaseModel):
id: str
solution: List[str]
class WriteBlanksExercise(ExerciseBase):
type: Literal["writeBlanks"]
maxWords: int
@@ -77,4 +86,4 @@ class ListeningSection(BaseModel):
class ListeningExam(BaseModel):
module: str = "listening"
minTimer: Optional[int]
sections: List[ListeningSection]
parts: List[ListeningSection]

View File

@@ -1,14 +1,75 @@
from typing import Dict, Any
from typing import Dict, Any, List, Union, Optional
from app.dtos.exams.listening import TrueFalseExercise, MultipleChoiceExercise, WriteBlanksExercise, ListeningExam, \
ListeningSection
from pydantic import BaseModel
from app.dtos.exams.listening import (
TrueFalseExercise,
MultipleChoiceExercise,
WriteBlanksExercise,
ListeningExam,
ListeningSection,
WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion
)
class ListeningQuestionSection(BaseModel):
exercises: List[Union[TrueFalseExercise, MultipleChoiceExercise, WriteBlanksQuestionExercise]]
class ListeningQuestionExam(BaseModel):
parts: List[ListeningQuestionSection]
minTimer: Optional[int]
module: str = "listening"
class WriteBlankProcessor:
@staticmethod
def to_question_model(exercise_data: Dict[str, Any]) -> WriteBlanksQuestionExercise:
questions = [
WriteBlankQuestion(
id=q["id"],
prompt=q["prompt"],
solution=q["solution"]
)
for q in exercise_data.get("questions", [])
]
return WriteBlanksQuestionExercise(
type="writeBlanks",
prompt=exercise_data.get("prompt"),
maxWords=exercise_data.get("maxWords"),
questions=questions,
variant=exercise_data.get("variant", "questions")
)
@staticmethod
def to_text_model(question_model: WriteBlanksQuestionExercise) -> WriteBlanksExercise:
if question_model.variant == WriteBlanksVariant.QUESTIONS:
text = '\\n'.join(f"{q.prompt} {{{{{q.id}}}}}" for q in question_model.questions)
elif question_model.variant == WriteBlanksVariant.FILL:
text = ' '.join(f"{q.prompt}" for q in question_model.questions)
elif question_model.variant == WriteBlanksVariant.FORM:
text = '\\n'.join(f"{q.prompt}" for q in question_model.questions)
else:
raise ValueError(f"Unknown variant: {question_model.variant}")
solutions = [
WriteBlankSolution(id=q.id, solution=q.solution)
for q in question_model.questions
]
return WriteBlanksExercise(
type="writeBlanks",
prompt=question_model.prompt,
maxWords=question_model.maxWords,
text=text,
solutions=solutions,
variant=question_model.variant
)
class ListeningMapper:
@staticmethod
def map_to_test_model(response: Dict[str, Any]) -> ListeningExam:
sections = []
for section in response.get('sections', []):
question_parts = []
for section in response.get('parts', []):
section_exercises = []
for exercise in section['exercises']:
@@ -19,14 +80,33 @@ class ListeningMapper:
elif exercise_type == 'multipleChoice':
section_exercises.append(MultipleChoiceExercise(**exercise))
elif exercise_type == 'writeBlanks':
section_exercises.append(WriteBlanksExercise(**exercise))
question_model = WriteBlankProcessor.to_question_model(exercise)
section_exercises.append(question_model)
else:
raise ValueError(f"Unknown exercise type: {exercise_type}")
sections.append(ListeningSection(exercises=section_exercises))
question_parts.append(ListeningQuestionSection(exercises=section_exercises))
return ListeningExam(
sections=sections,
question_exam = ListeningQuestionExam(
parts=question_parts,
minTimer=response.get('minTimer'),
module="listening"
)
final_parts = []
for section in question_exam.parts:
final_exercises = []
for exercise in section.exercises:
if isinstance(exercise, WriteBlanksQuestionExercise):
final_exercises.append(WriteBlankProcessor.to_text_model(exercise))
else:
final_exercises.append(exercise)
final_parts.append(ListeningSection(exercises=final_exercises))
return ListeningExam(
parts=final_parts,
minTimer=response.get('minTimer'),
module="listening"
)

View File

@@ -1,3 +1,4 @@
import json
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
@@ -18,7 +19,6 @@ class ImportListeningModule:
async def import_from_file(
self,
exercises: UploadFile,
audio: UploadFile,
solutions: UploadFile = None
) -> Dict[str, Any] | None:
path_id = str(uuid4())
@@ -99,51 +99,36 @@ class ImportListeningModule:
}
@staticmethod
def _write_blanks_questions_template() -> dict:
def _write_blanks_template() -> dict:
return {
"type": "writeBlanks",
"maxWords": "<number>",
"maxWords": "<integer max words allowed per answer>",
"prompt": "<instructions>",
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can only occur at the end of sentence>",
"solutions": [
"questions": [
{
"id": "<question number as string>",
"prompt": "<question text with blanks replaced with {{id}}>",
"solution": ["<acceptable answer(s)>"]
}
],
"variant": "questions"
"variant": "<one of: questions, fill, form - chosen based on format:\n" +
"- questions: for numbered questions with blank at end\n" +
"- fill: for paragraph/summary with blanks, it MUST be a PARAGRAPH not separated related questions!\n" +
"- form: when questions and fill dont meet the requirements>"
}
@staticmethod
def _write_blanks_fill_template() -> dict:
def _true_false():
return {
"type": "writeBlanks",
"maxWords": "<number>",
"prompt": "<instructions>",
"text": "<A summary with blanks denoted by {{id}}>",
"solutions": [
"questions": [
{
"id": "<blank number as string inside {{}}>",
"solution": ["<correct word>"]
"id": "<question number>",
"prompt": "<statement to evaluate>",
"solution": "<one of: true, false, not_given>",
}
],
"variant": "fill"
}
@staticmethod
def _write_blanks_form_template() -> dict:
return {
"type": "writeBlanks",
"maxWords": "<number>",
"prompt": "<instructions>",
"text": "<questions separated by newlines '\n' and blanks {{id}} in them the blanks can happen mid text>",
"solutions": [
{
"id": "<blank number as string inside {{}}>",
"solution": ["<correct word>"]
}
],
"variant": "form"
"type": "trueFalse",
"prompt": "<specific instructions including T/F/NG marking scheme>"
}
def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
@@ -152,29 +137,47 @@ class ImportListeningModule:
"role": "system",
"content": (
f"You are processing a listening test exercise sheet{solutions_str}. "
"Structure each exercise exactly according to these json templates:\n\n"
"Structure the test according to this json template:\n\n"
f"{self._listening_json_schema()}\n\n"
"Each exercise within a section should follow these templates:\n\n"
f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
f"2. Write Blanks - Questions format:\n{self._write_blanks_questions_template()}\n\n"
f"3. Write Blanks - Fill format:\n{self._write_blanks_fill_template()}\n\n"
f"4. Write Blanks - Form format:\n{self._write_blanks_form_template()}\n\n"
f"2. True/False Questions:\n{self._true_false()}\n\n"
f"3. Write Blanks:\n{self._write_blanks_template()}\n\n"
"\nImportant rules:\n"
"1. Keep exact question numbering from the original\n"
"2. Include all options for multiple choice questions\n"
"3. Mark blanks with {{id}} where id is the question number\n"
"3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n"
"4. Set maxWords according to the instructions\n"
"5. Include all possible correct answers in solution arrays\n"
"6. Maintain exact spacing and formatting from templates\n"
"7. Use appropriate variant for writeBlanks (questions/fill/form)\n"
"6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n"
"7. For writeBlanks, choose the appropriate variant:\n"
" - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n"
" - fill: for paragraph/summary with blanks\n"
" - form: when questions and fill dont meet the requirements\n"
"8. For text fields, use actual newlines between questions/sentences\n"
"9. Format text according to chosen variant:\n"
" - questions: each line should end with {{id}}\n"
" - fill: embed {{id}} naturally in the paragraph\n"
" - form: place {{id}} where blank should appear in text\n"
"10. For True/False, use exact values: true, false, or not_given\n\n"
"11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to "
"you and they are uppercase you should placed them in lowercase.\n\n"
"First identify all sections/parts by looking for 'SECTION n' headers or similar ones, "
"then for each section identify and structure its exercises according to the templates above."
)
}
def _listening_json_schema(self) -> Dict[str, Any]:
return {
"exercises": [
self._multiple_choice_template(),
self._write_blanks_questions_template(),
self._write_blanks_fill_template(),
self._write_blanks_form_template()
"minTimer": "<integer representing minutes allowed for the exam as string, if there is none set it to 30>",
"parts": [
{
"intro": "<optional field that contains information about the section>",
"exercises": [
self._multiple_choice_template(),
self._write_blanks_template(),
self._true_false()
]
}
]
}

View File

@@ -22,7 +22,7 @@ class OpenAI(ILLMService):
def __init__(self, client: AsyncOpenAI):
self._client = client
self._logger = logging.getLogger(__name__)
self._default_model = "gpt-4o-2024-08-06"
self._default_model = "gpt-4o"
async def prediction(
self,
@@ -125,8 +125,8 @@ class OpenAI(ILLMService):
result_content = result.choices[0].message.content
try:
print(result_content)
result_json = json.loads(result_content)
print(str(result_json))
return map_to_model(result_json)
except Exception as e:
attempt += 1