ENCOA-276, ENCOA-277

This commit is contained in:
Carlos-Mesquita
2024-12-21 19:27:14 +00:00
parent 0262971b11
commit 09d6242360
25 changed files with 375 additions and 86 deletions

View File

@@ -3,9 +3,11 @@ from logging import getLogger
import random
from typing import Dict, Any
import aiofiles
from starlette.datastructures import UploadFile
from ielts_be.dtos.listening import GenerateListeningExercises, Dialog, ListeningExercises
from ielts_be.exceptions.exceptions import TranscriptionException
from ielts_be.repositories import IFileStorage, IDocumentStore
from ielts_be.services import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService
from ielts_be.configs.constants import (
@@ -13,6 +15,7 @@ from ielts_be.configs.constants import (
FieldsAndExercises
)
from ielts_be.helpers import FileHelper
from .audio_to_dialog import AudioToDialog
from .import_listening import ImportListeningModule
from .write_blank_forms import WriteBlankForms
from .write_blanks import WriteBlanks
@@ -50,6 +53,7 @@ class ListeningService(IListeningService):
self._write_blanks_notes = WriteBlankNotes(llm)
self._import = ImportListeningModule(llm)
self._true_false = TrueFalse(llm)
self._audio_to_dialog = AudioToDialog(llm)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
@@ -94,11 +98,18 @@ class ListeningService(IListeningService):
async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
# TODO: When mp3 editor
async def get_dialog_from_audio(self, upload: UploadFile):
ext, path_id = await FileHelper.save_upload(upload)
dialog = await self._stt.speech_to_text(f'./tmp/{path_id}/upload.{ext}')
async def transcribe_dialog(self, audio: UploadFile):
ext, path_id = await FileHelper.save_upload(audio)
try:
transcription_segments = await self._stt.speech_to_text(f'./tmp/{path_id}/upload.{ext}')
transcription = await self._stt.fix_overlap(self._llm, transcription_segments)
dialog = await self._audio_to_dialog.get_dialog(transcription)
except TranscriptionException as e:
self._logger.error(str(e))
return None
FileHelper.remove_directory(f'./tmp/{path_id}')
return dialog
async def generate_mp3(self, dto: Dialog) -> bytes:
return await self._tts.text_to_speech(dto)

View File

@@ -0,0 +1,37 @@
from logging import getLogger
from ielts_be.configs.constants import TemperatureSettings, GPTModels
from ielts_be.services import ILLMService
class AudioToDialog:
def __init__(self, llm_service: ILLMService):
self._logger = getLogger(__name__)
self._llm = llm_service
async def get_dialog(self, transcription: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on either one of these formats:\n'
'1 - {"dialog": [{"name": "name", "gender": "gender", "text": "text"}]}\n'
'2 - {"dialog": "text"}\n\n'
'A transcription of an audio file will be provided to you. Based on that transcription you will'
'need to determine whether the transcription is a conversation or a monologue. If the transcription '
'is a dialog you will have to determine the interlocutors names and genders and place each excerpt of '
'dialog in a sequential manner using the json array structure previously given (1). In the case of being '
'a monologue just place all the text in the field "dialog" (2). If the transcription is a conversation '
'and you can\'t ascertain the names of the interlocutors from the transcription give a single common name '
'to each interlocutor. Also gender must be male or female, if you can\'t ascertain then use male.'
)
},
{
"role": "user",
"content": f"Transcription: {transcription}"
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["dialog"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)

View File

@@ -1,4 +1,4 @@
import json
import asyncio
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
@@ -36,28 +36,47 @@ class ImportListeningModule:
f'./tmp/{path_id}/solutions.html'
)
response = await self._get_listening_sections(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return response.model_dump(exclude_none=True)
return None
async def _get_listening_sections(
self,
path_id: str,
has_solutions: bool = False
) -> ListeningExam:
async with aiofiles.open(
f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8'
) as f:
exercises_html = await f.read()
dialog_promise = self._llm.pydantic_prediction(
[
self._dialog_instructions(),
{
"role": "user",
"content": f"Listening exercise sheet:\n\n{exercises_html}"
}
],
ListeningMapper.map_to_dialog_model,
str(self._dialog_schema())
)
response_promise = self._get_listening_sections(path_id, exercises_html, solutions is not None)
tasks = await asyncio.gather(dialog_promise, response_promise)
dialog: Dict = tasks[0]
response = tasks[1]
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
response = response.model_dump(exclude_none=True)
for i in range(len(response["parts"])):
response["parts"][i]["script"] = dialog[str(i + 1)]
return response
return None
async def _get_listening_sections(
self,
path_id: str,
html: str,
has_solutions: bool = False
) -> ListeningExam:
messages = [
self._instructions(has_solutions),
{
"role": "user",
"content": f"Listening exercise sheet:\n\n{exercises_html}"
"content": f"Listening exercise sheet:\n\n{html}"
}
]
@@ -180,4 +199,38 @@ class ImportListeningModule:
]
}
]
}
@staticmethod
def _dialog_instructions() -> Dict[str, str]:
return {
"role": "system",
"content": (
f"You are processing a listening test exercise sheet. Your objective is to ascertain if "
'there is a monologue or a conversation for parts/sections of the test. If there is you '
'must either use the following JSON: {"monologue": "monologue_text"} for monologues or '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]} for conversations. \n\n'
'First identify all sections/parts by looking for \'SECTION n\' headers or similar ones, '
'then for each section identify and structure its dialog type of the section iff there is one in a single '
'JSON format like so {"sections": [{"section_1": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}}, '
'{"section_2": {"monologue": "monologue_text"}} ]}'
'Each section might not have a conversation or monologue in those cases omit the section, for instance section 1 '
'might have a conversation, section 2 might have nothing, section 3 might have a monologue. In that case: '
'{"sections": [{"section_1": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}},'
'{"section_3": {"monologue": "monologue_text"}} ]}. Keep in mind that gender most likely won\'t be included '
', try to figure out by the name of the speaker, when in doubt use male. The gender MUST BE ONLY "male" or "female".'
)
}
@staticmethod
def _dialog_schema():
return {
"sections": [
{"section_1": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}},
{"section_2": {"monologue": "monologue_text"}},
{"section_3": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}},
{"section_4": {"monologue": "monologue_text"}},
]
}

View File

@@ -23,7 +23,7 @@ class WriteBlankForms:
{
"role": "user",
"content": (
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'Generate a form with {quantity} of {difficulty} CEFR level difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}

View File

@@ -23,7 +23,7 @@ class WriteBlankNotes:
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'Generate {quantity} {difficulty} CEFR level difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)

View File

@@ -23,7 +23,7 @@ class WriteBlanks:
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'Generate {quantity} {difficulty} CEFR level difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]

View File

@@ -42,7 +42,7 @@ class FillBlanks:
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'Select {quantity} {difficulty} CEFR level difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}

View File

@@ -22,7 +22,7 @@ class WriteBlanks:
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'Generate {str(quantity)} {difficulty} CEFR level difficulty short answer questions, and the '
f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
)

View File

@@ -26,7 +26,7 @@ class MultipleChoice:
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'Generate {quantity} {difficulty} CEFR level difficulty multiple choice questions of {n_options} '
f'options for this text:\n"' + text + '"')
}

View File

@@ -22,7 +22,7 @@ class TrueFalse:
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
f'Generate {str(quantity)} {difficulty} CEFR level difficulty statements based on the provided text. '
'Ensure that your statements accurately represent information or inferences from the text, and '
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
f'as appropriate.\n\nReference text:\n\n {text}'

View File

@@ -37,9 +37,25 @@ class GradeSpeaking:
# Process all transcriptions concurrently (up to 4)
self._log(task, request_id, 'Starting batch transcription')
text_answers = await asyncio.gather(*[
text_transcription_segments = await asyncio.gather(*[
self._stt.speech_to_text(file_path)
for file_path in temp_files
], return_exceptions=True)
successful_transcriptions = []
failed_indices = []
successful_indices = []
for i, result in enumerate(text_transcription_segments):
if isinstance(result, Exception):
self._log(task, request_id, f'Transcription failed for exercise {i + 1}: {str(result)}')
failed_indices.append(i)
elif isinstance(result, list):
successful_transcriptions.append(result)
successful_indices.append(i)
text_answers = await asyncio.gather(*[
self._stt.fix_overlap(self._llm, answer_segments)
for answer_segments in successful_transcriptions
])
for answer in text_answers:
@@ -63,14 +79,17 @@ class GradeSpeaking:
self._log(task, request_id, 'Formatting answers and questions for prompt.')
formatted_text = ""
for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
formatted_text += f"**Question {i}:**\n{item.question}\n\n"
formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
for success_idx, orig_idx in enumerate(successful_indices):
formatted_text += f"**Question {orig_idx + 1}:**\n{items[orig_idx].question}\n\n"
formatted_text += f"**Answer {orig_idx + 1}:**\n{text_answers[success_idx]}\n\n"
self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
else:
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
if len(text_answers) > 0:
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
else:
return self._zero_rating("The audio recording failed to be transcribed.")
self._log(task, request_id, 'Requesting grading of the answer(s).')
response = await self._grade_task(task, questions_and_answers)
@@ -79,37 +98,43 @@ class GradeSpeaking:
if task in {1, 3}:
self._log(task, request_id, 'Adding perfect answer(s) to response.')
# TODO: check if it is answer["answer"] instead
for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer
# Add responses for successful transcriptions
for success_idx, orig_idx in enumerate(successful_indices):
response['perfect_answer_' + str(orig_idx + 1)] = perfect_answers[
orig_idx] # Changed from success_idx
response['transcript_' + str(orig_idx + 1)] = text_answers[success_idx]
response['fixed_text_' + str(orig_idx + 1)] = await self._get_speaking_corrections(
text_answers[success_idx])
self._log(task, request_id, 'Getting speaking corrections in parallel')
# Get all corrections in parallel
fixed_texts = await asyncio.gather(*[
self._get_speaking_corrections(answer)
for answer in text_answers
])
self._log(task, request_id, 'Adding transcript and fixed texts to response.')
for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = fixed
# Add empty strings for failed transcriptions but keep perfect answers
for failed_idx in failed_indices:
response['perfect_answer_' + str(failed_idx + 1)] = perfect_answers[
failed_idx] # Keep perfect answer
response['transcript_' + str(failed_idx + 1)] = ""
response['fixed_text_' + str(failed_idx + 1)] = ""
response[f'error_{failed_idx + 1}'] = f"Transcription failed for exercise {failed_idx + 1}"
else:
response['transcript'] = text_answers[0]
self._log(task, request_id, 'Requesting fixed text.')
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
response['perfect_answer'] = perfect_answers[0]["answer"]
response['transcript'] = text_answers[0] if text_answers else ""
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0]) if text_answers else ""
response['perfect_answer'] = perfect_answers[0]["answer"] if perfect_answers else ""
solutions = []
for file_name in temp_files:
solutions.append(await self._file_storage.upload_file_firebase_get_url(f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav', file_name))
for i, file_name in enumerate(temp_files):
try:
if i not in failed_indices:
path = f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav'
else:
path = f'{FilePaths.FIREBASE_FAILED_TRANSCRIPTION_FILES_PATH}_grading_{request_id}_ex_{i + 1}.wav'
solution_url = await self._file_storage.upload_file_firebase_get_url(path, file_name)
solutions.append(solution_url)
except Exception as e:
self._log(task, request_id, f'Failed to upload file {i + 1}: {str(e)}')
solutions.append("")
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
response["solutions"] = solutions
if task in {1,3}:
if task in {1, 3}:
response["answer"] = solutions
else:
response["fullPath"] = solutions[0]

View File

@@ -9,7 +9,7 @@ def get_writing_args_general(task: int, topic: str, difficulty: str) -> List[Dic
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
'advice, or instructions within the letter. Make sure that the generated prompt is '
f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
f'of {difficulty} CEFR level difficulty and does not contain forbidden subjects in muslim countries.'
),
"instructions": (
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
@@ -19,7 +19,7 @@ def get_writing_args_general(task: int, topic: str, difficulty: str) -> List[Dic
"2": {
# TODO: Should the muslim disclaimer be here as well?
"prompt": (
f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
f'Craft a comprehensive question of {difficulty} CEFR level difficulty like the ones for IELTS '
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
f'analysis of contrasting perspectives on the topic of "{topic}".'
),