ENCOA-276, ENCOA-277

This commit is contained in:
Carlos-Mesquita
2024-12-21 19:27:14 +00:00
parent 0262971b11
commit 09d6242360
25 changed files with 375 additions and 86 deletions

View File

@@ -37,9 +37,25 @@ class GradeSpeaking:
# Process all transcriptions concurrently (up to 4)
self._log(task, request_id, 'Starting batch transcription')
text_answers = await asyncio.gather(*[
text_transcription_segments = await asyncio.gather(*[
self._stt.speech_to_text(file_path)
for file_path in temp_files
], return_exceptions=True)
successful_transcriptions = []
failed_indices = []
successful_indices = []
for i, result in enumerate(text_transcription_segments):
if isinstance(result, Exception):
self._log(task, request_id, f'Transcription failed for exercise {i + 1}: {str(result)}')
failed_indices.append(i)
elif isinstance(result, list):
successful_transcriptions.append(result)
successful_indices.append(i)
text_answers = await asyncio.gather(*[
self._stt.fix_overlap(self._llm, answer_segments)
for answer_segments in successful_transcriptions
])
for answer in text_answers:
@@ -63,14 +79,17 @@ class GradeSpeaking:
self._log(task, request_id, 'Formatting answers and questions for prompt.')
formatted_text = ""
for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
formatted_text += f"**Question {i}:**\n{item.question}\n\n"
formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
for success_idx, orig_idx in enumerate(successful_indices):
formatted_text += f"**Question {orig_idx + 1}:**\n{items[orig_idx].question}\n\n"
formatted_text += f"**Answer {orig_idx + 1}:**\n{text_answers[success_idx]}\n\n"
self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
else:
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
if len(text_answers) > 0:
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
else:
return self._zero_rating("The audio recording failed to be transcribed.")
self._log(task, request_id, 'Requesting grading of the answer(s).')
response = await self._grade_task(task, questions_and_answers)
@@ -79,37 +98,43 @@ class GradeSpeaking:
if task in {1, 3}:
self._log(task, request_id, 'Adding perfect answer(s) to response.')
# TODO: check if it is answer["answer"] instead
for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer
# Add responses for successful transcriptions
for success_idx, orig_idx in enumerate(successful_indices):
response['perfect_answer_' + str(orig_idx + 1)] = perfect_answers[
orig_idx] # Changed from success_idx
response['transcript_' + str(orig_idx + 1)] = text_answers[success_idx]
response['fixed_text_' + str(orig_idx + 1)] = await self._get_speaking_corrections(
text_answers[success_idx])
self._log(task, request_id, 'Getting speaking corrections in parallel')
# Get all corrections in parallel
fixed_texts = await asyncio.gather(*[
self._get_speaking_corrections(answer)
for answer in text_answers
])
self._log(task, request_id, 'Adding transcript and fixed texts to response.')
for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = fixed
# Add empty strings for failed transcriptions but keep perfect answers
for failed_idx in failed_indices:
response['perfect_answer_' + str(failed_idx + 1)] = perfect_answers[
failed_idx] # Keep perfect answer
response['transcript_' + str(failed_idx + 1)] = ""
response['fixed_text_' + str(failed_idx + 1)] = ""
response[f'error_{failed_idx + 1}'] = f"Transcription failed for exercise {failed_idx + 1}"
else:
response['transcript'] = text_answers[0]
self._log(task, request_id, 'Requesting fixed text.')
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
response['perfect_answer'] = perfect_answers[0]["answer"]
response['transcript'] = text_answers[0] if text_answers else ""
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0]) if text_answers else ""
response['perfect_answer'] = perfect_answers[0]["answer"] if perfect_answers else ""
solutions = []
for file_name in temp_files:
solutions.append(await self._file_storage.upload_file_firebase_get_url(f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav', file_name))
for i, file_name in enumerate(temp_files):
try:
if i not in failed_indices:
path = f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav'
else:
path = f'{FilePaths.FIREBASE_FAILED_TRANSCRIPTION_FILES_PATH}_grading_{request_id}_ex_{i + 1}.wav'
solution_url = await self._file_storage.upload_file_firebase_get_url(path, file_name)
solutions.append(solution_url)
except Exception as e:
self._log(task, request_id, f'Failed to upload file {i + 1}: {str(e)}')
solutions.append("")
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
response["solutions"] = solutions
if task in {1,3}:
if task in {1, 3}:
response["answer"] = solutions
else:
response["fullPath"] = solutions[0]