ENCOA-276, ENCOA-277
This commit is contained in:
@@ -37,9 +37,25 @@ class GradeSpeaking:
|
||||
|
||||
# Process all transcriptions concurrently (up to 4)
|
||||
self._log(task, request_id, 'Starting batch transcription')
|
||||
text_answers = await asyncio.gather(*[
|
||||
text_transcription_segments = await asyncio.gather(*[
|
||||
self._stt.speech_to_text(file_path)
|
||||
for file_path in temp_files
|
||||
], return_exceptions=True)
|
||||
|
||||
successful_transcriptions = []
|
||||
failed_indices = []
|
||||
successful_indices = []
|
||||
for i, result in enumerate(text_transcription_segments):
|
||||
if isinstance(result, Exception):
|
||||
self._log(task, request_id, f'Transcription failed for exercise {i + 1}: {str(result)}')
|
||||
failed_indices.append(i)
|
||||
elif isinstance(result, list):
|
||||
successful_transcriptions.append(result)
|
||||
successful_indices.append(i)
|
||||
|
||||
text_answers = await asyncio.gather(*[
|
||||
self._stt.fix_overlap(self._llm, answer_segments)
|
||||
for answer_segments in successful_transcriptions
|
||||
])
|
||||
|
||||
for answer in text_answers:
|
||||
@@ -63,14 +79,17 @@ class GradeSpeaking:
|
||||
self._log(task, request_id, 'Formatting answers and questions for prompt.')
|
||||
|
||||
formatted_text = ""
|
||||
for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
|
||||
formatted_text += f"**Question {i}:**\n{item.question}\n\n"
|
||||
formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
|
||||
for success_idx, orig_idx in enumerate(successful_indices):
|
||||
formatted_text += f"**Question {orig_idx + 1}:**\n{items[orig_idx].question}\n\n"
|
||||
formatted_text += f"**Answer {orig_idx + 1}:**\n{text_answers[success_idx]}\n\n"
|
||||
|
||||
self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
|
||||
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
|
||||
else:
|
||||
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
|
||||
if len(text_answers) > 0:
|
||||
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
|
||||
else:
|
||||
return self._zero_rating("The audio recording failed to be transcribed.")
|
||||
|
||||
self._log(task, request_id, 'Requesting grading of the answer(s).')
|
||||
response = await self._grade_task(task, questions_and_answers)
|
||||
@@ -79,37 +98,43 @@ class GradeSpeaking:
|
||||
if task in {1, 3}:
|
||||
self._log(task, request_id, 'Adding perfect answer(s) to response.')
|
||||
|
||||
# TODO: check if it is answer["answer"] instead
|
||||
for i, answer in enumerate(perfect_answers, start=1):
|
||||
response['perfect_answer_' + str(i)] = answer
|
||||
# Add responses for successful transcriptions
|
||||
for success_idx, orig_idx in enumerate(successful_indices):
|
||||
response['perfect_answer_' + str(orig_idx + 1)] = perfect_answers[
|
||||
orig_idx] # Changed from success_idx
|
||||
response['transcript_' + str(orig_idx + 1)] = text_answers[success_idx]
|
||||
response['fixed_text_' + str(orig_idx + 1)] = await self._get_speaking_corrections(
|
||||
text_answers[success_idx])
|
||||
|
||||
self._log(task, request_id, 'Getting speaking corrections in parallel')
|
||||
# Get all corrections in parallel
|
||||
fixed_texts = await asyncio.gather(*[
|
||||
self._get_speaking_corrections(answer)
|
||||
for answer in text_answers
|
||||
])
|
||||
|
||||
self._log(task, request_id, 'Adding transcript and fixed texts to response.')
|
||||
for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
|
||||
response['transcript_' + str(i)] = answer
|
||||
response['fixed_text_' + str(i)] = fixed
|
||||
# Add empty strings for failed transcriptions but keep perfect answers
|
||||
for failed_idx in failed_indices:
|
||||
response['perfect_answer_' + str(failed_idx + 1)] = perfect_answers[
|
||||
failed_idx] # Keep perfect answer
|
||||
response['transcript_' + str(failed_idx + 1)] = ""
|
||||
response['fixed_text_' + str(failed_idx + 1)] = ""
|
||||
response[f'error_{failed_idx + 1}'] = f"Transcription failed for exercise {failed_idx + 1}"
|
||||
else:
|
||||
response['transcript'] = text_answers[0]
|
||||
|
||||
self._log(task, request_id, 'Requesting fixed text.')
|
||||
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
|
||||
self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
|
||||
|
||||
response['perfect_answer'] = perfect_answers[0]["answer"]
|
||||
response['transcript'] = text_answers[0] if text_answers else ""
|
||||
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0]) if text_answers else ""
|
||||
response['perfect_answer'] = perfect_answers[0]["answer"] if perfect_answers else ""
|
||||
|
||||
solutions = []
|
||||
for file_name in temp_files:
|
||||
solutions.append(await self._file_storage.upload_file_firebase_get_url(f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav', file_name))
|
||||
for i, file_name in enumerate(temp_files):
|
||||
try:
|
||||
if i not in failed_indices:
|
||||
path = f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav'
|
||||
else:
|
||||
path = f'{FilePaths.FIREBASE_FAILED_TRANSCRIPTION_FILES_PATH}_grading_{request_id}_ex_{i + 1}.wav'
|
||||
|
||||
solution_url = await self._file_storage.upload_file_firebase_get_url(path, file_name)
|
||||
solutions.append(solution_url)
|
||||
except Exception as e:
|
||||
self._log(task, request_id, f'Failed to upload file {i + 1}: {str(e)}')
|
||||
solutions.append("")
|
||||
|
||||
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
|
||||
response["solutions"] = solutions
|
||||
if task in {1,3}:
|
||||
if task in {1, 3}:
|
||||
response["answer"] = solutions
|
||||
else:
|
||||
response["fullPath"] = solutions[0]
|
||||
|
||||
Reference in New Issue
Block a user