Files
encoach_backend/app/services/impl/speaking.py
Carlos Mesquita 3cf9fa5cba Async release
2024-07-23 08:40:35 +01:00

522 lines
22 KiB
Python

import logging
import os
import re
import uuid
import random
from typing import Dict, List
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService
from app.configs.constants import (
FieldsAndExercises, GPTModels, TemperatureSettings,
AvatarEnum, FilePaths
)
from app.helpers import TextHelper
class SpeakingService(ISpeakingService):
def __init__(
self, llm: ILLMService, vid_gen: IVideoGeneratorService,
file_storage: IFileStorage, document_store: IDocumentStore,
stt: ISpeechToTextService
):
self._llm = llm
self._vid_gen = vid_gen
self._file_storage = file_storage
self._document_store = document_store
self._stt = stt
self._logger = logging.getLogger(__name__)
self._tasks = {
"task_1": {
"get": {
"json_template": (
'{"topic": "topic", "question": "question"}'
),
"prompt": (
'Craft a thought-provoking question of {difficulty} difficulty for IELTS Speaking Part 1 '
'that encourages candidates to delve deeply into personal experiences, preferences, or '
'insights on the topic of "{topic}". Instruct the candidate to offer not only detailed '
'descriptions but also provide nuanced explanations, examples, or anecdotes to enrich '
'their response. Make sure that the generated question does not contain forbidden subjects in '
'muslim countries.'
)
}
},
"task_2": {
"get": {
"json_template": (
'{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}'
),
"prompt": (
'Create a question of {difficulty} difficulty for IELTS Speaking Part 2 '
'that encourages candidates to narrate a personal experience or story related to the topic '
'of "{topic}". Include 3 prompts that guide the candidate to describe '
'specific aspects of the experience, such as details about the situation, '
'their actions, and the reasons it left a lasting impression. Make sure that the '
'generated question does not contain forbidden subjects in muslim countries.'
)
}
},
"task_3": {
"get": {
"json_template": (
'{"topic": "topic", "questions": ["question", "question", "question"]}'
),
"prompt": (
'Formulate a set of 3 questions of {difficulty} difficulty for IELTS Speaking Part 3 '
'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". '
'Provide inquiries, ensuring they explore various aspects, perspectives, and implications '
'related to the topic. Make sure that the generated question does not contain forbidden '
'subjects in muslim countries.'
)
}
},
}
async def get_speaking_task(self, task_id: int, topic: str, difficulty: str):
task_values = self._tasks[f'task_{task_id}']['get']
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: ' +
task_values["json_template"]
)
},
{
"role": "user",
"content": str(task_values["prompt"]).format(topic=topic, difficulty=difficulty)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, FieldsAndExercises.GEN_FIELDS, TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
# TODO: this was on GET /speaking_task_3 don't know if it is intentional only for 3
if task_id == 3:
# Remove the numbers from the questions only if the string starts with a number
response["questions"] = [
re.sub(r"^\d+\.\s*", "", question)
if re.match(r"^\d+\.", question) else question
for question in response["questions"]
]
response["type"] = task_id
response["difficulty"] = difficulty
response["topic"] = topic
return response
async def grade_speaking_task_1_and_2(
self, task: int, question: str, answer_firebase_path: str, sound_file_name: str
):
request_id = uuid.uuid4()
req_data = {
"question": question,
"answer": answer_firebase_path
}
self._logger.info(
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
f'Use this id to track the logs: {str(request_id)} - Request data: {str(req_data)}'
)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {answer_firebase_path}')
await self._file_storage.download_firebase_file(answer_firebase_path, sound_file_name)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloaded file {answer_firebase_path} to {sound_file_name}')
answer = await self._stt.speech_to_text(sound_file_name)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer}')
if TextHelper.has_x_words(answer, 20):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"comment": "comment about answer quality", "overall": 0.0, '
'"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
'"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
},
{
"role": "user",
"content": (
f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
'assign a score of 0 if the response fails to address the question. Additionally, provide '
'detailed commentary highlighting both strengths and weaknesses in the response.'
f'\n Question: "{question}" \n Answer: "{answer}"')
}
]
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answer.')
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
["comment"],
TemperatureSettings.GRADING_TEMPERATURE
)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answer graded: {str(response)}')
perfect_answer_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"answer": "perfect answer"}'
)
},
{
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
f'Speaking Part {task} question: "{question}"')
}
]
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting perfect answer.')
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
perfect_answer_messages,
["answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response['perfect_answer'] = response["answer"]
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Perfect answer: ' + response['perfect_answer'])
response['transcript'] = answer
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting fixed text.')
response['fixed_text'] = await self._get_speaking_corrections(answer)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Fixed text: ' + response['fixed_text'])
if response["overall"] == "0.0" or response["overall"] == 0.0:
response["overall"] = self._calculate_overall(response)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
return response
else:
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
)
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
# TODO: When there's more time grade_speaking_task_1_2 can be merged with this, when there's more time
async def grade_speaking_task_3(self, answers: Dict, task: int = 3):
request_id = uuid.uuid4()
self._logger.info(
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
f'Use this id to track the logs: {str(request_id)} - Request data: {str(answers)}'
)
text_answers = []
perfect_answers = []
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
)
for item in answers:
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {item["answer"]}')
await self._file_storage.download_firebase_file(item["answer"], sound_file_name)
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
'Downloaded file ' + item["answer"] + f' to {sound_file_name}'
)
answer_text = await self._stt.speech_to_text(sound_file_name)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer_text}')
text_answers.append(answer_text)
item["answer"] = answer_text
os.remove(sound_file_name)
if not TextHelper.has_x_words(answer_text, 20):
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}')
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
perfect_answer_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"answer": "perfect answer"}'
)
},
{
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
f'Speaking Part {task} question: "{item["question"]}"'
)
}
]
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
f'Requesting perfect answer for question: {item["question"]}'
)
perfect_answers.append(
await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
perfect_answer_messages,
["answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"comment": "comment about answer quality", "overall": 0.0, '
'"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
'"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
}
]
message = (
f"Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a "
"strict assessment that penalizes errors. Deduct points for deviations from the task, and "
"assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
"commentary highlighting both strengths and weaknesses in the response."
"\n\n The questions and answers are: \n\n'")
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Formatting answers and questions for prompt.'
)
formatted_text = ""
for i, entry in enumerate(answers, start=1):
formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Formatted answers and questions for prompt: {formatted_text}'
)
message += formatted_text
messages.append({
"role": "user",
"content": message
})
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answers.')
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answers graded: {str(response)}')
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Adding perfect answers to response.')
for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Adding transcript and fixed texts to response.'
)
for i, answer in enumerate(text_answers, start=1):
response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer)
if response["overall"] == "0.0" or response["overall"] == 0.0:
response["overall"] = self._calculate_overall(response)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
return response
# ==================================================================================================================
# grade_speaking_task helpers
# ==================================================================================================================
@staticmethod
def _zero_rating(comment: str):
return {
"comment": comment,
"overall": 0,
"task_response": {
"Fluency and Coherence": 0,
"Lexical Resource": 0,
"Grammatical Range and Accuracy": 0,
"Pronunciation": 0
}
}
@staticmethod
def _calculate_overall(response: Dict):
return round(
(
response["task_response"]["Fluency and Coherence"] +
response["task_response"]["Lexical Resource"] +
response["task_response"]["Grammatical Range and Accuracy"] +
response["task_response"]["Pronunciation"]
) / 4, 1
)
async def _get_speaking_corrections(self, text):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"fixed_text": "fixed transcription with no misspelling errors"}'
)
},
{
"role": "user",
"content": (
'Fix the errors in the provided transcription and put it in a JSON. '
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
["fixed_text"],
0.2,
False
)
return response["fixed_text"]
async def create_videos_and_save_to_db(self, exercises, template, req_id):
template = await self._create_video_per_part(exercises, template, 1)
template = await self._create_video_per_part(exercises, template, 2)
template = await self._create_video_per_part(exercises, template, 3)
await self._document_store.save_to_db_with_id("speaking", template, req_id)
self._logger.info(f'Saved speaking to DB with id {req_id} : {str(template)}')
async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
template_index = part - 1
# Using list comprehension to find the element with the desired value in the 'type' field
found_exercises = [element for element in exercises if element.get('type') == part]
# Check if any elements were found
if found_exercises:
exercise = found_exercises[0]
self._logger.info(f'Creating video for speaking part {part}')
if part in {1, 2}:
result = await self._create_video(
exercise["question"],
(random.choice(list(AvatarEnum))).value,
f'Failed to create video for part {part} question: {str(exercise["question"])}'
)
if result is not None:
if part == 2:
template["exercises"][template_index]["prompts"] = exercise["prompts"]
template["exercises"][template_index]["text"] = exercise["question"]
template["exercises"][template_index]["title"] = exercise["topic"]
template["exercises"][template_index]["video_url"] = result["video_url"]
template["exercises"][template_index]["video_path"] = result["video_path"]
else:
questions = []
for question in exercise["questions"]:
result = await self._create_video(
question,
(random.choice(list(AvatarEnum))).value,
f'Failed to create video for part {part} question: {str(exercise["question"])}'
)
if result is not None:
video = {
"text": question,
"video_path": result["video_path"],
"video_url": result["video_url"]
}
questions.append(video)
template["exercises"][template_index]["prompts"] = questions
template["exercises"][template_index]["title"] = exercise["topic"]
if not found_exercises:
template["exercises"].pop(template_index)
return template
# TODO: Check if it is intended to log the original question
async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]):
if len(prompts) > 0:
question = original_question + " In your answer you should consider: " + " ".join(prompts)
else:
question = original_question
error_msg = f'Failed to create video for part 1 question: {original_question}'
result = await self._create_video(
question,
avatar,
error_msg
)
if result is not None:
return {
"text": original_question,
"prompts": prompts,
"title": topic,
**result,
"type": "speaking",
"id": uuid.uuid4()
}
else:
return str(error_msg)
async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str):
sp_questions = []
self._logger.info('Creating videos for speaking part 3')
for question in questions:
result = await self._create_video(
question,
avatar,
f'Failed to create video for part 3 question: {question}'
)
if result is not None:
video = {
"text": question,
**result
}
sp_questions.append(video)
return {
"prompts": sp_questions,
"title": topic,
"type": "interactiveSpeaking",
"id": uuid.uuid4()
}
async def _create_video(self, question: str, avatar: str, error_message: str):
result = await self._vid_gen.create_video(question, avatar)
if result is not None:
sound_file_path = FilePaths.VIDEO_FILES_PATH + result
firebase_file_path = FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
url = await self._file_storage.upload_file_firebase_get_url(firebase_file_path, sound_file_path)
return {
"video_path": firebase_file_path,
"video_url": url
}
self._logger.error(error_message)
return None