import logging import os import re import uuid import random from typing import Dict, List, Optional from app.repositories.abc import IFileStorage, IDocumentStore from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService from app.configs.constants import ( FieldsAndExercises, GPTModels, TemperatureSettings, FilePaths ) from app.helpers import TextHelper class SpeakingService(ISpeakingService): def __init__( self, llm: ILLMService, file_storage: IFileStorage, stt: ISpeechToTextService ): self._llm = llm self._file_storage = file_storage self._stt = stt self._logger = logging.getLogger(__name__) # TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with # either the difficulty in the request or a random one yet the prompt doesn't change self._tasks = { "task_1": { "get": { "json_template": { "first_topic": "topic 1", "second_topic": "topic 2", "questions": [ ( "Introductory question about the first topic, starting the topic with " "'Let's talk about x' and then the question." ), "Follow up question about the first topic", "Follow up question about the first topic", "Question about second topic", "Follow up question about the second topic", ] }, "prompt": ( 'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 ' 'that encourages candidates to delve deeply into personal experiences, preferences, or ' 'insights on the topic of "{first_topic}" and the topic of "{second_topic}". ' 'Make sure that the generated question does not contain forbidden subjects in ' 'muslim countries.' ) } }, "task_2": { "get": { "json_template": { "topic": "topic", "question": "question", "prompts": [ "prompt_1", "prompt_2", "prompt_3" ], "suffix": "And explain why..." }, "prompt": ( 'Create a question of medium difficulty for IELTS Speaking Part 2 ' 'that encourages candidates to narrate a personal experience or story related to the topic ' 'of "{topic}". Include 3 prompts that guide the candidate to describe ' 'specific aspects of the experience, such as details about the situation, ' 'their actions, and the reasons it left a lasting impression. Make sure that the ' 'generated question does not contain forbidden subjects in muslim countries.' ) } }, "task_3": { "get": { "json_template": { "topic": "topic", "questions": [ "Introductory question about the topic.", "Follow up question about the topic", "Follow up question about the topic", "Follow up question about the topic", "Follow up question about the topic" ] }, "prompt": ( 'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3' 'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". ' 'Provide inquiries, ensuring they explore various aspects, perspectives, and implications ' 'related to the topic. Make sure that the generated question does not contain forbidden ' 'subjects in muslim countries.' ) } }, } async def get_speaking_part( self, part: int, topic: str, second_topic: str, difficulty: str ) -> Dict: task_values = self._tasks[f'task_{part}']['get'] if part == 1: task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic) else: task_prompt = task_values["prompt"].format(topic=topic) messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' f'{task_values["json_template"]}' ) }, { "role": "user", "content": task_prompt } ] part_specific = { "1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).', "2": ( 'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams ' 'that start with "And explain why".' ) } if part in {1, 2}: messages.append({ "role": "user", "content": part_specific[str(part)] }) if part in {1, 3}: messages.append({ "role": "user", "content": 'They must be 1 single question each and not be double-barreled questions.' }) fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS response = await self._llm.prediction( GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE ) if part == 3: # Remove the numbers from the questions only if the string starts with a number response["questions"] = [ re.sub(r"^\d+\.\s*", "", question) if re.match(r"^\d+\.", question) else question for question in response["questions"] ] response["type"] = part response["difficulty"] = difficulty if part in {2, 3}: response["topic"] = topic return response async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: request_id = uuid.uuid4() self._logger.info( f'POST - speaking_task_{task} - Received request to grade speaking task {task}. ' f'Use this id to track the logs: {str(request_id)} - Request data: {str(answers)}' ) text_answers = [] perfect_answers = [] if task != 2: self._logger.info( f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.' ) for item in answers: sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4()) self._logger.info(f'POST - speaking_task_{task} - {request_id} - Downloading file {item["answer"]}') await self._file_storage.download_firebase_file(item["answer"], sound_file_name) self._logger.info( f'POST - speaking_task_{task} - {request_id} - ' f'Downloaded file {item["answer"]} to {sound_file_name}' ) answer_text = await self._stt.speech_to_text(sound_file_name) self._logger.info(f'POST - speaking_task_{task} - {request_id} - Transcripted answer: {answer_text}') text_answers.append(answer_text) item["answer"] = answer_text os.remove(sound_file_name) # TODO: This will end the grading of all answers if a single one does not have enough words # don't know if this is intended if not TextHelper.has_x_words(answer_text, 20): self._logger.info( f'POST - speaking_task_{task} - {request_id} - ' f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}' ) return self._zero_rating("The audio recorded does not contain enough english words to be graded.") self._logger.info( f'POST - speaking_task_{task} - {request_id} - ' f'Requesting perfect answer for question: {item["question"]}' ) perfect_answers.append(await self._get_perfect_answer(task, item["question"])) if task in {1, 3}: self._logger.info( f'POST - speaking_task_{task} - {request_id} - Formatting answers and questions for prompt.' ) formatted_text = "" for i, entry in enumerate(answers, start=1): formatted_text += f"**Question {i}:**\n{entry['question']}\n\n" formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n" self._logger.info( f'POST - speaking_task_{task} - {request_id} - ' f'Formatted answers and questions for prompt: {formatted_text}' ) questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}' else: questions_and_answers = f'\n Question: "{answers[0]["question"]}" \n Answer: "{answers[0]["answer"]}"' self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting grading of the answer(s).') response = await self._grade_task(task, questions_and_answers) self._logger.info(f'POST - speaking_task_{task} - {request_id} - Answer(s) graded: {response}') if task in {1, 3}: self._logger.info( f'POST - speaking_task_{task} - {request_id} - Adding perfect answer(s) to response.') # TODO: check if it is answer["answer"] instead for i, answer in enumerate(perfect_answers, start=1): response['perfect_answer_' + str(i)] = answer self._logger.info( f'POST - speaking_task_{task} - {request_id} - Adding transcript and fixed texts to response.' ) for i, answer in enumerate(text_answers, start=1): response['transcript_' + str(i)] = answer response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer) else: response['transcript'] = answers[0]["answer"] self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting fixed text.') response['fixed_text'] = await self._get_speaking_corrections(answers[0]["answer"]) self._logger.info(f'POST - speaking_task_{task} - {request_id} - Fixed text: {response["fixed_text"]}') response['perfect_answer'] = perfect_answers[0]["answer"] response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"]) self._logger.info(f'POST - speaking_task_{task} - {request_id} - Final response: {response}') return response # ================================================================================================================== # grade_speaking_task helpers # ================================================================================================================== async def _get_perfect_answer(self, task: int, question: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}' ) }, { "role": "user", "content": ( 'Provide a perfect answer according to ielts grading system to the following ' f'Speaking Part {task} question: "{question}"' ) } ] if task == 1: messages.append({ "role": "user", "content": 'The answer must be 2 or 3 sentences long.' }) gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO return await self._llm.prediction( gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE ) async def _grade_task(self, task: int, questions_and_answers: str) -> Dict: messages = [ { "role": "system", "content": ( f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}' ) }, { "role": "user", "content": ( f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a ' 'strict assessment that penalizes errors. Deduct points for deviations from the task, and ' 'assign a score of 0 if the response fails to address the question. Additionally, provide ' 'detailed commentary highlighting both strengths and weaknesses in the response.' ) + questions_and_answers } ] task_specific = { "1": ( 'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the ' 'student that they should be.' ), "2": 'Address the student as "you"', "3": 'Address the student as "you" and pay special attention to coherence between the answers.' } messages.append({ "role": "user", "content": task_specific[str(task)] }) if task in {1, 3}: messages.extend([ { "role": "user", "content": ( 'For pronunciations act as if you heard the answers and they were transcripted ' 'as you heard them.' ) }, { "role": "user", "content": 'The comments must be long, detailed, justify the grading and suggest improvements.' } ]) return await self._llm.prediction( GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE ) @staticmethod def _fix_speaking_overall(overall: float, task_response: dict): grades = [category["grade"] for category in task_response.values()] if overall > max(grades) or overall < min(grades): total_sum = sum(grades) average = total_sum / len(grades) rounded_average = round(average, 0) return rounded_average return overall @staticmethod def _zero_rating(comment: str): return { "comment": comment, "overall": 0, "task_response": { "Fluency and Coherence": { "grade": 0.0, "comment": "" }, "Lexical Resource": { "grade": 0.0, "comment": "" }, "Grammatical Range and Accuracy": { "grade": 0.0, "comment": "" }, "Pronunciation": { "grade": 0.0, "comment": "" } } } async def _get_speaking_corrections(self, text): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"fixed_text": "fixed transcription with no misspelling errors"}' ) }, { "role": "user", "content": ( 'Fix the errors in the provided transcription and put it in a JSON. ' f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"' ) } ] response = await self._llm.prediction( GPTModels.GPT_3_5_TURBO, messages, ["fixed_text"], 0.2, False ) return response["fixed_text"] @staticmethod def _grade_template(): return { "comment": "extensive comment about answer quality", "overall": 0.0, "task_response": { "Fluency and Coherence": { "grade": 0.0, "comment": ( "extensive comment about fluency and coherence, use examples to justify the grade awarded." ) }, "Lexical Resource": { "grade": 0.0, "comment": "extensive comment about lexical resource, use examples to justify the grade awarded." }, "Grammatical Range and Accuracy": { "grade": 0.0, "comment": ( "extensive comment about grammatical range and accuracy, use examples to justify the " "grade awarded." ) }, "Pronunciation": { "grade": 0.0, "comment": ( "extensive comment about pronunciation on the transcribed answer, use examples to justify the " "grade awarded." ) } } }