Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274
This commit is contained in:
168
ielts_be/services/impl/exam/speaking/__init__.py
Normal file
168
ielts_be/services/impl/exam/speaking/__init__.py
Normal file
@@ -0,0 +1,168 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from ielts_be.configs.constants import (
|
||||
FieldsAndExercises, GPTModels, TemperatureSettings
|
||||
)
|
||||
from ielts_be.dtos.speaking import GradeSpeakingItem
|
||||
from ielts_be.repositories import IFileStorage
|
||||
from ielts_be.services import ISpeakingService, ILLMService, ISpeechToTextService
|
||||
from .grade import GradeSpeaking
|
||||
|
||||
|
||||
class SpeakingService(ISpeakingService):
|
||||
|
||||
def __init__(
|
||||
self, llm: ILLMService,
|
||||
file_storage: IFileStorage,
|
||||
stt: ISpeechToTextService
|
||||
):
|
||||
self._llm = llm
|
||||
self._file_storage = file_storage
|
||||
self._stt = stt
|
||||
self._logger = logging.getLogger(__name__)
|
||||
self._grade = GradeSpeaking(llm, file_storage, stt)
|
||||
|
||||
# TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with
|
||||
# either the difficulty in the request or a random one yet the prompt doesn't change
|
||||
self._tasks = {
|
||||
"task_1": {
|
||||
"get": {
|
||||
"json_template": {
|
||||
"first_topic": "topic 1",
|
||||
"second_topic": "topic 2",
|
||||
"questions": [
|
||||
(
|
||||
"Introductory question about the first topic, starting the topic with "
|
||||
"'Let's talk about x' and then the question."
|
||||
),
|
||||
"Follow up question about the first topic",
|
||||
"Follow up question about the first topic",
|
||||
"Question about second topic",
|
||||
"Follow up question about the second topic",
|
||||
]
|
||||
},
|
||||
"prompt": (
|
||||
'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
|
||||
'that encourages candidates to delve deeply into personal experiences, preferences, or '
|
||||
'insights on the topic of "{first_topic}" and the topic of "{second_topic}". '
|
||||
'Make sure that the generated question does not contain forbidden subjects in '
|
||||
'muslim countries.'
|
||||
)
|
||||
}
|
||||
},
|
||||
"task_2": {
|
||||
"get": {
|
||||
"json_template": {
|
||||
"topic": "topic",
|
||||
"question": "question",
|
||||
"prompts": [
|
||||
"prompt_1",
|
||||
"prompt_2",
|
||||
"prompt_3"
|
||||
],
|
||||
"suffix": "And explain why..."
|
||||
},
|
||||
"prompt": (
|
||||
'Create a question of medium difficulty for IELTS Speaking Part 2 '
|
||||
'that encourages candidates to narrate a personal experience or story related to the topic '
|
||||
'of "{topic}". Include 3 prompts that guide the candidate to describe '
|
||||
'specific aspects of the experience, such as details about the situation, '
|
||||
'their actions, and the reasons it left a lasting impression. Make sure that the '
|
||||
'generated question does not contain forbidden subjects in muslim countries.'
|
||||
)
|
||||
}
|
||||
},
|
||||
"task_3": {
|
||||
"get": {
|
||||
"json_template": {
|
||||
"topic": "topic",
|
||||
"questions": [
|
||||
"Introductory question about the topic.",
|
||||
"Follow up question about the topic",
|
||||
"Follow up question about the topic",
|
||||
"Follow up question about the topic",
|
||||
"Follow up question about the topic"
|
||||
]
|
||||
},
|
||||
"prompt": (
|
||||
'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3'
|
||||
'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". '
|
||||
'Provide inquiries, ensuring they explore various aspects, perspectives, and implications '
|
||||
'related to the topic. Make sure that the generated question does not contain forbidden '
|
||||
'subjects in muslim countries.'
|
||||
)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
async def get_speaking_part(
|
||||
self, part: int, topic: str, second_topic: str, difficulty: str
|
||||
) -> Dict:
|
||||
task_values = self._tasks[f'task_{part}']['get']
|
||||
|
||||
if part == 1:
|
||||
task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic)
|
||||
else:
|
||||
task_prompt = task_values["prompt"].format(topic=topic)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
f'{task_values["json_template"]}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": task_prompt
|
||||
}
|
||||
]
|
||||
|
||||
part_specific = {
|
||||
"1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).',
|
||||
"2": (
|
||||
'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
|
||||
'that start with "And explain why".'
|
||||
)
|
||||
}
|
||||
|
||||
if part in {1, 2}:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": part_specific[str(part)]
|
||||
})
|
||||
|
||||
if part in {1, 3}:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": 'They must be 1 single question each and not be double-barreled questions.'
|
||||
})
|
||||
|
||||
fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if part == 3:
|
||||
# Remove the numbers from the questions only if the string starts with a number
|
||||
response["questions"] = [
|
||||
re.sub(r"^\d+\.\s*", "", question)
|
||||
if re.match(r"^\d+\.", question) else question
|
||||
for question in response["questions"]
|
||||
]
|
||||
|
||||
response["type"] = part
|
||||
response["difficulty"] = difficulty
|
||||
|
||||
if part in {2, 3}:
|
||||
response["topic"] = topic
|
||||
|
||||
return response
|
||||
|
||||
async def grade_speaking_task(self, task: int, items: List[GradeSpeakingItem]) -> Dict:
|
||||
return await self._grade.grade_speaking_task(task, items)
|
||||
316
ielts_be/services/impl/exam/speaking/grade.py
Normal file
316
ielts_be/services/impl/exam/speaking/grade.py
Normal file
@@ -0,0 +1,316 @@
|
||||
import asyncio
|
||||
import os
|
||||
import uuid
|
||||
from logging import getLogger
|
||||
from typing import Dict, List
|
||||
|
||||
import aiofiles
|
||||
|
||||
from ielts_be.configs.constants import GPTModels, TemperatureSettings, FilePaths
|
||||
from ielts_be.dtos.speaking import GradeSpeakingItem
|
||||
from ielts_be.helpers import TextHelper
|
||||
from ielts_be.repositories import IFileStorage
|
||||
from ielts_be.services import ILLMService, ISpeechToTextService
|
||||
|
||||
|
||||
class GradeSpeaking:
|
||||
|
||||
def __init__(self, llm: ILLMService, file_storage: IFileStorage, stt: ISpeechToTextService):
|
||||
self._llm = llm
|
||||
self._file_storage = file_storage
|
||||
self._stt = stt
|
||||
self._logger = getLogger(__name__)
|
||||
|
||||
async def grade_speaking_task(self, task: int, items: List[GradeSpeakingItem]) -> Dict:
|
||||
request_id = str(uuid.uuid4())
|
||||
self._log(task, request_id, f"Received request to grade speaking task {task}.")
|
||||
|
||||
if task != 2:
|
||||
self._log(task, request_id, f'Received {len(items)} total answers.')
|
||||
|
||||
temp_files = []
|
||||
try:
|
||||
# Save all files first
|
||||
temp_files = await asyncio.gather(*[
|
||||
self.save_file(item) for item in items
|
||||
])
|
||||
|
||||
# Process all transcriptions concurrently (up to 4)
|
||||
self._log(task, request_id, 'Starting batch transcription')
|
||||
text_answers = await asyncio.gather(*[
|
||||
self._stt.speech_to_text(file_path)
|
||||
for file_path in temp_files
|
||||
])
|
||||
|
||||
for answer in text_answers:
|
||||
self._log(task, request_id, f'Transcribed answer: {answer}')
|
||||
if not TextHelper.has_x_words(answer, 20):
|
||||
self._log(
|
||||
task, request_id,
|
||||
f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
|
||||
)
|
||||
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
|
||||
|
||||
# Get perfect answers
|
||||
self._log(task, request_id, 'Requesting perfect answers')
|
||||
perfect_answers = await asyncio.gather(*[
|
||||
self._get_perfect_answer(task, item.question)
|
||||
for item in items
|
||||
])
|
||||
|
||||
# Format the responses
|
||||
if task in {1, 3}:
|
||||
self._log(task, request_id, 'Formatting answers and questions for prompt.')
|
||||
|
||||
formatted_text = ""
|
||||
for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
|
||||
formatted_text += f"**Question {i}:**\n{item.question}\n\n"
|
||||
formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
|
||||
|
||||
self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
|
||||
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
|
||||
else:
|
||||
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
|
||||
|
||||
self._log(task, request_id, 'Requesting grading of the answer(s).')
|
||||
response = await self._grade_task(task, questions_and_answers)
|
||||
self._log(task, request_id, f'Answer(s) graded: {response}')
|
||||
|
||||
if task in {1, 3}:
|
||||
self._log(task, request_id, 'Adding perfect answer(s) to response.')
|
||||
|
||||
# TODO: check if it is answer["answer"] instead
|
||||
for i, answer in enumerate(perfect_answers, start=1):
|
||||
response['perfect_answer_' + str(i)] = answer
|
||||
|
||||
self._log(task, request_id, 'Getting speaking corrections in parallel')
|
||||
# Get all corrections in parallel
|
||||
fixed_texts = await asyncio.gather(*[
|
||||
self._get_speaking_corrections(answer)
|
||||
for answer in text_answers
|
||||
])
|
||||
|
||||
self._log(task, request_id, 'Adding transcript and fixed texts to response.')
|
||||
for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
|
||||
response['transcript_' + str(i)] = answer
|
||||
response['fixed_text_' + str(i)] = fixed
|
||||
else:
|
||||
response['transcript'] = text_answers[0]
|
||||
|
||||
self._log(task, request_id, 'Requesting fixed text.')
|
||||
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
|
||||
self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
|
||||
|
||||
response['perfect_answer'] = perfect_answers[0]["answer"]
|
||||
|
||||
solutions = []
|
||||
for file_name in temp_files:
|
||||
solutions.append(await self._file_storage.upload_file_firebase_get_url(f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav', file_name))
|
||||
|
||||
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
|
||||
response["solutions"] = solutions
|
||||
if task in {1,3}:
|
||||
response["answer"] = solutions
|
||||
else:
|
||||
response["fullPath"] = solutions[0]
|
||||
|
||||
self._log(task, request_id, f'Final response: {response}')
|
||||
return response
|
||||
|
||||
finally:
|
||||
for file_path in temp_files:
|
||||
try:
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
self._log(task, request_id, f'Error cleaning up temp file {file_path}: {str(e)}')
|
||||
|
||||
def _log(self, task: int, request_id: str, message: str):
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - {message}')
|
||||
|
||||
async def _get_perfect_answer(self, task: int, question: str):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Provide a perfect answer according to ielts grading system to the following '
|
||||
f'Speaking Part {task} question: "{question}"'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
if task == 1:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": 'The answer must be 2 or 3 sentences long.'
|
||||
})
|
||||
|
||||
gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO
|
||||
|
||||
return await self._llm.prediction(
|
||||
gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE
|
||||
)
|
||||
|
||||
async def _grade_task(self, task: int, questions_and_answers: str) -> Dict:
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
|
||||
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
|
||||
'assign a score of 0 if the response fails to address the question. Additionally, provide '
|
||||
'detailed commentary highlighting both strengths and weaknesses in the response.'
|
||||
) + questions_and_answers
|
||||
}
|
||||
]
|
||||
|
||||
task_specific = {
|
||||
"1": (
|
||||
'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the '
|
||||
'student that they should be.'
|
||||
),
|
||||
"2": 'Address the student as "you"',
|
||||
"3": 'Address the student as "you" and pay special attention to coherence between the answers.'
|
||||
}
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": task_specific[str(task)]
|
||||
})
|
||||
|
||||
if task in {1, 3}:
|
||||
messages.extend([
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'For pronunciations act as if you heard the answers and they were transcribed '
|
||||
'as you heard them.'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
|
||||
}
|
||||
])
|
||||
|
||||
return await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _fix_speaking_overall(overall: float, task_response: dict):
|
||||
grades = [category["grade"] for category in task_response.values()]
|
||||
|
||||
if overall > max(grades) or overall < min(grades):
|
||||
total_sum = sum(grades)
|
||||
average = total_sum / len(grades)
|
||||
rounded_average = round(average, 0)
|
||||
return rounded_average
|
||||
|
||||
return overall
|
||||
|
||||
@staticmethod
|
||||
def _zero_rating(comment: str):
|
||||
return {
|
||||
"comment": comment,
|
||||
"overall": 0,
|
||||
"task_response": {
|
||||
"Fluency and Coherence": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
"Lexical Resource": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
"Grammatical Range and Accuracy": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
"Pronunciation": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async def _get_speaking_corrections(self, text):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"fixed_text": "fixed transcription with no misspelling errors"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Fix the errors in the provided transcription and put it in a JSON. '
|
||||
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_3_5_TURBO,
|
||||
messages,
|
||||
["fixed_text"],
|
||||
0.2,
|
||||
False
|
||||
)
|
||||
return response["fixed_text"]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _grade_template():
|
||||
return {
|
||||
"comment": "extensive comment about answer quality",
|
||||
"overall": 0.0,
|
||||
"task_response": {
|
||||
"Fluency and Coherence": {
|
||||
"grade": 0.0,
|
||||
"comment": (
|
||||
"extensive comment about fluency and coherence, use examples to justify the grade awarded."
|
||||
)
|
||||
},
|
||||
"Lexical Resource": {
|
||||
"grade": 0.0,
|
||||
"comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
|
||||
},
|
||||
"Grammatical Range and Accuracy": {
|
||||
"grade": 0.0,
|
||||
"comment": (
|
||||
"extensive comment about grammatical range and accuracy, use examples to justify the "
|
||||
"grade awarded."
|
||||
)
|
||||
},
|
||||
"Pronunciation": {
|
||||
"grade": 0.0,
|
||||
"comment": (
|
||||
"extensive comment about pronunciation on the transcribed answer, use examples to justify the "
|
||||
"grade awarded."
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
async def save_file(item: GradeSpeakingItem) -> str:
|
||||
sound_file_name = "tmp/" + str(uuid.uuid4())
|
||||
content = await item.answer.read()
|
||||
async with aiofiles.open(sound_file_name, 'wb') as f:
|
||||
await f.write(content)
|
||||
return sound_file_name
|
||||
Reference in New Issue
Block a user