Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274

This commit is contained in:
Carlos-Mesquita
2024-12-10 22:24:40 +00:00
parent 68cab80851
commit 6982068864
167 changed files with 1411 additions and 1229 deletions

View File

@@ -0,0 +1,11 @@
from .user import UserService
from .training import *
from .third_parties import *
from .exam import *
__all__ = [
"UserService"
]
__all__.extend(third_parties.__all__)
__all__.extend(training.__all__)
__all__.extend(exam.__all__)

View File

@@ -0,0 +1,18 @@
from .level import LevelService
from .listening import ListeningService
from .reading import ReadingService
from .speaking import SpeakingService
from .writing import WritingService
from .grade import GradeService
from .evaluation import EvaluationService
__all__ = [
"LevelService",
"ListeningService",
"ReadingService",
"SpeakingService",
"WritingService",
"GradeService",
"EvaluationService"
]

View File

@@ -0,0 +1,104 @@
import logging
from typing import Union, List
from fastapi import BackgroundTasks
from ielts_be.dtos.evaluation import EvaluationType
from ielts_be.dtos.speaking import GradeSpeakingItem
from ielts_be.dtos.writing import WritingGradeTaskDTO
from ielts_be.repositories import IDocumentStore
from ielts_be.services import IWritingService, ISpeakingService, IEvaluationService
class EvaluationService(IEvaluationService):
def __init__(self, db: IDocumentStore, writing_service: IWritingService, speaking_service: ISpeakingService):
self._db = db
self._writing_service = writing_service
self._speaking_service = speaking_service
self._logger = logging.getLogger(__name__)
async def create_evaluation(
self,
user_id: str,
session_id: str,
exercise_id: str,
eval_type: EvaluationType,
task: int
):
await self._db.save_to_db(
"evaluation",
{
"user": user_id,
"session_id": session_id,
"exercise_id": exercise_id,
"type": eval_type,
"task": task,
"status": "pending"
}
)
async def begin_evaluation(
self,
user_id: str, session_id: str, task: int,
exercise_id: str, exercise_type: str,
solution: Union[WritingGradeTaskDTO, List[GradeSpeakingItem]],
background_tasks: BackgroundTasks
):
background_tasks.add_task(
self._begin_evaluation,
user_id, session_id, task,
exercise_id, exercise_type,
solution
)
async def _begin_evaluation(
self, user_id: str, session_id: str, task: int,
exercise_id: str, exercise_type: str,
solution: Union[WritingGradeTaskDTO, List[GradeSpeakingItem]]
):
try:
if exercise_type == EvaluationType.WRITING:
result = await self._writing_service.grade_writing_task(
task,
solution.question,
solution.answer,
solution.attachment
)
else:
result = await self._speaking_service.grade_speaking_task(
task,
solution
)
await self._db.update(
"evaluation",
{
"user": user_id,
"exercise_id": exercise_id,
"session_id": session_id,
},
{
"$set": {
"status": "completed",
"result": result,
}
}
)
except Exception as e:
self._logger.error(f"Error processing evaluation {session_id} - {exercise_id}: {str(e)}")
await self._db.update(
"evaluation",
{
"user": user_id,
"exercise_id": exercise_id,
"session_id": session_id
},
{
"$set": {
"status": "error",
"error": str(e),
}
}
)

View File

@@ -0,0 +1,200 @@
import json
from typing import List, Dict
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.services import ILLMService, IGradeService
class GradeService(IGradeService):
def __init__(self, llm: ILLMService):
self._llm = llm
async def grade_short_answers(self, data: Dict):
json_format = {
"exercises": [
{
"id": 1,
"correct": True,
"correct_answer": " correct answer if wrong"
}
]
}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
'Grade these answers according to the text content and write a correct answer if they are '
f'wrong. Text, questions and answers:\n {data}'
)
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["exercises"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def calculate_grading_summary(self, extracted_sections: List):
ret = []
for section in extracted_sections:
openai_response_dict = await self._calculate_section_grade_summary(section)
ret.append(
{
'code': section['code'],
'name': section['name'],
'grade': section['grade'],
'evaluation': openai_response_dict['evaluation'],
'suggestions': openai_response_dict['suggestions'],
'bullet_points': self._parse_bullet_points(openai_response_dict['bullet_points'], section['grade'])
}
)
return {'sections': ret}
async def _calculate_section_grade_summary(self, section):
section_name = section['name']
section_grade = section['grade']
messages = [
{
"role": "user",
"content": (
'You are a IELTS test section grade evaluator. You will receive a IELTS test section name and the '
'grade obtained in the section. You should offer a evaluation comment on this grade and separately '
'suggestions on how to possibly get a better grade.'
)
},
{
"role": "user",
"content": f'Section: {str(section_name)} Grade: {str(section_grade)}',
},
{
"role": "user",
"content": "Speak in third person."
},
{
"role": "user",
"content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."
},
{
"role": "user",
"content": (
"Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, "
"be one paragraph long."
)
},
{
"role": "user",
"content": "Please save the evaluation comment and suggestions generated."
},
{
"role": "user",
"content": f"Offer bullet points to improve the english {str(section_name)} ability."
},
]
if section['code'] == "level":
messages[2:2] = [{
"role": "user",
"content": (
"This section is comprised of multiple choice questions that measure the user's overall english "
"level. These multiple choice questions are about knowledge on vocabulary, syntax, grammar rules, "
"and contextual usage. The grade obtained measures the ability in these areas and english language "
"overall."
)
}]
elif section['code'] == "speaking":
messages[2:2] = [{
"role": "user",
"content": (
"This section is s designed to assess the English language proficiency of individuals who want to "
"study or work in English-speaking countries. The speaking section evaluates a candidate's ability "
"to communicate effectively in spoken English."
)
}]
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
tools = self.get_tools()
res = await self._llm.prediction_override(
model="gpt-3.5-turbo",
max_tokens=chat_config['max_tokens'],
temperature=chat_config['temperature'],
tools=tools,
messages=messages
)
return self._parse_openai_response(res)
@staticmethod
def _parse_openai_response(response):
if 'choices' in response and len(response['choices']) > 0 and 'message' in response['choices'][
0] and 'tool_calls' in response['choices'][0]['message'] and isinstance(
response['choices'][0]['message']['tool_calls'], list) and len(
response['choices'][0]['message']['tool_calls']) > 0 and \
response['choices'][0]['message']['tool_calls'][0]['function']['arguments']:
return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments'])
else:
return {'evaluation': "", 'suggestions': "", 'bullet_points': []}
@staticmethod
def _parse_bullet_points(bullet_points_str, grade):
max_grade_for_suggestions = 9
if isinstance(bullet_points_str, str) and grade < max_grade_for_suggestions:
# Split the string by '\n'
lines = bullet_points_str.split('\n')
# Remove '-' and trim whitespace from each line
cleaned_lines = [line.replace('-', '').strip() for line in lines]
# Add '.' to lines that don't end with it
return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
else:
return []
@staticmethod
def get_tools():
return [
{
"type": "function",
"function": {
"name": "save_evaluation_and_suggestions",
"description": "Saves the evaluation and suggestions requested by input.",
"parameters": {
"type": "object",
"properties": {
"evaluation": {
"type": "string",
"description": (
"A comment on the IELTS section grade obtained in the specific section and what "
"it could mean without suggestions."
),
},
"suggestions": {
"type": "string",
"description": (
"A small paragraph text with suggestions on how to possibly get a better grade "
"than the one obtained."
),
},
"bullet_points": {
"type": "string",
"description": (
"Text with four bullet points to improve the english speaking ability. Only "
"include text for the bullet points separated by a paragraph."
),
},
},
"required": ["evaluation", "suggestions"],
},
}
}
]

View File

@@ -0,0 +1,210 @@
from asyncio import gather
from typing import Dict, Optional
from uuid import uuid4
from fastapi import UploadFile
import random
from ielts_be.configs.constants import EducationalContent
from ielts_be.dtos.level import LevelExercisesDTO
from ielts_be.repositories import IDocumentStore
from ielts_be.services import (
ILevelService, ILLMService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
from .exercises import MultipleChoice, BlankSpace, PassageUtas, FillBlanks
from .full_exams import CustomLevelModule, LevelUtas
from .upload import UploadLevelModule
class LevelService(ILevelService):
def __init__(
self,
llm: ILLMService,
document_store: IDocumentStore,
mc_variants: Dict,
reading_service: IReadingService,
writing_service: IWritingService,
speaking_service: ISpeakingService,
listening_service: IListeningService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
self._upload_module = UploadLevelModule(llm)
self._mc_variants = mc_variants
self._mc = MultipleChoice(llm, mc_variants)
self._blank_space = BlankSpace(llm, mc_variants)
self._passage_utas = PassageUtas(llm, reading_service, mc_variants)
self._fill_blanks = FillBlanks(llm)
self._level_utas = LevelUtas(llm, self, mc_variants)
self._custom = CustomLevelModule(
llm, self, reading_service, listening_service, writing_service, speaking_service
)
async def upload_level(self, upload: UploadFile, solutions: Optional[UploadFile] = None) -> Dict:
return await self._upload_module.generate_level_from_file(upload, solutions)
async def _generate_exercise(self, req_exercise, start_id):
if req_exercise.type == "mcBlank":
questions = await self._mc.gen_multiple_choice("blank_space", req_exercise.quantity, start_id)
questions["variant"] = "mcBlank"
questions["type"] = "multipleChoice"
questions["prompt"] = "Choose the correct word or group of words that completes the sentences."
return questions
elif req_exercise.type == "mcUnderline":
questions = await self._mc.gen_multiple_choice("underline", req_exercise.quantity, start_id)
questions["variant"] = "mcUnderline"
questions["type"] = "multipleChoice"
questions["prompt"] = "Choose the underlined word or group of words that is not correct."
return questions
elif req_exercise.type == "passageUtas":
topic = req_exercise.topic if req_exercise.topic else random.choice(EducationalContent.TOPICS)
exercise = await self._passage_utas.gen_reading_passage_utas(
start_id,
req_exercise.quantity,
topic,
req_exercise.text_size
)
exercise["prompt"] = "Read the text and answer the questions below."
return exercise
elif req_exercise.type == "fillBlanksMC":
exercise = await self._fill_blanks.gen_fill_blanks(
start_id,
req_exercise.quantity,
req_exercise.text_size,
req_exercise.topic
)
exercise["prompt"] = "Read the text below and choose the correct word for each space."
return exercise
async def generate_exercises(self, dto: LevelExercisesDTO):
start_ids = []
current_id = 1
for req_exercise in dto.exercises:
start_ids.append(current_id)
current_id += req_exercise.quantity
tasks = [
self._generate_exercise(req_exercise, start_id)
for req_exercise, start_id in zip(dto.exercises, start_ids)
]
questions = await gather(*tasks)
questions = [{'id': str(uuid4()), **exercise} for exercise in questions]
return {"exercises": questions}
# Just here to support other modules that I don't know if they are supposed to still be used
async def gen_multiple_choice(self, mc_variant: str, quantity: int, start_id: int = 1):
return await self._mc.gen_multiple_choice(mc_variant, quantity, start_id)
async def gen_reading_passage_utas(self, start_id, mc_quantity: int, topic=Optional[str]): # sa_quantity: int,
return await self._passage_utas.gen_reading_passage_utas(start_id, mc_quantity, topic)
async def gen_blank_space_text_utas(self, quantity: int, start_id: int, size: int, topic: str):
return await self._blank_space.gen_blank_space_text_utas(quantity, start_id, size, topic)
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
pass
async def get_level_utas(self):
return await self._level_utas.get_level_utas()
async def get_custom_level(self, data: Dict):
return await self._custom.get_custom_level(data)
"""
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
mc_template = self._mc_variants[mc_variant]["questions"][0]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": (
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
f'it can be easy, intermediate or advanced.'
)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
"""
"""
async def _replace_exercise_if_exists(
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
mc_variant, utas
)
else:
seen_keys.add(key)
if not utas:
for exam in all_exams:
exam_dict = exam.to_dict()
if len(exam_dict.get("parts", [])) > 0:
exercise_dict = exam_dict.get("parts", [])[0]
if len(exercise_dict.get("exercises", [])) > 0:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
else:
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
return current_exercise, seen_keys
"""

View File

@@ -0,0 +1,11 @@
from .multiple_choice import MultipleChoice
from .blank_space import BlankSpace
from .passage_utas import PassageUtas
from .fill_blanks import FillBlanks
__all__ = [
"MultipleChoice",
"BlankSpace",
"PassageUtas",
"FillBlanks"
]

View File

@@ -0,0 +1,44 @@
import random
from ielts_be.configs.constants import EducationalContent, GPTModels, TemperatureSettings
from ielts_be.services import ILLMService
class BlankSpace:
def __init__(self, llm: ILLMService, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=None
):
if not topic:
topic = random.choice(EducationalContent.MTI_TOPICS)
json_template = self._mc_variants["blank_space_text"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'Put the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]

View File

@@ -0,0 +1,73 @@
import random
from ielts_be.configs.constants import GPTModels, TemperatureSettings, EducationalContent
from ielts_be.services import ILLMService
class FillBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_fill_blanks(
self, start_id: int, quantity: int, size: int = 300, topic=None
):
if not topic:
topic = random.choice(EducationalContent.MTI_TOPICS)
print(quantity)
print(start_id)
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {self._fill_blanks_mc_template()}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose exactly {quantity} words (cannot be sequential words) replace '
'each with {{id}} (starting from ' + str(start_id) + ' and incrementing), then generate a '
'JSON object containing: the modified text, a solutions array with each word\'s correct '
'letter (A-D), and a words array containing each id with four options where one is '
'the original word (matching the solution) and three are plausible but incorrect '
'alternatives that maintain grammatical consistency. '
'You cannot use repeated words!' #TODO: Solve this after
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, [], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
**question,
"type": "fillBlanks",
"variant": "mc",
"prompt": "Click a blank to select the appropriate word for it.",
}
@staticmethod
def _fill_blanks_mc_template():
return {
"text": "",
"solutions": [
{
"id": "",
"solution": "<A,B,C or D>"
}
],
"words": [
{
"id": "",
"options": {
"A": "",
"B": "",
"C": "",
"D": ""
}
}
]
}

View File

@@ -0,0 +1,84 @@
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class MultipleChoice:
def __init__(self, llm: ILLMService, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1
):
mc_template = self._mc_variants[mc_variant]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
gen_multiple_choice_for_text: str = (
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return ExercisesHelper.fix_exercise_ids(questions, start_id)
"""
if len(question["questions"]) != quantity:
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
else:
if not utas:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
else:
if all_exams is not None:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
"""

View File

@@ -0,0 +1,91 @@
from typing import Optional
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService, IReadingService
class PassageUtas:
def __init__(self, llm: ILLMService, reading_service: IReadingService, mc_variants: dict):
self._llm = llm
self._reading_service = reading_service
self._mc_variants = mc_variants
async def gen_reading_passage_utas(
self, start_id, mc_quantity: int, topic: Optional[str], word_size: Optional[int] # sa_quantity: int,
):
passage = await self._reading_service.generate_reading_passage(1, topic, word_size)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id, mc_quantity)
mc_exercises["type"] = "multipleChoice"
"""
exercises: {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"""
return {
**mc_exercises,
"passage": {
"content": passage["text"],
"title": passage["title"]
},
"mcVariant": "passageUtas"
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
f'maximum 3 words per answer, about this text:\n"{text}"'
)
},
{
"role": "user",
"content": f'The id starts at {start_id}.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_template = self._mc_variants["text_mc_utas"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
else:
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response

View File

@@ -0,0 +1,7 @@
from .custom import CustomLevelModule
from .level_utas import LevelUtas
__all__ = [
"CustomLevelModule",
"LevelUtas"
]

View File

@@ -0,0 +1,335 @@
import queue
import random
from typing import Dict
from ielts_be.configs.constants import CustomLevelExerciseTypes, EducationalContent
from ielts_be.services import (
ILLMService, ILevelService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
class CustomLevelModule:
def __init__(
self,
llm: ILLMService,
level: ILevelService,
reading: IReadingService,
listening: IListeningService,
writing: IWritingService,
speaking: ISpeakingService
):
self._llm = llm
self._level = level
self._reading = reading
self._listening = listening
self._writing = writing
self._speaking = speaking
# TODO: I've changed this to retrieve the args from the body request and not request query args
async def get_custom_level(self, data: Dict):
nr_exercises = int(data.get('nr_exercises'))
exercise_id = 1
response = {
"exercises": {},
"module": "level"
}
for i in range(1, nr_exercises + 1, 1):
exercise_type = data.get(f'exercise_{i}_type')
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"normal", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"blank_space", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"underline", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
exercise_qty, exercise_id, exercise_text_size
)
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
)
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
1, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
1, exercise_topic, exercise_difficulty, exercise_topic_2
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
3, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
if exercise_ideamatch_qty != -1:
exercises.append('ideaMatch')
exercise_qty_q.put(exercise_ideamatch_qty)
total_qty = total_qty + exercise_ideamatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc3_qty != -1:
exercises.append('multipleChoice3Options')
exercise_qty_q.put(exercise_mc3_qty)
total_qty = total_qty + exercise_mc3_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
return response

View File

@@ -0,0 +1,119 @@
import json
import uuid
from ielts_be.services import ILLMService
class LevelUtas:
def __init__(self, llm: ILLMService, level_service, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
self._level_service = level_service
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
mc_exercises1 = await self._level_service.gen_multiple_choice(
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self._level_service.gen_multiple_choice(
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self._level_service.gen_multiple_choice(
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self._level_service.gen_multiple_choice(
"underline", 15, 46, utas=True, all_exams=all_mc_questions
)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self._level_service.gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self._level_service.gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self._level_service.gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}

View File

@@ -0,0 +1,137 @@
{
"normal": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "And"
},
{
"id": "B",
"text": "Cat"
},
{
"id": "C",
"text": "Happy"
},
{
"id": "D",
"text": "Jump"
}
],
"prompt": "Which of the following is a conjunction?",
"solution": "A",
"variant": "text"
}
]
},
"blank_space": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "This"
},
{
"id": "B",
"text": "Those"
},
{
"id": "C",
"text": "These"
},
{
"id": "D",
"text": "That"
}
],
"prompt": "_____ man there is very kind.",
"solution": "A",
"variant": "text"
}
]
},
"underline": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "was"
},
{
"id": "B",
"text": "for work"
},
{
"id": "C",
"text": "because"
},
{
"id": "D",
"text": "could"
}
],
"prompt": "I <u>was</u> late <u>for work</u> yesterday <u>because</u> I <u>could</u> start my car.",
"solution": "D",
"variant": "text"
}
]
},
"blank_space_text": {
"question": {
"words": [
{
"id": "1",
"text": "a"
},
{
"id": "2",
"text": "b"
},
{
"id": "3",
"text": "c"
},
{
"id": "4",
"text": "d"
}
],
"text": "text"
}
},
"text_mc_utas": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
}

View File

@@ -0,0 +1,338 @@
from uuid import uuid4
import aiofiles
import os
from logging import getLogger
from typing import Dict, Any, Optional
import pdfplumber
from fastapi import UploadFile
from ielts_be.services import ILLMService
from ielts_be.helpers import FileHelper
from ielts_be.mappers import LevelMapper
from ielts_be.dtos.exams.level import Exam
from ielts_be.dtos.sheet import Sheet
from ielts_be.utils import suppress_loggers
class UploadLevelModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
async def generate_level_from_file(self, exercises: UploadFile, solutions: Optional[UploadFile]) -> Dict[str, Any] | None:
path_id = str(uuid4())
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
if solutions:
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
#completion: Coroutine[Any, Any, Exam] = (
# self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
#)
response = await self._html_completion(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return self.fix_ids(response.model_dump(exclude_none=True))
return None
@staticmethod
@suppress_loggers()
def _check_pdf_for_images(pdf_path: str) -> bool:
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
if page.images:
return True
return False
def _level_json_schema(self):
return {
"parts": [
{
"text": {
"content": "<this attribute is mandatory if there is a text passage else this 'text' field is omitted>",
"title": "<this attribute is optional you may exclude it if not required>",
},
"exercises": [
self._multiple_choice_html(),
self._passage_blank_space_html()
]
}
]
}
async def _html_completion(self, path_id: str, solutions_provided: bool) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
html = await f.read()
solutions = []
if solutions_provided:
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
solutions_html = await f.read()
solutions.append({
"role": "user",
"content": f'The solutions to the question sheet are the following:\n\n{solutions_html}'
})
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": html
},
*solutions
],
LevelMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_html(self):
return {
"role": "system",
"content": (
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
'Your current task is to scrape html english questions sheets and structure them into parts NOT sections.\n\n'
'In the question sheet you will only see 4 types of question:\n'
'- blank space multiple choice\n'
'- underline multiple choice\n'
'- reading passage blank space multiple choice\n'
'- reading passage multiple choice\n\n'
'For the first two types of questions the template is the same but the question prompts differ, '
'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
'multiple "_", in the underline you must include in the prompt the <u></u> to '
'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
'For the reading passage exercise you must handle the formatting of the passages. If it is a '
'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
'between paragraphs.\n\n'
'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
'out the best paragraph separation possible.'
'You will place all the information in a single JSON: '
'{"parts": [{"exercises": [{...}], "text": {"title": "", "content": ""} ]}\n '
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
'text, which contains the reading passages that are required in order to solve the part questions, '
'(if there are passages) place them in text.content and if there is a title place it in text.title '
'else omit the title field.\n'
'IMPORTANT: As stated earlier your job is to structure the questions into PARTS not SECTION, this means '
'that if there is for example: Section 1, Part 1 and Part 2, Section 2, Part 1 and Part 2, you MUST '
'place in the parts array 4 parts NOT 2 parts with the exercises of both parts! If there are no sections '
'and only Parts then group them by parts, and when I say parts I mean it in the fucking literal sense of the'
' word Part x which is in the html. '
'You must strictly adhere to this instruction, do not mistake sections for parts!\n'
'The templates for the exercises are the following:\n'
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
f'{self._multiple_choice_html()}\n'
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
'IMPORTANT: The text.content field must be set with the reading passages of a part (if there is one)'
'without paragraphs or line numbers, with 2 newlines between paragraphs.'
)
}
@staticmethod
def _multiple_choice_html():
return {
"type": "multipleChoice",
"prompt": "<general instructions for this section>",
"questions": [
{
"id": "<question number as string>",
"prompt": "<question text>",
"options": [
{
"id": "<A/B/C/D>",
"text": "<option text>"
}
],
"solution": "<correct option letter>",
"variant": "text"
}
]
}
@staticmethod
def _passage_blank_space_html():
return {
"type": "fillBlanks",
"variant": "mc",
"prompt": "Click a blank to select the appropriate word for it.",
"text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id/number>}} with 2 newlines between paragraphs>"
),
"solutions": [
{
"id": "<question number>",
"solution": "<the option that holds the solution>"
}
],
"words": [
{
"id": "<question number>",
"options": {
"A": "<a option>",
"B": "<b option>",
"C": "<c option>",
"D": "<d option>"
}
}
]
}
async def _png_completion(self, path_id: str) -> Exam:
FileHelper.pdf_to_png(path_id)
tmp_files = os.listdir(f'./tmp/{path_id}')
pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
json_schema = {
"components": [
{"type": "part", "part": "<name or number of the part>"},
self._multiple_choice_png(),
{"type": "blanksPassage", "text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question number>}} with 2 newlines between paragraphs>"
)},
{"type": "passage", "context": (
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
)},
self._passage_blank_space_png()
]
}
components = []
for i in range(len(pages)):
current_page = pages[i]
next_page = pages[i + 1] if i + 1 < len(pages) else None
batch = [current_page, next_page] if next_page else [current_page]
sheet = await self._png_batch(path_id, batch, json_schema)
sheet.batch = i + 1
components.append(sheet.model_dump())
batches = {"batches": components}
return await self._batches_to_exam_completion(batches)
async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_png(),
{
"role": "user",
"content": [
*FileHelper.b64_pngs(path_id, files)
]
}
],
LevelMapper.map_to_sheet,
str(json_schema)
)
def _gpt_instructions_png(self):
return {
"role": "system",
"content": (
'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
'Your current task is to scan english questions sheets.\n\n'
'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
'sheet components you will retrieve from the images, the components and their corresponding JSON '
'templates are as follows:\n'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In the blanks prompt you must leave 5 underscores to represent the blank space. '
'In the underlines questions the objective is to pick the words that are incorrect in the given '
'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
'in the order they appear in the question for the options A to D, disreguard options that might be '
'included underneath the underlines question and use the ones you wrapped in <u></u>.'
'In normal you just leave the question as is. '
f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
'- Reading Passages, there are two types of reading passages. Reading passages where you will see '
'blanks represented by a (question id) followed by a line, you must format these types of reading '
'passages to be only the text with the brackets that have the question id and line replaced with '
'"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
'and place 2 newlines between paragraphs. '
'For the reading passages with blanks the template is: {"type": "blanksPassage", '
'"text": "<The whole text for the exercise with replacements for blank spaces and their '
'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n'
'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
'reading passages are cut off don\'t include them in the JSON.'
)
}
def _multiple_choice_png(self):
multiple_choice = self._multiple_choice_html()["questions"][0]
multiple_choice["type"] = "multipleChoice"
multiple_choice.pop("solution")
return multiple_choice
def _passage_blank_space_png(self):
passage_blank_space = self._passage_blank_space_html()["words"][0]
passage_blank_space["type"] = "fillBlanks"
return passage_blank_space
async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": str(batches)
}
],
LevelMapper.map_to_exam_model,
str(self._level_json_schema())
)
@staticmethod
def fix_ids(response):
counter = 1
for part in response["parts"]:
for exercise in part["exercises"]:
if exercise["type"] == "multipleChoice":
for question in exercise["questions"]:
question["id"] = counter
counter += 1
if exercise["type"] == "fillBlanks":
for i in range(len(exercise["words"])):
exercise["words"][i]["id"] = counter
exercise["solutions"][i]["id"] = counter
counter += 1
return response

View File

@@ -0,0 +1,290 @@
import asyncio
from logging import getLogger
import random
from typing import Dict, Any
from starlette.datastructures import UploadFile
from ielts_be.dtos.listening import GenerateListeningExercises, Dialog, ListeningExercises
from ielts_be.repositories import IFileStorage, IDocumentStore
from ielts_be.services import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService
from ielts_be.configs.constants import (
NeuralVoices, GPTModels, TemperatureSettings, EducationalContent,
FieldsAndExercises
)
from ielts_be.helpers import FileHelper
from .import_listening import ImportListeningModule
from .write_blank_forms import WriteBlankForms
from .write_blanks import WriteBlanks
from .write_blank_notes import WriteBlankNotes
from ..shared import TrueFalse, MultipleChoice
class ListeningService(IListeningService):
CONVERSATION_TAIL = (
"Please include random names and genders for the characters in your dialogue. "
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
)
MONOLOGUE_TAIL = (
"Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
)
def __init__(
self, llm: ILLMService,
tts: ITextToSpeechService,
stt: ISpeechToTextService,
file_storage: IFileStorage,
document_store: IDocumentStore
):
self._llm = llm
self._tts = tts
self._stt = stt
self._file_storage = file_storage
self._document_store = document_store
self._logger = getLogger(__name__)
self._multiple_choice = MultipleChoice(llm)
self._write_blanks = WriteBlanks(llm)
self._write_blanks_forms = WriteBlankForms(llm)
self._write_blanks_notes = WriteBlankNotes(llm)
self._import = ImportListeningModule(llm)
self._true_false = TrueFalse(llm)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_2": {
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
"exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue",
},
"section_3": {
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_4": {
"topic": EducationalContent.ACADEMIC_SUBJECTS,
"exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
}
}
async def import_exam(
self, exercises: UploadFile, solutions: UploadFile = None
) -> Dict[str, Any] | None:
return await self._import.import_from_file(exercises, solutions)
async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
# TODO: When mp3 editor
async def get_dialog_from_audio(self, upload: UploadFile):
ext, path_id = await FileHelper.save_upload(upload)
dialog = await self._stt.speech_to_text(f'./tmp/{path_id}/upload.{ext}')
FileHelper.remove_directory(f'./tmp/{path_id}')
async def generate_mp3(self, dto: Dialog) -> bytes:
return await self._tts.text_to_speech(dto)
async def get_listening_question(self, dto: GenerateListeningExercises):
start_id = 1
exercise_tasks = []
for req_exercise in dto.exercises:
exercise_tasks.append(
self._generate_exercise(
req_exercise,
"dialog or monologue",
dto.text,
start_id,
dto.difficulty
)
)
start_id += req_exercise.quantity
return {"exercises": await asyncio.gather(*exercise_tasks) }
async def _generate_exercise(
self, req_exercise: ListeningExercises, dialog_type: str, text: str, start_id: int, difficulty: str
):
if req_exercise.type == "multipleChoice" or req_exercise.type == "multipleChoice3Options":
n_options = 4 if req_exercise.type == "multipleChoice" else 3
question = await self._multiple_choice.gen_multiple_choice(
text, req_exercise.quantity, start_id, difficulty, n_options
)
self._logger.info(f"Added multiple choice: {question}")
return question
elif req_exercise.type == "writeBlanksQuestions":
question = await self._write_blanks.gen_write_blanks_questions(
dialog_type, text, req_exercise.quantity, start_id, difficulty
)
question["variant"] = "questions"
self._logger.info(f"Added write blanks questions: {question}")
return question
elif req_exercise.type == "writeBlanksFill":
question = await self._write_blanks_notes.gen_write_blanks_notes(
dialog_type, text, req_exercise.quantity, start_id, difficulty
)
question["variant"] = "fill"
self._logger.info(f"Added write blanks notes: {question}")
return question
elif req_exercise.type == "writeBlanksForm":
question = await self._write_blanks_forms.gen_write_blanks_form(
dialog_type, text, req_exercise.quantity, start_id, difficulty
)
question["variant"] = "form"
self._logger.info(f"Added write blanks form: {question}")
return question
elif req_exercise.type == "trueFalse":
question = await self._true_false.gen_true_false_not_given_exercise(
text, req_exercise.quantity, start_id, difficulty, "listening"
)
self._logger.info(f"Added trueFalse: {question}")
return question
# ==================================================================================================================
# generate_listening_question helpers
# ==================================================================================================================
async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
head = (
'Compose an authentic conversation between two individuals in the everyday social context of "'
if section == 1 else
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
f'{head}{topic}". {self.CONVERSATION_TAIL}'
)
}
]
if section == 1:
messages.extend([
{
"role": "user",
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
},
{
"role": "user",
"content": 'Try to have spelling of names (cities, people, etc)'
}
])
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["conversation"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
conversation = self._get_conversation_voices(response, True)
return {"dialog": conversation["conversation"]}
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
head = (
'Generate a comprehensive monologue set in the social context of'
if section == 2 else
'Generate a comprehensive and complex monologue on the academic subject of'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["monologue"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {"dialog": response["monologue"]}
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
chosen_voices = []
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
# section 1
if unique_voices_across_segments:
while voice is None:
chosen_voice = self._get_random_voice(segment['gender'])
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
# section 3
else:
voice = self._get_random_voice(segment['gender'])
name_to_voice[name] = voice
segment['voice'] = voice
return response
@staticmethod
def _get_random_voice(gender: str):
if gender.lower() == 'male':
available_voices = NeuralVoices.MALE_NEURAL_VOICES
else:
available_voices = NeuralVoices.FEMALE_NEURAL_VOICES
return random.choice(available_voices)['Id']
@staticmethod
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)

View File

@@ -0,0 +1,183 @@
import json
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
import aiofiles
from fastapi import UploadFile
from ielts_be.dtos.exams.listening import ListeningExam
from ielts_be.helpers import FileHelper
from ielts_be.mappers.listening import ListeningMapper
from ielts_be.services import ILLMService
class ImportListeningModule:
def __init__(self, llm_service: ILLMService):
self._logger = getLogger(__name__)
self._llm = llm_service
async def import_from_file(
self,
exercises: UploadFile,
solutions: UploadFile = None
) -> Dict[str, Any] | None:
path_id = str(uuid4())
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
FileHelper.convert_file_to_html(
f'./tmp/{path_id}/exercises.{ext}',
f'./tmp/{path_id}/exercises.html'
)
if solutions:
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(
f'./tmp/{path_id}/solutions.{ext}',
f'./tmp/{path_id}/solutions.html'
)
response = await self._get_listening_sections(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return response.model_dump(exclude_none=True)
return None
async def _get_listening_sections(
self,
path_id: str,
has_solutions: bool = False
) -> ListeningExam:
async with aiofiles.open(
f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8'
) as f:
exercises_html = await f.read()
messages = [
self._instructions(has_solutions),
{
"role": "user",
"content": f"Listening exercise sheet:\n\n{exercises_html}"
}
]
if has_solutions:
async with aiofiles.open(
f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8'
) as f:
solutions_html = await f.read()
messages.append({
"role": "user",
"content": f"Solutions:\n\n{solutions_html}"
})
return await self._llm.pydantic_prediction(
messages,
ListeningMapper.map_to_test_model,
str(self._listening_json_schema())
)
@staticmethod
def _multiple_choice_template() -> dict:
return {
"type": "multipleChoice",
"prompt": "<general instructions for this section>",
"questions": [
{
"id": "<question number as string>",
"prompt": "<question text>",
"options": [
{
"id": "<A/B/C/D>",
"text": "<option text>"
}
],
"solution": "<correct option letter>",
"variant": "text"
}
]
}
@staticmethod
def _write_blanks_template() -> dict:
return {
"type": "writeBlanks",
"maxWords": "<integer max words allowed per answer>",
"prompt": "<instructions>",
"questions": [
{
"id": "<question number as string>",
"prompt": "<question text with blanks replaced with {{id}}>",
"solution": ["<acceptable answer(s)>"]
}
],
"variant": "<one of: questions, fill, form - chosen based on format:\n" +
"- questions: for numbered questions with blank at end\n" +
"- fill: for paragraph/summary with blanks, it MUST be a PARAGRAPH not separated related questions!\n" +
"- form: when questions and fill dont meet the requirements>"
}
@staticmethod
def _true_false():
return {
"questions": [
{
"id": "<question number>",
"prompt": "<statement to evaluate>",
"solution": "<one of: true, false, not_given>",
}
],
"type": "trueFalse",
"prompt": "<specific instructions including T/F/NG marking scheme>"
}
def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
solutions_str = " and its solutions" if has_solutions else ""
return {
"role": "system",
"content": (
f"You are processing a listening test exercise sheet{solutions_str}. "
"Structure the test according to this json template:\n\n"
f"{self._listening_json_schema()}\n\n"
"Each exercise within a section should follow these templates:\n\n"
f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
f"2. True/False Questions:\n{self._true_false()}\n\n"
f"3. Write Blanks:\n{self._write_blanks_template()}\n\n"
"\nImportant rules:\n"
"1. Keep exact question numbering from the original\n"
"2. Include all options for multiple choice questions\n"
"3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n"
"4. Set maxWords according to the instructions\n"
"5. Include all possible correct answers in solution arrays\n"
"6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n"
"7. For writeBlanks, choose the appropriate variant:\n"
" - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n"
" - fill: for paragraph/summary with blanks\n"
" - form: when questions and fill dont meet the requirements\n"
"8. For text fields, use actual newlines between questions/sentences\n"
"9. Format text according to chosen variant:\n"
" - questions: each line should end with {{id}}\n"
" - fill: embed {{id}} naturally in the paragraph\n"
" - form: place {{id}} where blank should appear in text\n"
"10. For True/False, use exact values: true, false, or not_given\n\n"
"11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to "
"you and they are uppercase you should placed them in lowercase.\n\n"
"First identify all sections/parts by looking for 'SECTION n' headers or similar ones, "
"then for each section identify and structure its exercises according to the templates above."
)
}
def _listening_json_schema(self) -> Dict[str, Any]:
return {
"minTimer": "<integer representing minutes allowed for the exam as string, if there is none set it to 30>",
"parts": [
{
"intro": "<optional field that contains information about the section>",
"exercises": [
self._multiple_choice_template(),
self._write_blanks_template(),
self._true_false()
]
}
]
}

View File

@@ -0,0 +1,55 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class WriteBlankForms:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_form(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}
]
if dialog_type == "conversation":
messages.append({
"role": "user",
"content": (
'It must be a form and not questions. '
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
)
})
parsed_form = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
parsed_form = parsed_form["form"][:quantity]
replaced_form, words = ExercisesHelper.build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Fill the form with words/numbers missing.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}

View File

@@ -0,0 +1,68 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class WriteBlankNotes:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_notes(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["notes"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this '
'format: {"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = await self._llm.prediction(
GPTModels.GPT_4_O, word_messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
words = words["words"][:quantity]
replaced_notes = ExercisesHelper.replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}

View File

@@ -0,0 +1,43 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class WriteBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_questions(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Answer the questions below using no more than three words or a number accordingly.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}

View File

@@ -0,0 +1,147 @@
import asyncio
from logging import getLogger
from fastapi import UploadFile
from ielts_be.configs.constants import GPTModels, FieldsAndExercises, TemperatureSettings
from ielts_be.dtos.reading import ReadingDTO
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import IReadingService, ILLMService
from .fill_blanks import FillBlanks
from .idea_match import IdeaMatch
from .paragraph_match import ParagraphMatch
from ..shared import TrueFalse, MultipleChoice
from .import_reading import ImportReadingModule
from .write_blanks import WriteBlanks
class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
self._fill_blanks = FillBlanks(llm)
self._idea_match = IdeaMatch(llm)
self._paragraph_match = ParagraphMatch(llm)
self._true_false = TrueFalse(llm)
self._write_blanks = WriteBlanks(llm)
self._multiple_choice = MultipleChoice(llm)
self._logger = getLogger(__name__)
self._import = ImportReadingModule(llm)
async def import_exam(self, exercises: UploadFile, solutions: UploadFile = None):
return await self._import.import_from_file(exercises, solutions)
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on your generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
FieldsAndExercises.GEN_TEXT_FIELDS,
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def _generate_single_exercise(self, req_exercise, text: str, start_id: int, difficulty: str) -> dict:
if req_exercise.type == "fillBlanks":
question = await self._fill_blanks.gen_summary_fill_blanks_exercise(
text, req_exercise.quantity, start_id, difficulty, req_exercise.num_random_words
)
self._logger.info(f"Added fill blanks: {question}")
return question
elif req_exercise.type == "trueFalse":
question = await self._true_false.gen_true_false_not_given_exercise(
text, req_exercise.quantity, start_id, difficulty, "reading"
)
self._logger.info(f"Added trueFalse: {question}")
return question
elif req_exercise.type == "writeBlanks":
question = await self._write_blanks.gen_write_blanks_exercise(
text, req_exercise.quantity, start_id, difficulty, req_exercise.max_words
)
if ExercisesHelper.answer_word_limit_ok(question):
self._logger.info(f"Added write blanks: {question}")
return question
else:
self._logger.info("Did not add write blanks because it did not respect word limit")
return {}
elif req_exercise.type == "paragraphMatch":
question = await self._paragraph_match.gen_paragraph_match_exercise(
text, req_exercise.quantity, start_id
)
self._logger.info(f"Added paragraph match: {question}")
return question
elif req_exercise.type == "ideaMatch":
question = await self._idea_match.gen_idea_match_exercise(
text, req_exercise.quantity, start_id
)
question["variant"] = "ideaMatch"
self._logger.info(f"Added idea match: {question}")
return question
elif req_exercise.type == "multipleChoice":
question = await self._multiple_choice.gen_multiple_choice(
text, req_exercise.quantity, start_id, difficulty, 4
)
self._logger.info(f"Added multiple choice: {question}")
return question
async def generate_reading_exercises(self, dto: ReadingDTO):
exercise_tasks = []
start_id = 1
for req_exercise in dto.exercises:
exercise_tasks.append(
self._generate_single_exercise(
req_exercise,
dto.text,
start_id,
dto.difficulty
)
)
start_id += req_exercise.quantity
return {
"exercises": await asyncio.gather(*exercise_tasks)
}

View File

@@ -0,0 +1,73 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class FillBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": f'Summarize this text: "{text}"'
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}

View File

@@ -0,0 +1,46 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class IdeaMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_idea_match_exercise(self, text: str, quantity: int, start_id: int):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}

View File

@@ -0,0 +1,237 @@
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
import aiofiles
from fastapi import UploadFile
from ielts_be.helpers import FileHelper
from ielts_be.mappers.reading import ReadingMapper
from ielts_be.services import ILLMService
from ielts_be.dtos.exams.reading import Exam
class ImportReadingModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
async def import_from_file(
self, exercises: UploadFile, solutions: UploadFile = None
) -> Dict[str, Any] | None:
path_id = str(uuid4())
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
if solutions:
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
response = await self._get_reading_parts(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return response.model_dump(exclude_none=True)
return None
async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
exercises_html = await f.read()
messages = [
self._instructions(solutions),
{
"role": "user",
"content": f"Exam question sheet:\n\n{exercises_html}"
}
]
if solutions:
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
solutions_html = await f.read()
messages.append({
"role": "user",
"content": f"Solutions:\n\n{solutions_html}"
})
return await self._llm.pydantic_prediction(
messages,
ReadingMapper.map_to_exam_model,
str(self._reading_json_schema())
)
def _reading_json_schema(self):
json = self._reading_exam_template()
json["parts"][0]["exercises"] = [
self._write_blanks(),
self._fill_blanks(),
self._match_sentences(),
self._true_false(),
self._multiple_choice()
]
return json
@staticmethod
def _reading_exam_template():
return {
"minTimer": "<integer representing minutes allowed for the exam>",
"parts": [
{
"text": {
"title": "<title of the reading passage>",
"content": "<full text content of the reading passage>",
},
"exercises": []
}
]
}
@staticmethod
def _write_blanks():
return {
"maxWords": "<integer max words allowed per answer>",
"solutions": [
{
"id": "<question number as string>",
"solution": [
"<acceptable answer(s) within maxWords limit>"
]
}
],
"text": (
"<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] "
"- notice how there the question number inside {{}} -> the text MUST always contain the question number in that format "
"- and notice how there is a double backslash before the n -> I want an escaped newline in your output> "
),
"type": "writeBlanks",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
def _match_sentences():
return {
"options": [
{
"id": "<paragraph letter A-F>",
"sentence": "<THIS NEEDS TO BE A PARAGRAPH OF THE SECTION TEXT>"
}
],
"sentences": [
{
"id": "<question number as string>",
"solution": "<matching paragraph letter>",
"sentence": "<A SHORT SENTENCE THAT CONVEYS AND IDEA OR HEADING>"
}
],
"type": "matchSentences",
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
def _true_false():
return {
"questions": [
{
"id": "<question number>",
"prompt": "<statement to evaluate>",
"solution": "<one of: true, false, not_given>",
}
],
"type": "trueFalse",
"prompt": "<specific instructions including T/F/NG marking scheme>"
}
@staticmethod
def _multiple_choice():
return {
"questions": [
{
"id": "<question number>",
"prompt": "<question text>",
"options": [
{
"id": "<A, B, or C>",
"text": "<option text>"
}
],
"solution": "<correct option letter>",
"variant": "text"
}
],
"type": "multipleChoice",
"prompt": "<specific instructions for this exercise section>"
}
@staticmethod
def _fill_blanks():
return {
"solutions": [
{
"id": "<blank number>",
"solution": "<correct word>"
}
],
"text": "<text passage with blanks marked as {{<blank number>}}>",
"type": "fillBlanks",
"words": [
{
"letter": "<word identifier letter>",
"word": "<word from word bank>"
}
],
"prompt": "<specific instructions for this exercise section>"
}
def _instructions(self, solutions=False):
solutions_str = " and its solutions" if solutions else ""
tail = (
"Parse the exam carefully and identify:\n"
"1. Time limit from instructions\n"
"2. Reading passage title and full content\n"
"3. All exercise sections and their specific instructions\n"
"4. Question numbering and grouping\n"
"5. Word limits and formatting requirements\n"
"6. Specific marking schemes (e.g., T/F/NG)\n\n"
+ (
"Solutions were not provided - analyze the passage carefully to determine correct answers."
if not solutions else
"Use the provided solutions to fill in all answer fields accurately, if word answers have all letters "
"uppercase convert them to lowercase before assigning them."
)
+
"Pay extra attention to fillblanks exercises the solution and option wording must match in case! "
"There can't be options in lowercase and solutions in uppercase! "
"Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
"should be a text, if there isn't a title for it choose a reasonable one based on its contents. "
)
return {
"role": "system",
"content": (
f"You are processing an English reading comprehension exam{solutions_str}. Structure the data according "
f"to this json template: {self._reading_exam_template()}\n\n"
"The exam contains these exercise types:\n"
"1. \"writeBlanks\": Short answer questions with strict word limits\n"
"2. \"matchSentences\": Match headings or ideas with paragraphs, the sentences field\n"
"3. \"trueFalse\": Evaluate statements as True/False/Not Given\n"
"4. \"fillBlanks\": Complete text using provided word bank\n"
"5. \"multipleChoice\": Select correct option from choices\n\n"
"Exercise templates:\n"
f"writeBlanks: {self._write_blanks()}\n"
f"matchSentences: {self._match_sentences()}\n"
f"trueFalse: {self._true_false()}\n"
f"fillBlanks: {self._fill_blanks()}\n"
f"multipleChoice: {self._multiple_choice()}\n\n"
"Important details to capture:\n"
"- Exercise section instructions and constraints\n"
"- Question numbering and grouping\n"
"- Word limits and formatting requirements\n"
"- Marking schemes and answer formats\n\n"
f"{tail}"
)
}

View File

@@ -0,0 +1,63 @@
import random
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class ParagraphMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_paragraph_match_exercise(self, text: str, quantity: int, start_id: int):
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. '
f'The paragraphs are these: {str(paragraphs)}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
headings = response["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}

View File

@@ -0,0 +1,44 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class WriteBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, max_words: int = 3):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": max_words,
"prompt": f"Choose no more than {max_words} words and/or a number from the passage for each answer.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}

View File

@@ -0,0 +1,7 @@
from .true_false import TrueFalse
from .multiple_choice import MultipleChoice
__all__ = [
"TrueFalse",
"MultipleChoice"
]

View File

@@ -0,0 +1,46 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class MultipleChoice:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_multiple_choice(
self, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'options for this text:\n"' + text + '"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["questions"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(questions, start_id)["questions"],
"type": "multipleChoice",
}

View File

@@ -0,0 +1,55 @@
import uuid
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import ExercisesHelper
from ielts_be.services import ILLMService
class TrueFalse:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, module: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
'Ensure that your statements accurately represent information or inferences from the text, and '
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
f'as appropriate.\n\nReference text:\n\n {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["prompts"]
if len(questions) > quantity:
questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
tail = (
"the information given in the Reading Passage"
if module == "reading" else
"what you've heard"
)
return {
"id": str(uuid.uuid4()),
"prompt": f"Do the following statements agree with {tail}?",
"questions": questions,
"type": "trueFalse"
}

View File

@@ -0,0 +1,168 @@
import logging
import re
from typing import Dict, List
from ielts_be.configs.constants import (
FieldsAndExercises, GPTModels, TemperatureSettings
)
from ielts_be.dtos.speaking import GradeSpeakingItem
from ielts_be.repositories import IFileStorage
from ielts_be.services import ISpeakingService, ILLMService, ISpeechToTextService
from .grade import GradeSpeaking
class SpeakingService(ISpeakingService):
def __init__(
self, llm: ILLMService,
file_storage: IFileStorage,
stt: ISpeechToTextService
):
self._llm = llm
self._file_storage = file_storage
self._stt = stt
self._logger = logging.getLogger(__name__)
self._grade = GradeSpeaking(llm, file_storage, stt)
# TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with
# either the difficulty in the request or a random one yet the prompt doesn't change
self._tasks = {
"task_1": {
"get": {
"json_template": {
"first_topic": "topic 1",
"second_topic": "topic 2",
"questions": [
(
"Introductory question about the first topic, starting the topic with "
"'Let's talk about x' and then the question."
),
"Follow up question about the first topic",
"Follow up question about the first topic",
"Question about second topic",
"Follow up question about the second topic",
]
},
"prompt": (
'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
'that encourages candidates to delve deeply into personal experiences, preferences, or '
'insights on the topic of "{first_topic}" and the topic of "{second_topic}". '
'Make sure that the generated question does not contain forbidden subjects in '
'muslim countries.'
)
}
},
"task_2": {
"get": {
"json_template": {
"topic": "topic",
"question": "question",
"prompts": [
"prompt_1",
"prompt_2",
"prompt_3"
],
"suffix": "And explain why..."
},
"prompt": (
'Create a question of medium difficulty for IELTS Speaking Part 2 '
'that encourages candidates to narrate a personal experience or story related to the topic '
'of "{topic}". Include 3 prompts that guide the candidate to describe '
'specific aspects of the experience, such as details about the situation, '
'their actions, and the reasons it left a lasting impression. Make sure that the '
'generated question does not contain forbidden subjects in muslim countries.'
)
}
},
"task_3": {
"get": {
"json_template": {
"topic": "topic",
"questions": [
"Introductory question about the topic.",
"Follow up question about the topic",
"Follow up question about the topic",
"Follow up question about the topic",
"Follow up question about the topic"
]
},
"prompt": (
'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3'
'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". '
'Provide inquiries, ensuring they explore various aspects, perspectives, and implications '
'related to the topic. Make sure that the generated question does not contain forbidden '
'subjects in muslim countries.'
)
}
},
}
async def get_speaking_part(
self, part: int, topic: str, second_topic: str, difficulty: str
) -> Dict:
task_values = self._tasks[f'task_{part}']['get']
if part == 1:
task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic)
else:
task_prompt = task_values["prompt"].format(topic=topic)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
f'{task_values["json_template"]}'
)
},
{
"role": "user",
"content": task_prompt
}
]
part_specific = {
"1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).',
"2": (
'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
'that start with "And explain why".'
)
}
if part in {1, 2}:
messages.append({
"role": "user",
"content": part_specific[str(part)]
})
if part in {1, 3}:
messages.append({
"role": "user",
"content": 'They must be 1 single question each and not be double-barreled questions.'
})
fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if part == 3:
# Remove the numbers from the questions only if the string starts with a number
response["questions"] = [
re.sub(r"^\d+\.\s*", "", question)
if re.match(r"^\d+\.", question) else question
for question in response["questions"]
]
response["type"] = part
response["difficulty"] = difficulty
if part in {2, 3}:
response["topic"] = topic
return response
async def grade_speaking_task(self, task: int, items: List[GradeSpeakingItem]) -> Dict:
return await self._grade.grade_speaking_task(task, items)

View File

@@ -0,0 +1,316 @@
import asyncio
import os
import uuid
from logging import getLogger
from typing import Dict, List
import aiofiles
from ielts_be.configs.constants import GPTModels, TemperatureSettings, FilePaths
from ielts_be.dtos.speaking import GradeSpeakingItem
from ielts_be.helpers import TextHelper
from ielts_be.repositories import IFileStorage
from ielts_be.services import ILLMService, ISpeechToTextService
class GradeSpeaking:
def __init__(self, llm: ILLMService, file_storage: IFileStorage, stt: ISpeechToTextService):
self._llm = llm
self._file_storage = file_storage
self._stt = stt
self._logger = getLogger(__name__)
async def grade_speaking_task(self, task: int, items: List[GradeSpeakingItem]) -> Dict:
request_id = str(uuid.uuid4())
self._log(task, request_id, f"Received request to grade speaking task {task}.")
if task != 2:
self._log(task, request_id, f'Received {len(items)} total answers.')
temp_files = []
try:
# Save all files first
temp_files = await asyncio.gather(*[
self.save_file(item) for item in items
])
# Process all transcriptions concurrently (up to 4)
self._log(task, request_id, 'Starting batch transcription')
text_answers = await asyncio.gather(*[
self._stt.speech_to_text(file_path)
for file_path in temp_files
])
for answer in text_answers:
self._log(task, request_id, f'Transcribed answer: {answer}')
if not TextHelper.has_x_words(answer, 20):
self._log(
task, request_id,
f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
)
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
# Get perfect answers
self._log(task, request_id, 'Requesting perfect answers')
perfect_answers = await asyncio.gather(*[
self._get_perfect_answer(task, item.question)
for item in items
])
# Format the responses
if task in {1, 3}:
self._log(task, request_id, 'Formatting answers and questions for prompt.')
formatted_text = ""
for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
formatted_text += f"**Question {i}:**\n{item.question}\n\n"
formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
else:
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
self._log(task, request_id, 'Requesting grading of the answer(s).')
response = await self._grade_task(task, questions_and_answers)
self._log(task, request_id, f'Answer(s) graded: {response}')
if task in {1, 3}:
self._log(task, request_id, 'Adding perfect answer(s) to response.')
# TODO: check if it is answer["answer"] instead
for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer
self._log(task, request_id, 'Getting speaking corrections in parallel')
# Get all corrections in parallel
fixed_texts = await asyncio.gather(*[
self._get_speaking_corrections(answer)
for answer in text_answers
])
self._log(task, request_id, 'Adding transcript and fixed texts to response.')
for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = fixed
else:
response['transcript'] = text_answers[0]
self._log(task, request_id, 'Requesting fixed text.')
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
response['perfect_answer'] = perfect_answers[0]["answer"]
solutions = []
for file_name in temp_files:
solutions.append(await self._file_storage.upload_file_firebase_get_url(f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav', file_name))
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
response["solutions"] = solutions
if task in {1,3}:
response["answer"] = solutions
else:
response["fullPath"] = solutions[0]
self._log(task, request_id, f'Final response: {response}')
return response
finally:
for file_path in temp_files:
try:
if os.path.exists(file_path):
os.remove(file_path)
except Exception as e:
self._log(task, request_id, f'Error cleaning up temp file {file_path}: {str(e)}')
def _log(self, task: int, request_id: str, message: str):
self._logger.info(f'POST - speaking_task_{task} - {request_id} - {message}')
async def _get_perfect_answer(self, task: int, question: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}'
)
},
{
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
f'Speaking Part {task} question: "{question}"'
)
}
]
if task == 1:
messages.append({
"role": "user",
"content": 'The answer must be 2 or 3 sentences long.'
})
gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO
return await self._llm.prediction(
gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE
)
async def _grade_task(self, task: int, questions_and_answers: str) -> Dict:
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}'
)
},
{
"role": "user",
"content": (
f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
'assign a score of 0 if the response fails to address the question. Additionally, provide '
'detailed commentary highlighting both strengths and weaknesses in the response.'
) + questions_and_answers
}
]
task_specific = {
"1": (
'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the '
'student that they should be.'
),
"2": 'Address the student as "you"',
"3": 'Address the student as "you" and pay special attention to coherence between the answers.'
}
messages.append({
"role": "user",
"content": task_specific[str(task)]
})
if task in {1, 3}:
messages.extend([
{
"role": "user",
"content": (
'For pronunciations act as if you heard the answers and they were transcribed '
'as you heard them.'
)
},
{
"role": "user",
"content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
}
])
return await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
)
@staticmethod
def _fix_speaking_overall(overall: float, task_response: dict):
grades = [category["grade"] for category in task_response.values()]
if overall > max(grades) or overall < min(grades):
total_sum = sum(grades)
average = total_sum / len(grades)
rounded_average = round(average, 0)
return rounded_average
return overall
@staticmethod
def _zero_rating(comment: str):
return {
"comment": comment,
"overall": 0,
"task_response": {
"Fluency and Coherence": {
"grade": 0.0,
"comment": ""
},
"Lexical Resource": {
"grade": 0.0,
"comment": ""
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": ""
},
"Pronunciation": {
"grade": 0.0,
"comment": ""
}
}
}
async def _get_speaking_corrections(self, text):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"fixed_text": "fixed transcription with no misspelling errors"}'
)
},
{
"role": "user",
"content": (
'Fix the errors in the provided transcription and put it in a JSON. '
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
["fixed_text"],
0.2,
False
)
return response["fixed_text"]
@staticmethod
def _grade_template():
return {
"comment": "extensive comment about answer quality",
"overall": 0.0,
"task_response": {
"Fluency and Coherence": {
"grade": 0.0,
"comment": (
"extensive comment about fluency and coherence, use examples to justify the grade awarded."
)
},
"Lexical Resource": {
"grade": 0.0,
"comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": (
"extensive comment about grammatical range and accuracy, use examples to justify the "
"grade awarded."
)
},
"Pronunciation": {
"grade": 0.0,
"comment": (
"extensive comment about pronunciation on the transcribed answer, use examples to justify the "
"grade awarded."
)
}
}
}
@staticmethod
async def save_file(item: GradeSpeakingItem) -> str:
sound_file_name = "tmp/" + str(uuid.uuid4())
content = await item.answer.read()
async with aiofiles.open(sound_file_name, 'wb') as f:
await f.write(content)
return sound_file_name

View File

@@ -0,0 +1,80 @@
from typing import List, Dict, Optional
from fastapi import UploadFile
from ielts_be.repositories import IFileStorage
from ielts_be.services import IWritingService, ILLMService, IAIDetectorService
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from .academic import get_writing_args_academic
from .general import get_writing_args_general
from .grade import GradeWriting
class WritingService(IWritingService):
def __init__(self, llm: ILLMService, ai_detector: IAIDetectorService, file_storage: IFileStorage):
self._llm = llm
self._grade = GradeWriting(llm, file_storage, ai_detector)
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
)
},
*get_writing_args_general(task, topic, difficulty)
]
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
response = await self._llm.prediction(
llm_model,
messages,
["prompt"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
question = response["prompt"].strip()
return {
"question": self._add_newline_before_hyphen(question) if task == 1 else question,
"difficulty": difficulty,
"topic": topic
}
async def get_writing_task_academic_question(self, task: int, file: UploadFile, difficulty: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
)
},
*(await get_writing_args_academic(task, file))
]
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
response = await self._llm.prediction(
llm_model,
messages,
["prompt"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
question = response["prompt"].strip()
return {
"question": self._add_newline_before_hyphen(question) if task == 1 else question,
"difficulty": difficulty,
}
async def grade_writing_task(self, task: int, question: str, answer: str, attachment: Optional[str] = None):
return await self._grade.grade_writing_task(task, question, answer, attachment)
@staticmethod
def _add_newline_before_hyphen(s):
return s.replace(" -", "\n-")

View File

@@ -0,0 +1,48 @@
from base64 import b64encode
from typing import List, Dict
from fastapi.datastructures import UploadFile
async def get_writing_args_academic(task: int, attachment: UploadFile) -> List[Dict]:
writing_args = {
"1": {
"prompt": (
'Analyze the uploaded image and create a detailed IELTS Writing Task 1 Academic prompt.\n'
'Based on the visual data presented, craft a prompt that accurately reflects the image\'s '
'content, complexity, and academic nature.\n'
),
"instructions": (
'The generated prompt must:\n'
'1. Clearly describe the type of visual representation in the image\n'
'2. Provide a concise context for the data shown\n'
'3. End with the standard IELTS Task 1 Academic instruction:\n'
'"Summarise the information by selecting and reporting the main features, and make comparisons where relevant."'
)
},
}
if task == 2:
raise NotImplemented("Task 2 academic isn't implemented yet, current implementation still uses General Task 2 prompts.")
messages = [
{
"role": "user",
"content": writing_args[str(task)]["prompt"]
},
{
"role": "user",
"content": writing_args[str(task)]["instructions"]
}
]
if task == 1:
attachment_bytes = await attachment.read()
messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/{attachment.filename.split('.')[-1]};base64,{b64encode(attachment_bytes).decode('utf-8')}"
}
})
return messages

View File

@@ -0,0 +1,44 @@
from typing import List, Dict
def get_writing_args_general(task: int, topic: str, difficulty: str) -> List[Dict]:
writing_args = {
"1": {
"prompt": (
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
'advice, or instructions within the letter. Make sure that the generated prompt is '
f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
),
"instructions": (
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
'the answer should include.'
)
},
"2": {
# TODO: Should the muslim disclaimer be here as well?
"prompt": (
f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
f'analysis of contrasting perspectives on the topic of "{topic}".'
),
"instructions": (
'The question should lead to an answer with either "theories", "complicated information" or '
'be "very descriptive" on the topic.'
)
}
}
messages = [
{
"role": "user",
"content": writing_args[str(task)]["prompt"]
},
{
"role": "user",
"content": writing_args[str(task)]["instructions"]
}
]
return messages

View File

@@ -0,0 +1,207 @@
import asyncio
from typing import Dict, Optional
from uuid import uuid4
from ielts_be.configs.constants import GPTModels, TemperatureSettings
from ielts_be.helpers import TextHelper, ExercisesHelper, FileHelper
from ielts_be.repositories import IFileStorage
from ielts_be.services import ILLMService, IAIDetectorService
class GradeWriting:
def __init__(self, llm: ILLMService, file_storage: IFileStorage, ai_detector: IAIDetectorService):
self._llm = llm
self._file_storage = file_storage
self._ai_detector = ai_detector
async def grade_writing_task(self, task: int, question: str, answer: str, attachment: Optional[str] = None):
bare_minimum = 100 if task == 1 else 180
if not TextHelper.has_words(answer):
return self._zero_rating("The answer does not contain enough english words.")
elif not TextHelper.has_x_words(answer, bare_minimum):
return self._zero_rating("The answer is insufficient and too small to be graded.")
else:
template = self._get_writing_template()
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {template}'
)
},
{
"role": "user",
"content": (
f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
'from the task, and assign a score of 0 if the response fails to address the question. '
'Additionally, provide a detailed commentary highlighting both strengths and '
'weaknesses in the response. '
f'\n Question: "{question}" \n Answer: "{answer}"')
}
]
if task == 1:
if attachment is None:
messages.append({
"role": "user",
"content": (
'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
'"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
)
})
else:
uuid = str(uuid4())
name = attachment.split('/')[-1]
out_path = f'./tmp/{uuid}/{name}'
path = await self._file_storage.download_firebase_file(attachment, out_path)
messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/{name.split('.')[-1]};base64,{FileHelper.encode_image(path)}"
}
})
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
temperature = (
TemperatureSettings.GRADING_TEMPERATURE
if task == 1 else
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
evaluation_promise = self._llm.prediction(
llm_model,
messages,
["comment"],
temperature
)
perfect_answer_minimum = 150 if task == 1 else 250
perfect_answer_promise = self._get_perfect_answer(question, perfect_answer_minimum)
fixed_text_promise = self._get_fixed_text(answer)
ai_detection_promise = self._ai_detector.run_detection(answer)
prediction_result, perfect_answer_result, fixed_text_result, ai_detection_result = await asyncio.gather(
evaluation_promise,
perfect_answer_promise,
fixed_text_promise,
ai_detection_promise
)
response = prediction_result
response["perfect_answer"] = perfect_answer_result["perfect_answer"]
response["overall"] = ExercisesHelper.fix_writing_overall(
response["overall"],
response["task_response"]
)
response['fixed_text'] = fixed_text_result
if ai_detection_result is not None:
response['ai_detection'] = ai_detection_result
return response
async def _get_fixed_text(self, text):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"fixed_text": "fixed test with no misspelling errors"}'
)
},
{
"role": "user",
"content": (
'Fix the errors in the given text and put it in a JSON. '
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
["fixed_text"],
0.2,
False
)
return response["fixed_text"]
async def _get_perfect_answer(self, question: str, size: int) -> Dict:
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"perfect_answer": "perfect answer for the question"}'
)
},
{
"role": "user",
"content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
},
{
"role": "user",
"content": f'The answer must have at least {size} words'
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["perfect_answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
@staticmethod
def _zero_rating(comment: str):
return {
'comment': comment,
'overall': 0,
'task_response': {
'Task Achievement': {
"grade": 0.0,
"comment": ""
},
'Coherence and Cohesion': {
"grade": 0.0,
"comment": ""
},
'Lexical Resource': {
"grade": 0.0,
"comment": ""
},
'Grammatical Range and Accuracy': {
"grade": 0.0,
"comment": ""
}
}
}
@staticmethod
def _get_writing_template():
return {
"comment": "comment about student's response quality",
"overall": 0.0,
"task_response": {
"Task Achievement": {
"grade": 0.0,
"comment": "comment about Task Achievement of the student's response"
},
"Coherence and Cohesion": {
"grade": 0.0,
"comment": "comment about Coherence and Cohesion of the student's response"
},
"Lexical Resource": {
"grade": 0.0,
"comment": "comment about Lexical Resource of the student's response"
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": "comment about Grammatical Range and Accuracy of the student's response"
}
}
}

View File

@@ -0,0 +1,15 @@
from .aws_polly import AWSPolly
from .heygen import Heygen
from .openai import OpenAI
from .whisper import OpenAIWhisper
from .gpt_zero import GPTZero
from .elai import ELAI
__all__ = [
"AWSPolly",
"Heygen",
"OpenAI",
"OpenAIWhisper",
"GPTZero",
"ELAI"
]

View File

@@ -0,0 +1,86 @@
import random
from aiobotocore.client import BaseClient
from ielts_be.dtos.listening import Dialog
from ielts_be.services import ITextToSpeechService
from ielts_be.configs.constants import NeuralVoices
class AWSPolly(ITextToSpeechService):
def __init__(self, client: BaseClient):
self._client = client
async def synthesize_speech(self, text: str, voice: str, engine: str = "neural", output_format: str = "mp3"):
tts_response = await self._client.synthesize_speech(
Engine=engine,
Text=text,
OutputFormat=output_format,
VoiceId=voice
)
return await tts_response['AudioStream'].read()
async def text_to_speech(self, dialog: Dialog) -> bytes:
if not dialog.conversation and not dialog.monologue:
raise ValueError("Unsupported argument for text_to_speech")
if not dialog.conversation:
audio_segments = await self._text_to_speech(dialog.monologue)
else:
audio_segments = await self._conversation_to_speech(dialog)
final_message = await self.synthesize_speech(
"This audio recording, for the listening exercise, has finished.",
"Stephen"
)
# Add finish message
audio_segments.append(final_message)
# Combine the audio segments into a single audio file
combined_audio = b"".join(audio_segments)
return combined_audio
# Save the combined audio to a single file
#async with aiofiles.open(file_name, "wb") as f:
# await f.write(combined_audio)
#print("Speech segments saved to " + file_name)
async def _text_to_speech(self, text: str):
voice = random.choice(NeuralVoices.ALL_NEURAL_VOICES)['Id']
audio_segments = []
for part in self._divide_text(text):
audio_segments.append(await self.synthesize_speech(part, voice))
return audio_segments
async def _conversation_to_speech(self, dialog: Dialog):
audio_segments = []
for convo_payload in dialog.conversation:
audio_segments.append(await self.synthesize_speech(convo_payload.text, convo_payload.voice))
return audio_segments
@staticmethod
def _divide_text(text, max_length=3000):
if len(text) <= max_length:
return [text]
divisions = []
current_position = 0
while current_position < len(text):
next_position = min(current_position + max_length, len(text))
next_period_position = text.rfind('.', current_position, next_position)
if next_period_position != -1 and next_period_position > current_position:
divisions.append(text[current_position:next_period_position + 1])
current_position = next_period_position + 1
else:
# If no '.' found in the next chunk, split at max_length
divisions.append(text[current_position:next_position])
current_position = next_position
return divisions

View File

@@ -0,0 +1,84 @@
from copy import deepcopy
from logging import getLogger
from httpx import AsyncClient
from ielts_be.dtos.video import Task, TaskStatus
from ielts_be.services import IVideoGeneratorService
class ELAI(IVideoGeneratorService):
_ELAI_ENDPOINT = 'https://apis.elai.io/api/v1/videos'
def __init__(self, client: AsyncClient, token: str, avatars: dict, *, conf: dict):
super().__init__(deepcopy(avatars))
self._http_client = client
self._conf = deepcopy(conf)
self._logger = getLogger(__name__)
self._GET_HEADER = {
"accept": "application/json",
"Authorization": f"Bearer {token}"
}
self._POST_HEADER = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": f"Bearer {token}"
}
async def create_video(self, text: str, avatar: str):
avatar_url = self._avatars[avatar].get("avatar_url")
avatar_code = self._avatars[avatar].get("avatar_code")
avatar_gender = self._avatars[avatar].get("avatar_gender")
avatar_canvas = self._avatars[avatar].get("avatar_canvas")
voice_id = self._avatars[avatar].get("voice_id")
voice_provider = self._avatars[avatar].get("voice_provider")
self._conf["slides"][0]["canvas"]["objects"][0]["src"] = avatar_url
self._conf["slides"]["avatar"] = {
"code": avatar_code,
"gender": avatar_gender,
"canvas": avatar_canvas
}
self._conf["slides"]["speech"] = text
self._conf["slides"]["voice"] = voice_id
self._conf["slides"]["voiceProvider"] = voice_provider
response = await self._http_client.post(self._ELAI_ENDPOINT, headers=self._POST_HEADER, json=self._conf)
self._logger.info(response.status_code)
self._logger.info(response.json())
video_id = response.json()["_id"]
if video_id:
await self._http_client.post(f'{self._ELAI_ENDPOINT}/render/{video_id}', headers=self._GET_HEADER)
return Task(
result=video_id,
status=TaskStatus.STARTED,
)
else:
return Task(status=TaskStatus.ERROR)
async def pool_status(self, video_id: str) -> Task:
response = await self._http_client.get(f'{self._ELAI_ENDPOINT}/{video_id}', headers=self._GET_HEADER)
response_data = response.json()
if response_data['status'] == 'ready':
self._logger.info(response_data)
return Task(
status=TaskStatus.COMPLETED,
result=response_data.get('url')
)
elif response_data['status'] == 'failed':
self._logger.error('Video creation failed.')
return Task(
status=TaskStatus.ERROR,
result=response_data.get('url')
)
else:
self._logger.info('Video is still processing.')
return Task(
status=TaskStatus.IN_PROGRESS,
result=video_id
)

View File

@@ -0,0 +1,58 @@
{
"Gia": {
"avatar_code": "gia.business",
"avatar_gender": "female",
"avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/gia/business/gia_business.png",
"avatar_canvas": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/gia/business/gia_business.png",
"voice_id": "EXAVITQu4vr4xnSDxMaL",
"voice_provider": "elevenlabs"
},
"Vadim": {
"avatar_code": "vadim.business",
"avatar_gender": "male",
"avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/vadim/business/vadim_business.png",
"avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/vadim/business/vadim_business.png",
"voice_id": "flq6f7yk4E4fJM5XTYuZ",
"voice_provider": "elevenlabs"
},
"Orhan": {
"avatar_code": "orhan.business",
"avatar_gender": "male",
"avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/orhan/business/orhan.png",
"avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/orhan/business/orhan.png",
"voice_id": "en-US-AndrewMultilingualNeural",
"voice_provider": "azure"
},
"Flora": {
"avatar_code": "flora.business",
"avatar_gender": "female",
"avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/flora/business/flora_business.png",
"avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/flora/business/flora_business.png",
"voice_id": "en-US-JaneNeural",
"voice_provider": "azure"
},
"Scarlett": {
"avatar_code": "scarlett.business",
"avatar_gender": "female",
"avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/scarlett/business/scarlett_business.png",
"avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/scarlett/business/scarlett_business.png",
"voice_id": "en-US-NancyNeural",
"voice_provider": "azure"
},
"Parker": {
"avatar_code": "parker.casual",
"avatar_gender": "male",
"avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/parker/casual/parker_casual.png",
"avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/parker/casual/parker_casual.png",
"voice_id": "en-US-TonyNeural",
"voice_provider": "azure"
},
"Ethan": {
"avatar_code": "ethan.business",
"avatar_gender": "male",
"avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/ethan/business/ethan_business_low.png",
"avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/ethan/business/ethan_business_low.png",
"voice_id": "en-US-JasonNeural",
"voice_provider": "azure"
}
}

View File

@@ -0,0 +1,72 @@
{
"name": "API test",
"slides": [
{
"id": 1,
"canvas": {
"objects": [
{
"type": "avatar",
"left": 151.5,
"top": 36,
"fill": "#4868FF",
"scaleX": 0.3,
"scaleY": 0.3,
"width": 1080,
"height": 1080,
"avatarType": "transparent",
"animation": {
"type": null,
"exitType": null
}
},
{
"type": "image",
"version": "5.3.0",
"originX": "left",
"originY": "top",
"left": 30,
"top": 30,
"width": 800,
"height": 600,
"fill": "rgb(0,0,0)",
"stroke": null,
"strokeWidth": 0,
"strokeDashArray": null,
"strokeLineCap": "butt",
"strokeDashOffset": 0,
"strokeLineJoin": "miter",
"strokeUniform": false,
"strokeMiterLimit": 4,
"scaleX": 0.18821429,
"scaleY": 0.18821429,
"angle": 0,
"flipX": false,
"flipY": false,
"opacity": 1,
"shadow": null,
"visible": true,
"backgroundColor": "",
"fillRule": "nonzero",
"paintFirst": "fill",
"globalCompositeOperation": "source-over",
"skewX": 0,
"skewY": 0,
"cropX": 0,
"cropY": 0,
"id": 676845479989,
"src": "https://d3u63mhbhkevz8.cloudfront.net/production/uploads/66f5190349f943682dd776ff/en-coach-main-logo-800x600_sm1ype.jpg?Expires=1727654400&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9kM3U2M21oYmhrZXZ6OC5jbG91ZGZyb250Lm5ldC9wcm9kdWN0aW9uL3VwbG9hZHMvNjZmNTE5MDM0OWY5NDM2ODJkZDc3NmZmL2VuLWNvYWNoLW1haW4tbG9nby04MDB4NjAwX3NtMXlwZS5qcGciLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3Mjc2NTQ0MDB9fX1dfQ__&Signature=kTVzlDeS7cua2HiAE5G%7E-yFqbhu0bHraFH5SauUln7yuNXoX7vtiKIBYiL%7Eps3LCLEZS77arSZ7H%7EG8CKzabHDjAR-Y6Uc%7ELD5KQaMmk0jbAxbC3Wdoq6cfd0qIwEuodQYlC0It2WBidP8KsgOy3uUQ%7EvcBoqlb255yMFw4pHuptOBB1kPs%7EFyzDV0fnRNsKaYRcy0Fn2EFUp13axm0CZQclazuLFM622AyCydKMy0vfxV%7Etny3sskwPaUe2OANGMFg07Q1pRuy6fUON0DsbhAh1tA2H6-nnem5KbFwiZK3IIwwYGBx3H41ovzC6Ejt80Fd0%7EPSHw7GzVBnUmtP-IA__&Key-Pair-Id=K1Y7U91AR6T7E5",
"crossOrigin": "anonymous",
"filters": [],
"_exists": true
}
],
"background": "#ffffff",
"version": "4.4.0"
},
"animation": "fade_in",
"language": "English",
"voiceType": "text"
}
]
}

View File

@@ -0,0 +1,52 @@
from logging import getLogger
from typing import Dict, Optional
from httpx import AsyncClient
from ielts_be.services import IAIDetectorService
class GPTZero(IAIDetectorService):
_GPT_ZERO_ENDPOINT = 'https://api.gptzero.me/v2/predict/text'
def __init__(self, client: AsyncClient, gpt_zero_key: str):
self._header = {
'x-api-key': gpt_zero_key
}
self._http_client = client
self._logger = getLogger(__name__)
async def run_detection(self, text: str):
data = {
'document': text,
'version': '',
'multilingual': False
}
response = await self._http_client.post(self._GPT_ZERO_ENDPOINT, headers=self._header, json=data)
if response.status_code != 200:
return None
return self._parse_detection(response.json())
def _parse_detection(self, response: Dict) -> Optional[Dict]:
try:
text_scan = response["documents"][0]
filtered_sentences = [
{
"sentence": item["sentence"],
"highlight_sentence_for_ai": item["highlight_sentence_for_ai"]
}
for item in text_scan["sentences"]
]
return {
"class_probabilities": text_scan["class_probabilities"],
"confidence_category": text_scan["confidence_category"],
"predicted_class": text_scan["predicted_class"],
"sentences": filtered_sentences
}
except Exception as e:
self._logger.error(f'Failed to parse GPT\'s Zero response: {str(e)}')
return None

View File

@@ -0,0 +1,82 @@
import logging
from copy import deepcopy
from httpx import AsyncClient
from ielts_be.dtos.video import Task, TaskStatus
from ielts_be.services import IVideoGeneratorService
class Heygen(IVideoGeneratorService):
_GET_VIDEO_URL = 'https://api.heygen.com/v1/video_status.get'
def __init__(self, client: AsyncClient, token: str, avatars: dict):
super().__init__(deepcopy(avatars))
self._get_header = {
'X-Api-Key': token
}
self._post_header = {
'X-Api-Key': token,
'Content-Type': 'application/json'
}
self._http_client = client
self._logger = logging.getLogger(__name__)
async def create_video(self, text: str, avatar: str):
avatar = self._avatars[avatar]["id"]
create_video_url = f'https://api.heygen.com/v2/template/{avatar}/generate'
data = {
"test": False,
"caption": False,
"title": "video_title",
"variables": {
"script_here": {
"name": "script_here",
"type": "text",
"properties": {
"content": text
}
}
}
}
response = await self._http_client.post(create_video_url, headers=self._post_header, json=data)
self._logger.info(response.status_code)
self._logger.info(response.json())
video_id = response.json()["data"]["video_id"]
return Task(
result=video_id,
status=TaskStatus.STARTED,
)
async def poll_status(self, video_id: str) -> Task:
response = await self._http_client.get(self._GET_VIDEO_URL, headers=self._get_header, params={
'video_id': video_id
})
response_data = response.json()
status = response_data["data"]["status"]
error = response_data["data"]["error"]
if status != "completed" and error is None:
self._logger.info(f"Status: {status}")
return Task(
status=TaskStatus.IN_PROGRESS,
result=video_id
)
if error:
self._logger.error('Video creation failed.')
return Task(
status=TaskStatus.ERROR,
result=response_data.get('url')
)
url = response.json()['data']['video_url']
self._logger.info(f'Successfully generated video: {url}')
return Task(
status=TaskStatus.COMPLETED,
result=url
)

View File

@@ -0,0 +1,30 @@
{
"Matthew Noah": {
"id": "5912afa7c77c47d3883af3d874047aaf",
"avatar_gender": "male"
},
"Vera Cerise": {
"id": "9e58d96a383e4568a7f1e49df549e0e4",
"avatar_gender": "female"
},
"Edward Tony": {
"id": "d2cdd9c0379a4d06ae2afb6e5039bd0c",
"avatar_gender": "male"
},
"Tanya Molly": {
"id": "045cb5dcd00042b3a1e4f3bc1c12176b",
"avatar_gender": "female"
},
"Kayla Abbi": {
"id": "1ae1e5396cc444bfad332155fdb7a934",
"avatar_gender": "female"
},
"Jerome Ryan": {
"id": "0ee6aa7cc1084063a630ae514fccaa31",
"avatar_gender": "male"
},
"Tyler Christopher": {
"id": "5772cff935844516ad7eeff21f839e43",
"avatar_gender": "male"
}
}

View File

@@ -0,0 +1,153 @@
import json
import re
import logging
from typing import List, Optional, Callable, TypeVar
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
from ielts_be.services.abc import ILLMService
from ielts_be.helpers import count_tokens
from ielts_be.configs.constants import BLACKLISTED_WORDS
from pydantic import BaseModel
T = TypeVar('T', bound=BaseModel)
class OpenAI(ILLMService):
MAX_TOKENS = 4097
TRY_LIMIT = 2
def __init__(self, client: AsyncOpenAI):
self._client = client
self._logger = logging.getLogger(__name__)
self._default_model = "gpt-4o"
async def prediction(
self,
model: str,
messages: List[ChatCompletionMessageParam],
fields_to_check: Optional[List[str]],
temperature: float,
check_blacklisted: bool = True,
token_count: int = -1
):
if token_count == -1:
token_count = self._count_total_tokens(messages)
return await self._prediction(model, messages, token_count, fields_to_check, temperature, 0, check_blacklisted)
async def _prediction(
self,
model: str,
messages: List[ChatCompletionMessageParam],
token_count: int,
fields_to_check: Optional[List[str]],
temperature: float,
try_count: int,
check_blacklisted: bool,
):
result = await self._client.chat.completions.create(
model=model,
max_tokens=int(self.MAX_TOKENS - token_count - 300),
temperature=float(temperature),
messages=messages,
response_format={"type": "json_object"}
)
result = result.choices[0].message.content
if check_blacklisted:
found_blacklisted_word = self._get_found_blacklisted_words(result)
if found_blacklisted_word is not None and try_count < self.TRY_LIMIT:
self._logger.warning("Result contains blacklisted words: " + str(found_blacklisted_word))
return await self._prediction(
model, messages, token_count, fields_to_check, temperature, (try_count + 1), check_blacklisted
)
elif found_blacklisted_word is not None and try_count >= self.TRY_LIMIT:
return ""
if fields_to_check is None:
return json.loads(result)
if not self._check_fields(result, fields_to_check) and try_count < self.TRY_LIMIT:
return await self._prediction(
model, messages, token_count, fields_to_check, temperature, (try_count + 1), check_blacklisted
)
return json.loads(result)
async def prediction_override(self, **kwargs):
return await self._client.chat.completions.create(
**kwargs
)
@staticmethod
def _get_found_blacklisted_words(text: str):
text_lower = text.lower()
for word in BLACKLISTED_WORDS:
if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
return word
return None
@staticmethod
def _count_total_tokens(messages):
total_tokens = 0
for message in messages:
total_tokens += count_tokens(message["content"])["n_tokens"]
return total_tokens
@staticmethod
def _check_fields(obj, fields):
return all(field in obj for field in fields)
async def pydantic_prediction(
self,
messages: List[ChatCompletionMessageParam],
map_to_model: Callable,
json_scheme: str,
*,
model: Optional[str] = None,
temperature: Optional[float] = None,
max_retries: int = 3
) -> List[T] | T | None:
params = {
"messages": messages,
"response_format": {"type": "json_object"},
"model": model if model else self._default_model
}
if temperature:
params["temperature"] = temperature
attempt = 0
while attempt < 3:
result = await self._client.chat.completions.create(**params)
result_content = result.choices[0].message.content
try:
result_json = json.loads(result_content)
print(str(result_json))
return map_to_model(result_json)
except Exception as e:
attempt += 1
self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
params["messages"] = [
{
"role": "user",
"content": (
"Your previous response wasn't in the json format I've explicitly told you to output. "
f"In your next response, you will fix it and return me just the json I've asked."
)
},
{
"role": "user",
"content": (
f"Previous response: {result_content}\n"
f"JSON format: {json_scheme}"
f"Validation errors: {e}"
)
}
]
if attempt >= max_retries:
self._logger.error(f"Max retries exceeded!")
return None

View File

@@ -0,0 +1,106 @@
import threading
import whisper
import asyncio
import numpy as np
import soundfile as sf
import librosa
from concurrent.futures import ThreadPoolExecutor
from typing import Dict
from logging import getLogger
from whisper import Whisper
from ielts_be.services import ISpeechToTextService
"""
The whisper model is not thread safe, a thread pool
with 4 whisper models will be created so it can
process up to 4 transcriptions at a time.
The base model requires ~1GB so 4 instances is the safe bet:
https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages
"""
class OpenAIWhisper(ISpeechToTextService):
def __init__(self, model_name: str = "base", num_models: int = 4):
self._model_name = model_name
self._num_models = num_models
self._models: Dict[int, 'Whisper'] = {}
self._lock = threading.Lock()
self._next_model_id = 0
self._is_closed = False
self._logger = getLogger(__name__)
for i in range(num_models):
self._models[i] = whisper.load_model(self._model_name, in_memory=True)
self._executor = ThreadPoolExecutor(
max_workers=num_models,
thread_name_prefix="whisper_worker"
)
def get_model(self) -> 'Whisper':
with self._lock:
model_id = self._next_model_id
self._next_model_id = (self._next_model_id + 1) % self._num_models
return self._models[model_id]
async def speech_to_text(self, path: str) -> str:
def transcribe():
try:
audio, sr = sf.read(path)
# Convert to mono first to reduce memory usage
if len(audio.shape) > 1:
audio = audio.mean(axis=1)
# Resample from 48kHz to 16kHz
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
# Normalize to [-1, 1] range
audio = audio.astype(np.float32)
if np.max(np.abs(audio)) > 0:
audio = audio / np.max(np.abs(audio))
# Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
max_samples = 480000
if len(audio) > max_samples:
chunks = []
for i in range(0, len(audio), max_samples):
chunk = audio[i:i + max_samples]
chunks.append(chunk)
model = self.get_model()
texts = []
for chunk in chunks:
result = model.transcribe(
chunk,
fp16=False,
language='English',
verbose=False
)["text"]
texts.append(result)
return " ".join(texts)
else:
model = self.get_model()
return model.transcribe(
audio,
fp16=False,
language='English',
verbose=False
)["text"]
except Exception as e:
raise
loop = asyncio.get_running_loop()
return await loop.run_in_executor(self._executor, transcribe)
def close(self):
with self._lock:
if not self._is_closed:
self._is_closed = True
if self._executor:
self._executor.shutdown(wait=True, cancel_futures=True)
def __del__(self):
self.close()

View File

@@ -0,0 +1,7 @@
from .training import TrainingService
from .kb import TrainingContentKnowledgeBase
__all__ = [
"TrainingService",
"TrainingContentKnowledgeBase"
]

View File

@@ -0,0 +1,88 @@
import json
import os
from logging import getLogger
from typing import Dict, List
import faiss
import pickle
from ielts_be.services import IKnowledgeBase
class TrainingContentKnowledgeBase(IKnowledgeBase):
def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
self._embedding_model = embeddings
self._tips = None # self._read_json(path)
self._category_metadata = None
self._indices = None
self.load_indices_and_metadata()
self._logger = getLogger(__name__)
@staticmethod
def _read_json(path: str) -> Dict[str, any]:
with open(path, 'r', encoding="utf-8") as json_file:
return json.loads(json_file.read())
def print_category_count(self):
category_tips = {}
for unit in self._tips['units']:
for page in unit['pages']:
for tip in page['tips']:
category = tip['category'].lower().replace(" ", "_")
if category not in category_tips:
category_tips[category] = 0
else:
category_tips[category] = category_tips[category] + 1
print(category_tips)
def create_embeddings_and_save_them(self) -> None:
category_embeddings = {}
category_metadata = {}
for unit in self._tips['units']:
for page in unit['pages']:
for tip in page['tips']:
category = tip['category'].lower().replace(" ", "_")
if category not in category_embeddings:
category_embeddings[category] = []
category_metadata[category] = []
category_embeddings[category].append(tip['embedding'])
category_metadata[category].append({"id": tip['id'], "text": tip['text']})
category_indices = {}
for category, embeddings in category_embeddings.items():
embeddings_array = self._embedding_model.encode(embeddings)
index = faiss.IndexFlatL2(embeddings_array.shape[1])
index.add(embeddings_array)
category_indices[category] = index
faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
with open("./faiss/tips_metadata.pkl", "wb") as f:
pickle.dump(category_metadata, f)
def load_indices_and_metadata(
self,
directory: str = './faiss',
suffix: str = '_tips_index.faiss',
metadata_path: str = './faiss/tips_metadata.pkl'
):
files = os.listdir(directory)
self._indices = {}
for file in files:
if file.endswith(suffix):
self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
with open(metadata_path, 'rb') as f:
self._category_metadata = pickle.load(f)
self._logger.info("Loaded tips metadata")
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
query_embedding = self._embedding_model.encode([query])
index = self._indices[category]
D, I = index.search(query_embedding, top_k)
results = [self._category_metadata[category][i] for i in I[0]]
return results

View File

@@ -0,0 +1,458 @@
import re
from datetime import datetime
from functools import reduce
from logging import getLogger
from typing import Dict
from ielts_be.configs.constants import TemperatureSettings, GPTModels
from ielts_be.helpers import count_tokens
from ielts_be.repositories import IDocumentStore
from ielts_be.services import ILLMService, ITrainingService, IKnowledgeBase
from ielts_be.dtos.training import *
class TrainingService(ITrainingService):
TOOLS = [
'critical_thinking',
'language_for_writing',
'reading_skills',
'strategy',
'words',
'writing_skills'
]
# strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
def __init__(self, llm: ILLMService, document_store: IDocumentStore, training_kb: IKnowledgeBase):
self._llm = llm
self._db = document_store
self._kb = training_kb
self._logger = getLogger(__name__)
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
messages = self._get_question_tips(question, answer, correct_answer, context)
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
None,
TemperatureSettings.TIPS_TEMPERATURE,
token_count=token_count
)
if isinstance(response, str):
response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
return response
@staticmethod
def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
messages = [
{
"role": "user",
"content": (
"You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
"help students understand why it was a wrong answer and gives helpful insight for the future. "
"The tip should refer to the context and question."
),
}
]
if not (context is None or context == ""):
messages.append({
"role": "user",
"content": f"This is the context for the question: {context}",
})
messages.extend([
{
"role": "user",
"content": f"This is the question: {question}",
},
{
"role": "user",
"content": f"This is the answer: {answer}",
},
{
"role": "user",
"content": f"This is the correct answer: {correct_answer}",
}
])
return messages
async def get_training_content(self, training_content: Dict) -> Dict:
user, stats = training_content["userID"], training_content["stats"]
exam_data, exam_map = await self._sort_out_solutions(stats)
training_content = await self._get_exam_details_and_tips(exam_data)
tips = self._query_kb(training_content.queries)
usefull_tips = await self._get_usefull_tips(exam_data, tips)
exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
weak_areas = {"weak_areas": []}
for area in training_content.weak_areas:
weak_areas["weak_areas"].append(area.dict())
training_doc = {
'created_at': int(datetime.now().timestamp() * 1000),
**exam_map,
**usefull_tips.dict(),
**weak_areas,
"user": user
}
new_id = await self._db.save_to_db('training', training_doc)
return {
"id": new_id
}
@staticmethod
def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
new_exam_map = {"exams": []}
for detail in details:
new_exam_map["exams"].append({
"id": detail.exam_id,
"date": detail.date,
"performance_comment": detail.performance_comment,
"detailed_summary": detail.detailed_summary,
**exam_map[detail.exam_id]
})
return new_exam_map
def _query_kb(self, queries: List[QueryDTO]):
map_categories = {
"critical_thinking": "ct_focus",
"language_for_writing": "language_for_writing",
"reading_skills": "reading_skill",
"strategy": "strategy",
"writing_skills": "writing_skill"
}
tips = {"tips": []}
for query in queries:
if query.category == "words":
tips["tips"].extend(
self._kb.query_knowledge_base(query.text, "word_link")
)
tips["tips"].extend(
self._kb.query_knowledge_base(query.text, "word_partners")
)
else:
if query.category in map_categories:
tips["tips"].extend(
self._kb.query_knowledge_base(query.text, map_categories[query.category])
)
else:
self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
return tips
async def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
json_schema = (
'{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
)
messages = [
{
"role": "user",
"content": (
f"I'm going to provide you with exam data, you will take the exam data and fill this json "
f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
'summary of the student\'s performance, "weak_areas" are identified areas'
' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
' with sentence structure and punctuation.", the "queries" field is where you will write queries '
'for tips that will be displayed to the student, the category attribute is a collection of '
'embeddings and the text will be the text used to query the knowledge base. The categories are '
f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
'"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
)
},
{
"role": "user",
"content": f'Exam Data: {str(exam_data)}'
}
]
return await self._llm.pydantic_prediction(messages, self._map_gpt_response, json_schema)
async def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
json_schema = (
'{ "tip_ids": [] }'
)
messages = [
{
"role": "user",
"content": (
f"I'm going to provide you with tips and I want you to return to me the tips that "
f"can be usefull for the student that made the exam that I'm going to send you, return "
f"me the tip ids in this json format {json_schema}."
)
},
{
"role": "user",
"content": f'Exam Data: {str(exam_data)}'
},
{
"role": "user",
"content": f'Tips: {str(tips)}'
}
]
return await self._llm.pydantic_prediction(messages, lambda response: TipsDTO(**response), json_schema)
@staticmethod
def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
parsed_response = {
"details": [DetailsDTO(**detail) for detail in response["details"]],
"weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
"queries": [QueryDTO(**query) for query in response["queries"]]
}
return TrainingContentDTO(**parsed_response)
async def _sort_out_solutions(self, stats):
grouped_stats = {}
for stat in stats:
session_key = f'{str(stat["date"])}-{stat["user"]}'
module = stat["module"]
exam_id = stat["exam"]
if session_key not in grouped_stats:
grouped_stats[session_key] = {}
if module not in grouped_stats[session_key]:
grouped_stats[session_key][module] = {
"stats": [],
"exam_id": exam_id
}
grouped_stats[session_key][module]["stats"].append(stat)
exercises = {}
exam_map = {}
for session_key, modules in grouped_stats.items():
exercises[session_key] = {}
for module, module_stats in modules.items():
exercises[session_key][module] = {}
exam_id = module_stats["exam_id"]
if exam_id not in exercises[session_key][module]:
exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
exam_total_questions = 0
exam_total_correct = 0
for stat in module_stats["stats"]:
exam_total_questions += stat["score"]["total"]
exam_total_correct += stat["score"]["correct"]
exercises[session_key][module][exam_id]["date"] = stat["date"]
if session_key not in exam_map:
exam_map[session_key] = {"stat_ids": [], "score": 0}
exam_map[session_key]["stat_ids"].append(stat["id"])
exam = await self._db.get_doc_by_id(module, exam_id)
if module == "listening":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_listening_solutions(stat, exam))
elif module == "reading":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_reading_solutions(stat, exam))
elif module == "writing":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_writing_prompts_and_answers(stat, exam)
)
elif module == "speaking":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_speaking_solutions(stat, exam)
)
elif module == "level":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_level_solutions(stat, exam)
)
exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
exam_map[session_key]["module"] = module
return {"exams": exercises}, exam_map
def _get_writing_prompts_and_answers(self, stat, exam):
result = []
try:
exercises = []
for solution in stat['solutions']:
answer = solution['solution']
exercise_id = solution['id']
exercises.append({
"exercise_id": exercise_id,
"answer": answer
})
for exercise in exercises:
for exam_exercise in exam["exercises"]:
if exam_exercise["id"] == exercise["exercise_id"]:
result.append({
"exercise": exam_exercise["prompt"],
"answer": exercise["answer"]
})
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
@staticmethod
def _get_mc_question(exercise, stat):
shuffle_maps = stat.get("shuffleMaps", [])
answer = stat["solutions"] if len(shuffle_maps) == 0 else []
if len(shuffle_maps) != 0:
for solution in stat["solutions"]:
shuffle_map = [
item["map"] for item in shuffle_maps
if item["questionID"] == solution["question"]
]
answer.append({
"question": solution["question"],
"option": shuffle_map[solution["option"]]
})
return {
"question": exercise["prompt"],
"exercise": exercise["questions"],
"answer": stat["solutions"]
}
@staticmethod
def _swap_key_name(d, original_key, new_key):
d[new_key] = d.pop(original_key)
return d
def _get_level_solutions(self, stat, exam):
result = []
try:
for part in exam["parts"]:
for exercise in part["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "fillBlanks":
result.append({
"prompt": exercise["prompt"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": [
self._swap_key_name(item, 'solution', 'option')
for item in stat["solutions"]
]
})
elif stat["type"] == "multipleChoice":
result.append(self._get_mc_question(exercise, stat))
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
def _get_listening_solutions(self, stat, exam):
result = []
try:
for part in exam["parts"]:
for exercise in part["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "writeBlanks":
result.append({
"question": exercise["prompt"],
"template": exercise["text"],
"solution": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "fillBlanks":
result.append({
"question": exercise["prompt"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "multipleChoice":
result.append(self._get_mc_question(exercise, stat))
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
@staticmethod
def _find_shuffle_map(shuffle_maps, question_id):
return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
def _get_speaking_solutions(self, stat, exam):
result = {}
try:
result = {
"comments": {
key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
,
"exercises": {}
}
for exercise in exam["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "interactiveSpeaking":
for i in range(len(exercise["prompts"])):
result["exercises"][f"exercise_{i+1}"] = {
"question": exercise["prompts"][i]["text"]
}
for i in range(len(exercise["prompts"])):
answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
result["exercises"][f"exercise_{i+1}"]["answer"] = answer
elif stat["type"] == "speaking":
result["exercises"]["exercise_1"] = {
"question": exercise["text"],
"answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
}
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return [result]
def _get_reading_solutions(self, stat, exam):
result = []
try:
for part in exam["parts"]:
text = part["text"]
for exercise in part["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "fillBlanks":
result.append({
"text": text,
"question": exercise["prompt"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "writeBlanks":
result.append({
"text": text,
"question": exercise["prompt"],
"template": exercise["text"],
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "trueFalse":
result.append({
"text": text,
"questions": exercise["questions"],
"answer": stat["solutions"]
})
elif stat["type"] == "matchSentences":
result.append({
"text": text,
"question": exercise["prompt"],
"sentences": exercise["sentences"],
"options": exercise["options"],
"answer": stat["solutions"]
})
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result

View File

@@ -0,0 +1,188 @@
import os
import subprocess
import time
import uuid
from datetime import datetime
from logging import getLogger
import pandas as pd
import shortuuid
from ielts_be.dtos.user_batch import BatchUsersDTO, UserDTO
from ielts_be.helpers import FileHelper
from ielts_be.repositories import IDocumentStore
from ielts_be.services import IUserService
class UserService(IUserService):
_DEFAULT_DESIRED_LEVELS = {
"reading": 9,
"listening": 9,
"writing": 9,
"speaking": 9,
}
_DEFAULT_LEVELS = {
"reading": 0,
"listening": 0,
"writing": 0,
"speaking": 0,
}
def __init__(self, document_store: IDocumentStore):
self._db = document_store
self._logger = getLogger(__name__)
def batch_users(self, batch_dto: BatchUsersDTO):
file_name = f'{uuid.uuid4()}.csv'
path = f'./tmp/{file_name}'
self._generate_firebase_auth_csv(batch_dto, path)
result = self._upload_users('./tmp', file_name)
if result.returncode != 0:
error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stdout}```"
self._logger.error(error_msg)
return error_msg
self._init_users(batch_dto)
FileHelper.remove_file(path)
return {"ok": True}
@staticmethod
def _generate_firebase_auth_csv(batch_dto: BatchUsersDTO, path: str):
# https://firebase.google.com/docs/cli/auth#file_format
columns = [
'UID', 'Email', 'Email Verified', 'Password Hash', 'Password Salt', 'Name',
'Photo URL', 'Google ID', 'Google Email', 'Google Display Name', 'Google Photo URL',
'Facebook ID', 'Facebook Email', 'Facebook Display Name', 'Facebook Photo URL',
'Twitter ID', 'Twitter Email', 'Twitter Display Name', 'Twitter Photo URL',
'GitHub ID', 'GitHub Email', 'GitHub Display Name', 'GitHub Photo URL',
'User Creation Time', 'Last Sign-In Time', 'Phone Number'
]
users_data = []
current_time = int(time.time() * 1000)
for user in batch_dto.users:
user_data = {
'UID': str(user.id),
'Email': user.email,
'Email Verified': False,
'Password Hash': user.passwordHash,
'Password Salt': user.passwordSalt,
'Name': '',
'Photo URL': '',
'Google ID': '',
'Google Email': '',
'Google Display Name': '',
'Google Photo URL': '',
'Facebook ID': '',
'Facebook Email': '',
'Facebook Display Name': '',
'Facebook Photo URL': '',
'Twitter ID': '',
'Twitter Email': '',
'Twitter Display Name': '',
'Twitter Photo URL': '',
'GitHub ID': '',
'GitHub Email': '',
'GitHub Display Name': '',
'GitHub Photo URL': '',
'User Creation Time': current_time,
'Last Sign-In Time': '',
'Phone Number': ''
}
users_data.append(user_data)
df = pd.DataFrame(users_data, columns=columns)
df.to_csv(path, index=False, header=False)
@staticmethod
def _upload_users(directory: str, file_name: str):
command = (
f'firebase auth:import {file_name} '
f'--hash-algo=SCRYPT '
f'--hash-key={os.getenv("FIREBASE_SCRYPT_B64_SIGNER_KEY")} '
f'--salt-separator={os.getenv("FIREBASE_SCRYPT_B64_SALT_SEPARATOR")} '
f'--rounds={os.getenv("FIREBASE_SCRYPT_ROUNDS")} '
f'--mem-cost={os.getenv("FIREBASE_SCRYPT_MEM_COST")} '
f'--project={os.getenv("FIREBASE_PROJECT_ID")} '
)
result = subprocess.run(command, shell=True, cwd=directory, capture_output=True, text=True)
return result
async def _init_users(self, batch_users: BatchUsersDTO):
maker_id = batch_users.makerID
for user in batch_users.users:
await self._insert_new_user(user)
await self._create_code(user, maker_id)
if user.groupName and len(user.groupName.strip()) > 0:
await self._assign_user_to_group_by_name(user, maker_id)
async def _insert_new_user(self, user: UserDTO):
new_user = {
**user.dict(exclude={
'passport_id', 'groupName', 'expiryDate',
'corporate', 'passwordHash', 'passwordSalt'
}),
'bio': "",
'focus': "academic",
'status': "active",
'desiredLevels': self._DEFAULT_DESIRED_LEVELS,
'profilePicture': "/defaultAvatar.png",
'levels': self._DEFAULT_LEVELS,
'isFirstLogin': False,
'isVerified': True,
'registrationDate': datetime.now(),
'subscriptionExpirationDate': user.expiryDate,
'entities': user.entities
}
await self._db.save_to_db("users", new_user, str(user.id))
async def _create_code(self, user: UserDTO, maker_id: str) -> str:
code = shortuuid.ShortUUID().random(length=6)
await self._db.save_to_db("codes", {
'id': code,
'code': code,
'creator': maker_id,
'expiryDate': user.expiryDate,
'type': user.type,
'creationDate': datetime.now(),
'userId': str(user.id),
'email': user.email,
'name': user.name,
'passport_id': user.passport_id
}, code)
return code
async def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
user_id = str(user.id)
groups = await self._db.find("groups", {
"admin": maker_id,
"name": user.groupName.strip()
})
if len(groups) == 0:
new_group = {
'admin': maker_id,
'name': user.groupName.strip(),
'participants': [user_id],
'disableEditing': False,
}
await self._db.save_to_db("groups", new_group, str(uuid.uuid4()))
else:
group = groups[0]
participants = group["participants"]
if user_id not in participants:
participants.append(user_id)
await self._db.update(
"groups",
{"id": group["id"]},
{"$set": {"participants": participants}}
)