Changes to endpoints so they allow to only get context and then the exercises as well as tidying up a bit

This commit is contained in:
Carlos-Mesquita
2024-11-04 23:31:48 +00:00
parent 2a032c5aba
commit 84ed2f2f6a
83 changed files with 4229 additions and 1843 deletions

View File

@@ -1,5 +1,191 @@
from .level import LevelService
from typing import Dict, Optional
from fastapi import UploadFile
__all__ = [
"LevelService"
]
from app.dtos.level import LevelExercisesDTO
from app.repositories.abc import IDocumentStore
from app.services.abc import (
ILevelService, ILLMService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
from .exercises import MultipleChoice, BlankSpace, PassageUtas, FillBlanks
from .full_exams import CustomLevelModule, LevelUtas
from .upload import UploadLevelModule
class LevelService(ILevelService):
def __init__(
self,
llm: ILLMService,
document_store: IDocumentStore,
mc_variants: Dict,
reading_service: IReadingService,
writing_service: IWritingService,
speaking_service: ISpeakingService,
listening_service: IListeningService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
self._upload_module = UploadLevelModule(llm)
self._mc_variants = mc_variants
self._mc = MultipleChoice(llm, mc_variants)
self._blank_space = BlankSpace(llm, mc_variants)
self._passage_utas = PassageUtas(llm, reading_service, mc_variants)
self._fill_blanks = FillBlanks(llm)
self._level_utas = LevelUtas(llm, self, mc_variants)
self._custom = CustomLevelModule(
llm, self, reading_service, listening_service, writing_service, speaking_service
)
async def upload_level(self, upload: UploadFile) -> Dict:
return await self._upload_module.generate_level_from_file(upload)
async def generate_exercises(self, dto: LevelExercisesDTO):
exercises = []
start_id = 1
for req_exercise in dto.exercises:
if req_exercise.type == "multipleChoice":
questions = await self._mc.gen_multiple_choice("normal", req_exercise.quantity, start_id)
exercises.append(questions)
elif req_exercise.type == "mcBlank":
questions = await self._mc.gen_multiple_choice("blank_space", req_exercise.quantity, start_id)
questions["variant"] = "mc"
exercises.append(questions)
elif req_exercise.type == "mcUnderline":
questions = await self._mc.gen_multiple_choice("underline", req_exercise.quantity, start_id)
exercises.append(questions)
elif req_exercise.type == "blankSpaceText":
questions = await self._blank_space.gen_blank_space_text_utas(
req_exercise.quantity, start_id, req_exercise.text_size, req_exercise.topic
)
exercises.append(questions)
elif req_exercise.type == "passageUtas":
questions = await self._passage_utas.gen_reading_passage_utas(
start_id, req_exercise.mc_qty, req_exercise.text_size
)
exercises.append(questions)
elif req_exercise.type == "fillBlanksMC":
questions = await self._passage_utas.gen_reading_passage_utas(
start_id, req_exercise.mc_qty, req_exercise.text_size
)
exercises.append(questions)
start_id = start_id + req_exercise.quantity
return exercises
# Just here to support other modules that I don't know if they are supposed to still be used
async def gen_multiple_choice(self, mc_variant: str, quantity: int, start_id: int = 1):
return await self._mc.gen_multiple_choice(mc_variant, quantity, start_id)
async def gen_reading_passage_utas(self, start_id, mc_quantity: int, topic=Optional[str]): # sa_quantity: int,
return await self._passage_utas.gen_reading_passage_utas(start_id, mc_quantity, topic)
async def gen_blank_space_text_utas(self, quantity: int, start_id: int, size: int, topic: str):
return await self._blank_space.gen_blank_space_text_utas(quantity, start_id, size, topic)
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
pass
async def get_level_utas(self):
return await self._level_utas.get_level_utas()
async def get_custom_level(self, data: Dict):
return await self._custom.get_custom_level(data)
"""
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
mc_template = self._mc_variants[mc_variant]["questions"][0]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": (
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
f'it can be easy, intermediate or advanced.'
)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
"""
"""
async def _replace_exercise_if_exists(
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
mc_variant, utas
)
else:
seen_keys.add(key)
if not utas:
for exam in all_exams:
exam_dict = exam.to_dict()
if len(exam_dict.get("parts", [])) > 0:
exercise_dict = exam_dict.get("parts", [])[0]
if len(exercise_dict.get("exercises", [])) > 0:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
else:
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
return current_exercise, seen_keys
"""

View File

@@ -0,0 +1,11 @@
from .multiple_choice import MultipleChoice
from .blank_space import BlankSpace
from .passage_utas import PassageUtas
from .fillBlanks import FillBlanks
__all__ = [
"MultipleChoice",
"BlankSpace",
"PassageUtas",
"FillBlanks"
]

View File

@@ -0,0 +1,44 @@
import random
from app.configs.constants import EducationalContent, GPTModels, TemperatureSettings
from app.services.abc import ILLMService
class BlankSpace:
def __init__(self, llm: ILLMService, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=None
):
if not topic:
topic = random.choice(EducationalContent.MTI_TOPICS)
json_template = self._mc_variants["blank_space_text"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'Put the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]

View File

@@ -0,0 +1,73 @@
import random
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
from app.services.abc import ILLMService
class FillBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_fill_blanks(
self, quantity: int, start_id: int, size: int, topic=None
):
if not topic:
topic = random.choice(EducationalContent.MTI_TOPICS)
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {self._fill_blanks_mc_template()}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'For each removed word you will place it in the solutions array and assign a letter from A to D,'
' then you will place that removed word and the chosen letter on the words array along with '
' other 3 other words for the remaining letter. This is a fill blanks question for an english '
'exam, so don\'t choose words completely at random.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
**question,
"type": "fillBlanks",
"variant": "mc",
"prompt": "Click a blank to select the appropriate word for it.",
}
@staticmethod
def _fill_blanks_mc_template():
return {
"text": "",
"solutions": [
{
"id": "",
"solution": ""
}
],
"words": [
{
"id": "",
"options": {
"A": "",
"B": "",
"C": "",
"D": ""
}
}
]
}

View File

@@ -0,0 +1,84 @@
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class MultipleChoice:
def __init__(self, llm: ILLMService, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1
):
mc_template = self._mc_variants[mc_variant]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
gen_multiple_choice_for_text: str = (
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return ExercisesHelper.fix_exercise_ids(questions, start_id)
"""
if len(question["questions"]) != quantity:
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
else:
if not utas:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
else:
if all_exams is not None:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
"""

View File

@@ -0,0 +1,93 @@
from typing import Optional
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService, IReadingService
class PassageUtas:
def __init__(self, llm: ILLMService, reading_service: IReadingService, mc_variants: dict):
self._llm = llm
self._reading_service = reading_service
self._mc_variants = mc_variants
async def gen_reading_passage_utas(
self, start_id, mc_quantity: int, topic: Optional[str] # sa_quantity: int,
):
passage = await self._reading_service.generate_reading_passage(1, topic)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id, mc_quantity)
#short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
# + sa_quantity, mc_quantity)
"""
exercises: {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"""
return {
"exercises": mc_exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
f'maximum 3 words per answer, about this text:\n"{text}"'
)
},
{
"role": "user",
"content": f'The id starts at {start_id}.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_template = self._mc_variants["text_mc_utas"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
else:
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response

View File

@@ -0,0 +1,7 @@
from .custom import CustomLevelModule
from .level_utas import LevelUtas
__all__ = [
"CustomLevelModule",
"LevelUtas"
]

View File

@@ -1,335 +1,335 @@
import queue
import random
from typing import Dict
from app.configs.constants import CustomLevelExerciseTypes, EducationalContent
from app.services.abc import (
ILLMService, ILevelService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
class CustomLevelModule:
def __init__(
self,
llm: ILLMService,
level: ILevelService,
reading: IReadingService,
listening: IListeningService,
writing: IWritingService,
speaking: ISpeakingService
):
self._llm = llm
self._level = level
self._reading = reading
self._listening = listening
self._writing = writing
self._speaking = speaking
# TODO: I've changed this to retrieve the args from the body request and not request query args
async def get_custom_level(self, data: Dict):
nr_exercises = int(data.get('nr_exercises'))
exercise_id = 1
response = {
"exercises": {},
"module": "level"
}
for i in range(1, nr_exercises + 1, 1):
exercise_type = data.get(f'exercise_{i}_type')
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"normal", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"blank_space", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"underline", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
exercise_qty, exercise_id, exercise_text_size
)
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
)
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
1, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
1, exercise_topic, exercise_difficulty, exercise_topic_2
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
3, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
if exercise_ideamatch_qty != -1:
exercises.append('ideaMatch')
exercise_qty_q.put(exercise_ideamatch_qty)
total_qty = total_qty + exercise_ideamatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc3_qty != -1:
exercises.append('multipleChoice3Options')
exercise_qty_q.put(exercise_mc3_qty)
total_qty = total_qty + exercise_mc3_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
return response
import queue
import random
from typing import Dict
from app.configs.constants import CustomLevelExerciseTypes, EducationalContent
from app.services.abc import (
ILLMService, ILevelService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
class CustomLevelModule:
def __init__(
self,
llm: ILLMService,
level: ILevelService,
reading: IReadingService,
listening: IListeningService,
writing: IWritingService,
speaking: ISpeakingService
):
self._llm = llm
self._level = level
self._reading = reading
self._listening = listening
self._writing = writing
self._speaking = speaking
# TODO: I've changed this to retrieve the args from the body request and not request query args
async def get_custom_level(self, data: Dict):
nr_exercises = int(data.get('nr_exercises'))
exercise_id = 1
response = {
"exercises": {},
"module": "level"
}
for i in range(1, nr_exercises + 1, 1):
exercise_type = data.get(f'exercise_{i}_type')
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"normal", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"blank_space", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"underline", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
exercise_qty, exercise_id, exercise_text_size
)
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
)
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
1, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
1, exercise_topic, exercise_difficulty, exercise_topic_2
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
3, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
if exercise_ideamatch_qty != -1:
exercises.append('ideaMatch')
exercise_qty_q.put(exercise_ideamatch_qty)
total_qty = total_qty + exercise_ideamatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc3_qty != -1:
exercises.append('multipleChoice3Options')
exercise_qty_q.put(exercise_mc3_qty)
total_qty = total_qty + exercise_mc3_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
return response

View File

@@ -0,0 +1,119 @@
import json
import uuid
from app.services.abc import ILLMService
class LevelUtas:
def __init__(self, llm: ILLMService, level_service, mc_variants: dict):
self._llm = llm
self._mc_variants = mc_variants
self._level_service = level_service
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
mc_exercises1 = await self._level_service.gen_multiple_choice(
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self._level_service.gen_multiple_choice(
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self._level_service.gen_multiple_choice(
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self._level_service.gen_multiple_choice(
"underline", 15, 46, utas=True, all_exams=all_mc_questions
)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self._level_service.gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self._level_service.gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self._level_service.gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}

View File

@@ -1,417 +0,0 @@
import json
import random
import uuid
from typing import Dict
from fastapi import UploadFile
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
from app.helpers import ExercisesHelper
from app.repositories.abc import IDocumentStore
from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \
IListeningService
from .custom import CustomLevelModule
from .upload import UploadLevelModule
class LevelService(ILevelService):
def __init__(
self,
llm: ILLMService,
document_store: IDocumentStore,
mc_variants: Dict,
reading_service: IReadingService,
writing_service: IWritingService,
speaking_service: ISpeakingService,
listening_service: IListeningService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
self._custom_module = CustomLevelModule(
llm, self, reading_service, listening_service, writing_service, speaking_service
)
self._upload_module = UploadLevelModule(llm)
# TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt
# mc_variants are stored in ./mc_variants.json
self._mc_variants = mc_variants
async def upload_level(self, upload: UploadFile) -> Dict:
return await self._upload_module.generate_level_from_file(upload)
async def get_custom_level(self, data: Dict):
return await self._custom_module.get_custom_level(data)
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False)
return {
"exercises": [exercises],
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
mc_exercises1 = await self.gen_multiple_choice(
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self.gen_multiple_choice(
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self.gen_multiple_choice(
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self.gen_multiple_choice(
"underline", 15, 46, utas=True, all_exams=all_mc_questions
)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self.gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
):
mc_template = self._mc_variants[mc_variant]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
gen_multiple_choice_for_text: str = (
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
else:
if not utas:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
else:
if all_exams is not None:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
mc_template = self._mc_variants[mc_variant]["questions"][0]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": (
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
f'it can be easy, intermediate or advanced.'
)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
async def _replace_exercise_if_exists(
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
mc_variant, utas
)
else:
seen_keys.add(key)
if not utas:
for exam in all_exams:
exam_dict = exam.to_dict()
if len(exam_dict.get("parts", [])) > 0:
exercise_dict = exam_dict.get("parts", [])[0]
if len(exercise_dict.get("exercises", [])) > 0:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
else:
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
return current_exercise, seen_keys
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
json_template = self._mc_variants["blank_space_text"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'Put the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]
async def gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
passage = await self._reading_service.generate_reading_passage(1, topic)
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
f'maximum 3 words per answer, about this text:\n"{text}"'
)
},
{
"role": "user",
"content": f'The id starts at {start_id}.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_template = self._mc_variants["text_mc_utas"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
else:
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response

View File

@@ -34,22 +34,22 @@
"options": [
{
"id": "A",
"text": "And"
"text": "This"
},
{
"id": "B",
"text": "Cat"
"text": "Those"
},
{
"id": "C",
"text": "Happy"
"text": "These"
},
{
"id": "D",
"text": "Jump"
"text": "That"
}
],
"prompt": "Which of the following is a conjunction?",
"prompt": "_____ man there is very kind.",
"solution": "A",
"variant": "text"
}
@@ -62,23 +62,23 @@
"options": [
{
"id": "A",
"text": "a"
"text": "was"
},
{
"id": "B",
"text": "b"
"text": "for work"
},
{
"id": "C",
"text": "c"
"text": "because"
},
{
"id": "D",
"text": "d"
"text": "could"
}
],
"prompt": "prompt",
"solution": "A",
"prompt": "I <u>was</u> late <u>for work</u> yesterday <u>because</u> I <u>could</u> start my car.",
"solution": "D",
"variant": "text"
}
]

View File

@@ -1,18 +1,17 @@
import aiofiles
import os
import uuid
from logging import getLogger
from typing import Dict, Any, Tuple, Coroutine
from typing import Dict, Any, Coroutine
import pdfplumber
from fastapi import UploadFile
from app.services.abc import ILLMService
from app.helpers import LoggerHelper, FileHelper
from app.mappers import ExamMapper
from app.mappers import LevelMapper
from app.dtos.exam import Exam
from app.dtos.exams.level import Exam
from app.dtos.sheet import Sheet
@@ -21,17 +20,15 @@ class UploadLevelModule:
self._logger = getLogger(__name__)
self._llm = openai
# TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
# firestore, return the id right away, in generation view poll for the id
async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
ext, path_id = await self._save_upload(file)
ext, path_id = await FileHelper.save_upload(file)
FileHelper.convert_file_to_pdf(
f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.pdf'
)
file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
if not file_has_images:
FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
FileHelper.convert_file_to_html(f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.html')
completion: Coroutine[Any, Any, Exam] = (
self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
@@ -41,7 +38,7 @@ class UploadLevelModule:
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return self.fix_ids(response.dict(exclude_none=True))
return self.fix_ids(response.model_dump(exclude_none=True))
return None
@staticmethod
@@ -53,20 +50,6 @@ class UploadLevelModule:
return True
return False
@staticmethod
async def _save_upload(file: UploadFile) -> Tuple[str, str]:
ext = file.filename.split('.')[-1]
path_id = str(uuid.uuid4())
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
file_bytes: bytes = await file.read()
async with aiofiles.open(tmp_filename, 'wb') as file:
await file.write(file_bytes)
return ext, path_id
def _level_json_schema(self):
return {
"parts": [
@@ -91,7 +74,7 @@ class UploadLevelModule:
"content": html
}
],
ExamMapper.map_to_exam_model,
LevelMapper.map_to_exam_model,
str(self._level_json_schema())
)
@@ -237,7 +220,7 @@ class UploadLevelModule:
sheet = await self._png_batch(path_id, batch, json_schema)
sheet.batch = i + 1
components.append(sheet.dict())
components.append(sheet.model_dump())
batches = {"batches": components}
@@ -253,7 +236,7 @@ class UploadLevelModule:
]
}
],
ExamMapper.map_to_sheet,
LevelMapper.map_to_sheet,
str(json_schema)
)
@@ -326,67 +309,10 @@ class UploadLevelModule:
"content": str(batches)
}
],
ExamMapper.map_to_exam_model,
LevelMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_batches(self):
return {
"role": "system",
"content": (
'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
'components and solve the questions. Each batch may contain overlapping content with the previous '
'batch, or close enough content which needs to be excluded. The components are as follows:'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
'appropriate option to solve it.'
'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
'the order in which the underlines appear in the prompt you will need to fix it.'
'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
'select the appropriate solution.'
f'The template for these questions is the same: {self._multiple_choice_png()}\n'
'- Reading Passages, there are two types of reading passages with different templates. The one with '
'type "blanksPassage" where the text field holds the passage and a blank is represented by '
'{{<some number>}} and the other one with type "passage" that has the context field with just '
'reading passages. For both of these components you will have to remove any additional data that might '
'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
' if there are any. These components are used in conjunction with other ones.'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n\n'
'Now that you know the possible components here\'s what I want you to do:\n'
'1. Remove duplicates. A batch will have duplicates of other batches and the components of '
'the next batch should always take precedence over the previous one batch, what I mean by this is that '
'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
'you pick the next one.\n'
'2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
'exercise. For the multiple choice question follow the previous instruction to solve them and place '
f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
f'the template for this exercise: {self._passage_blank_space_html()}.\n'
f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
f'You must group the exercises by the parts in the order they appear in the batches components. '
f'The context field of a part is the context of a passage component that has text relevant to normal '
f'multiple choice questions.\n'
'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
'in your response and correctly structure the JSON.'
)
}
@staticmethod
def fix_ids(response):
counter = 1

View File

@@ -1,492 +0,0 @@
import queue
import uuid
from logging import getLogger
from queue import Queue
import random
from typing import Dict, List
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import IListeningService, ILLMService, ITextToSpeechService
from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate
from app.configs.constants import (
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent,
FieldsAndExercises
)
from app.helpers import ExercisesHelper, FileHelper
class ListeningService(IListeningService):
CONVERSATION_TAIL = (
"Please include random names and genders for the characters in your dialogue. "
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
)
MONOLOGUE_TAIL = (
"Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
)
def __init__(
self, llm: ILLMService,
tts: ITextToSpeechService,
file_storage: IFileStorage,
document_store: IDocumentStore
):
self._llm = llm
self._tts = tts
self._file_storage = file_storage
self._document_store = document_store
self._logger = getLogger(__name__)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
"start_id": 1,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_2": {
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
"exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
"start_id": 11,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue",
},
"section_3": {
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
"start_id": 21,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_4": {
"topic": EducationalContent.ACADEMIC_SUBJECTS,
"exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
"start_id": 31,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
}
}
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
number_of_exercises_q=queue.Queue(), start_id=-1
):
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
section = self._sections[f"section_{section_id}"]
if not topic:
topic = random.choice(section["topic"])
if len(req_exercises) == 0:
req_exercises = random.sample(section["exercise_types"], section["exercise_sample_size"])
if number_of_exercises_q.empty():
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
section["total_exercises"], len(req_exercises)
)
if start_id == -1:
start_id = section["start_id"]
dialog = await self.generate_listening_question(section_id, topic)
if section_id in {1, 3}:
dialog = self.parse_conversation(dialog)
self._logger.info(f'Generated {section["type"]}: {dialog}')
exercises = await self.generate_listening_exercises(
section_id, str(dialog), req_exercises, number_of_exercises_q, start_id, difficulty
)
return {
"exercises": exercises,
"text": dialog,
"difficulty": difficulty
}
async def generate_listening_question(self, section: int, topic: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
async def generate_listening_exercises(
self, section: int, dialog: str,
req_exercises: list[str], number_of_exercises_q: Queue,
start_id: int, difficulty: str
):
dialog_type = self._sections[f'section_{section}']["type"]
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice" or req_exercise == "multipleChoice3Options":
n_options = 4 if "multipleChoice" else 3
question = await self._gen_multiple_choice_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty, n_options
)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = await self._gen_write_blanks_questions_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = await self._gen_write_blanks_notes_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = await self._gen_write_blanks_form_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str):
template = getListeningTemplate()
template['difficulty'] = difficulty
for i, part in enumerate(parts, start=0):
part_template = getListeningPartTemplate()
file_name = str(uuid.uuid4()) + ".mp3"
sound_file_path = FilePaths.AUDIO_FILES_PATH + file_name
firebase_file_path = FilePaths.FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
if "conversation" in part["text"]:
await self._tts.text_to_speech(part["text"]["conversation"], sound_file_path)
else:
await self._tts.text_to_speech(part["text"], sound_file_path)
file_url = await self._file_storage.upload_file_firebase_get_url(firebase_file_path, sound_file_path)
part_template["audio"]["source"] = file_url
part_template["exercises"] = part["exercises"]
template['parts'].append(part_template)
if min_timer != MinTimers.LISTENING_MIN_TIMER_DEFAULT:
template["minTimer"] = min_timer
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
listening_id = await self._document_store.save_to_db_with_id("listening", template, listening_id)
if listening_id:
return {**template, "id": listening_id}
else:
raise Exception("Failed to save question: " + str(parts))
# ==================================================================================================================
# generate_listening_question helpers
# ==================================================================================================================
async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
head = (
'Compose an authentic conversation between two individuals in the everyday social context of "'
if section == 1 else
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
f'{head}{topic}". {self.CONVERSATION_TAIL}'
)
}
]
if section == 1:
messages.extend([
{
"role": "user",
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
},
{
"role": "user",
"content": 'Try to have spelling of names (cities, people, etc)'
}
])
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["conversation"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return self._get_conversation_voices(response, True)
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
head = (
'Generate a comprehensive monologue set in the social context of'
if section == 2 else
'Generate a comprehensive and complex monologue on the academic subject of'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["monologue"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return response["monologue"]
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
chosen_voices = []
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
# section 1
if unique_voices_across_segments:
while voice is None:
chosen_voice = self._get_random_voice(segment['gender'])
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
# section 3
else:
voice = self._get_random_voice(segment['gender'])
name_to_voice[name] = voice
segment['voice'] = voice
return response
@staticmethod
def _get_random_voice(gender: str):
if gender.lower() == 'male':
available_voices = NeuralVoices.MALE_NEURAL_VOICES
else:
available_voices = NeuralVoices.FEMALE_NEURAL_VOICES
return random.choice(available_voices)['Id']
# ==================================================================================================================
# generate_listening_exercises helpers
# ==================================================================================================================
async def _gen_multiple_choice_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'options for this {dialog_type}:\n"' + text + '"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["questions"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(questions, start_id)["questions"],
"type": "multipleChoice",
}
async def _gen_write_blanks_questions_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Answer the questions below using no more than three words or a number accordingly.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
async def _gen_write_blanks_notes_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["notes"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this '
'format: {"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = await self._llm.prediction(
GPTModels.GPT_4_O, word_messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
words = words["words"][:quantity]
replaced_notes = ExercisesHelper.replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
async def _gen_write_blanks_form_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}
]
if dialog_type == "conversation":
messages.append({
"role": "user",
"content": (
'It must be a form and not questions. '
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
)
})
parsed_form = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
parsed_form = parsed_form["form"][:quantity]
replaced_form, words = ExercisesHelper.build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Fill the form with words/numbers missing.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
@staticmethod
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)

View File

@@ -0,0 +1,294 @@
import queue
import uuid
from logging import getLogger
from queue import Queue
import random
from typing import Dict, List
from starlette.datastructures import UploadFile
from app.dtos.listening import GenerateListeningExercises
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService
from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate
from app.configs.constants import (
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent,
FieldsAndExercises
)
from app.helpers import ExercisesHelper, FileHelper
from .multiple_choice import MultipleChoice
from .write_blank_forms import WriteBlankForms
from .write_blanks import WriteBlanks
from .write_blank_notes import WriteBlankNotes
class ListeningService(IListeningService):
CONVERSATION_TAIL = (
"Please include random names and genders for the characters in your dialogue. "
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
)
MONOLOGUE_TAIL = (
"Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
)
def __init__(
self, llm: ILLMService,
tts: ITextToSpeechService,
stt: ISpeechToTextService,
file_storage: IFileStorage,
document_store: IDocumentStore
):
self._llm = llm
self._tts = tts
self._stt = stt
self._file_storage = file_storage
self._document_store = document_store
self._logger = getLogger(__name__)
self._multiple_choice = MultipleChoice(llm)
self._write_blanks = WriteBlanks(llm)
self._write_blanks_forms = WriteBlankForms(llm)
self._write_blanks_notes = WriteBlankNotes(llm)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_2": {
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
"exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue",
},
"section_3": {
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_4": {
"topic": EducationalContent.ACADEMIC_SUBJECTS,
"exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
}
}
async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
async def get_dialog_from_audio(self, upload: UploadFile):
ext, path_id = await FileHelper.save_upload(upload)
dialog = await self._stt.speech_to_text(f'./tmp/{path_id}/upload.{ext}')
FileHelper.remove_directory(f'./tmp/{path_id}')
async def get_listening_question(self, section: int, dto: GenerateListeningExercises):
dialog_type = self._sections[f'section_{section}']["type"]
exercises = []
start_id = 1
for req_exercise in dto.exercises:
if req_exercise.type == "multipleChoice" or req_exercise.type == "multipleChoice3Options":
n_options = 4 if "multipleChoice" else 3
question = await self._multiple_choice.gen_multiple_choice(
dialog_type, dto.text, req_exercise.quantity, start_id, dto.difficulty, n_options
)
exercises.append(question)
self._logger.info(f"Added multiple choice: {question}")
elif req_exercise.type == "writeBlanksQuestions":
question = await self._write_blanks.gen_write_blanks_questions(
dialog_type, dto.text, req_exercise.quantity, start_id, dto.difficulty
)
question["variant"] = "questions"
exercises.append(question)
self._logger.info(f"Added write blanks questions: {question}")
elif req_exercise.type == "writeBlanksFill":
question = await self._write_blanks_notes.gen_write_blanks_notes(
dialog_type, dto.text, req_exercise.quantity, start_id, dto.difficulty
)
question["variant"] = "fill"
exercises.append(question)
self._logger.info(f"Added write blanks notes: {question}")
elif req_exercise.type == "writeBlanksForm":
question = await self._write_blanks_forms.gen_write_blanks_form(
dialog_type, dto.text, req_exercise.quantity, start_id, dto.difficulty
)
question["variant"] = "form"
exercises.append(question)
self._logger.info(f"Added write blanks form: {question}")
start_id = start_id + req_exercise.quantity
return {"exercises": exercises}
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str):
template = getListeningTemplate()
template['difficulty'] = difficulty
for i, part in enumerate(parts, start=0):
part_template = getListeningPartTemplate()
file_name = str(uuid.uuid4()) + ".mp3"
sound_file_path = FilePaths.AUDIO_FILES_PATH + file_name
firebase_file_path = FilePaths.FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
if "conversation" in part["text"]:
await self._tts.text_to_speech(part["text"]["conversation"], sound_file_path)
else:
await self._tts.text_to_speech(part["text"], sound_file_path)
file_url = await self._file_storage.upload_file_firebase_get_url(firebase_file_path, sound_file_path)
part_template["audio"]["source"] = file_url
part_template["exercises"] = part["exercises"]
template['parts'].append(part_template)
if min_timer != MinTimers.LISTENING_MIN_TIMER_DEFAULT:
template["minTimer"] = min_timer
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
listening_id = await self._document_store.save_to_db("listening", template, listening_id)
if listening_id:
return {**template, "id": listening_id}
else:
raise Exception("Failed to save question: " + str(parts))
# ==================================================================================================================
# generate_listening_question helpers
# ==================================================================================================================
async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
head = (
'Compose an authentic conversation between two individuals in the everyday social context of "'
if section == 1 else
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
f'{head}{topic}". {self.CONVERSATION_TAIL}'
)
}
]
if section == 1:
messages.extend([
{
"role": "user",
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
},
{
"role": "user",
"content": 'Try to have spelling of names (cities, people, etc)'
}
])
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["conversation"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
conversation = self._get_conversation_voices(response, True)
return {"dialog": conversation["conversation"]}
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
head = (
'Generate a comprehensive monologue set in the social context of'
if section == 2 else
'Generate a comprehensive and complex monologue on the academic subject of'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["monologue"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {"dialog": response["monologue"]}
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
chosen_voices = []
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
# section 1
if unique_voices_across_segments:
while voice is None:
chosen_voice = self._get_random_voice(segment['gender'])
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
# section 3
else:
voice = self._get_random_voice(segment['gender'])
name_to_voice[name] = voice
segment['voice'] = voice
return response
@staticmethod
def _get_random_voice(gender: str):
if gender.lower() == 'male':
available_voices = NeuralVoices.MALE_NEURAL_VOICES
else:
available_voices = NeuralVoices.FEMALE_NEURAL_VOICES
return random.choice(available_voices)['Id']
@staticmethod
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)

View File

@@ -0,0 +1,46 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class MultipleChoice:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_multiple_choice(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'options for this {dialog_type}:\n"' + text + '"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["questions"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(questions, start_id)["questions"],
"type": "multipleChoice",
}

View File

@@ -0,0 +1,55 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class WriteBlankForms:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_form(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}
]
if dialog_type == "conversation":
messages.append({
"role": "user",
"content": (
'It must be a form and not questions. '
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
)
})
parsed_form = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
parsed_form = parsed_form["form"][:quantity]
replaced_form, words = ExercisesHelper.build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Fill the form with words/numbers missing.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}

View File

@@ -0,0 +1,68 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class WriteBlankNotes:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_notes(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["notes"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this '
'format: {"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = await self._llm.prediction(
GPTModels.GPT_4_O, word_messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
words = words["words"][:quantity]
replaced_notes = ExercisesHelper.replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}

View File

@@ -0,0 +1,43 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class WriteBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_questions(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Answer the questions below using no more than three words or a number accordingly.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}

View File

@@ -1,349 +0,0 @@
import random
import uuid
from queue import Queue
from typing import List
from app.services.abc import IReadingService, ILLMService
from app.configs.constants import QuestionType, TemperatureSettings, FieldsAndExercises, GPTModels
from app.helpers import ExercisesHelper
class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_reading_passage(
self,
part: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str,
start_id: int
):
passage = await self.generate_reading_passage(part, topic)
exercises = await self._generate_reading_exercises(
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
)
if ExercisesHelper.contains_empty_dict(exercises):
return await self.gen_reading_passage(
part, topic, req_exercises, number_of_exercises_q, difficulty, start_id
)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on you generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
FieldsAndExercises.GEN_TEXT_FIELDS,
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def _generate_reading_exercises(
self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
question = await self._gen_summary_fill_blanks_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = await self._gen_true_false_not_given_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
question = await self._gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
if ExercisesHelper.answer_word_limit_ok(question):
exercises.append(question)
print("Added write blanks: " + str(question))
else:
exercises.append({})
print("Did not add write blanks because it did not respect word limit")
elif req_exercise == "paragraphMatch":
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def _gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": f'Summarize this text: "{text}"'
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
'Ensure that your statements accurately represent information or inferences from the text, and '
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
f'as appropriate.\n\nReference text:\n\n {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["prompts"]
if len(questions) > quantity:
questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}
async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'possible answers, must have maximum 3 words per answer, about this text:\n"{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. '
f'The paragraphs are these: {str(paragraphs)}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
headings = response["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}
async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}

View File

@@ -0,0 +1,131 @@
from logging import getLogger
from fastapi import UploadFile
from app.configs.constants import GPTModels, FieldsAndExercises, TemperatureSettings
from app.dtos.reading import ReadingDTO
from app.helpers import ExercisesHelper
from app.services.abc import IReadingService, ILLMService
from .fill_blanks import FillBlanks
from .idea_match import IdeaMatch
from .paragraph_match import ParagraphMatch
from .true_false import TrueFalse
from .import_reading import ImportReadingModule
from .write_blanks import WriteBlanks
class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
self._fill_blanks = FillBlanks(llm)
self._idea_match = IdeaMatch(llm)
self._paragraph_match = ParagraphMatch(llm)
self._true_false = TrueFalse(llm)
self._write_blanks = WriteBlanks(llm)
self._logger = getLogger(__name__)
self._import = ImportReadingModule(llm)
async def import_exam(self, exercises: UploadFile, solutions: UploadFile = None):
return await self._import.import_from_file(exercises, solutions)
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on your generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
FieldsAndExercises.GEN_TEXT_FIELDS,
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def generate_reading_exercises(self, dto: ReadingDTO):
exercises = []
start_id = 1
for req_exercise in dto.exercises:
if req_exercise.type == "fillBlanks":
question = await self._fill_blanks.gen_summary_fill_blanks_exercise(
dto.text, req_exercise.quantity, start_id, dto.difficulty, req_exercise.num_random_words
)
exercises.append(question)
self._logger.info(f"Added fill blanks: {question}")
elif req_exercise.type == "trueFalse":
question = await self._true_false.gen_true_false_not_given_exercise(
dto.text, req_exercise.quantity, start_id, dto.difficulty
)
exercises.append(question)
self._logger.info(f"Added trueFalse: {question}")
elif req_exercise.type == "writeBlanks":
question = await self._write_blanks.gen_write_blanks_exercise(
dto.text, req_exercise.quantity, start_id, dto.difficulty, req_exercise.max_words
)
if ExercisesHelper.answer_word_limit_ok(question):
exercises.append(question)
self._logger.info(f"Added write blanks: {question}")
else:
exercises.append({})
self._logger.info("Did not add write blanks because it did not respect word limit")
elif req_exercise.type == "paragraphMatch":
question = await self._paragraph_match.gen_paragraph_match_exercise(
dto.text, req_exercise.quantity, start_id
)
exercises.append(question)
self._logger.info(f"Added paragraph match: {question}")
elif req_exercise.type == "ideaMatch":
question = await self._idea_match.gen_idea_match_exercise(
dto.text, req_exercise.quantity, start_id
)
question["variant"] = "ideaMatch"
exercises.append(question)
self._logger.info(f"Added idea match: {question}")
start_id = start_id + req_exercise.quantity
return {
"exercises": exercises
}

View File

@@ -0,0 +1,73 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class FillBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": f'Summarize this text: "{text}"'
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}

View File

@@ -0,0 +1,46 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class IdeaMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_idea_match_exercise(self, text: str, quantity: int, start_id: int):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}

View File

@@ -0,0 +1,190 @@
from logging import getLogger
from typing import Dict, Any
from uuid import uuid4
import aiofiles
from fastapi import UploadFile
from app.helpers import FileHelper
from app.mappers.reading import ReadingMapper
from app.services.abc import ILLMService
from app.dtos.exams.reading import Exam
class ImportReadingModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
async def import_from_file(
self, exercises: UploadFile, solutions: UploadFile = None
) -> Dict[str, Any] | None:
path_id = str(uuid4())
ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
if solutions:
ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
response = await self._get_reading_parts(path_id, solutions is not None)
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return response.model_dump(exclude_none=True)
return None
async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
exercises_html = await f.read()
messages = [
self._instructions(),
{
"role": "user",
"content": f"Exam question sheet:\n\n{exercises_html}"
}
]
if solutions:
async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
solutions_html = await f.read()
messages.append({
"role": "user",
"content": f"Solutions:\n\n{solutions_html}"
})
return await self._llm.pydantic_prediction(
messages,
ReadingMapper.map_to_exam_model,
str(self._reading_json_schema())
)
def _reading_json_schema(self):
json = self._reading_exam_template()
json["parts"][0]["exercises"] = [
self._write_blanks(),
self._fill_blanks(),
self._match_sentences(),
self._true_false()
]
@staticmethod
def _reading_exam_template():
return {
"minTimer": "<number of minutes as int not string>",
"parts": [
{
"text": {
"title": "<title of the passage>",
"content": "<the text of the passage>",
},
"exercises": []
}
]
}
@staticmethod
def _write_blanks():
return {
"maxWords": "<number of max words return the int value not string>",
"solutions": [
{
"id": "<number of the question as string>",
"solution": [
"<at least one solution can have alternative solutions (that dont exceed maxWords)>"
]
},
],
"text": "<all the questions formatted in this way: <question>{{<id>}}\\n<question2>{{<id2>}}\\n >",
"type": "writeBlanks"
}
@staticmethod
def _match_sentences():
return {
"options": [
{
"id": "<uppercase letter that identifies a paragraph>",
"sentence": "<either a heading or an idea>"
}
],
"sentences": [
{
"id": "<the question id not the option id>",
"solution": "<id in options>",
"sentence": "<heading or an idea>",
}
],
"type": "matchSentences",
"variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>"
}
@staticmethod
def _true_false():
return {
"questions": [
{
"prompt": "<question>",
"solution": "<can only be one of these [\"true\", \"false\", \"not_given\"]>",
"id": "<the question id>"
}
],
"type": "trueFalse"
}
@staticmethod
def _fill_blanks():
return {
"solutions": [
{
"id": "<blank id>",
"solution": "<word>"
}
],
"text": "<section of text with blanks denoted by {{<blank id>}}>",
"type": "fillBlanks",
"words": [
{
"letter": "<uppercase letter that ids the words (may not be included and if not start at A)>",
"word": "<word>"
}
]
}
def _instructions(self, solutions = False):
solutions_str = " and its solutions" if solutions else ""
tail = (
"The solutions were not supplied so you will have to solve them. Do your utmost to get all the information and"
"all the solutions right!"
if not solutions else
"Do your utmost to correctly identify the sections, its exercises and respective solutions"
)
return {
"role": "system",
"content": (
f"You will receive html pertaining to an english exam question sheet{solutions_str}. Your job is to "
f"structure the data into a single json with this template: {self._reading_exam_template()}\n"
"You will need find out how many parts the exam has a correctly place its exercises. You will "
"encounter 4 types of exercises:\n"
" - \"writeBlanks\": short answer questions that have a answer word limit, generally two or three\n"
" - \"matchSentences\": a sentence needs to be matched with a paragraph\n"
" - \"trueFalse\": questions that its answers can only be true false or not given\n"
" - \"fillBlanks\": a text that has blank spaces on a section of text and a word bank which "
"contains the solutions and sometimes random words to throw off the students\n"
"These 4 types of exercises will need to be placed in the correct json template inside each part, "
"the templates are as follows:\n "
f"writeBlanks: {self._write_blanks()}\n"
f"matchSentences: {self._match_sentences()}\n"
f"trueFalse: {self._true_false()}\n"
f"fillBlanks: {self._fill_blanks()}\n\n"
f"{tail}"
)
}

View File

@@ -0,0 +1,63 @@
import random
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class ParagraphMatch:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_paragraph_match_exercise(self, text: str, quantity: int, start_id: int):
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. '
f'The paragraphs are these: {str(paragraphs)}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
headings = response["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}

View File

@@ -0,0 +1,49 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class TrueFalse:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id: int, difficulty: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
'Ensure that your statements accurately represent information or inferences from the text, and '
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
f'as appropriate.\n\nReference text:\n\n {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["prompts"]
if len(questions) > quantity:
questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}

View File

@@ -0,0 +1,44 @@
import uuid
from app.configs.constants import GPTModels, TemperatureSettings
from app.helpers import ExercisesHelper
from app.services.abc import ILLMService
class WriteBlanks:
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_write_blanks_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, max_words: int = 3):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": max_words,
"prompt": f"Choose no more than {max_words} words and/or a number from the passage for each answer.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}

View File

@@ -9,7 +9,7 @@ from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService
from app.configs.constants import (
FieldsAndExercises, GPTModels, TemperatureSettings,
AvatarEnum, FilePaths
ELAIAvatars, FilePaths
)
from app.helpers import TextHelper
@@ -425,7 +425,7 @@ class SpeakingService(ISpeakingService):
self._logger.info(f'Saved speaking to DB with id {req_id} : {str(template)}')
async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
avatar = (random.choice(list(AvatarEnum))).value
avatar = (random.choice(list(ELAIAvatars))).name
template_index = part - 1
# Using list comprehension to find the element with the desired value in the 'type' field

View File

@@ -19,7 +19,7 @@ class WritingService(IWritingService):
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
)
},
*self._get_writing_messages(task, topic, difficulty)
*self._get_writing_args(task, topic, difficulty)
]
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
@@ -40,36 +40,43 @@ class WritingService(IWritingService):
}
@staticmethod
def _get_writing_messages(task: int, topic: str, difficulty: str) -> List[Dict]:
# TODO: Should the muslim disclaimer be added to task 2?
task_prompt = (
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
'advice, or instructions within the letter. Make sure that the generated prompt is '
f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
) if task == 1 else (
f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
f'analysis of contrasting perspectives on the topic of "{topic}".'
)
task_instructions = (
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
'the answer should include.'
) if task == 1 else (
'The question should lead to an answer with either "theories", "complicated information" or '
'be "very descriptive" on the topic.'
)
def _get_writing_args(task: int, topic: str, difficulty: str) -> List[Dict]:
writing_args = {
"1": {
"prompt": (
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
'advice, or instructions within the letter. Make sure that the generated prompt is '
f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
),
"instructions": (
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
'the answer should include.'
)
},
"2": {
# TODO: Should the muslim disclaimer be here as well?
"prompt": (
f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
f'analysis of contrasting perspectives on the topic of "{topic}".'
),
"instructions": (
'The question should lead to an answer with either "theories", "complicated information" or '
'be "very descriptive" on the topic.'
)
}
}
messages = [
{
"role": "user",
"content": task_prompt
"content": writing_args[str(task)]["prompt"]
},
{
"role": "user",
"content": task_instructions
"content": writing_args[str(task)]["instructions"]
}
]

View File

@@ -3,11 +3,13 @@ from .heygen import Heygen
from .openai import OpenAI
from .whisper import OpenAIWhisper
from .gpt_zero import GPTZero
from .elai import ELAI
__all__ = [
"AWSPolly",
"Heygen",
"OpenAI",
"OpenAIWhisper",
"GPTZero"
"GPTZero",
"ELAI"
]

View File

@@ -0,0 +1,95 @@
import asyncio
import os
import logging
from asyncio import sleep
from copy import deepcopy
import aiofiles
from charset_normalizer.md import getLogger
from httpx import AsyncClient
from app.configs.constants import ELAIAvatars
from app.services.abc import IVideoGeneratorService
class ELAI(IVideoGeneratorService):
_ELAI_ENDPOINT = 'https://apis.elai.io/api/v1/videos'
def __init__(self, client: AsyncClient, token: str, conf: dict):
self._http_client = client
self._conf = deepcopy(conf)
self._logger = getLogger(__name__)
self._GET_HEADER = {
"accept": "application/json",
"Authorization": f"Bearer {token}"
}
self._POST_HEADER = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": f"Bearer {token}"
}
async def create_video(self, text: str, avatar: str):
avatar_url = ELAIAvatars[avatar].value.get("avatar_url")
avatar_code = ELAIAvatars[avatar].value.get("avatar_code")
avatar_gender = ELAIAvatars[avatar].value.get("avatar_gender")
avatar_canvas = ELAIAvatars[avatar].value.get("avatar_canvas")
voice_id = ELAIAvatars[avatar].value.get("voice_id")
voice_provider = ELAIAvatars[avatar].value.get("voice_provider")
self._conf["slides"][0]["canvas"]["objects"][0]["src"] = avatar_url
self._conf["slides"]["avatar"] = {
"code": avatar_code,
"gender": avatar_gender,
"canvas": avatar_canvas
}
self._conf["slides"]["speech"] = text
self._conf["slides"]["voice"] = voice_id
self._conf["slides"]["voiceProvider"] = voice_provider
response = await self._http_client.post(self._ELAI_ENDPOINT, headers=self._POST_HEADER, json=self._conf)
self._logger.info(response.status_code)
self._logger.info(response.json())
video_id = response.json()["_id"]
if video_id:
await self._http_client.post(f'{self._ELAI_ENDPOINT}/render/{video_id}', headers=self._GET_HEADER)
while True:
response = await self._http_client.get(f'{self._ELAI_ENDPOINT}/{video_id}', headers=self._GET_HEADER)
response_data = response.json()
if response_data['status'] == 'ready':
self._logger.info(response_data)
download_url = response_data.get('url')
output_directory = 'download-video/'
output_filename = video_id + '.mp4'
response = await self._http_client.get(download_url)
if response.status_code == 200:
os.makedirs(output_directory, exist_ok=True)
output_path = os.path.join(output_directory, output_filename)
with open(output_path, 'wb') as f:
f.write(response.content)
self._logger.info(f"File '{output_filename}' downloaded successfully.")
return output_filename
else:
self._logger.error(f"Failed to download file. Status code: {response.status_code}")
return None
elif response_data['status'] == 'failed':
self._logger.error('Video creation failed.')
break
else:
self._logger.info('Video is still processing. Checking again in 10 seconds...')
await sleep(10)

View File

@@ -0,0 +1,72 @@
{
"name": "API test",
"slides": [
{
"id": 1,
"canvas": {
"objects": [
{
"type": "avatar",
"left": 151.5,
"top": 36,
"fill": "#4868FF",
"scaleX": 0.3,
"scaleY": 0.3,
"width": 1080,
"height": 1080,
"avatarType": "transparent",
"animation": {
"type": null,
"exitType": null
}
},
{
"type": "image",
"version": "5.3.0",
"originX": "left",
"originY": "top",
"left": 30,
"top": 30,
"width": 800,
"height": 600,
"fill": "rgb(0,0,0)",
"stroke": null,
"strokeWidth": 0,
"strokeDashArray": null,
"strokeLineCap": "butt",
"strokeDashOffset": 0,
"strokeLineJoin": "miter",
"strokeUniform": false,
"strokeMiterLimit": 4,
"scaleX": 0.18821429,
"scaleY": 0.18821429,
"angle": 0,
"flipX": false,
"flipY": false,
"opacity": 1,
"shadow": null,
"visible": true,
"backgroundColor": "",
"fillRule": "nonzero",
"paintFirst": "fill",
"globalCompositeOperation": "source-over",
"skewX": 0,
"skewY": 0,
"cropX": 0,
"cropY": 0,
"id": 676845479989,
"src": "https://d3u63mhbhkevz8.cloudfront.net/production/uploads/66f5190349f943682dd776ff/en-coach-main-logo-800x600_sm1ype.jpg?Expires=1727654400&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9kM3U2M21oYmhrZXZ6OC5jbG91ZGZyb250Lm5ldC9wcm9kdWN0aW9uL3VwbG9hZHMvNjZmNTE5MDM0OWY5NDM2ODJkZDc3NmZmL2VuLWNvYWNoLW1haW4tbG9nby04MDB4NjAwX3NtMXlwZS5qcGciLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3Mjc2NTQ0MDB9fX1dfQ__&Signature=kTVzlDeS7cua2HiAE5G%7E-yFqbhu0bHraFH5SauUln7yuNXoX7vtiKIBYiL%7Eps3LCLEZS77arSZ7H%7EG8CKzabHDjAR-Y6Uc%7ELD5KQaMmk0jbAxbC3Wdoq6cfd0qIwEuodQYlC0It2WBidP8KsgOy3uUQ%7EvcBoqlb255yMFw4pHuptOBB1kPs%7EFyzDV0fnRNsKaYRcy0Fn2EFUp13axm0CZQclazuLFM622AyCydKMy0vfxV%7Etny3sskwPaUe2OANGMFg07Q1pRuy6fUON0DsbhAh1tA2H6-nnem5KbFwiZK3IIwwYGBx3H41ovzC6Ejt80Fd0%7EPSHw7GzVBnUmtP-IA__&Key-Pair-Id=K1Y7U91AR6T7E5",
"crossOrigin": "anonymous",
"filters": [],
"_exists": true
}
],
"background": "#ffffff",
"version": "4.4.0"
},
"animation": "fade_in",
"language": "English",
"voiceType": "text"
}
]
}

View File

@@ -10,12 +10,11 @@ from app.services.abc import IVideoGeneratorService
class Heygen(IVideoGeneratorService):
# TODO: Not used, remove if not necessary
# CREATE_VIDEO_URL = 'https://api.heygen.com/v1/template.generate'
_GET_VIDEO_URL = 'https://api.heygen.com/v1/video_status.get'
def __init__(self, client: AsyncClient, heygen_token: str):
def __init__(self, client: AsyncClient, token: str):
pass
"""
self._get_header = {
'X-Api-Key': heygen_token
}
@@ -25,9 +24,12 @@ class Heygen(IVideoGeneratorService):
}
self._http_client = client
self._logger = logging.getLogger(__name__)
"""
async def create_video(self, text: str, avatar: str):
pass
# POST TO CREATE VIDEO
"""
create_video_url = 'https://api.heygen.com/v2/template/' + avatar + '/generate'
data = {
"test": False,
@@ -87,4 +89,5 @@ class Heygen(IVideoGeneratorService):
else:
self._logger.error(f"Failed to download file. Status code: {response.status_code}")
return None
"""

View File

@@ -120,7 +120,7 @@ class OpenAI(ILLMService):
params["temperature"] = temperature
attempt = 0
while attempt < max_retries:
while attempt < 3:
result = await self._client.chat.completions.create(**params)
result_content = result.choices[0].message.content
try:
@@ -142,6 +142,7 @@ class OpenAI(ILLMService):
"content": (
f"Previous response: {result_content}\n"
f"JSON format: {json_scheme}"
f"Validation errors: {e}"
)
}
]