Files
encoach_backend/app/services/impl/level.py
Carlos Mesquita 3cf9fa5cba Async release
2024-07-23 08:40:35 +01:00

507 lines
20 KiB
Python

import json
import random
import uuid
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType
from app.helpers import ExercisesHelper
from app.repositories.abc import IDocumentStore
from app.services.abc import ILevelService, ILLMService, IReadingService
class LevelService(ILevelService):
def __init__(
self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
async def get_level_exam(self):
number_of_exercises = 25
exercises = await self._gen_multiple_choice_level(number_of_exercises)
return {
"exercises": [exercises],
"isDiagnostic": False,
"minTimer": 25,
"module": "level"
}
async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
gen_multiple_choice_for_text = (
f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self._gen_multiple_choice_level(quantity, start_id)
else:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
else:
seen_keys.add(key)
for exam in all_exams:
exam_dict = exam.to_dict()
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
return current_exercise, seen_keys
async def _generate_single_mc_level_question(self):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}'
)
},
{
"role": "user",
"content": (
'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
'intermediate or advanced.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
async def get_level_utas(self):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self._gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": False,
"minTimer": 25,
"module": "level"
}
async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams):
gen_multiple_choice_for_text = (
f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english '
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure '
'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self._gen_multiple_choice_level(quantity, start_id)
else:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas(
all_exams,
question["questions"][i],
question,
seen_keys
)
return ExercisesHelper.fix_exercise_ids(question, start_id)
async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return self._replace_exercise_if_exists_utas(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
else:
seen_keys.add(key)
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return self._replace_exercise_if_exists_utas(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
return current_exercise, seen_keys
async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int):
json_format = {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
gen_multiple_choice_for_text = (
f'Generate {str(quantity)} multiple choice questions of 4 options for an english '
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that '
'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": gen_multiple_choice_for_text
},
{
"role": "user",
"content": (
'The type of multiple choice is the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self._gen_multiple_choice_level(quantity, start_id)
else:
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
async def _gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
json_format = {
"question": {
"words": [
{
"id": "1",
"text": "a"
},
{
"id": "2",
"text": "b"
},
{
"id": "3",
"text": "c"
},
{
"id": "4",
"text": "d"
}
],
"text": "text"
}
}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": f'Generate a text of at least {str(size)} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. Put '
'the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]
async def _gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": (
'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have '
'maximum 3 words per answer, about this text:\n"' + text + '"')
},
{
"role": "user",
"content": 'The id starts at ' + str(start_id) + '.'
}
]
return (
await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
)["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_format = {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": 'Generate ' + str(
mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_multiple_choice_level(mc_quantity, start_id)
else:
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]