507 lines
20 KiB
Python
507 lines
20 KiB
Python
import json
|
|
import random
|
|
import uuid
|
|
|
|
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType
|
|
from app.helpers import ExercisesHelper
|
|
from app.repositories.abc import IDocumentStore
|
|
from app.services.abc import ILevelService, ILLMService, IReadingService
|
|
|
|
|
|
class LevelService(ILevelService):
|
|
|
|
def __init__(
|
|
self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService
|
|
):
|
|
self._llm = llm
|
|
self._document_store = document_store
|
|
self._reading_service = reading_service
|
|
|
|
async def get_level_exam(self):
|
|
number_of_exercises = 25
|
|
exercises = await self._gen_multiple_choice_level(number_of_exercises)
|
|
return {
|
|
"exercises": [exercises],
|
|
"isDiagnostic": False,
|
|
"minTimer": 25,
|
|
"module": "level"
|
|
}
|
|
|
|
async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
|
|
gen_multiple_choice_for_text = (
|
|
f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy '
|
|
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
|
|
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
|
|
'punctuation. Make sure every question only has 1 correct answer.'
|
|
)
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{"questions": [{"id": "9", "options": '
|
|
'[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
|
|
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
|
|
'"prompt": "Which of the following is a conjunction?", '
|
|
'"solution": "A", "variant": "text"}]}'
|
|
)
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": gen_multiple_choice_for_text
|
|
}
|
|
]
|
|
|
|
question = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
if len(question["questions"]) != quantity:
|
|
return await self._gen_multiple_choice_level(quantity, start_id)
|
|
else:
|
|
all_exams = await self._document_store.get_all("level")
|
|
seen_keys = set()
|
|
for i in range(len(question["questions"])):
|
|
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
|
|
all_exams, question["questions"][i], question, seen_keys
|
|
)
|
|
return {
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": "Select the appropriate option.",
|
|
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
|
|
"type": "multipleChoice",
|
|
}
|
|
|
|
async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
|
|
# Extracting relevant fields for comparison
|
|
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
|
|
# Check if the key is in the set
|
|
if key in seen_keys:
|
|
return await self._replace_exercise_if_exists(
|
|
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
|
)
|
|
else:
|
|
seen_keys.add(key)
|
|
|
|
for exam in all_exams:
|
|
exam_dict = exam.to_dict()
|
|
if any(
|
|
exercise["prompt"] == current_exercise["prompt"] and
|
|
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
|
current_exercise["options"])
|
|
for exercise in exam_dict.get("exercises", [])[0]["questions"]
|
|
):
|
|
return await self._replace_exercise_if_exists(
|
|
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
|
)
|
|
return current_exercise, seen_keys
|
|
|
|
async def _generate_single_mc_level_question(self):
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
|
|
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
|
|
'"prompt": "Which of the following is a conjunction?", '
|
|
'"solution": "A", "variant": "text"}'
|
|
)
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
|
|
'intermediate or advanced.'
|
|
)
|
|
|
|
}
|
|
]
|
|
|
|
question = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
return question
|
|
|
|
async def get_level_utas(self):
|
|
# Formats
|
|
mc = {
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": "Choose the correct word or group of words that completes the sentences.",
|
|
"questions": None,
|
|
"type": "multipleChoice",
|
|
"part": 1
|
|
}
|
|
|
|
umc = {
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": "Choose the underlined word or group of words that is not correct.",
|
|
"questions": None,
|
|
"type": "multipleChoice",
|
|
"part": 2
|
|
}
|
|
|
|
bs_1 = {
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": "Read the text and write the correct word for each space.",
|
|
"questions": None,
|
|
"type": "blankSpaceText",
|
|
"part": 3
|
|
}
|
|
|
|
bs_2 = {
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": "Read the text and write the correct word for each space.",
|
|
"questions": None,
|
|
"type": "blankSpaceText",
|
|
"part": 4
|
|
}
|
|
|
|
reading = {
|
|
"id": str(uuid.uuid4()),
|
|
"prompt": "Read the text and answer the questions below.",
|
|
"questions": None,
|
|
"type": "readingExercises",
|
|
"part": 5
|
|
}
|
|
|
|
all_mc_questions = []
|
|
|
|
# PART 1
|
|
mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions)
|
|
print(json.dumps(mc_exercises1, indent=4))
|
|
all_mc_questions.append(mc_exercises1)
|
|
|
|
# PART 2
|
|
mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions)
|
|
print(json.dumps(mc_exercises2, indent=4))
|
|
all_mc_questions.append(mc_exercises2)
|
|
|
|
# PART 3
|
|
mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions)
|
|
print(json.dumps(mc_exercises3, indent=4))
|
|
all_mc_questions.append(mc_exercises3)
|
|
|
|
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
|
|
print(json.dumps(mc_exercises, indent=4))
|
|
mc["questions"] = mc_exercises
|
|
|
|
# Underlined mc
|
|
underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46)
|
|
print(json.dumps(underlined_mc, indent=4))
|
|
umc["questions"] = underlined_mc
|
|
|
|
# Blank Space text 1
|
|
blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250)
|
|
print(json.dumps(blank_space_text_1, indent=4))
|
|
bs_1["questions"] = blank_space_text_1
|
|
|
|
# Blank Space text 2
|
|
blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350)
|
|
print(json.dumps(blank_space_text_2, indent=4))
|
|
bs_2["questions"] = blank_space_text_2
|
|
|
|
# Reading text
|
|
reading_text = await self._gen_reading_passage_utas(87, 10, 4)
|
|
print(json.dumps(reading_text, indent=4))
|
|
reading["questions"] = reading_text
|
|
|
|
return {
|
|
"exercises": {
|
|
"blankSpaceMultipleChoice": mc,
|
|
"underlinedMultipleChoice": umc,
|
|
"blankSpaceText1": bs_1,
|
|
"blankSpaceText2": bs_2,
|
|
"readingExercises": reading,
|
|
},
|
|
"isDiagnostic": False,
|
|
"minTimer": 25,
|
|
"module": "level"
|
|
}
|
|
|
|
async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams):
|
|
gen_multiple_choice_for_text = (
|
|
f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english '
|
|
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure '
|
|
'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
|
|
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
|
|
)
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
'You are a helpful assistant designed to output JSON on this format: '
|
|
'{"questions": [{"id": "9", "options": [{"id": "A", "text": '
|
|
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
|
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
|
'"prompt": "Which of the following is a conjunction?", '
|
|
'"solution": "A", "variant": "text"}]}')
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": gen_multiple_choice_for_text
|
|
}
|
|
]
|
|
|
|
question = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
if len(question["questions"]) != quantity:
|
|
return await self._gen_multiple_choice_level(quantity, start_id)
|
|
else:
|
|
seen_keys = set()
|
|
for i in range(len(question["questions"])):
|
|
question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas(
|
|
all_exams,
|
|
question["questions"][i],
|
|
question,
|
|
seen_keys
|
|
)
|
|
return ExercisesHelper.fix_exercise_ids(question, start_id)
|
|
|
|
async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys):
|
|
# Extracting relevant fields for comparison
|
|
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
|
|
# Check if the key is in the set
|
|
if key in seen_keys:
|
|
return self._replace_exercise_if_exists_utas(
|
|
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
|
)
|
|
else:
|
|
seen_keys.add(key)
|
|
|
|
for exam in all_exams:
|
|
if any(
|
|
exercise["prompt"] == current_exercise["prompt"] and
|
|
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
|
current_exercise["options"])
|
|
for exercise in exam.get("questions", [])
|
|
):
|
|
return self._replace_exercise_if_exists_utas(
|
|
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
|
)
|
|
return current_exercise, seen_keys
|
|
|
|
|
|
async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int):
|
|
json_format = {
|
|
"questions": [
|
|
{
|
|
"id": "9",
|
|
"options": [
|
|
{
|
|
"id": "A",
|
|
"text": "a"
|
|
},
|
|
{
|
|
"id": "B",
|
|
"text": "b"
|
|
},
|
|
{
|
|
"id": "C",
|
|
"text": "c"
|
|
},
|
|
{
|
|
"id": "D",
|
|
"text": "d"
|
|
}
|
|
],
|
|
"prompt": "prompt",
|
|
"solution": "A",
|
|
"variant": "text"
|
|
}
|
|
]
|
|
}
|
|
|
|
gen_multiple_choice_for_text = (
|
|
f'Generate {str(quantity)} multiple choice questions of 4 options for an english '
|
|
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that '
|
|
'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
|
|
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
|
|
)
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": gen_multiple_choice_for_text
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
'The type of multiple choice is the prompt has wrong words or group of words and the options '
|
|
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
|
|
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
|
|
'the boss <u>is</u> nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
|
|
)
|
|
}
|
|
]
|
|
|
|
question = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
if len(question["questions"]) != quantity:
|
|
return await self._gen_multiple_choice_level(quantity, start_id)
|
|
else:
|
|
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
|
|
|
|
async def _gen_blank_space_text_utas(
|
|
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
|
):
|
|
json_format = {
|
|
"question": {
|
|
"words": [
|
|
{
|
|
"id": "1",
|
|
"text": "a"
|
|
},
|
|
{
|
|
"id": "2",
|
|
"text": "b"
|
|
},
|
|
{
|
|
"id": "3",
|
|
"text": "c"
|
|
},
|
|
{
|
|
"id": "4",
|
|
"text": "d"
|
|
}
|
|
],
|
|
"text": "text"
|
|
}
|
|
}
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f'Generate a text of at least {str(size)} words about the topic {topic}.'
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace '
|
|
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
|
|
'The ids must be ordered throughout the text and the words must be replaced only once. Put '
|
|
'the removed words and respective ids on the words array of the json in the correct order.'
|
|
)
|
|
}
|
|
]
|
|
|
|
question = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
return question["question"]
|
|
|
|
async def _gen_reading_passage_utas(
|
|
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
|
):
|
|
|
|
passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
|
|
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
|
|
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
|
|
return {
|
|
"exercises": {
|
|
"shortAnswer": short_answer,
|
|
"multipleChoice": mc_exercises,
|
|
},
|
|
"text": {
|
|
"content": passage["text"],
|
|
"title": passage["title"]
|
|
}
|
|
}
|
|
|
|
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
|
|
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": (
|
|
'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have '
|
|
'maximum 3 words per answer, about this text:\n"' + text + '"')
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": 'The id starts at ' + str(start_id) + '.'
|
|
}
|
|
]
|
|
|
|
return (
|
|
await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
)["questions"]
|
|
|
|
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
|
|
json_format = {
|
|
"questions": [
|
|
{
|
|
"id": "9",
|
|
"options": [
|
|
{
|
|
"id": "A",
|
|
"text": "a"
|
|
},
|
|
{
|
|
"id": "B",
|
|
"text": "b"
|
|
},
|
|
{
|
|
"id": "C",
|
|
"text": "c"
|
|
},
|
|
{
|
|
"id": "D",
|
|
"text": "d"
|
|
}
|
|
],
|
|
"prompt": "prompt",
|
|
"solution": "A",
|
|
"variant": "text"
|
|
}
|
|
]
|
|
}
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": 'Generate ' + str(
|
|
mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": 'Make sure every question only has 1 correct answer.'
|
|
}
|
|
]
|
|
|
|
question = await self._llm.prediction(
|
|
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
|
)
|
|
|
|
if len(question["questions"]) != mc_quantity:
|
|
return await self._gen_multiple_choice_level(mc_quantity, start_id)
|
|
else:
|
|
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
|