import json import random import uuid from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType from app.helpers import ExercisesHelper from app.repositories.abc import IDocumentStore from app.services.abc import ILevelService, ILLMService, IReadingService class LevelService(ILevelService): def __init__( self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService ): self._llm = llm self._document_store = document_store self._reading_service = reading_service async def get_level_exam(self): number_of_exercises = 25 exercises = await self._gen_multiple_choice_level(number_of_exercises) return { "exercises": [exercises], "isDiagnostic": False, "minTimer": 25, "module": "level" } async def _gen_multiple_choice_level(self, quantity: int, start_id=1): gen_multiple_choice_for_text = ( f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy ' 'questions, some intermediate questions and some advanced questions. Ensure that the questions cover ' 'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and ' 'punctuation. Make sure every question only has 1 correct answer.' ) messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"id": "9", "options": ' '[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, ' '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}]}' ) }, { "role": "user", "content": gen_multiple_choice_for_text } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) if len(question["questions"]) != quantity: return await self._gen_multiple_choice_level(quantity, start_id) else: all_exams = await self._document_store.get_all("level") seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = await self._replace_exercise_if_exists( all_exams, question["questions"][i], question, seen_keys ) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return await self._replace_exercise_if_exists( all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys ) else: seen_keys.add(key) for exam in all_exams: exam_dict = exam.to_dict() if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exam_dict.get("exercises", [])[0]["questions"] ): return await self._replace_exercise_if_exists( all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys ) return current_exercise, seen_keys async def _generate_single_mc_level_question(self): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, ' '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}' ) }, { "role": "user", "content": ( 'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, ' 'intermediate or advanced.' ) } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) return question async def get_level_utas(self): # Formats mc = { "id": str(uuid.uuid4()), "prompt": "Choose the correct word or group of words that completes the sentences.", "questions": None, "type": "multipleChoice", "part": 1 } umc = { "id": str(uuid.uuid4()), "prompt": "Choose the underlined word or group of words that is not correct.", "questions": None, "type": "multipleChoice", "part": 2 } bs_1 = { "id": str(uuid.uuid4()), "prompt": "Read the text and write the correct word for each space.", "questions": None, "type": "blankSpaceText", "part": 3 } bs_2 = { "id": str(uuid.uuid4()), "prompt": "Read the text and write the correct word for each space.", "questions": None, "type": "blankSpaceText", "part": 4 } reading = { "id": str(uuid.uuid4()), "prompt": "Read the text and answer the questions below.", "questions": None, "type": "readingExercises", "part": 5 } all_mc_questions = [] # PART 1 mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions) print(json.dumps(mc_exercises1, indent=4)) all_mc_questions.append(mc_exercises1) # PART 2 mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions) print(json.dumps(mc_exercises2, indent=4)) all_mc_questions.append(mc_exercises2) # PART 3 mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions) print(json.dumps(mc_exercises3, indent=4)) all_mc_questions.append(mc_exercises3) mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions'] print(json.dumps(mc_exercises, indent=4)) mc["questions"] = mc_exercises # Underlined mc underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46) print(json.dumps(underlined_mc, indent=4)) umc["questions"] = underlined_mc # Blank Space text 1 blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250) print(json.dumps(blank_space_text_1, indent=4)) bs_1["questions"] = blank_space_text_1 # Blank Space text 2 blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350) print(json.dumps(blank_space_text_2, indent=4)) bs_2["questions"] = blank_space_text_2 # Reading text reading_text = await self._gen_reading_passage_utas(87, 10, 4) print(json.dumps(reading_text, indent=4)) reading["questions"] = reading_text return { "exercises": { "blankSpaceMultipleChoice": mc, "underlinedMultipleChoice": umc, "blankSpaceText1": bs_1, "blankSpaceText2": bs_2, "readingExercises": reading, }, "isDiagnostic": False, "minTimer": 25, "module": "level" } async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams): gen_multiple_choice_for_text = ( f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english ' 'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure ' 'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, ' 'sentence structure, and punctuation. Make sure every question only has 1 correct answer.' ) messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"id": "9", "options": [{"id": "A", "text": ' '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}]}') }, { "role": "user", "content": gen_multiple_choice_for_text } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) if len(question["questions"]) != quantity: return await self._gen_multiple_choice_level(quantity, start_id) else: seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas( all_exams, question["questions"][i], question, seen_keys ) return ExercisesHelper.fix_exercise_ids(question, start_id) async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return self._replace_exercise_if_exists_utas( all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys ) else: seen_keys.add(key) for exam in all_exams: if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exam.get("questions", []) ): return self._replace_exercise_if_exists_utas( all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys ) return current_exercise, seen_keys async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int): json_format = { "questions": [ { "id": "9", "options": [ { "id": "A", "text": "a" }, { "id": "B", "text": "b" }, { "id": "C", "text": "c" }, { "id": "D", "text": "d" } ], "prompt": "prompt", "solution": "A", "variant": "text" } ] } gen_multiple_choice_for_text = ( f'Generate {str(quantity)} multiple choice questions of 4 options for an english ' 'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that ' 'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, ' 'sentence structure, and punctuation. Make sure every question only has 1 correct answer.' ) messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": gen_multiple_choice_for_text }, { "role": "user", "content": ( 'The type of multiple choice is the prompt has wrong words or group of words and the options ' 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n' 'Prompt: "I complain about my boss all the time, but my colleagues thinks ' 'the boss is nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"' ) } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) if len(question["questions"]) != quantity: return await self._gen_multiple_choice_level(quantity, start_id) else: return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"] async def _gen_blank_space_text_utas( self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS) ): json_format = { "question": { "words": [ { "id": "1", "text": "a" }, { "id": "2", "text": "b" }, { "id": "3", "text": "c" }, { "id": "4", "text": "d" } ], "text": "text" } } messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": f'Generate a text of at least {str(size)} words about the topic {topic}.' }, { "role": "user", "content": ( f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace ' 'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. ' 'The ids must be ordered throughout the text and the words must be replaced only once. Put ' 'the removed words and respective ids on the words array of the json in the correct order.' ) } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) return question["question"] async def _gen_reading_passage_utas( self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS) ): passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic) short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity) mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) return { "exercises": { "shortAnswer": short_answer, "multipleChoice": mc_exercises, }, "text": { "content": passage["text"], "title": passage["title"] } } async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int): json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]} messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": ( 'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have ' 'maximum 3 words per answer, about this text:\n"' + text + '"') }, { "role": "user", "content": 'The id starts at ' + str(start_id) + '.' } ] return ( await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) )["questions"] async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int): json_format = { "questions": [ { "id": "9", "options": [ { "id": "A", "text": "a" }, { "id": "B", "text": "b" }, { "id": "C", "text": "c" }, { "id": "D", "text": "d" } ], "prompt": "prompt", "solution": "A", "variant": "text" } ] } messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": 'Generate ' + str( mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text }, { "role": "user", "content": 'Make sure every question only has 1 correct answer.' } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) if len(question["questions"]) != mc_quantity: return await self._gen_multiple_choice_level(mc_quantity, start_id) else: return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]