import json import random import uuid from typing import Dict from fastapi import UploadFile from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent from app.helpers import ExercisesHelper from app.repositories.abc import IDocumentStore from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \ IListeningService from .custom import CustomLevelModule from .upload import UploadLevelModule class LevelService(ILevelService): def __init__( self, llm: ILLMService, document_store: IDocumentStore, mc_variants: Dict, reading_service: IReadingService, writing_service: IWritingService, speaking_service: ISpeakingService, listening_service: IListeningService ): self._llm = llm self._document_store = document_store self._reading_service = reading_service self._custom_module = CustomLevelModule( llm, self, reading_service, listening_service, writing_service, speaking_service ) self._upload_module = UploadLevelModule(llm) # TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt # mc_variants are stored in ./mc_variants.json self._mc_variants = mc_variants async def upload_level(self, upload: UploadFile) -> Dict: return await self._upload_module.generate_level_from_file(upload) async def get_custom_level(self, data: Dict): return await self._custom_module.get_custom_level(data) async def get_level_exam( self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False ) -> Dict: exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False) return { "exercises": [exercises], "isDiagnostic": diagnostic, "minTimer": min_timer, "module": "level" } async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25): # Formats mc = { "id": str(uuid.uuid4()), "prompt": "Choose the correct word or group of words that completes the sentences.", "questions": None, "type": "multipleChoice", "part": 1 } umc = { "id": str(uuid.uuid4()), "prompt": "Choose the underlined word or group of words that is not correct.", "questions": None, "type": "multipleChoice", "part": 2 } bs_1 = { "id": str(uuid.uuid4()), "prompt": "Read the text and write the correct word for each space.", "questions": None, "type": "blankSpaceText", "part": 3 } bs_2 = { "id": str(uuid.uuid4()), "prompt": "Read the text and write the correct word for each space.", "questions": None, "type": "blankSpaceText", "part": 4 } reading = { "id": str(uuid.uuid4()), "prompt": "Read the text and answer the questions below.", "questions": None, "type": "readingExercises", "part": 5 } all_mc_questions = [] # PART 1 # await self._gen_multiple_choice("normal", number_of_exercises, utas=False) mc_exercises1 = await self.gen_multiple_choice( "blank_space", 15, 1, utas=True, all_exams=all_mc_questions ) print(json.dumps(mc_exercises1, indent=4)) all_mc_questions.append(mc_exercises1) # PART 2 mc_exercises2 = await self.gen_multiple_choice( "blank_space", 15, 16, utas=True, all_exams=all_mc_questions ) print(json.dumps(mc_exercises2, indent=4)) all_mc_questions.append(mc_exercises2) # PART 3 mc_exercises3 = await self.gen_multiple_choice( "blank_space", 15, 31, utas=True, all_exams=all_mc_questions ) print(json.dumps(mc_exercises3, indent=4)) all_mc_questions.append(mc_exercises3) mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions'] print(json.dumps(mc_exercises, indent=4)) mc["questions"] = mc_exercises # Underlined mc underlined_mc = await self.gen_multiple_choice( "underline", 15, 46, utas=True, all_exams=all_mc_questions ) print(json.dumps(underlined_mc, indent=4)) umc["questions"] = underlined_mc # Blank Space text 1 blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250) print(json.dumps(blank_space_text_1, indent=4)) bs_1["questions"] = blank_space_text_1 # Blank Space text 2 blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350) print(json.dumps(blank_space_text_2, indent=4)) bs_2["questions"] = blank_space_text_2 # Reading text reading_text = await self.gen_reading_passage_utas(87, 10, 4) print(json.dumps(reading_text, indent=4)) reading["questions"] = reading_text return { "exercises": { "blankSpaceMultipleChoice": mc, "underlinedMultipleChoice": umc, "blankSpaceText1": bs_1, "blankSpaceText2": bs_2, "readingExercises": reading, }, "isDiagnostic": diagnostic, "minTimer": min_timer, "module": "level" } async def gen_multiple_choice( self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None ): mc_template = self._mc_variants[mc_variant] blank_mod = " blank space " if mc_variant == "blank_space" else " " gen_multiple_choice_for_text: str = ( 'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy ' 'questions, some intermediate questions and some advanced questions. Ensure that the questions cover ' 'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and ' 'punctuation. Make sure every question only has 1 correct answer.' ) messages = [ { "role": "system", "content": ( f'You are a helpful assistant designed to output JSON on this format: {mc_template}' ) }, { "role": "user", "content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod) } ] if mc_variant == "underline": messages.append({ "role": "user", "content": ( 'The type of multiple choice in the prompt has wrong words or group of words and the options ' 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n' 'Prompt: "I complain about my boss all the time, but my colleagues thinks ' 'the boss is nice."\n' 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"' ) }) question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) if len(question["questions"]) != quantity: return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams) else: if not utas: all_exams = await self._document_store.get_all("level") seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = await self._replace_exercise_if_exists( all_exams, question["questions"][i], question, seen_keys, mc_variant, utas ) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } else: if all_exams is not None: seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = await self._replace_exercise_if_exists( all_exams, question["questions"][i], question, seen_keys, mc_variant, utas ) response = ExercisesHelper.fix_exercise_ids(question, start_id) response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"]) return response async def _generate_single_multiple_choice(self, mc_variant: str = "normal"): mc_template = self._mc_variants[mc_variant]["questions"][0] blank_mod = " blank space " if mc_variant == "blank_space" else " " messages = [ { "role": "system", "content": ( f'You are a helpful assistant designed to output JSON on this format: {mc_template}' ) }, { "role": "user", "content": ( f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, ' f'it can be easy, intermediate or advanced.' ) } ] if mc_variant == "underline": messages.append({ "role": "user", "content": ( 'The type of multiple choice in the prompt has wrong words or group of words and the options ' 'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n' 'Prompt: "I complain about my boss all the time, but my colleagues thinks ' 'the boss is nice."\n' 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"' ) }) question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) return question async def _replace_exercise_if_exists( self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False ): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return await self._replace_exercise_if_exists( all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys, mc_variant, utas ) else: seen_keys.add(key) if not utas: for exam in all_exams: exam_dict = exam.to_dict() if len(exam_dict.get("parts", [])) > 0: exercise_dict = exam_dict.get("parts", [])[0] if len(exercise_dict.get("exercises", [])) > 0: if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exercise_dict.get("exercises", [])[0]["questions"] ): return await self._replace_exercise_if_exists( all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys, mc_variant, utas ) else: for exam in all_exams: if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exam.get("questions", []) ): return await self._replace_exercise_if_exists( all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys, mc_variant, utas ) return current_exercise, seen_keys async def gen_blank_space_text_utas( self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS) ): json_template = self._mc_variants["blank_space_text"] messages = [ { "role": "system", "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}' }, { "role": "user", "content": f'Generate a text of at least {size} words about the topic {topic}.' }, { "role": "user", "content": ( f'From the generated text choose {quantity} words (cannot be sequential words) to replace ' 'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. ' 'The ids must be ordered throughout the text and the words must be replaced only once. ' 'Put the removed words and respective ids on the words array of the json in the correct order.' ) } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) return question["question"] async def gen_reading_passage_utas( self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS) ): passage = await self._reading_service.generate_reading_passage(1, topic) short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity) mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) return { "exercises": { "shortAnswer": short_answer, "multipleChoice": mc_exercises, }, "text": { "content": passage["text"], "title": passage["title"] } } async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int): json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]} messages = [ { "role": "system", "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}' }, { "role": "user", "content": ( f'Generate {sa_quantity} short answer questions, and the possible answers, must have ' f'maximum 3 words per answer, about this text:\n"{text}"' ) }, { "role": "user", "content": f'The id starts at {start_id}.' } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) return question["questions"] async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int): json_template = self._mc_variants["text_mc_utas"] messages = [ { "role": "system", "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}' }, { "role": "user", "content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}' }, { "role": "user", "content": 'Make sure every question only has 1 correct answer.' } ] question = await self._llm.prediction( GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE ) if len(question["questions"]) != mc_quantity: return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id) else: response = ExercisesHelper.fix_exercise_ids(question, start_id) response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"]) return response