Fastapi refactor update

2024-10-01 19:31:01 +01:00
parent f92a803d96
commit 2a032c5aba
132 changed files with 22856 additions and 10309 deletions
--- a/app/services/impl/exam/level/level.py
+++ b/app/services/impl/exam/level/level.py
@@ -0,0 +1,417 @@
+import json
+import random
+import uuid
+
+from typing import Dict
+
+from fastapi import UploadFile
+
+from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
+from app.helpers import ExercisesHelper
+from app.repositories.abc import IDocumentStore
+from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \
+    IListeningService
+from .custom import CustomLevelModule
+from .upload import UploadLevelModule
+
+
+class LevelService(ILevelService):
+
+    def __init__(
+        self,
+        llm: ILLMService,
+        document_store: IDocumentStore,
+        mc_variants: Dict,
+        reading_service: IReadingService,
+        writing_service: IWritingService,
+        speaking_service: ISpeakingService,
+        listening_service: IListeningService
+    ):
+        self._llm = llm
+        self._document_store = document_store
+        self._reading_service = reading_service
+        self._custom_module = CustomLevelModule(
+            llm, self, reading_service, listening_service, writing_service, speaking_service
+        )
+        self._upload_module = UploadLevelModule(llm)
+
+        # TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt
+        # mc_variants are stored in ./mc_variants.json
+        self._mc_variants = mc_variants
+
+    async def upload_level(self, upload: UploadFile) -> Dict:
+        return await self._upload_module.generate_level_from_file(upload)
+
+    async def get_custom_level(self, data: Dict):
+        return await self._custom_module.get_custom_level(data)
+
+    async def get_level_exam(
+            self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
+    ) -> Dict:
+        exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False)
+        return {
+            "exercises": [exercises],
+            "isDiagnostic": diagnostic,
+            "minTimer": min_timer,
+            "module": "level"
+        }
+
+    async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
+        # Formats
+        mc = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Choose the correct word or group of words that completes the sentences.",
+            "questions": None,
+            "type": "multipleChoice",
+            "part": 1
+        }
+
+        umc = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Choose the underlined word or group of words that is not correct.",
+            "questions": None,
+            "type": "multipleChoice",
+            "part": 2
+        }
+
+        bs_1 = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and write the correct word for each space.",
+            "questions": None,
+            "type": "blankSpaceText",
+            "part": 3
+        }
+
+        bs_2 = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and write the correct word for each space.",
+            "questions": None,
+            "type": "blankSpaceText",
+            "part": 4
+        }
+
+        reading = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and answer the questions below.",
+            "questions": None,
+            "type": "readingExercises",
+            "part": 5
+        }
+
+        all_mc_questions = []
+
+        # PART 1
+        # await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
+        mc_exercises1 = await self.gen_multiple_choice(
+            "blank_space", 15, 1, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(mc_exercises1, indent=4))
+        all_mc_questions.append(mc_exercises1)
+
+        # PART 2
+        mc_exercises2 = await self.gen_multiple_choice(
+            "blank_space", 15, 16, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(mc_exercises2, indent=4))
+        all_mc_questions.append(mc_exercises2)
+
+        # PART 3
+        mc_exercises3 = await self.gen_multiple_choice(
+            "blank_space", 15, 31, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(mc_exercises3, indent=4))
+        all_mc_questions.append(mc_exercises3)
+
+        mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
+        print(json.dumps(mc_exercises, indent=4))
+        mc["questions"] = mc_exercises
+
+        # Underlined mc
+        underlined_mc = await self.gen_multiple_choice(
+            "underline", 15, 46, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(underlined_mc, indent=4))
+        umc["questions"] = underlined_mc
+
+        # Blank Space text 1
+        blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250)
+        print(json.dumps(blank_space_text_1, indent=4))
+        bs_1["questions"] = blank_space_text_1
+
+        # Blank Space text 2
+        blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350)
+        print(json.dumps(blank_space_text_2, indent=4))
+        bs_2["questions"] = blank_space_text_2
+
+        # Reading text
+        reading_text = await self.gen_reading_passage_utas(87, 10, 4)
+        print(json.dumps(reading_text, indent=4))
+        reading["questions"] = reading_text
+
+        return {
+            "exercises": {
+                "blankSpaceMultipleChoice": mc,
+                "underlinedMultipleChoice": umc,
+                "blankSpaceText1": bs_1,
+                "blankSpaceText2": bs_2,
+                "readingExercises": reading,
+            },
+            "isDiagnostic": diagnostic,
+            "minTimer": min_timer,
+            "module": "level"
+        }
+
+    async def gen_multiple_choice(
+            self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
+    ):
+        mc_template = self._mc_variants[mc_variant]
+        blank_mod = " blank space " if mc_variant == "blank_space" else " "
+
+        gen_multiple_choice_for_text: str = (
+            'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
+            'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
+            'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
+            'punctuation. Make sure every question only has 1 correct answer.'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
+                )
+            },
+            {
+                "role": "user",
+                "content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
+            }
+        ]
+
+        if mc_variant == "underline":
+            messages.append({
+                "role": "user",
+                "content": (
+                    'The type of multiple choice in the prompt has wrong words or group of words and the options '
+                    'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+                    'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
+                    'the boss <u>is</u> nice."\n'
+                    'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+                )
+            })
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != quantity:
+            return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
+        else:
+            if not utas:
+                all_exams = await self._document_store.get_all("level")
+                seen_keys = set()
+                for i in range(len(question["questions"])):
+                    question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+                        all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+                    )
+                return {
+                    "id": str(uuid.uuid4()),
+                    "prompt": "Select the appropriate option.",
+                    "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
+                    "type": "multipleChoice",
+                }
+            else:
+                if all_exams is not None:
+                    seen_keys = set()
+                    for i in range(len(question["questions"])):
+                        question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+                            all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+                        )
+                response = ExercisesHelper.fix_exercise_ids(question, start_id)
+                response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+                return response
+
+    async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
+        mc_template = self._mc_variants[mc_variant]["questions"][0]
+        blank_mod = " blank space " if mc_variant == "blank_space" else " "
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
+                    f'it can be easy, intermediate or advanced.'
+                )
+
+            }
+        ]
+
+        if mc_variant == "underline":
+            messages.append({
+                "role": "user",
+                "content": (
+                    'The type of multiple choice in the prompt has wrong words or group of words and the options '
+                    'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+                    'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
+                    'the boss <u>is</u> nice."\n'
+                    'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+                )
+            })
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question
+
+    async def _replace_exercise_if_exists(
+            self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
+    ):
+        # Extracting relevant fields for comparison
+        key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+        # Check if the key is in the set
+        if key in seen_keys:
+            return await self._replace_exercise_if_exists(
+                all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
+                mc_variant, utas
+            )
+        else:
+            seen_keys.add(key)
+
+        if not utas:
+            for exam in all_exams:
+                exam_dict = exam.to_dict()
+                if len(exam_dict.get("parts", [])) > 0:
+                    exercise_dict = exam_dict.get("parts", [])[0]
+                    if len(exercise_dict.get("exercises", [])) > 0:
+                        if any(
+                                exercise["prompt"] == current_exercise["prompt"] and
+                                any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                                    current_exercise["options"])
+                                for exercise in exercise_dict.get("exercises", [])[0]["questions"]
+                        ):
+                            return await self._replace_exercise_if_exists(
+                                all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
+                                seen_keys, mc_variant, utas
+                            )
+        else:
+            for exam in all_exams:
+                if any(
+                        exercise["prompt"] == current_exercise["prompt"] and
+                        any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                            current_exercise["options"])
+                        for exercise in exam.get("questions", [])
+                ):
+                    return await self._replace_exercise_if_exists(
+                        all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
+                        seen_keys, mc_variant, utas
+                    )
+        return current_exercise, seen_keys
+
+    async def gen_blank_space_text_utas(
+            self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+    ):
+        json_template = self._mc_variants["blank_space_text"]
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate a text of at least {size} words about the topic {topic}.'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
+                    'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
+                    'The ids must be ordered throughout the text and the words must be replaced only once. '
+                    'Put the removed words and respective ids on the words array of the json in the correct order.'
+                )
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages,  ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question["question"]
+
+    async def gen_reading_passage_utas(
+            self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+    ):
+        passage = await self._reading_service.generate_reading_passage(1, topic)
+        short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
+        mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
+        return {
+            "exercises": {
+                "shortAnswer": short_answer,
+                "multipleChoice": mc_exercises,
+            },
+            "text": {
+                "content": passage["text"],
+                "title": passage["title"]
+            }
+        }
+
+    async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
+        json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
+
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
+                    f'maximum 3 words per answer, about this text:\n"{text}"'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'The id starts at {start_id}.'
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question["questions"]
+
+    async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
+        json_template = self._mc_variants["text_mc_utas"]
+
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
+            },
+            {
+                "role": "user",
+                "content": 'Make sure every question only has 1 correct answer.'
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != mc_quantity:
+            return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
+        else:
+            response = ExercisesHelper.fix_exercise_ids(question, start_id)
+            response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+            return response