Async release

2024-07-23 08:40:35 +01:00
parent a4caecdb4f
commit 3cf9fa5cba
116 changed files with 5609 additions and 30630 deletions
--- a/app/services/impl/level.py
+++ b/app/services/impl/level.py
@@ -0,0 +1,506 @@
+import json
+import random
+import uuid
+
+from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType
+from app.helpers import ExercisesHelper
+from app.repositories.abc import IDocumentStore
+from app.services.abc import ILevelService, ILLMService, IReadingService
+
+
+class LevelService(ILevelService):
+
+    def __init__(
+        self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService
+    ):
+        self._llm = llm
+        self._document_store = document_store
+        self._reading_service = reading_service
+
+    async def get_level_exam(self):
+        number_of_exercises = 25
+        exercises = await self._gen_multiple_choice_level(number_of_exercises)
+        return {
+            "exercises": [exercises],
+            "isDiagnostic": False,
+            "minTimer": 25,
+            "module": "level"
+        }
+
+    async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
+        gen_multiple_choice_for_text = (
+            f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy '
+            'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
+            'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
+            'punctuation. Make sure every question only has 1 correct answer.'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"questions": [{"id": "9", "options": '
+                    '[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
+                    '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
+                    '"prompt": "Which of the following is a conjunction?", '
+                    '"solution": "A", "variant": "text"}]}'
+                )
+            },
+            {
+                "role": "user",
+                "content": gen_multiple_choice_for_text
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != quantity:
+            return await self._gen_multiple_choice_level(quantity, start_id)
+        else:
+            all_exams = await self._document_store.get_all("level")
+            seen_keys = set()
+            for i in range(len(question["questions"])):
+                question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+                    all_exams, question["questions"][i], question, seen_keys
+                )
+            return {
+                "id": str(uuid.uuid4()),
+                "prompt": "Select the appropriate option.",
+                "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
+                "type": "multipleChoice",
+            }
+
+    async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
+        # Extracting relevant fields for comparison
+        key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+        # Check if the key is in the set
+        if key in seen_keys:
+            return await self._replace_exercise_if_exists(
+                all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
+            )
+        else:
+            seen_keys.add(key)
+
+        for exam in all_exams:
+            exam_dict = exam.to_dict()
+            if any(
+                    exercise["prompt"] == current_exercise["prompt"] and
+                    any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                        current_exercise["options"])
+                    for exercise in exam_dict.get("exercises", [])[0]["questions"]
+            ):
+                return await self._replace_exercise_if_exists(
+                    all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
+                )
+        return current_exercise, seen_keys
+
+    async def _generate_single_mc_level_question(self):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
+                    '{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
+                    '"prompt": "Which of the following is a conjunction?", '
+                    '"solution": "A", "variant": "text"}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
+                    'intermediate or advanced.'
+                )
+
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question
+
+    async def get_level_utas(self):
+        # Formats
+        mc = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Choose the correct word or group of words that completes the sentences.",
+            "questions": None,
+            "type": "multipleChoice",
+            "part": 1
+        }
+
+        umc = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Choose the underlined word or group of words that is not correct.",
+            "questions": None,
+            "type": "multipleChoice",
+            "part": 2
+        }
+
+        bs_1 = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and write the correct word for each space.",
+            "questions": None,
+            "type": "blankSpaceText",
+            "part": 3
+        }
+
+        bs_2 = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and write the correct word for each space.",
+            "questions": None,
+            "type": "blankSpaceText",
+            "part": 4
+        }
+
+        reading = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and answer the questions below.",
+            "questions": None,
+            "type": "readingExercises",
+            "part": 5
+        }
+
+        all_mc_questions = []
+
+        # PART 1
+        mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions)
+        print(json.dumps(mc_exercises1, indent=4))
+        all_mc_questions.append(mc_exercises1)
+
+        # PART 2
+        mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions)
+        print(json.dumps(mc_exercises2, indent=4))
+        all_mc_questions.append(mc_exercises2)
+
+        # PART 3
+        mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions)
+        print(json.dumps(mc_exercises3, indent=4))
+        all_mc_questions.append(mc_exercises3)
+
+        mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
+        print(json.dumps(mc_exercises, indent=4))
+        mc["questions"] = mc_exercises
+
+        # Underlined mc
+        underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46)
+        print(json.dumps(underlined_mc, indent=4))
+        umc["questions"] = underlined_mc
+
+        # Blank Space text 1
+        blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250)
+        print(json.dumps(blank_space_text_1, indent=4))
+        bs_1["questions"] = blank_space_text_1
+
+        # Blank Space text 2
+        blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350)
+        print(json.dumps(blank_space_text_2, indent=4))
+        bs_2["questions"] = blank_space_text_2
+
+        # Reading text
+        reading_text = await self._gen_reading_passage_utas(87, 10, 4)
+        print(json.dumps(reading_text, indent=4))
+        reading["questions"] = reading_text
+
+        return {
+            "exercises": {
+                "blankSpaceMultipleChoice": mc,
+                "underlinedMultipleChoice": umc,
+                "blankSpaceText1": bs_1,
+                "blankSpaceText2": bs_2,
+                "readingExercises": reading,
+            },
+            "isDiagnostic": False,
+            "minTimer": 25,
+            "module": "level"
+        }
+
+    async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams):
+        gen_multiple_choice_for_text = (
+            f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english '
+            'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure '
+            'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
+            'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"questions": [{"id": "9", "options": [{"id": "A", "text": '
+                    '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                    '"Happy"}, {"id": "D", "text": "Jump"}], '
+                    '"prompt": "Which of the following is a conjunction?", '
+                    '"solution": "A", "variant": "text"}]}')
+            },
+            {
+                "role": "user",
+                "content": gen_multiple_choice_for_text
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != quantity:
+            return await self._gen_multiple_choice_level(quantity, start_id)
+        else:
+            seen_keys = set()
+            for i in range(len(question["questions"])):
+                question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas(
+                    all_exams,
+                    question["questions"][i],
+                    question,
+                    seen_keys
+                )
+            return ExercisesHelper.fix_exercise_ids(question, start_id)
+
+    async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys):
+        # Extracting relevant fields for comparison
+        key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+        # Check if the key is in the set
+        if key in seen_keys:
+            return self._replace_exercise_if_exists_utas(
+                all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
+            )
+        else:
+            seen_keys.add(key)
+
+        for exam in all_exams:
+            if any(
+                    exercise["prompt"] == current_exercise["prompt"] and
+                    any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                        current_exercise["options"])
+                    for exercise in exam.get("questions", [])
+            ):
+                return self._replace_exercise_if_exists_utas(
+                    all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
+                )
+        return current_exercise, seen_keys
+
+
+    async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int):
+        json_format = {
+            "questions": [
+                {
+                    "id": "9",
+                    "options": [
+                        {
+                            "id": "A",
+                            "text": "a"
+                        },
+                        {
+                            "id": "B",
+                            "text": "b"
+                        },
+                        {
+                            "id": "C",
+                            "text": "c"
+                        },
+                        {
+                            "id": "D",
+                            "text": "d"
+                        }
+                    ],
+                    "prompt": "prompt",
+                    "solution": "A",
+                    "variant": "text"
+                }
+            ]
+        }
+
+        gen_multiple_choice_for_text = (
+            f'Generate {str(quantity)} multiple choice questions of 4 options for an english '
+            'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that '
+            'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
+            'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
+            },
+            {
+                "role": "user",
+                "content": gen_multiple_choice_for_text
+            },
+            {
+                "role": "user",
+                "content": (
+                    'The type of multiple choice is the prompt has wrong words or group of words and the options '
+                    'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+                    'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
+                    'the boss <u>is</u> nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+                )
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != quantity:
+            return await self._gen_multiple_choice_level(quantity, start_id)
+        else:
+            return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
+
+    async def _gen_blank_space_text_utas(
+            self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+    ):
+        json_format = {
+            "question": {
+                "words": [
+                    {
+                        "id": "1",
+                        "text": "a"
+                    },
+                    {
+                        "id": "2",
+                        "text": "b"
+                    },
+                    {
+                        "id": "3",
+                        "text": "c"
+                    },
+                    {
+                        "id": "4",
+                        "text": "d"
+                    }
+                ],
+                "text": "text"
+            }
+        }
+
+        messages = [
+            {
+                "role": "system",
+                "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
+            },
+            {
+                "role": "user",
+                "content": f'Generate a text of at least {str(size)} words about the topic {topic}.'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace '
+                    'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
+                    'The ids must be ordered throughout the text and the words must be replaced only once. Put '
+                    'the removed words and respective ids on the words array of the json in the correct order.'
+                )
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question["question"]
+
+    async def _gen_reading_passage_utas(
+            self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
+    ):
+
+        passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
+        short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
+        mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
+        return {
+            "exercises": {
+                "shortAnswer": short_answer,
+                "multipleChoice": mc_exercises,
+            },
+            "text": {
+                "content": passage["text"],
+                "title": passage["title"]
+            }
+        }
+
+    async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
+        json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
+
+        messages = [
+            {
+                "role": "system",
+                "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
+            },
+            {
+                "role": "user",
+                "content": (
+                        'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have '
+                                                         'maximum 3 words per answer, about this text:\n"' + text + '"')
+            },
+            {
+                "role": "user",
+                "content": 'The id starts at ' + str(start_id) + '.'
+            }
+        ]
+
+        return (
+            await self._llm.prediction(
+                GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+            )
+        )["questions"]
+
+    async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
+        json_format = {
+            "questions": [
+                {
+                    "id": "9",
+                    "options": [
+                        {
+                            "id": "A",
+                            "text": "a"
+                        },
+                        {
+                            "id": "B",
+                            "text": "b"
+                        },
+                        {
+                            "id": "C",
+                            "text": "c"
+                        },
+                        {
+                            "id": "D",
+                            "text": "d"
+                        }
+                    ],
+                    "prompt": "prompt",
+                    "solution": "A",
+                    "variant": "text"
+                }
+            ]
+        }
+
+        messages = [
+            {
+                "role": "system",
+                "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
+            },
+            {
+                "role": "user",
+                "content": 'Generate ' + str(
+                    mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
+            },
+            {
+                "role": "user",
+                "content": 'Make sure every question only has 1 correct answer.'
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != mc_quantity:
+            return await self._gen_multiple_choice_level(mc_quantity, start_id)
+        else:
+            return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]