Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274

2024-12-10 22:24:40 +00:00
parent 68cab80851
commit 6982068864
167 changed files with 1411 additions and 1229 deletions
--- a/ielts_be/services/impl/exam/level/exercises/init.py
+++ b/ielts_be/services/impl/exam/level/exercises/init.py
@@ -0,0 +1,11 @@
+from .multiple_choice import MultipleChoice
+from .blank_space import BlankSpace
+from .passage_utas import PassageUtas
+from .fill_blanks import FillBlanks
+
+__all__ = [
+    "MultipleChoice",
+    "BlankSpace",
+    "PassageUtas",
+    "FillBlanks"
+]
--- a/ielts_be/services/impl/exam/level/exercises/blank_space.py
+++ b/ielts_be/services/impl/exam/level/exercises/blank_space.py
@@ -0,0 +1,44 @@
+import random
+
+from ielts_be.configs.constants import EducationalContent, GPTModels, TemperatureSettings
+from ielts_be.services import ILLMService
+
+
+class BlankSpace:
+
+    def __init__(self, llm: ILLMService, mc_variants: dict):
+        self._llm = llm
+        self._mc_variants = mc_variants
+
+    async def gen_blank_space_text_utas(
+            self, quantity: int, start_id: int, size: int, topic=None
+    ):
+        if not topic:
+            topic = random.choice(EducationalContent.MTI_TOPICS)
+
+        json_template = self._mc_variants["blank_space_text"]
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate a text of at least {size} words about the topic {topic}.'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
+                    'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
+                    'The ids must be ordered throughout the text and the words must be replaced only once. '
+                    'Put the removed words and respective ids on the words array of the json in the correct order.'
+                )
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages,  ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question["question"]
--- a/ielts_be/services/impl/exam/level/exercises/fill_blanks.py
+++ b/ielts_be/services/impl/exam/level/exercises/fill_blanks.py
@@ -0,0 +1,73 @@
+import random
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings, EducationalContent
+from ielts_be.services import ILLMService
+
+
+class FillBlanks:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+
+    async def gen_fill_blanks(
+            self, start_id: int, quantity: int, size: int = 300, topic=None
+    ):
+        if not topic:
+            topic = random.choice(EducationalContent.MTI_TOPICS)
+        print(quantity)
+        print(start_id)
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {self._fill_blanks_mc_template()}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate a text of at least {size} words about the topic {topic}.'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the generated text choose exactly {quantity} words (cannot be sequential words) replace '
+                    'each with {{id}} (starting from ' + str(start_id) + ' and incrementing), then generate a '
+                    'JSON object containing: the modified text, a solutions array with each word\'s correct '
+                    'letter (A-D), and a words array containing each id with four options where one is '
+                    'the original word (matching the solution) and three are plausible but incorrect '
+                    'alternatives that maintain grammatical consistency. '
+                    'You cannot use repeated words!' #TODO: Solve this after
+                )
+            }
+        ]
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages,  [], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        return {
+            **question,
+            "type": "fillBlanks",
+            "variant": "mc",
+            "prompt": "Click a blank to select the appropriate word for it.",
+        }
+
+    @staticmethod
+    def _fill_blanks_mc_template():
+        return {
+            "text": "",
+            "solutions": [
+                {
+                    "id": "",
+                    "solution": "<A,B,C or D>"
+                }
+            ],
+            "words": [
+                {
+                    "id": "",
+                    "options": {
+                        "A": "",
+                        "B": "",
+                        "C": "",
+                        "D": ""
+                    }
+                }
+            ]
+        }
--- a/ielts_be/services/impl/exam/level/exercises/multiple_choice.py
+++ b/ielts_be/services/impl/exam/level/exercises/multiple_choice.py
@@ -0,0 +1,84 @@
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class MultipleChoice:
+
+    def __init__(self, llm: ILLMService, mc_variants: dict):
+        self._llm = llm
+        self._mc_variants = mc_variants
+
+    async def gen_multiple_choice(
+            self, mc_variant: str, quantity: int, start_id: int = 1
+    ):
+        mc_template = self._mc_variants[mc_variant]
+        blank_mod = " blank space " if mc_variant == "blank_space" else " "
+
+        gen_multiple_choice_for_text: str = (
+            'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
+            'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
+            'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
+            'punctuation. Make sure every question only has 1 correct answer.'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
+                )
+            },
+            {
+                "role": "user",
+                "content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
+            }
+        ]
+
+        if mc_variant == "underline":
+            messages.append({
+                "role": "user",
+                "content": (
+                    'The type of multiple choice in the prompt has wrong words or group of words and the options '
+                    'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+                    'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
+                    'the boss <u>is</u> nice."\n'
+                    'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+                )
+            })
+
+        questions = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        return ExercisesHelper.fix_exercise_ids(questions, start_id)
+
+"""
+        if len(question["questions"]) != quantity:
+            return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
+        else:
+            if not utas:
+                all_exams = await self._document_store.get_all("level")
+                seen_keys = set()
+                for i in range(len(question["questions"])):
+                    question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+                        all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+                    )
+                return {
+                    "id": str(uuid.uuid4()),
+                    "prompt": "Select the appropriate option.",
+                    "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
+                    "type": "multipleChoice",
+                }
+            else:
+                if all_exams is not None:
+                    seen_keys = set()
+                    for i in range(len(question["questions"])):
+                        question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+                            all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+                        )
+                response = ExercisesHelper.fix_exercise_ids(question, start_id)
+                response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+                return response
+    """
+
+
--- a/ielts_be/services/impl/exam/level/exercises/passage_utas.py
+++ b/ielts_be/services/impl/exam/level/exercises/passage_utas.py
@@ -0,0 +1,91 @@
+from typing import Optional
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService, IReadingService
+
+
+class PassageUtas:
+
+    def __init__(self, llm: ILLMService, reading_service: IReadingService, mc_variants: dict):
+        self._llm = llm
+        self._reading_service = reading_service
+        self._mc_variants = mc_variants
+
+    async def gen_reading_passage_utas(
+            self, start_id, mc_quantity: int, topic: Optional[str], word_size: Optional[int] # sa_quantity: int,
+    ):
+
+        passage = await self._reading_service.generate_reading_passage(1, topic, word_size)
+        mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id, mc_quantity)
+        mc_exercises["type"] = "multipleChoice"
+        """
+            exercises: {
+                "shortAnswer": short_answer,
+                "multipleChoice": mc_exercises,
+            },
+        """
+        return {
+            **mc_exercises,
+            "passage": {
+                "content": passage["text"],
+                "title": passage["title"]
+            },
+            "mcVariant": "passageUtas"
+        }
+
+    async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
+        json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
+
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
+                    f'maximum 3 words per answer, about this text:\n"{text}"'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'The id starts at {start_id}.'
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question["questions"]
+
+    async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
+        json_template = self._mc_variants["text_mc_utas"]
+
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
+            },
+            {
+                "role": "user",
+                "content": 'Make sure every question only has 1 correct answer.'
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != mc_quantity:
+            return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
+        else:
+            response = ExercisesHelper.fix_exercise_ids(question, start_id)
+            response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+            return response