Changes to endpoints so they allow to only get context and then the exercises as well as tidying up a bit

2024-11-04 23:31:48 +00:00
parent 2a032c5aba
commit 84ed2f2f6a
83 changed files with 4229 additions and 1843 deletions
--- a/app/services/impl/exam/reading/init.py
+++ b/app/services/impl/exam/reading/init.py
@@ -0,0 +1,131 @@
+from logging import getLogger
+
+from fastapi import UploadFile
+
+from app.configs.constants import GPTModels, FieldsAndExercises, TemperatureSettings
+from app.dtos.reading import ReadingDTO
+from app.helpers import ExercisesHelper
+from app.services.abc import IReadingService, ILLMService
+from .fill_blanks import FillBlanks
+from .idea_match import IdeaMatch
+from .paragraph_match import ParagraphMatch
+from .true_false import TrueFalse
+from .import_reading import ImportReadingModule
+from .write_blanks import WriteBlanks
+
+
+class ReadingService(IReadingService):
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+        self._fill_blanks = FillBlanks(llm)
+        self._idea_match = IdeaMatch(llm)
+        self._paragraph_match = ParagraphMatch(llm)
+        self._true_false = TrueFalse(llm)
+        self._write_blanks = WriteBlanks(llm)
+        self._logger = getLogger(__name__)
+        self._import = ImportReadingModule(llm)
+
+    async def import_exam(self, exercises: UploadFile, solutions: UploadFile = None):
+        return await self._import.import_from_file(exercises, solutions)
+
+    async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
+        part_system_message = {
+            "1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
+            "2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
+            "3": (
+                'The generated text should be very hard to understand and include different points, theories, '
+                'subtle differences of opinions from people, correctly sourced to the person who said it, '
+                'over the specified topic and have multiple paragraphs.'
+            )
+        }
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"title": "title of the text", "text": "generated text"}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
+                    f'on the topic of "{topic}". The passage should offer a substantial amount of '
+                    'information, analysis, or narrative relevant to the chosen subject matter. This text '
+                    'passage aims to serve as the primary reading section of an IELTS test, providing an '
+                    'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
+                    'does not contain forbidden subjects in muslim countries.'
+                )
+            },
+            {
+                "role": "system",
+                "content": part_system_message[str(part)]
+            }
+        ]
+
+        if part == 3:
+            messages.append({
+                "role": "user",
+                "content": "Use real text excerpts on your generated passage and cite the sources."
+            })
+
+        return await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            FieldsAndExercises.GEN_TEXT_FIELDS,
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+    async def generate_reading_exercises(self, dto: ReadingDTO):
+        exercises = []
+        start_id = 1
+        for req_exercise in dto.exercises:
+            if req_exercise.type == "fillBlanks":
+                question = await self._fill_blanks.gen_summary_fill_blanks_exercise(
+                    dto.text, req_exercise.quantity, start_id, dto.difficulty, req_exercise.num_random_words
+                )
+                exercises.append(question)
+                self._logger.info(f"Added fill blanks: {question}")
+
+            elif req_exercise.type == "trueFalse":
+                question = await self._true_false.gen_true_false_not_given_exercise(
+                    dto.text, req_exercise.quantity, start_id, dto.difficulty
+                )
+                exercises.append(question)
+                self._logger.info(f"Added trueFalse: {question}")
+
+            elif req_exercise.type == "writeBlanks":
+                question = await self._write_blanks.gen_write_blanks_exercise(
+                    dto.text, req_exercise.quantity, start_id, dto.difficulty, req_exercise.max_words
+                )
+
+                if ExercisesHelper.answer_word_limit_ok(question):
+                    exercises.append(question)
+                    self._logger.info(f"Added write blanks: {question}")
+                else:
+                    exercises.append({})
+                    self._logger.info("Did not add write blanks because it did not respect word limit")
+
+            elif req_exercise.type == "paragraphMatch":
+
+                question = await self._paragraph_match.gen_paragraph_match_exercise(
+                    dto.text, req_exercise.quantity, start_id
+                )
+                exercises.append(question)
+                self._logger.info(f"Added paragraph match: {question}")
+
+            elif req_exercise.type == "ideaMatch":
+
+                question = await self._idea_match.gen_idea_match_exercise(
+                    dto.text, req_exercise.quantity, start_id
+                )
+                question["variant"] = "ideaMatch"
+                exercises.append(question)
+                self._logger.info(f"Added idea match: {question}")
+
+            start_id = start_id + req_exercise.quantity
+
+        return {
+            "exercises": exercises
+        }
--- a/app/services/impl/exam/reading/fill_blanks.py
+++ b/app/services/impl/exam/reading/fill_blanks.py
@@ -0,0 +1,73 @@
+import uuid
+
+from app.configs.constants import GPTModels, TemperatureSettings
+from app.helpers import ExercisesHelper
+from app.services.abc import ILLMService
+
+
+class FillBlanks:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_summary_fill_blanks_exercise(
+            self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
+    ):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'Summarize this text: "{text}"'
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"words": ["word_1", "word_2"] }'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                        f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
+                        f'from this:\n{response["summary"]}'
+                )
+            }
+        ]
+
+        words_response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        response["words"] = words_response["words"]
+        replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
+            response["summary"], response["words"], start_id
+        )
+        options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
+        solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
+
+        return {
+            "allowRepetition": True,
+            "id": str(uuid.uuid4()),
+            "prompt": (
+                "Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
+                "more words than spaces so you will not use them all. You may use any of the words more than once."
+            ),
+            "solutions": solutions,
+            "text": replaced_summary,
+            "type": "fillBlanks",
+            "words": options_words
+        }
--- a/app/services/impl/exam/reading/idea_match.py
+++ b/app/services/impl/exam/reading/idea_match.py
@@ -0,0 +1,46 @@
+import uuid
+
+from app.configs.constants import GPTModels, TemperatureSettings
+from app.helpers import ExercisesHelper
+from app.services.abc import ILLMService
+
+
+class IdeaMatch:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_idea_match_exercise(self, text: str, quantity: int, start_id: int):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"ideas": [ '
+                    '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
+                    '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
+                    ']}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
+                    f'The text: {text}'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        ideas = response["ideas"]
+
+        return {
+            "id": str(uuid.uuid4()),
+            "allowRepetition": False,
+            "options": ExercisesHelper.build_options(ideas),
+            "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
+            "sentences": ExercisesHelper.build_sentences(ideas, start_id),
+            "type": "matchSentences"
+        }
--- a/app/services/impl/exam/reading/import_reading.py
+++ b/app/services/impl/exam/reading/import_reading.py
@@ -0,0 +1,190 @@
+from logging import getLogger
+from typing import Dict, Any
+from uuid import uuid4
+
+import aiofiles
+from fastapi import UploadFile
+
+from app.helpers import FileHelper
+from app.mappers.reading import ReadingMapper
+from app.services.abc import ILLMService
+from app.dtos.exams.reading import Exam
+
+
+class ImportReadingModule:
+    def __init__(self, openai: ILLMService):
+        self._logger = getLogger(__name__)
+        self._llm = openai
+
+    async def import_from_file(
+            self, exercises: UploadFile, solutions: UploadFile = None
+    ) -> Dict[str, Any] | None:
+        path_id = str(uuid4())
+        ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
+        FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
+
+        if solutions:
+            ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
+            FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
+
+        response = await self._get_reading_parts(path_id, solutions is not None)
+
+        FileHelper.remove_directory(f'./tmp/{path_id}')
+        if response:
+            return response.model_dump(exclude_none=True)
+        return None
+
+    async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
+        async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
+            exercises_html = await f.read()
+
+        messages = [
+            self._instructions(),
+            {
+                "role": "user",
+                "content": f"Exam question sheet:\n\n{exercises_html}"
+            }
+        ]
+
+        if solutions:
+            async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
+                solutions_html = await f.read()
+                messages.append({
+                    "role": "user",
+                    "content": f"Solutions:\n\n{solutions_html}"
+                })
+
+        return await self._llm.pydantic_prediction(
+            messages,
+            ReadingMapper.map_to_exam_model,
+            str(self._reading_json_schema())
+        )
+
+    def _reading_json_schema(self):
+        json = self._reading_exam_template()
+        json["parts"][0]["exercises"] = [
+            self._write_blanks(),
+            self._fill_blanks(),
+            self._match_sentences(),
+            self._true_false()
+        ]
+
+    @staticmethod
+    def _reading_exam_template():
+        return {
+            "minTimer": "<number of minutes as int not string>",
+            "parts": [
+                {
+                    "text": {
+                        "title": "<title of the passage>",
+                        "content": "<the text of the passage>",
+                    },
+                    "exercises": []
+                }
+            ]
+        }
+
+    @staticmethod
+    def _write_blanks():
+        return {
+            "maxWords": "<number of max words return the int value not string>",
+            "solutions": [
+                {
+                    "id": "<number of the question as string>",
+                    "solution": [
+                        "<at least one solution can have alternative solutions (that dont exceed maxWords)>"
+                    ]
+                },
+            ],
+            "text": "<all the questions formatted in this way: <question>{{<id>}}\\n<question2>{{<id2>}}\\n  >",
+            "type": "writeBlanks"
+        }
+
+    @staticmethod
+    def _match_sentences():
+        return {
+            "options": [
+                {
+                    "id": "<uppercase letter that identifies a paragraph>",
+                    "sentence": "<either a heading or an idea>"
+                }
+            ],
+            "sentences": [
+                {
+                    "id": "<the question id not the option id>",
+                    "solution": "<id in options>",
+                    "sentence": "<heading or an idea>",
+                }
+            ],
+            "type": "matchSentences",
+            "variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>"
+        }
+
+    @staticmethod
+    def _true_false():
+        return {
+            "questions": [
+                {
+                    "prompt": "<question>",
+                    "solution": "<can only be one of these [\"true\", \"false\", \"not_given\"]>",
+                    "id": "<the question id>"
+                }
+            ],
+            "type": "trueFalse"
+        }
+
+    @staticmethod
+    def _fill_blanks():
+        return {
+            "solutions": [
+                {
+                    "id": "<blank id>",
+                    "solution": "<word>"
+                }
+            ],
+            "text": "<section of text with blanks denoted by {{<blank id>}}>",
+            "type": "fillBlanks",
+            "words": [
+                {
+                    "letter": "<uppercase letter that ids the words (may not be included and if not start at A)>",
+                    "word": "<word>"
+                }
+            ]
+        }
+
+    def _instructions(self, solutions = False):
+        solutions_str = " and its solutions" if solutions else ""
+        tail = (
+            "The solutions were not supplied so you will have to solve them. Do your utmost to get all the information and"
+            "all the solutions right!"
+            if not solutions else
+            "Do your utmost to correctly identify the sections, its exercises and respective solutions"
+        )
+
+        return {
+            "role": "system",
+            "content": (
+                f"You will receive html pertaining to an english exam question sheet{solutions_str}. Your job is to "
+                f"structure the data into a single json with this template: {self._reading_exam_template()}\n"
+                
+                "You will need find out how many parts the exam has a correctly place its exercises. You will "
+                "encounter 4 types of exercises:\n"
+                " - \"writeBlanks\": short answer questions that have a answer word limit, generally two or three\n"
+                " - \"matchSentences\": a sentence needs to be matched with a paragraph\n"
+                " - \"trueFalse\": questions that its answers can only be true false or not given\n"
+                " - \"fillBlanks\": a text that has blank spaces on a section of text and a word bank which "
+                "contains the solutions and sometimes random words to throw off the students\n"
+                
+                "These 4 types of exercises will need to be placed in the correct json template inside each part, "
+                "the templates are as follows:\n "
+                
+                f"writeBlanks: {self._write_blanks()}\n"
+                f"matchSentences: {self._match_sentences()}\n"
+                f"trueFalse: {self._true_false()}\n"
+                f"fillBlanks: {self._fill_blanks()}\n\n"
+                
+                f"{tail}"
+            )
+        }
+
+
--- a/app/services/impl/exam/reading/paragraph_match.py
+++ b/app/services/impl/exam/reading/paragraph_match.py
@@ -0,0 +1,63 @@
+import random
+import uuid
+
+from app.configs.constants import GPTModels, TemperatureSettings
+from app.helpers import ExercisesHelper
+from app.services.abc import ILLMService
+
+
+class ParagraphMatch:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_paragraph_match_exercise(self, text: str, quantity: int, start_id: int):
+        paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    'For every paragraph of the list generate a minimum 5 word heading for it. '
+                    f'The paragraphs are these: {str(paragraphs)}'
+                )
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        headings = response["headings"]
+
+        options = []
+        for i, paragraph in enumerate(paragraphs, start=0):
+            paragraph["heading"] = headings[i]["heading"]
+            options.append({
+                "id": paragraph["letter"],
+                "sentence": paragraph["paragraph"]
+            })
+
+        random.shuffle(paragraphs)
+        sentences = []
+        for i, paragraph in enumerate(paragraphs, start=start_id):
+            sentences.append({
+                "id": i,
+                "sentence": paragraph["heading"],
+                "solution": paragraph["letter"]
+            })
+
+        return {
+            "id": str(uuid.uuid4()),
+            "allowRepetition": False,
+            "options": options,
+            "prompt": "Choose the correct heading for paragraphs from the list of headings below.",
+            "sentences": sentences[:quantity],
+            "type": "matchSentences"
+        }
--- a/app/services/impl/exam/reading/true_false.py
+++ b/app/services/impl/exam/reading/true_false.py
@@ -0,0 +1,49 @@
+import uuid
+
+from app.configs.constants import GPTModels, TemperatureSettings
+from app.helpers import ExercisesHelper
+from app.services.abc import ILLMService
+
+
+class TrueFalse:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id: int, difficulty: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
+                    '{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
+                    'Ensure that your statements accurately represent information or inferences from the text, and '
+                    'provide a variety of responses, including, at least one of each True, False, and Not Given, '
+                    f'as appropriate.\n\nReference text:\n\n {text}'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = response["prompts"]
+
+        if len(questions) > quantity:
+            questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
+
+        for i, question in enumerate(questions, start=start_id):
+            question["id"] = str(i)
+
+        return {
+            "id": str(uuid.uuid4()),
+            "prompt": "Do the following statements agree with the information given in the Reading Passage?",
+            "questions": questions,
+            "type": "trueFalse"
+        }
--- a/app/services/impl/exam/reading/write_blanks.py
+++ b/app/services/impl/exam/reading/write_blanks.py
@@ -0,0 +1,44 @@
+import uuid
+
+from app.configs.constants import GPTModels, TemperatureSettings
+from app.helpers import ExercisesHelper
+from app.services.abc import ILLMService
+
+
+class WriteBlanks:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_write_blanks_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, max_words: int = 3):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
+                    f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
+                )
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = response["questions"][:quantity]
+
+        return {
+            "id": str(uuid.uuid4()),
+            "maxWords": max_words,
+            "prompt": f"Choose no more than {max_words} words and/or a number from the passage for each answer.",
+            "solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
+            "text": ExercisesHelper.build_write_blanks_text(questions, start_id),
+            "type": "writeBlanks"
+        }