Async release

2024-07-23 08:40:35 +01:00
parent a4caecdb4f
commit 3cf9fa5cba
116 changed files with 5609 additions and 30630 deletions
--- a/app/services/impl/reading.py
+++ b/app/services/impl/reading.py
@@ -0,0 +1,287 @@
+import random
+import uuid
+from queue import Queue
+from typing import List
+
+from app.services.abc import IReadingService, ILLMService
+from app.configs.constants import QuestionType, TemperatureSettings, FieldsAndExercises, GPTModels
+from app.helpers import ExercisesHelper
+
+
+class ReadingService(IReadingService):
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+        self._passages = {
+            "passage_1": {
+                "question_type": QuestionType.READING_PASSAGE_1,
+                "start_id": 1
+            },
+            "passage_2": {
+                "question_type": QuestionType.READING_PASSAGE_2,
+                "start_id": 14
+            },
+            "passage_3": {
+                "question_type": QuestionType.READING_PASSAGE_3,
+                "start_id": 27
+            }
+        }
+
+    async def gen_reading_passage(
+            self,
+            passage_id: int,
+            topic: str,
+            req_exercises: List[str],
+            number_of_exercises_q: Queue,
+            difficulty: str
+    ):
+        _passage = self._passages[f'passage_{str(passage_id)}']
+
+        passage = await self.generate_reading_passage(_passage["question_type"], topic)
+
+        if passage == "":
+            return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
+
+        start_id = _passage["start_id"]
+        exercises = await self._generate_reading_exercises(
+            passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
+        )
+        if ExercisesHelper.contains_empty_dict(exercises):
+            return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
+
+        return {
+            "exercises": exercises,
+            "text": {
+                "content": passage["text"],
+                "title": passage["title"]
+            },
+            "difficulty": difficulty
+        }
+
+    async def generate_reading_passage(self, q_type: QuestionType, topic: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"title": "title of the text", "text": "generated text"}')
+            },
+            {
+                "role": "user",
+                "content": (
+                        f'Generate an extensive text for IELTS {q_type.value}, of at least 1500 words, '
+                        f'on the topic of "{topic}". The passage should offer a substantial amount of '
+                        'information, analysis, or narrative relevant to the chosen subject matter. This text '
+                        'passage aims to serve as the primary reading section of an IELTS test, providing an '
+                        'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
+                        'does not contain forbidden subjects in muslim countries.'
+                )
+
+            }
+        ]
+
+        return await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            FieldsAndExercises.GEN_TEXT_FIELDS,
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+    async def _generate_reading_exercises(
+            self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
+    ):
+        exercises = []
+        for req_exercise in req_exercises:
+            number_of_exercises = number_of_exercises_q.get()
+
+            if req_exercise == "fillBlanks":
+                question = await self._gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
+                exercises.append(question)
+                print("Added fill blanks: " + str(question))
+            elif req_exercise == "trueFalse":
+                question = await self._gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
+                exercises.append(question)
+                print("Added trueFalse: " + str(question))
+            elif req_exercise == "writeBlanks":
+                question = await self._gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
+                if ExercisesHelper.answer_word_limit_ok(question):
+                    exercises.append(question)
+                    print("Added write blanks: " + str(question))
+                else:
+                    exercises.append({})
+                    print("Did not add write blanks because it did not respect word limit")
+            elif req_exercise == "paragraphMatch":
+                question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
+                exercises.append(question)
+                print("Added paragraph match: " + str(question))
+
+            start_id = start_id + number_of_exercises
+
+        return exercises
+
+    async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{ "summary": "summary", "words": ["word_1", "word_2"] }')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Summarize this text: "{text}"'
+                )
+
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Select {str(quantity)} {difficulty} difficulty words, it must be words and not '
+                    'expressions, from the summary.'
+                )
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
+        options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], 5)
+        solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
+
+        return {
+            "allowRepetition": True,
+            "id": str(uuid.uuid4()),
+            "prompt": (
+                "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
+                "more words than spaces so you will not use them all. You may use any of the words more than once."
+            ),
+            "solutions": solutions,
+            "text": replaced_summary,
+            "type": "fillBlanks",
+            "words": options_words
+
+        }
+
+    async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
+                    '{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
+                    'Ensure that your statements accurately represent information or inferences from the text, and '
+                    'provide a variety of responses, including, at least one of each True, False, and Not Given, '
+                    f'as appropriate.\n\nReference text:\n\n {text}'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = response["prompts"]
+
+        if len(questions) > quantity:
+            questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
+
+        for i, question in enumerate(questions, start=start_id):
+            question["id"] = str(i)
+
+        return {
+            "id": str(uuid.uuid4()),
+            "prompt": "Do the following statements agree with the information given in the Reading Passage?",
+            "questions": questions,
+            "type": "trueFalse"
+        }
+
+    async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
+                    f'possible answers, must have maximum 3 words per answer, about this text:\n"{text}"'
+                )
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = response["questions"][:quantity]
+
+        return {
+            "id": str(uuid.uuid4()),
+            "maxWords": 3,
+            "prompt": "Choose no more than three words and/or a number from the passage for each answer.",
+            "solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
+            "text": ExercisesHelper.build_write_blanks_text(questions, start_id),
+            "type": "writeBlanks"
+        }
+
+    async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
+        paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    'For every paragraph of the list generate a minimum 5 word heading for it. '
+                    f'The paragraphs are these: {str(paragraphs)}'
+                )
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        headings = response["headings"]
+
+        options = []
+        for i, paragraph in enumerate(paragraphs, start=0):
+            paragraph["heading"] = headings[i]
+            options.append({
+                "id": paragraph["letter"],
+                "sentence": paragraph["paragraph"]
+            })
+
+        random.shuffle(paragraphs)
+        sentences = []
+        for i, paragraph in enumerate(paragraphs, start=start_id):
+            sentences.append({
+                "id": i,
+                "sentence": paragraph["heading"],
+                "solution": paragraph["letter"]
+            })
+
+        return {
+            "id": str(uuid.uuid4()),
+            "allowRepetition": False,
+            "options": options,
+            "prompt": "Choose the correct heading for paragraphs from the list of headings below.",
+            "sentences": sentences[:quantity],
+            "type": "matchSentences"
+        }