Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274

2024-12-10 22:24:40 +00:00
parent 68cab80851
commit 6982068864
167 changed files with 1411 additions and 1229 deletions
--- a/ielts_be/services/impl/init.py
+++ b/ielts_be/services/impl/init.py
@@ -0,0 +1,11 @@
+from .user import UserService
+from .training import *
+from .third_parties import *
+from .exam import *
+
+__all__ = [
+    "UserService"
+]
+__all__.extend(third_parties.__all__)
+__all__.extend(training.__all__)
+__all__.extend(exam.__all__)
--- a/ielts_be/services/impl/exam/init.py
+++ b/ielts_be/services/impl/exam/init.py
@@ -0,0 +1,18 @@
+from .level import LevelService
+from .listening import ListeningService
+from .reading import ReadingService
+from .speaking import SpeakingService
+from .writing import WritingService
+from .grade import GradeService
+from .evaluation import EvaluationService
+
+
+__all__ = [
+    "LevelService",
+    "ListeningService",
+    "ReadingService",
+    "SpeakingService",
+    "WritingService",
+    "GradeService",
+    "EvaluationService"
+]
--- a/ielts_be/services/impl/exam/evaluation.py
+++ b/ielts_be/services/impl/exam/evaluation.py
@@ -0,0 +1,104 @@
+import logging
+from typing import Union, List
+
+from fastapi import BackgroundTasks
+
+from ielts_be.dtos.evaluation import EvaluationType
+from ielts_be.dtos.speaking import GradeSpeakingItem
+from ielts_be.dtos.writing import WritingGradeTaskDTO
+from ielts_be.repositories import IDocumentStore
+from ielts_be.services import IWritingService, ISpeakingService, IEvaluationService
+
+
+class EvaluationService(IEvaluationService):
+
+    def __init__(self, db: IDocumentStore, writing_service: IWritingService, speaking_service: ISpeakingService):
+        self._db = db
+        self._writing_service = writing_service
+        self._speaking_service = speaking_service
+        self._logger = logging.getLogger(__name__)
+
+    async def create_evaluation(
+            self,
+            user_id: str,
+            session_id: str,
+            exercise_id: str,
+            eval_type: EvaluationType,
+            task: int
+    ):
+        await self._db.save_to_db(
+        "evaluation",
+        {
+                "user": user_id,
+                "session_id": session_id,
+                "exercise_id": exercise_id,
+                "type": eval_type,
+                "task": task,
+                "status": "pending"
+            }
+        )
+
+    async def begin_evaluation(
+            self,
+            user_id: str, session_id: str, task: int,
+            exercise_id: str, exercise_type: str,
+            solution: Union[WritingGradeTaskDTO, List[GradeSpeakingItem]],
+            background_tasks: BackgroundTasks
+    ):
+        background_tasks.add_task(
+            self._begin_evaluation,
+            user_id, session_id, task,
+            exercise_id, exercise_type,
+            solution
+        )
+
+    async def _begin_evaluation(
+        self, user_id: str, session_id: str, task: int,
+        exercise_id: str, exercise_type: str,
+        solution: Union[WritingGradeTaskDTO, List[GradeSpeakingItem]]
+    ):
+        try:
+            if exercise_type == EvaluationType.WRITING:
+                result = await self._writing_service.grade_writing_task(
+                    task,
+                    solution.question,
+                    solution.answer,
+                    solution.attachment
+                )
+            else:
+                result = await self._speaking_service.grade_speaking_task(
+                    task,
+                    solution
+                )
+
+            await self._db.update(
+                "evaluation",
+                {
+                    "user": user_id,
+                    "exercise_id": exercise_id,
+                    "session_id": session_id,
+                },
+                {
+                    "$set": {
+                        "status": "completed",
+                        "result": result,
+                    }
+                }
+            )
+
+        except Exception as e:
+            self._logger.error(f"Error processing evaluation {session_id} - {exercise_id}: {str(e)}")
+            await self._db.update(
+                "evaluation",
+                {
+                    "user": user_id,
+                    "exercise_id": exercise_id,
+                    "session_id": session_id
+                },
+                {
+                    "$set": {
+                        "status": "error",
+                        "error": str(e),
+                    }
+                }
+            )
--- a/ielts_be/services/impl/exam/grade.py
+++ b/ielts_be/services/impl/exam/grade.py
@@ -0,0 +1,200 @@
+import json
+from typing import List, Dict
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.services import ILLMService, IGradeService
+
+
+class GradeService(IGradeService):
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def grade_short_answers(self, data: Dict):
+        json_format = {
+            "exercises": [
+                {
+                    "id": 1,
+                    "correct": True,
+                    "correct_answer": " correct answer if wrong"
+                }
+            ]
+        }
+
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
+            },
+            {
+                "role": "user",
+                "content": (
+                    'Grade these answers according to the text content and write a correct answer if they are '
+                    f'wrong. Text, questions and answers:\n {data}'
+                )
+            }
+        ]
+
+        return await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            ["exercises"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+    async def calculate_grading_summary(self, extracted_sections: List):
+        ret = []
+
+        for section in extracted_sections:
+            openai_response_dict = await self._calculate_section_grade_summary(section)
+            ret.append(
+                {
+                    'code': section['code'],
+                    'name': section['name'],
+                    'grade': section['grade'],
+                    'evaluation': openai_response_dict['evaluation'],
+                    'suggestions': openai_response_dict['suggestions'],
+                    'bullet_points': self._parse_bullet_points(openai_response_dict['bullet_points'], section['grade'])
+                }
+            )
+
+        return {'sections': ret}
+
+    async def _calculate_section_grade_summary(self, section):
+        section_name = section['name']
+        section_grade = section['grade']
+        messages = [
+            {
+                "role": "user",
+                "content": (
+                    'You are a IELTS test section grade evaluator. You will receive a IELTS test section name and the '
+                    'grade obtained in the section. You should offer a evaluation comment on this grade and separately '
+                    'suggestions on how to possibly get a better grade.'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'Section: {str(section_name)} Grade: {str(section_grade)}',
+            },
+            {
+                "role": "user",
+                "content": "Speak in third person."
+            },
+            {
+                "role": "user",
+                "content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."
+            },
+            {
+                "role": "user",
+                "content": (
+                    "Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, "
+                    "be one paragraph long."
+                )
+            },
+            {
+                "role": "user",
+                "content": "Please save the evaluation comment and suggestions generated."
+            },
+            {
+                "role": "user",
+                "content": f"Offer bullet points to improve the english {str(section_name)} ability."
+            },
+        ]
+
+        if section['code'] == "level":
+            messages[2:2] = [{
+                "role": "user",
+                "content": (
+                    "This section is comprised of multiple choice questions that measure the user's overall english "
+                    "level. These multiple choice questions are about knowledge on vocabulary, syntax, grammar rules, "
+                    "and contextual usage. The grade obtained measures the ability in these areas and english language "
+                    "overall."
+                )
+            }]
+        elif section['code'] == "speaking":
+            messages[2:2] = [{
+                "role": "user",
+                "content": (
+                    "This section is s designed to assess the English language proficiency of individuals who want to "
+                    "study or work in English-speaking countries. The speaking section evaluates a candidate's ability "
+                    "to communicate effectively in spoken English."
+                )
+            }]
+
+        chat_config = {'max_tokens': 1000, 'temperature': 0.2}
+        tools = self.get_tools()
+
+        res = await self._llm.prediction_override(
+            model="gpt-3.5-turbo",
+            max_tokens=chat_config['max_tokens'],
+            temperature=chat_config['temperature'],
+            tools=tools,
+            messages=messages
+        )
+
+        return self._parse_openai_response(res)
+
+    @staticmethod
+    def _parse_openai_response(response):
+        if 'choices' in response and len(response['choices']) > 0 and 'message' in response['choices'][
+            0] and 'tool_calls' in response['choices'][0]['message'] and isinstance(
+            response['choices'][0]['message']['tool_calls'], list) and len(
+            response['choices'][0]['message']['tool_calls']) > 0 and \
+                response['choices'][0]['message']['tool_calls'][0]['function']['arguments']:
+            return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments'])
+        else:
+            return {'evaluation': "", 'suggestions': "", 'bullet_points': []}
+
+    @staticmethod
+    def _parse_bullet_points(bullet_points_str, grade):
+        max_grade_for_suggestions = 9
+        if isinstance(bullet_points_str, str) and grade < max_grade_for_suggestions:
+            # Split the string by '\n'
+            lines = bullet_points_str.split('\n')
+
+            # Remove '-' and trim whitespace from each line
+            cleaned_lines = [line.replace('-', '').strip() for line in lines]
+
+            # Add '.' to lines that don't end with it
+            return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
+        else:
+            return []
+
+    @staticmethod
+    def get_tools():
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": "save_evaluation_and_suggestions",
+                    "description": "Saves the evaluation and suggestions requested by input.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "evaluation": {
+                                "type": "string",
+                                "description": (
+                                    "A comment on the IELTS section grade obtained in the specific section and what "
+                                    "it could mean without suggestions."
+                                ),
+                            },
+                            "suggestions": {
+                                "type": "string",
+                                "description": (
+                                    "A small paragraph text with suggestions on how to possibly get a better grade "
+                                    "than the one obtained."
+                                ),
+                            },
+                            "bullet_points": {
+                                "type": "string",
+                                "description": (
+                                    "Text with four bullet points to improve the english speaking ability. Only "
+                                    "include text for the bullet points separated by a paragraph."
+                                ),
+                            },
+                        },
+                        "required": ["evaluation", "suggestions"],
+                    },
+                }
+            }
+        ]
--- a/ielts_be/services/impl/exam/level/init.py
+++ b/ielts_be/services/impl/exam/level/init.py
@@ -0,0 +1,210 @@
+from asyncio import gather
+from typing import Dict, Optional
+from uuid import uuid4
+
+from fastapi import UploadFile
+
+import random
+
+from ielts_be.configs.constants import EducationalContent
+from ielts_be.dtos.level import LevelExercisesDTO
+from ielts_be.repositories import IDocumentStore
+from ielts_be.services import (
+    ILevelService, ILLMService, IReadingService,
+    IWritingService, IListeningService, ISpeakingService
+)
+from .exercises import MultipleChoice, BlankSpace, PassageUtas, FillBlanks
+from .full_exams import CustomLevelModule, LevelUtas
+from .upload import UploadLevelModule
+
+
+class LevelService(ILevelService):
+
+    def __init__(
+        self,
+        llm: ILLMService,
+        document_store: IDocumentStore,
+        mc_variants: Dict,
+        reading_service: IReadingService,
+        writing_service: IWritingService,
+        speaking_service: ISpeakingService,
+        listening_service: IListeningService
+    ):
+        self._llm = llm
+        self._document_store = document_store
+        self._reading_service = reading_service
+        self._upload_module = UploadLevelModule(llm)
+        self._mc_variants = mc_variants
+
+        self._mc = MultipleChoice(llm, mc_variants)
+        self._blank_space = BlankSpace(llm, mc_variants)
+        self._passage_utas = PassageUtas(llm, reading_service, mc_variants)
+        self._fill_blanks = FillBlanks(llm)
+
+        self._level_utas = LevelUtas(llm, self, mc_variants)
+        self._custom = CustomLevelModule(
+            llm, self, reading_service, listening_service, writing_service, speaking_service
+        )
+
+
+    async def upload_level(self, upload: UploadFile, solutions: Optional[UploadFile] = None) -> Dict:
+        return await self._upload_module.generate_level_from_file(upload, solutions)
+
+    async def _generate_exercise(self, req_exercise, start_id):
+        if req_exercise.type == "mcBlank":
+            questions = await self._mc.gen_multiple_choice("blank_space", req_exercise.quantity, start_id)
+            questions["variant"] = "mcBlank"
+            questions["type"] = "multipleChoice"
+            questions["prompt"] = "Choose the correct word or group of words that completes the sentences."
+            return questions
+
+        elif req_exercise.type == "mcUnderline":
+            questions = await self._mc.gen_multiple_choice("underline", req_exercise.quantity, start_id)
+            questions["variant"] = "mcUnderline"
+            questions["type"] = "multipleChoice"
+            questions["prompt"] = "Choose the underlined word or group of words that is not correct."
+            return questions
+
+        elif req_exercise.type == "passageUtas":
+            topic = req_exercise.topic if req_exercise.topic else random.choice(EducationalContent.TOPICS)
+            exercise = await self._passage_utas.gen_reading_passage_utas(
+                start_id,
+                req_exercise.quantity,
+                topic,
+                req_exercise.text_size
+            )
+            exercise["prompt"] = "Read the text and answer the questions below."
+
+            return exercise
+
+        elif req_exercise.type == "fillBlanksMC":
+            exercise = await self._fill_blanks.gen_fill_blanks(
+                start_id,
+                req_exercise.quantity,
+                req_exercise.text_size,
+                req_exercise.topic
+            )
+            exercise["prompt"] = "Read the text below and choose the correct word for each space."
+            return exercise
+
+    async def generate_exercises(self, dto: LevelExercisesDTO):
+        start_ids = []
+        current_id = 1
+        for req_exercise in dto.exercises:
+            start_ids.append(current_id)
+            current_id += req_exercise.quantity
+
+        tasks = [
+            self._generate_exercise(req_exercise, start_id)
+            for req_exercise, start_id in zip(dto.exercises, start_ids)
+        ]
+        questions = await gather(*tasks)
+        questions = [{'id': str(uuid4()), **exercise} for exercise in questions]
+
+        return {"exercises": questions}
+
+    # Just here to support other modules that I don't know if they are supposed to still be used
+    async def gen_multiple_choice(self, mc_variant: str, quantity: int, start_id: int = 1):
+        return await self._mc.gen_multiple_choice(mc_variant, quantity, start_id)
+
+    async def gen_reading_passage_utas(self, start_id, mc_quantity: int, topic=Optional[str]): # sa_quantity: int,
+        return await self._passage_utas.gen_reading_passage_utas(start_id, mc_quantity, topic)
+
+    async def gen_blank_space_text_utas(self, quantity: int, start_id: int, size: int, topic: str):
+        return await self._blank_space.gen_blank_space_text_utas(quantity, start_id, size, topic)
+
+    async def get_level_exam(
+            self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
+    ) -> Dict:
+        pass
+
+    async def get_level_utas(self):
+        return await self._level_utas.get_level_utas()
+
+    async def get_custom_level(self, data: Dict):
+        return await self._custom.get_custom_level(data)
+"""
+    async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
+        mc_template = self._mc_variants[mc_variant]["questions"][0]
+        blank_mod = " blank space " if mc_variant == "blank_space" else " "
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
+                    f'it can be easy, intermediate or advanced.'
+                )
+
+            }
+        ]
+
+        if mc_variant == "underline":
+            messages.append({
+                "role": "user",
+                "content": (
+                    'The type of multiple choice in the prompt has wrong words or group of words and the options '
+                    'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+                    'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
+                    'the boss <u>is</u> nice."\n'
+                    'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+                )
+            })
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question
+"""
+"""
+    async def _replace_exercise_if_exists(
+            self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
+    ):
+        # Extracting relevant fields for comparison
+        key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+        # Check if the key is in the set
+        if key in seen_keys:
+            return await self._replace_exercise_if_exists(
+                all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
+                mc_variant, utas
+            )
+        else:
+            seen_keys.add(key)
+
+        if not utas:
+            for exam in all_exams:
+                exam_dict = exam.to_dict()
+                if len(exam_dict.get("parts", [])) > 0:
+                    exercise_dict = exam_dict.get("parts", [])[0]
+                    if len(exercise_dict.get("exercises", [])) > 0:
+                        if any(
+                                exercise["prompt"] == current_exercise["prompt"] and
+                                any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                                    current_exercise["options"])
+                                for exercise in exercise_dict.get("exercises", [])[0]["questions"]
+                        ):
+                            return await self._replace_exercise_if_exists(
+                                all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
+                                seen_keys, mc_variant, utas
+                            )
+        else:
+            for exam in all_exams:
+                if any(
+                        exercise["prompt"] == current_exercise["prompt"] and
+                        any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                            current_exercise["options"])
+                        for exercise in exam.get("questions", [])
+                ):
+                    return await self._replace_exercise_if_exists(
+                        all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
+                        seen_keys, mc_variant, utas
+                    )
+        return current_exercise, seen_keys
+"""
--- a/ielts_be/services/impl/exam/level/exercises/init.py
+++ b/ielts_be/services/impl/exam/level/exercises/init.py
@@ -0,0 +1,11 @@
+from .multiple_choice import MultipleChoice
+from .blank_space import BlankSpace
+from .passage_utas import PassageUtas
+from .fill_blanks import FillBlanks
+
+__all__ = [
+    "MultipleChoice",
+    "BlankSpace",
+    "PassageUtas",
+    "FillBlanks"
+]
--- a/ielts_be/services/impl/exam/level/exercises/blank_space.py
+++ b/ielts_be/services/impl/exam/level/exercises/blank_space.py
@@ -0,0 +1,44 @@
+import random
+
+from ielts_be.configs.constants import EducationalContent, GPTModels, TemperatureSettings
+from ielts_be.services import ILLMService
+
+
+class BlankSpace:
+
+    def __init__(self, llm: ILLMService, mc_variants: dict):
+        self._llm = llm
+        self._mc_variants = mc_variants
+
+    async def gen_blank_space_text_utas(
+            self, quantity: int, start_id: int, size: int, topic=None
+    ):
+        if not topic:
+            topic = random.choice(EducationalContent.MTI_TOPICS)
+
+        json_template = self._mc_variants["blank_space_text"]
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate a text of at least {size} words about the topic {topic}.'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
+                    'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
+                    'The ids must be ordered throughout the text and the words must be replaced only once. '
+                    'Put the removed words and respective ids on the words array of the json in the correct order.'
+                )
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages,  ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question["question"]
--- a/ielts_be/services/impl/exam/level/exercises/fill_blanks.py
+++ b/ielts_be/services/impl/exam/level/exercises/fill_blanks.py
@@ -0,0 +1,73 @@
+import random
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings, EducationalContent
+from ielts_be.services import ILLMService
+
+
+class FillBlanks:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+
+    async def gen_fill_blanks(
+            self, start_id: int, quantity: int, size: int = 300, topic=None
+    ):
+        if not topic:
+            topic = random.choice(EducationalContent.MTI_TOPICS)
+        print(quantity)
+        print(start_id)
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {self._fill_blanks_mc_template()}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate a text of at least {size} words about the topic {topic}.'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the generated text choose exactly {quantity} words (cannot be sequential words) replace '
+                    'each with {{id}} (starting from ' + str(start_id) + ' and incrementing), then generate a '
+                    'JSON object containing: the modified text, a solutions array with each word\'s correct '
+                    'letter (A-D), and a words array containing each id with four options where one is '
+                    'the original word (matching the solution) and three are plausible but incorrect '
+                    'alternatives that maintain grammatical consistency. '
+                    'You cannot use repeated words!' #TODO: Solve this after
+                )
+            }
+        ]
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages,  [], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        return {
+            **question,
+            "type": "fillBlanks",
+            "variant": "mc",
+            "prompt": "Click a blank to select the appropriate word for it.",
+        }
+
+    @staticmethod
+    def _fill_blanks_mc_template():
+        return {
+            "text": "",
+            "solutions": [
+                {
+                    "id": "",
+                    "solution": "<A,B,C or D>"
+                }
+            ],
+            "words": [
+                {
+                    "id": "",
+                    "options": {
+                        "A": "",
+                        "B": "",
+                        "C": "",
+                        "D": ""
+                    }
+                }
+            ]
+        }
--- a/ielts_be/services/impl/exam/level/exercises/multiple_choice.py
+++ b/ielts_be/services/impl/exam/level/exercises/multiple_choice.py
@@ -0,0 +1,84 @@
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class MultipleChoice:
+
+    def __init__(self, llm: ILLMService, mc_variants: dict):
+        self._llm = llm
+        self._mc_variants = mc_variants
+
+    async def gen_multiple_choice(
+            self, mc_variant: str, quantity: int, start_id: int = 1
+    ):
+        mc_template = self._mc_variants[mc_variant]
+        blank_mod = " blank space " if mc_variant == "blank_space" else " "
+
+        gen_multiple_choice_for_text: str = (
+            'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
+            'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
+            'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
+            'punctuation. Make sure every question only has 1 correct answer.'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
+                )
+            },
+            {
+                "role": "user",
+                "content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
+            }
+        ]
+
+        if mc_variant == "underline":
+            messages.append({
+                "role": "user",
+                "content": (
+                    'The type of multiple choice in the prompt has wrong words or group of words and the options '
+                    'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+                    'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
+                    'the boss <u>is</u> nice."\n'
+                    'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
+                )
+            })
+
+        questions = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        return ExercisesHelper.fix_exercise_ids(questions, start_id)
+
+"""
+        if len(question["questions"]) != quantity:
+            return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
+        else:
+            if not utas:
+                all_exams = await self._document_store.get_all("level")
+                seen_keys = set()
+                for i in range(len(question["questions"])):
+                    question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+                        all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+                    )
+                return {
+                    "id": str(uuid.uuid4()),
+                    "prompt": "Select the appropriate option.",
+                    "questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
+                    "type": "multipleChoice",
+                }
+            else:
+                if all_exams is not None:
+                    seen_keys = set()
+                    for i in range(len(question["questions"])):
+                        question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
+                            all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
+                        )
+                response = ExercisesHelper.fix_exercise_ids(question, start_id)
+                response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+                return response
+    """
+
+
--- a/ielts_be/services/impl/exam/level/exercises/passage_utas.py
+++ b/ielts_be/services/impl/exam/level/exercises/passage_utas.py
@@ -0,0 +1,91 @@
+from typing import Optional
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService, IReadingService
+
+
+class PassageUtas:
+
+    def __init__(self, llm: ILLMService, reading_service: IReadingService, mc_variants: dict):
+        self._llm = llm
+        self._reading_service = reading_service
+        self._mc_variants = mc_variants
+
+    async def gen_reading_passage_utas(
+            self, start_id, mc_quantity: int, topic: Optional[str], word_size: Optional[int] # sa_quantity: int,
+    ):
+
+        passage = await self._reading_service.generate_reading_passage(1, topic, word_size)
+        mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id, mc_quantity)
+        mc_exercises["type"] = "multipleChoice"
+        """
+            exercises: {
+                "shortAnswer": short_answer,
+                "multipleChoice": mc_exercises,
+            },
+        """
+        return {
+            **mc_exercises,
+            "passage": {
+                "content": passage["text"],
+                "title": passage["title"]
+            },
+            "mcVariant": "passageUtas"
+        }
+
+    async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
+        json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
+
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
+                    f'maximum 3 words per answer, about this text:\n"{text}"'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'The id starts at {start_id}.'
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        return question["questions"]
+
+    async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
+        json_template = self._mc_variants["text_mc_utas"]
+
+        messages = [
+            {
+                "role": "system",
+                "content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
+            },
+            {
+                "role": "user",
+                "content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
+            },
+            {
+                "role": "user",
+                "content": 'Make sure every question only has 1 correct answer.'
+            }
+        ]
+
+        question = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if len(question["questions"]) != mc_quantity:
+            return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
+        else:
+            response = ExercisesHelper.fix_exercise_ids(question, start_id)
+            response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
+            return response
--- a/ielts_be/services/impl/exam/level/full_exams/init.py
+++ b/ielts_be/services/impl/exam/level/full_exams/init.py
@@ -0,0 +1,7 @@
+from .custom import CustomLevelModule
+from .level_utas import LevelUtas
+
+__all__ = [
+    "CustomLevelModule",
+    "LevelUtas"
+]
--- a/ielts_be/services/impl/exam/level/full_exams/custom.py
+++ b/ielts_be/services/impl/exam/level/full_exams/custom.py
@@ -0,0 +1,335 @@
+import queue
+import random
+
+from typing import Dict
+
+from ielts_be.configs.constants import CustomLevelExerciseTypes, EducationalContent
+from ielts_be.services import (
+    ILLMService, ILevelService, IReadingService,
+    IWritingService, IListeningService, ISpeakingService
+)
+
+
+class CustomLevelModule:
+
+    def __init__(
+            self,
+            llm: ILLMService,
+            level: ILevelService,
+            reading: IReadingService,
+            listening: IListeningService,
+            writing: IWritingService,
+            speaking: ISpeakingService
+    ):
+        self._llm = llm
+        self._level = level
+        self._reading = reading
+        self._listening = listening
+        self._writing = writing
+        self._speaking = speaking
+
+    # TODO: I've changed this to retrieve the args from the body request and not request query args
+    async def get_custom_level(self, data: Dict):
+        nr_exercises = int(data.get('nr_exercises'))
+
+        exercise_id = 1
+        response = {
+            "exercises": {},
+            "module": "level"
+        }
+        for i in range(1, nr_exercises + 1, 1):
+            exercise_type = data.get(f'exercise_{i}_type')
+            exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
+            exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
+            exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
+            exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
+            exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
+            exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
+            exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
+            exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
+            exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
+            exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
+            exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
+            exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
+            exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
+            exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
+            exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
+            exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
+
+            if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
+                response["exercises"][f"exercise_{i}"] = {}
+                response["exercises"][f"exercise_{i}"]["questions"] = []
+                response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
+                while exercise_qty > 0:
+                    if exercise_qty - 15 > 0:
+                        qty = 15
+                    else:
+                        qty = exercise_qty
+
+                    mc_response = await self._level.gen_multiple_choice(
+                        "normal", qty, exercise_id, utas=True,
+                        all_exams=response["exercises"][f"exercise_{i}"]["questions"]
+                    )
+                    response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
+                    exercise_id = exercise_id + qty
+                    exercise_qty = exercise_qty - qty
+
+            elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
+                response["exercises"][f"exercise_{i}"] = {}
+                response["exercises"][f"exercise_{i}"]["questions"] = []
+                response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
+                while exercise_qty > 0:
+                    if exercise_qty - 15 > 0:
+                        qty = 15
+                    else:
+                        qty = exercise_qty
+
+                    mc_response = await self._level.gen_multiple_choice(
+                        "blank_space", qty, exercise_id, utas=True,
+                        all_exams=response["exercises"][f"exercise_{i}"]["questions"]
+                    )
+                    response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
+
+                    exercise_id = exercise_id + qty
+                    exercise_qty = exercise_qty - qty
+
+            elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
+                response["exercises"][f"exercise_{i}"] = {}
+                response["exercises"][f"exercise_{i}"]["questions"] = []
+                response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
+                while exercise_qty > 0:
+                    if exercise_qty - 15 > 0:
+                        qty = 15
+                    else:
+                        qty = exercise_qty
+
+                    mc_response = await self._level.gen_multiple_choice(
+                        "underline", qty, exercise_id, utas=True,
+                        all_exams=response["exercises"][f"exercise_{i}"]["questions"]
+                    )
+                    response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
+
+                    exercise_id = exercise_id + qty
+                    exercise_qty = exercise_qty - qty
+
+            elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
+                response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
+                    exercise_qty, exercise_id, exercise_text_size
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
+                exercise_id = exercise_id + exercise_qty
+            elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
+                response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
+                    exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
+                exercise_id = exercise_id + exercise_qty
+            elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
+                response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
+                    1, exercise_topic, exercise_difficulty
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "writing"
+                exercise_id = exercise_id + 1
+            elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
+                response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
+                    2, exercise_topic, exercise_difficulty
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "writing"
+                exercise_id = exercise_id + 1
+            elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
+                response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
+                    1, exercise_topic, exercise_difficulty, exercise_topic_2
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
+                exercise_id = exercise_id + 1
+            elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
+                response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
+                    2, exercise_topic, exercise_difficulty
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "speaking"
+                exercise_id = exercise_id + 1
+            elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
+                response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
+                    3, exercise_topic, exercise_difficulty
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
+                exercise_id = exercise_id + 1
+            elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
+                exercises = []
+                exercise_qty_q = queue.Queue()
+                total_qty = 0
+                if exercise_fillblanks_qty != -1:
+                    exercises.append('fillBlanks')
+                    exercise_qty_q.put(exercise_fillblanks_qty)
+                    total_qty = total_qty + exercise_fillblanks_qty
+                if exercise_writeblanks_qty != -1:
+                    exercises.append('writeBlanks')
+                    exercise_qty_q.put(exercise_writeblanks_qty)
+                    total_qty = total_qty + exercise_writeblanks_qty
+                if exercise_truefalse_qty != -1:
+                    exercises.append('trueFalse')
+                    exercise_qty_q.put(exercise_truefalse_qty)
+                    total_qty = total_qty + exercise_truefalse_qty
+                if exercise_paragraphmatch_qty != -1:
+                    exercises.append('paragraphMatch')
+                    exercise_qty_q.put(exercise_paragraphmatch_qty)
+                    total_qty = total_qty + exercise_paragraphmatch_qty
+
+                response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
+                    1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "reading"
+
+                exercise_id = exercise_id + total_qty
+            elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
+                exercises = []
+                exercise_qty_q = queue.Queue()
+                total_qty = 0
+                if exercise_fillblanks_qty != -1:
+                    exercises.append('fillBlanks')
+                    exercise_qty_q.put(exercise_fillblanks_qty)
+                    total_qty = total_qty + exercise_fillblanks_qty
+                if exercise_writeblanks_qty != -1:
+                    exercises.append('writeBlanks')
+                    exercise_qty_q.put(exercise_writeblanks_qty)
+                    total_qty = total_qty + exercise_writeblanks_qty
+                if exercise_truefalse_qty != -1:
+                    exercises.append('trueFalse')
+                    exercise_qty_q.put(exercise_truefalse_qty)
+                    total_qty = total_qty + exercise_truefalse_qty
+                if exercise_paragraphmatch_qty != -1:
+                    exercises.append('paragraphMatch')
+                    exercise_qty_q.put(exercise_paragraphmatch_qty)
+                    total_qty = total_qty + exercise_paragraphmatch_qty
+
+                response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
+                    2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "reading"
+
+                exercise_id = exercise_id + total_qty
+            elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
+                exercises = []
+                exercise_qty_q = queue.Queue()
+                total_qty = 0
+                if exercise_fillblanks_qty != -1:
+                    exercises.append('fillBlanks')
+                    exercise_qty_q.put(exercise_fillblanks_qty)
+                    total_qty = total_qty + exercise_fillblanks_qty
+                if exercise_writeblanks_qty != -1:
+                    exercises.append('writeBlanks')
+                    exercise_qty_q.put(exercise_writeblanks_qty)
+                    total_qty = total_qty + exercise_writeblanks_qty
+                if exercise_truefalse_qty != -1:
+                    exercises.append('trueFalse')
+                    exercise_qty_q.put(exercise_truefalse_qty)
+                    total_qty = total_qty + exercise_truefalse_qty
+                if exercise_paragraphmatch_qty != -1:
+                    exercises.append('paragraphMatch')
+                    exercise_qty_q.put(exercise_paragraphmatch_qty)
+                    total_qty = total_qty + exercise_paragraphmatch_qty
+                if exercise_ideamatch_qty != -1:
+                    exercises.append('ideaMatch')
+                    exercise_qty_q.put(exercise_ideamatch_qty)
+                    total_qty = total_qty + exercise_ideamatch_qty
+
+                response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
+                    3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "reading"
+
+                exercise_id = exercise_id + total_qty
+            elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
+                exercises = []
+                exercise_qty_q = queue.Queue()
+                total_qty = 0
+                if exercise_mc_qty != -1:
+                    exercises.append('multipleChoice')
+                    exercise_qty_q.put(exercise_mc_qty)
+                    total_qty = total_qty + exercise_mc_qty
+                if exercise_writeblanksquestions_qty != -1:
+                    exercises.append('writeBlanksQuestions')
+                    exercise_qty_q.put(exercise_writeblanksquestions_qty)
+                    total_qty = total_qty + exercise_writeblanksquestions_qty
+                if exercise_writeblanksfill_qty != -1:
+                    exercises.append('writeBlanksFill')
+                    exercise_qty_q.put(exercise_writeblanksfill_qty)
+                    total_qty = total_qty + exercise_writeblanksfill_qty
+                if exercise_writeblanksform_qty != -1:
+                    exercises.append('writeBlanksForm')
+                    exercise_qty_q.put(exercise_writeblanksform_qty)
+                    total_qty = total_qty + exercise_writeblanksform_qty
+
+                response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+                    1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+                exercise_id = exercise_id + total_qty
+            elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
+                exercises = []
+                exercise_qty_q = queue.Queue()
+                total_qty = 0
+                if exercise_mc_qty != -1:
+                    exercises.append('multipleChoice')
+                    exercise_qty_q.put(exercise_mc_qty)
+                    total_qty = total_qty + exercise_mc_qty
+                if exercise_writeblanksquestions_qty != -1:
+                    exercises.append('writeBlanksQuestions')
+                    exercise_qty_q.put(exercise_writeblanksquestions_qty)
+                    total_qty = total_qty + exercise_writeblanksquestions_qty
+
+                response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+                    2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+                exercise_id = exercise_id + total_qty
+            elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
+                exercises = []
+                exercise_qty_q = queue.Queue()
+                total_qty = 0
+                if exercise_mc3_qty != -1:
+                    exercises.append('multipleChoice3Options')
+                    exercise_qty_q.put(exercise_mc3_qty)
+                    total_qty = total_qty + exercise_mc3_qty
+                if exercise_writeblanksquestions_qty != -1:
+                    exercises.append('writeBlanksQuestions')
+                    exercise_qty_q.put(exercise_writeblanksquestions_qty)
+                    total_qty = total_qty + exercise_writeblanksquestions_qty
+
+                response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+                    3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+                exercise_id = exercise_id + total_qty
+            elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
+                exercises = []
+                exercise_qty_q = queue.Queue()
+                total_qty = 0
+                if exercise_mc_qty != -1:
+                    exercises.append('multipleChoice')
+                    exercise_qty_q.put(exercise_mc_qty)
+                    total_qty = total_qty + exercise_mc_qty
+                if exercise_writeblanksquestions_qty != -1:
+                    exercises.append('writeBlanksQuestions')
+                    exercise_qty_q.put(exercise_writeblanksquestions_qty)
+                    total_qty = total_qty + exercise_writeblanksquestions_qty
+                if exercise_writeblanksfill_qty != -1:
+                    exercises.append('writeBlanksFill')
+                    exercise_qty_q.put(exercise_writeblanksfill_qty)
+                    total_qty = total_qty + exercise_writeblanksfill_qty
+                if exercise_writeblanksform_qty != -1:
+                    exercises.append('writeBlanksForm')
+                    exercise_qty_q.put(exercise_writeblanksform_qty)
+                    total_qty = total_qty + exercise_writeblanksform_qty
+
+                response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
+                    4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
+                )
+                response["exercises"][f"exercise_{i}"]["type"] = "listening"
+
+                exercise_id = exercise_id + total_qty
+
+        return response
--- a/ielts_be/services/impl/exam/level/full_exams/level_utas.py
+++ b/ielts_be/services/impl/exam/level/full_exams/level_utas.py
@@ -0,0 +1,119 @@
+import json
+import uuid
+
+from ielts_be.services import ILLMService
+
+
+class LevelUtas:
+
+
+    def __init__(self, llm: ILLMService, level_service, mc_variants: dict):
+        self._llm = llm
+        self._mc_variants = mc_variants
+        self._level_service = level_service
+
+
+    async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
+        # Formats
+        mc = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Choose the correct word or group of words that completes the sentences.",
+            "questions": None,
+            "type": "multipleChoice",
+            "part": 1
+        }
+
+        umc = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Choose the underlined word or group of words that is not correct.",
+            "questions": None,
+            "type": "multipleChoice",
+            "part": 2
+        }
+
+        bs_1 = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and write the correct word for each space.",
+            "questions": None,
+            "type": "blankSpaceText",
+            "part": 3
+        }
+
+        bs_2 = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and write the correct word for each space.",
+            "questions": None,
+            "type": "blankSpaceText",
+            "part": 4
+        }
+
+        reading = {
+            "id": str(uuid.uuid4()),
+            "prompt": "Read the text and answer the questions below.",
+            "questions": None,
+            "type": "readingExercises",
+            "part": 5
+        }
+
+        all_mc_questions = []
+
+        # PART 1
+        # await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
+        mc_exercises1 = await self._level_service.gen_multiple_choice(
+            "blank_space", 15, 1, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(mc_exercises1, indent=4))
+        all_mc_questions.append(mc_exercises1)
+
+        # PART 2
+        mc_exercises2 = await self._level_service.gen_multiple_choice(
+            "blank_space", 15, 16, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(mc_exercises2, indent=4))
+        all_mc_questions.append(mc_exercises2)
+
+        # PART 3
+        mc_exercises3 = await self._level_service.gen_multiple_choice(
+            "blank_space", 15, 31, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(mc_exercises3, indent=4))
+        all_mc_questions.append(mc_exercises3)
+
+        mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
+        print(json.dumps(mc_exercises, indent=4))
+        mc["questions"] = mc_exercises
+
+        # Underlined mc
+        underlined_mc = await self._level_service.gen_multiple_choice(
+            "underline", 15, 46, utas=True, all_exams=all_mc_questions
+        )
+        print(json.dumps(underlined_mc, indent=4))
+        umc["questions"] = underlined_mc
+
+        # Blank Space text 1
+        blank_space_text_1 = await self._level_service.gen_blank_space_text_utas(12, 61, 250)
+        print(json.dumps(blank_space_text_1, indent=4))
+        bs_1["questions"] = blank_space_text_1
+
+        # Blank Space text 2
+        blank_space_text_2 = await self._level_service.gen_blank_space_text_utas(14, 73, 350)
+        print(json.dumps(blank_space_text_2, indent=4))
+        bs_2["questions"] = blank_space_text_2
+
+        # Reading text
+        reading_text = await self._level_service.gen_reading_passage_utas(87, 10, 4)
+        print(json.dumps(reading_text, indent=4))
+        reading["questions"] = reading_text
+
+        return {
+            "exercises": {
+                "blankSpaceMultipleChoice": mc,
+                "underlinedMultipleChoice": umc,
+                "blankSpaceText1": bs_1,
+                "blankSpaceText2": bs_2,
+                "readingExercises": reading,
+            },
+            "isDiagnostic": diagnostic,
+            "minTimer": min_timer,
+            "module": "level"
+        }
--- a/ielts_be/services/impl/exam/level/mc_variants.json
+++ b/ielts_be/services/impl/exam/level/mc_variants.json
@@ -0,0 +1,137 @@
+{
+    "normal": {
+        "questions": [
+            {
+                "id": "9",
+                "options": [
+                    {
+                        "id": "A",
+                        "text": "And"
+                    },
+                    {
+                        "id": "B",
+                        "text": "Cat"
+                    },
+                    {
+                        "id": "C",
+                        "text": "Happy"
+                    },
+                    {
+                        "id": "D",
+                        "text": "Jump"
+                    }
+                ],
+                "prompt": "Which of the following is a conjunction?",
+                "solution": "A",
+                "variant": "text"
+            }
+        ]
+    },
+    "blank_space": {
+        "questions": [
+            {
+                "id": "9",
+                "options": [
+                    {
+                        "id": "A",
+                        "text": "This"
+                    },
+                    {
+                        "id": "B",
+                        "text": "Those"
+                    },
+                    {
+                        "id": "C",
+                        "text": "These"
+                    },
+                    {
+                        "id": "D",
+                        "text": "That"
+                    }
+                ],
+                "prompt": "_____ man there is very kind.",
+                "solution": "A",
+                "variant": "text"
+            }
+        ]
+    },
+    "underline": {
+        "questions": [
+            {
+                "id": "9",
+                "options": [
+                    {
+                        "id": "A",
+                        "text": "was"
+                    },
+                    {
+                        "id": "B",
+                        "text": "for work"
+                    },
+                    {
+                        "id": "C",
+                        "text": "because"
+                    },
+                    {
+                        "id": "D",
+                        "text": "could"
+                    }
+                ],
+                "prompt": "I <u>was</u> late <u>for work</u> yesterday <u>because</u> I <u>could</u> start my car.",
+                "solution": "D",
+                "variant": "text"
+            }
+        ]
+    },
+    "blank_space_text": {
+        "question": {
+            "words": [
+                {
+                    "id": "1",
+                    "text": "a"
+                },
+                {
+                    "id": "2",
+                    "text": "b"
+                },
+                {
+                    "id": "3",
+                    "text": "c"
+                },
+                {
+                    "id": "4",
+                    "text": "d"
+                }
+            ],
+            "text": "text"
+        }
+    },
+    "text_mc_utas": {
+        "questions": [
+            {
+                "id": "9",
+                "options": [
+                    {
+                        "id": "A",
+                        "text": "a"
+                    },
+                    {
+                        "id": "B",
+                        "text": "b"
+                    },
+                    {
+                        "id": "C",
+                        "text": "c"
+                    },
+                    {
+                        "id": "D",
+                        "text": "d"
+                    }
+                ],
+                "prompt": "prompt",
+                "solution": "A",
+                "variant": "text"
+            }
+        ]
+    }
+}
--- a/ielts_be/services/impl/exam/level/upload.py
+++ b/ielts_be/services/impl/exam/level/upload.py
@@ -0,0 +1,338 @@
+from uuid import uuid4
+
+import aiofiles
+import os
+from logging import getLogger
+
+from typing import Dict, Any, Optional
+
+import pdfplumber
+from fastapi import UploadFile
+
+from ielts_be.services import ILLMService
+from ielts_be.helpers import FileHelper
+from ielts_be.mappers import LevelMapper
+
+from ielts_be.dtos.exams.level import Exam
+from ielts_be.dtos.sheet import Sheet
+from ielts_be.utils import suppress_loggers
+
+
+class UploadLevelModule:
+    def __init__(self, openai: ILLMService):
+        self._logger = getLogger(__name__)
+        self._llm = openai
+
+    async def generate_level_from_file(self, exercises: UploadFile, solutions: Optional[UploadFile]) -> Dict[str, Any] | None:
+        path_id = str(uuid4())
+        ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
+        FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
+
+        if solutions:
+            ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
+            FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
+
+        #completion: Coroutine[Any, Any, Exam] = (
+        #    self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
+        #)
+        response = await self._html_completion(path_id, solutions is not None)
+
+        FileHelper.remove_directory(f'./tmp/{path_id}')
+
+        if response:
+            return self.fix_ids(response.model_dump(exclude_none=True))
+        return None
+
+
+    @staticmethod
+    @suppress_loggers()
+    def _check_pdf_for_images(pdf_path: str) -> bool:
+        with pdfplumber.open(pdf_path) as pdf:
+            for page in pdf.pages:
+                if page.images:
+                    return True
+        return False
+
+    def _level_json_schema(self):
+        return {
+            "parts": [
+                {
+                    "text": {
+                        "content": "<this attribute is mandatory if there is a text passage else this 'text' field is omitted>",
+                        "title": "<this attribute is optional you may exclude it if not required>",
+                    },
+                    "exercises": [
+                        self._multiple_choice_html(),
+                        self._passage_blank_space_html()
+                    ]
+                }
+            ]
+        }
+
+    async def _html_completion(self, path_id: str, solutions_provided: bool) -> Exam:
+        async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
+            html = await f.read()
+
+        solutions = []
+        if solutions_provided:
+            async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
+                solutions_html = await f.read()
+                solutions.append({
+                    "role": "user",
+                    "content": f'The solutions to the question sheet are the following:\n\n{solutions_html}'
+                })
+
+        return await self._llm.pydantic_prediction(
+            [self._gpt_instructions_html(),
+             {
+                 "role": "user",
+                 "content": html
+             },
+             *solutions
+             ],
+            LevelMapper.map_to_exam_model,
+            str(self._level_json_schema())
+        )
+
+    def _gpt_instructions_html(self):
+        return {
+            "role": "system",
+            "content": (
+                'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
+                'Your current task is to scrape html english questions sheets and structure them into parts NOT sections.\n\n'
+
+                'In the question sheet you will only see 4 types of question:\n'
+                '- blank space multiple choice\n'
+                '- underline multiple choice\n'
+                '- reading passage blank space multiple choice\n'
+                '- reading passage multiple choice\n\n'
+
+                'For the first two types of questions the template is the same but the question prompts differ, '
+                'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
+                'multiple "_", in the underline you must include in the prompt the <u></u> to '
+                'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
+
+                'For the reading passage exercise you must handle the formatting of the passages. If it is a '
+                'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
+                'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
+                'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
+                'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
+                'between paragraphs.\n\n'
+
+                'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
+                'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
+                'out the best paragraph separation possible.'
+
+                'You will place all the information in a single JSON: '
+                '{"parts": [{"exercises": [{...}], "text": {"title": "", "content": ""} ]}\n '
+                'Where {...} are the exercises templates for each part of a question sheet and the optional field '
+                'text, which contains the reading passages that are required in order to solve the part questions, '
+                '(if there are passages) place them in text.content and if there is a title place it in text.title '
+                'else omit the title field.\n'
+                
+                'IMPORTANT: As stated earlier your job is to structure the questions into PARTS not SECTION, this means '
+                'that if there is for example: Section 1, Part 1 and Part 2, Section 2, Part 1 and Part 2, you MUST '
+                'place in the parts array 4 parts NOT 2 parts with the exercises of both parts! If there are no sections '
+                'and only Parts then group them by parts, and when I say parts I mean it in the fucking literal sense of the'
+                ' word Part x which is in the html. '
+                'You must strictly adhere to this instruction, do not mistake sections for parts!\n'
+
+                'The templates for the exercises are the following:\n'
+                '- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
+                f'{self._multiple_choice_html()}\n'
+                f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
+
+                'IMPORTANT: The text.content field must be set with the reading passages of a part (if there is one)'
+                'without paragraphs or line numbers, with 2 newlines between paragraphs.'
+            )
+        }
+
+    @staticmethod
+    def _multiple_choice_html():
+        return {
+            "type": "multipleChoice",
+            "prompt": "<general instructions for this section>",
+            "questions": [
+                {
+                    "id": "<question number as string>",
+                    "prompt": "<question text>",
+                    "options": [
+                        {
+                            "id": "<A/B/C/D>",
+                            "text": "<option text>"
+                        }
+                    ],
+                    "solution": "<correct option letter>",
+                    "variant": "text"
+                }
+            ]
+        }
+
+    @staticmethod
+    def _passage_blank_space_html():
+        return {
+            "type": "fillBlanks",
+            "variant": "mc",
+            "prompt": "Click a blank to select the appropriate word for it.",
+            "text": (
+                "<The whole text for the exercise with replacements for blank spaces and their "
+                "ids with {{<question id/number>}} with 2 newlines between paragraphs>"
+            ),
+            "solutions": [
+                {
+                    "id": "<question number>",
+                    "solution": "<the option that holds the solution>"
+                }
+            ],
+            "words": [
+                {
+                    "id": "<question number>",
+                    "options": {
+                        "A": "<a option>",
+                        "B": "<b option>",
+                        "C": "<c option>",
+                        "D": "<d option>"
+                    }
+                }
+            ]
+        }
+
+    async def _png_completion(self, path_id: str) -> Exam:
+        FileHelper.pdf_to_png(path_id)
+
+        tmp_files = os.listdir(f'./tmp/{path_id}')
+        pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
+        pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
+
+        json_schema = {
+            "components": [
+                {"type": "part", "part": "<name or number of the part>"},
+                self._multiple_choice_png(),
+                {"type": "blanksPassage", "text": (
+                    "<The whole text for the exercise with replacements for blank spaces and their "
+                    "ids with {{<question number>}} with 2 newlines between paragraphs>"
+                )},
+                {"type": "passage", "context": (
+                    "<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
+                )},
+                self._passage_blank_space_png()
+            ]
+        }
+
+        components = []
+
+        for i in range(len(pages)):
+            current_page = pages[i]
+            next_page = pages[i + 1] if i + 1 < len(pages) else None
+            batch = [current_page, next_page] if next_page else [current_page]
+
+            sheet = await self._png_batch(path_id, batch, json_schema)
+            sheet.batch = i + 1
+            components.append(sheet.model_dump())
+
+        batches = {"batches": components}
+
+        return await self._batches_to_exam_completion(batches)
+
+    async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
+        return await self._llm.pydantic_prediction(
+            [self._gpt_instructions_png(),
+             {
+                 "role": "user",
+                 "content": [
+                     *FileHelper.b64_pngs(path_id, files)
+                 ]
+             }
+             ],
+            LevelMapper.map_to_sheet,
+            str(json_schema)
+        )
+
+    def _gpt_instructions_png(self):
+        return {
+            "role": "system",
+            "content": (
+                'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
+                'Your current task is to scan english questions sheets.\n\n'
+
+                'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
+                'sheet components you will retrieve from the images, the components and their corresponding JSON '
+                'templates are as follows:\n'
+
+                '- Part, a standalone part or part of a section of the question sheet: '
+                '{"type": "part", "part": "<name or number of the part>"}\n'
+
+                '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
+                'the prompt field of the template: blanks, underlines and normal. '
+
+                'In the blanks prompt you must leave 5 underscores to represent the blank space. '
+                'In the underlines questions the objective is to pick the words that are incorrect in the given '
+                'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
+                'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
+                'in the order they appear in the question for the options A to D, disreguard options that might be '
+                'included underneath the underlines question and use the ones you wrapped in <u></u>.'
+                'In normal you just leave the question as is. '
+
+                f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
+
+                '- Reading Passages, there are two types of reading passages. Reading passages where you will see '
+                'blanks represented by a (question id) followed by a line, you must format these types of reading '
+                'passages to be only the text with the brackets that have the question id and line replaced with '
+                '"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
+                'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
+                'and place 2 newlines between paragraphs. '
+
+                'For the reading passages with blanks the template is: {"type": "blanksPassage", '
+                '"text": "<The whole text for the exercise with replacements for blank spaces and their '
+                'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
+
+                'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
+                'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
+
+                '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
+                'options with the question id and the options from a to d. The template is: '
+                f'{self._passage_blank_space_png()}\n'
+
+                'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
+                'reading passages are cut off don\'t include them in the JSON.'
+            )
+        }
+
+    def _multiple_choice_png(self):
+        multiple_choice = self._multiple_choice_html()["questions"][0]
+        multiple_choice["type"] = "multipleChoice"
+        multiple_choice.pop("solution")
+        return multiple_choice
+
+    def _passage_blank_space_png(self):
+        passage_blank_space = self._passage_blank_space_html()["words"][0]
+        passage_blank_space["type"] = "fillBlanks"
+        return passage_blank_space
+
+    async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
+        return await self._llm.pydantic_prediction(
+            [self._gpt_instructions_html(),
+             {
+                 "role": "user",
+                 "content": str(batches)
+             }
+             ],
+            LevelMapper.map_to_exam_model,
+            str(self._level_json_schema())
+        )
+
+    @staticmethod
+    def fix_ids(response):
+        counter = 1
+        for part in response["parts"]:
+            for exercise in part["exercises"]:
+                if exercise["type"] == "multipleChoice":
+                    for question in exercise["questions"]:
+                        question["id"] = counter
+                        counter += 1
+                if exercise["type"] == "fillBlanks":
+                    for i in range(len(exercise["words"])):
+                        exercise["words"][i]["id"] = counter
+                        exercise["solutions"][i]["id"] = counter
+                        counter += 1
+        return response
--- a/ielts_be/services/impl/exam/listening/init.py
+++ b/ielts_be/services/impl/exam/listening/init.py
@@ -0,0 +1,290 @@
+import asyncio
+from logging import getLogger
+import random
+from typing import Dict, Any
+
+from starlette.datastructures import UploadFile
+
+from ielts_be.dtos.listening import GenerateListeningExercises, Dialog, ListeningExercises
+from ielts_be.repositories import IFileStorage, IDocumentStore
+from ielts_be.services import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService
+from ielts_be.configs.constants import (
+    NeuralVoices, GPTModels, TemperatureSettings, EducationalContent,
+    FieldsAndExercises
+)
+from ielts_be.helpers import FileHelper
+from .import_listening import ImportListeningModule
+from .write_blank_forms import WriteBlankForms
+from .write_blanks import WriteBlanks
+from .write_blank_notes import WriteBlankNotes
+from ..shared import TrueFalse, MultipleChoice
+
+
+class ListeningService(IListeningService):
+
+    CONVERSATION_TAIL = (
+        "Please include random names and genders for the characters in your dialogue. "
+        "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
+    )
+
+    MONOLOGUE_TAIL = (
+        "Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
+    )
+
+    def __init__(
+            self, llm: ILLMService,
+            tts: ITextToSpeechService,
+            stt: ISpeechToTextService,
+            file_storage: IFileStorage,
+            document_store: IDocumentStore
+    ):
+        self._llm = llm
+        self._tts = tts
+        self._stt = stt
+        self._file_storage = file_storage
+        self._document_store = document_store
+        self._logger = getLogger(__name__)
+        self._multiple_choice = MultipleChoice(llm)
+        self._write_blanks = WriteBlanks(llm)
+        self._write_blanks_forms = WriteBlankForms(llm)
+        self._write_blanks_notes = WriteBlankNotes(llm)
+        self._import = ImportListeningModule(llm)
+        self._true_false = TrueFalse(llm)
+        self._sections = {
+            "section_1": {
+                "topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
+                "exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
+                "exercise_sample_size": 1,
+                "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
+                "generate_dialogue": self._generate_listening_conversation,
+                "type": "conversation",
+            },
+            "section_2": {
+                "topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
+                "exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
+                "exercise_sample_size": 2,
+                "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
+                "generate_dialogue": self._generate_listening_monologue,
+                "type": "monologue",
+            },
+            "section_3": {
+                "topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
+                "exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
+                "exercise_sample_size": 1,
+                "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
+                "generate_dialogue": self._generate_listening_conversation,
+                "type": "conversation",
+            },
+            "section_4": {
+                "topic": EducationalContent.ACADEMIC_SUBJECTS,
+                "exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
+                "exercise_sample_size": 2,
+                "total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
+                "generate_dialogue": self._generate_listening_monologue,
+                "type": "monologue"
+            }
+        }
+
+    async def import_exam(
+            self, exercises: UploadFile, solutions: UploadFile = None
+    ) -> Dict[str, Any] | None:
+        return await self._import.import_from_file(exercises, solutions)
+
+
+    async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
+        return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
+
+    # TODO: When mp3 editor
+    async def get_dialog_from_audio(self, upload: UploadFile):
+        ext, path_id = await FileHelper.save_upload(upload)
+        dialog = await self._stt.speech_to_text(f'./tmp/{path_id}/upload.{ext}')
+        FileHelper.remove_directory(f'./tmp/{path_id}')
+
+    async def generate_mp3(self, dto: Dialog) -> bytes:
+        return await self._tts.text_to_speech(dto)
+
+    async def get_listening_question(self, dto: GenerateListeningExercises):
+        start_id = 1
+        exercise_tasks = []
+
+        for req_exercise in dto.exercises:
+            exercise_tasks.append(
+                self._generate_exercise(
+                    req_exercise,
+                    "dialog or monologue",
+                    dto.text,
+                    start_id,
+                    dto.difficulty
+                )
+            )
+            start_id += req_exercise.quantity
+
+        return {"exercises": await asyncio.gather(*exercise_tasks) }
+
+    async def _generate_exercise(
+            self, req_exercise: ListeningExercises, dialog_type: str, text: str, start_id: int, difficulty: str
+    ):
+        if req_exercise.type == "multipleChoice" or req_exercise.type == "multipleChoice3Options":
+            n_options = 4 if req_exercise.type == "multipleChoice" else 3
+            question = await self._multiple_choice.gen_multiple_choice(
+                text, req_exercise.quantity, start_id, difficulty, n_options
+            )
+            self._logger.info(f"Added multiple choice: {question}")
+            return question
+
+        elif req_exercise.type == "writeBlanksQuestions":
+            question = await self._write_blanks.gen_write_blanks_questions(
+                dialog_type, text, req_exercise.quantity, start_id, difficulty
+            )
+            question["variant"] = "questions"
+            self._logger.info(f"Added write blanks questions: {question}")
+            return question
+
+        elif req_exercise.type == "writeBlanksFill":
+            question = await self._write_blanks_notes.gen_write_blanks_notes(
+                dialog_type, text, req_exercise.quantity, start_id, difficulty
+            )
+            question["variant"] = "fill"
+            self._logger.info(f"Added write blanks notes: {question}")
+            return question
+
+        elif req_exercise.type == "writeBlanksForm":
+            question = await self._write_blanks_forms.gen_write_blanks_form(
+                dialog_type, text, req_exercise.quantity, start_id, difficulty
+            )
+            question["variant"] = "form"
+            self._logger.info(f"Added write blanks form: {question}")
+            return question
+        elif req_exercise.type == "trueFalse":
+            question = await self._true_false.gen_true_false_not_given_exercise(
+                text, req_exercise.quantity, start_id, difficulty, "listening"
+            )
+            self._logger.info(f"Added trueFalse: {question}")
+            return question
+
+
+    # ==================================================================================================================
+    # generate_listening_question helpers
+    # ==================================================================================================================
+
+    async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
+        head = (
+            'Compose an authentic conversation between two individuals in the everyday social context of "'
+            if section == 1 else
+            'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
+            'social context of "'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'{head}{topic}". {self.CONVERSATION_TAIL}'
+                )
+            }
+        ]
+
+        if section == 1:
+            messages.extend([
+                {
+                    "role": "user",
+                    "content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
+
+                },
+                {
+                    "role": "user",
+                    "content": 'Try to have spelling of names (cities, people, etc)'
+
+                }
+            ])
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            ["conversation"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        conversation = self._get_conversation_voices(response, True)
+        return {"dialog": conversation["conversation"]}
+
+
+    async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
+        head = (
+            'Generate a comprehensive monologue set in the social context of'
+            if section == 2 else
+            'Generate a comprehensive and complex monologue on the academic subject of'
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"monologue": "monologue"}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            ["monologue"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        return {"dialog": response["monologue"]}
+
+    def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
+        chosen_voices = []
+        name_to_voice = {}
+        for segment in response['conversation']:
+            if 'voice' not in segment:
+                name = segment['name']
+                if name in name_to_voice:
+                    voice = name_to_voice[name]
+                else:
+                    voice = None
+                    # section 1
+                    if unique_voices_across_segments:
+                        while voice is None:
+                            chosen_voice = self._get_random_voice(segment['gender'])
+                            if chosen_voice not in chosen_voices:
+                                voice = chosen_voice
+                                chosen_voices.append(voice)
+                    # section 3
+                    else:
+                        voice = self._get_random_voice(segment['gender'])
+                    name_to_voice[name] = voice
+                segment['voice'] = voice
+        return response
+
+    @staticmethod
+    def _get_random_voice(gender: str):
+        if gender.lower() == 'male':
+            available_voices = NeuralVoices.MALE_NEURAL_VOICES
+        else:
+            available_voices = NeuralVoices.FEMALE_NEURAL_VOICES
+
+        return random.choice(available_voices)['Id']
+
+    @staticmethod
+    def parse_conversation(conversation_data):
+        conversation_list = conversation_data.get('conversation', [])
+        readable_text = []
+
+        for message in conversation_list:
+            name = message.get('name', 'Unknown')
+            text = message.get('text', '')
+            readable_text.append(f"{name}: {text}")
+
+        return "\n".join(readable_text)
--- a/ielts_be/services/impl/exam/listening/import_listening.py
+++ b/ielts_be/services/impl/exam/listening/import_listening.py
@@ -0,0 +1,183 @@
+import json
+from logging import getLogger
+from typing import Dict, Any
+from uuid import uuid4
+import aiofiles
+from fastapi import UploadFile
+
+from ielts_be.dtos.exams.listening import ListeningExam
+from ielts_be.helpers import FileHelper
+from ielts_be.mappers.listening import ListeningMapper
+from ielts_be.services import ILLMService
+
+
+class ImportListeningModule:
+    def __init__(self, llm_service: ILLMService):
+        self._logger = getLogger(__name__)
+        self._llm = llm_service
+
+    async def import_from_file(
+            self,
+            exercises: UploadFile,
+            solutions: UploadFile = None
+    ) -> Dict[str, Any] | None:
+        path_id = str(uuid4())
+
+        ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
+        FileHelper.convert_file_to_html(
+            f'./tmp/{path_id}/exercises.{ext}',
+            f'./tmp/{path_id}/exercises.html'
+        )
+
+        if solutions:
+            ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
+            FileHelper.convert_file_to_html(
+                f'./tmp/{path_id}/solutions.{ext}',
+                f'./tmp/{path_id}/solutions.html'
+            )
+
+        response = await self._get_listening_sections(path_id, solutions is not None)
+
+        FileHelper.remove_directory(f'./tmp/{path_id}')
+        if response:
+            return response.model_dump(exclude_none=True)
+        return None
+
+    async def _get_listening_sections(
+            self,
+            path_id: str,
+            has_solutions: bool = False
+    ) -> ListeningExam:
+        async with aiofiles.open(
+                f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8'
+        ) as f:
+            exercises_html = await f.read()
+
+        messages = [
+            self._instructions(has_solutions),
+            {
+                "role": "user",
+                "content": f"Listening exercise sheet:\n\n{exercises_html}"
+            }
+        ]
+
+        if has_solutions:
+            async with aiofiles.open(
+                    f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8'
+            ) as f:
+                solutions_html = await f.read()
+                messages.append({
+                    "role": "user",
+                    "content": f"Solutions:\n\n{solutions_html}"
+                })
+
+        return await self._llm.pydantic_prediction(
+            messages,
+            ListeningMapper.map_to_test_model,
+            str(self._listening_json_schema())
+        )
+
+    @staticmethod
+    def _multiple_choice_template() -> dict:
+        return {
+            "type": "multipleChoice",
+            "prompt": "<general instructions for this section>",
+            "questions": [
+                {
+                    "id": "<question number as string>",
+                    "prompt": "<question text>",
+                    "options": [
+                        {
+                            "id": "<A/B/C/D>",
+                            "text": "<option text>"
+                        }
+                    ],
+                    "solution": "<correct option letter>",
+                    "variant": "text"
+                }
+            ]
+        }
+
+    @staticmethod
+    def _write_blanks_template() -> dict:
+        return {
+            "type": "writeBlanks",
+            "maxWords": "<integer max words allowed per answer>",
+            "prompt": "<instructions>",
+            "questions": [
+                {
+                    "id": "<question number as string>",
+                    "prompt": "<question text with blanks replaced with {{id}}>",
+                    "solution": ["<acceptable answer(s)>"]
+                }
+            ],
+            "variant": "<one of: questions, fill, form - chosen based on format:\n" +
+                       "- questions: for numbered questions with blank at end\n" +
+                       "- fill: for paragraph/summary with blanks, it MUST be a PARAGRAPH not separated related questions!\n" +
+                       "- form: when questions and fill dont meet the requirements>"
+        }
+
+    @staticmethod
+    def _true_false():
+        return {
+            "questions": [
+                {
+                    "id": "<question number>",
+                    "prompt": "<statement to evaluate>",
+                    "solution": "<one of: true, false, not_given>",
+                }
+            ],
+            "type": "trueFalse",
+            "prompt": "<specific instructions including T/F/NG marking scheme>"
+        }
+
+    def _instructions(self, has_solutions: bool = False) -> Dict[str, str]:
+        solutions_str = " and its solutions" if has_solutions else ""
+        return {
+            "role": "system",
+            "content": (
+                f"You are processing a listening test exercise sheet{solutions_str}. "
+                "Structure the test according to this json template:\n\n"
+                f"{self._listening_json_schema()}\n\n"
+                "Each exercise within a section should follow these templates:\n\n"
+                f"1. Multiple Choice Questions:\n{self._multiple_choice_template()}\n\n"
+                f"2. True/False Questions:\n{self._true_false()}\n\n"
+                f"3. Write Blanks:\n{self._write_blanks_template()}\n\n"
+                "\nImportant rules:\n"
+                "1. Keep exact question numbering from the original\n"
+                "2. Include all options for multiple choice questions\n"
+                "3. Replace blanks (any number of underscores '_' or similar placeholders) with {{id}} where id is the question number\n"
+                "4. Set maxWords according to the instructions\n"
+                "5. Include all possible correct answers in solution arrays\n"
+                "6. Maintain exact spacing and formatting from templates, except for writeBlanks exercises where blanks MUST be replaced with {{id}}\n"
+                "7. For writeBlanks, choose the appropriate variant:\n"
+                "   - questions: for numbered questions with blank at end that explicitly end with a question mark '?'\n"
+                "   - fill: for paragraph/summary with blanks\n"
+                "   - form: when questions and fill dont meet the requirements\n"
+                "8. For text fields, use actual newlines between questions/sentences\n"
+                "9. Format text according to chosen variant:\n"
+                "   - questions: each line should end with {{id}}\n"
+                "   - fill: embed {{id}} naturally in the paragraph\n"
+                "   - form: place {{id}} where blank should appear in text\n"
+                "10. For True/False, use exact values: true, false, or not_given\n\n"
+                "11. All the solutions for write blanks exercises should be lowercase. If solutions were provided to "
+                "you and they are uppercase you should placed them in lowercase.\n\n"
+                "First identify all sections/parts by looking for 'SECTION n' headers or similar ones, "
+                "then for each section identify and structure its exercises according to the templates above."
+            )
+        }
+
+    def _listening_json_schema(self) -> Dict[str, Any]:
+        return {
+            "minTimer": "<integer representing minutes allowed for the exam as string, if there is none set it to 30>",
+            "parts": [
+                {
+                    "intro": "<optional field that contains information about the section>",
+                    "exercises": [
+                        self._multiple_choice_template(),
+                        self._write_blanks_template(),
+                        self._true_false()
+                    ]
+                }
+            ]
+        }
--- a/ielts_be/services/impl/exam/listening/write_blank_forms.py
+++ b/ielts_be/services/impl/exam/listening/write_blank_forms.py
@@ -0,0 +1,55 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class WriteBlankForms:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_write_blanks_form(
+            self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
+    ):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"form": ["key: value", "key2: value"]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                        f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
+                        f'about this {dialog_type}:\n"{text}"'
+                )
+            }
+        ]
+
+        if dialog_type == "conversation":
+            messages.append({
+                    "role": "user",
+                    "content": (
+                        'It must be a form and not questions. '
+                        'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
+                    )
+            })
+
+        parsed_form = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        parsed_form = parsed_form["form"][:quantity]
+
+        replaced_form, words = ExercisesHelper.build_write_blanks_text_form(parsed_form, start_id)
+        return {
+            "id": str(uuid.uuid4()),
+            "maxWords": 3,
+            "prompt": f"You will hear a {dialog_type}. Fill the form with words/numbers missing.",
+            "solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
+            "text": replaced_form,
+            "type": "writeBlanks"
+        }
--- a/ielts_be/services/impl/exam/listening/write_blank_notes.py
+++ b/ielts_be/services/impl/exam/listening/write_blank_notes.py
@@ -0,0 +1,68 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class WriteBlankNotes:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_write_blanks_notes(
+            self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
+    ):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"notes": ["note_1", "note_2"]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                        f'Generate {quantity} {difficulty} difficulty notes taken from this '
+                        f'{dialog_type}:\n"{text}"'
+                )
+
+            }
+        ]
+
+        questions = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["notes"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = questions["notes"][:quantity]
+
+        formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
+
+        word_messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this '
+                    'format: {"words": ["word_1", "word_2"] }'
+                )
+            },
+            {
+                "role": "user",
+                "content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
+
+            }
+        ]
+        words = await self._llm.prediction(
+            GPTModels.GPT_4_O, word_messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        words = words["words"][:quantity]
+
+        replaced_notes = ExercisesHelper.replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
+        return {
+            "id": str(uuid.uuid4()),
+            "maxWords": 3,
+            "prompt": "Fill the blank space with the word missing from the audio.",
+            "solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
+            "text": "\\n".join(replaced_notes),
+            "type": "writeBlanks"
+        }
--- a/ielts_be/services/impl/exam/listening/write_blanks.py
+++ b/ielts_be/services/impl/exam/listening/write_blanks.py
@@ -0,0 +1,43 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class WriteBlanks:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_write_blanks_questions(
+            self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
+    ):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                        f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
+                        f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
+            }
+        ]
+
+        questions = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = questions["questions"][:quantity]
+
+        return {
+            "id": str(uuid.uuid4()),
+            "maxWords": 3,
+            "prompt": f"You will hear a {dialog_type}. Answer the questions below using no more than three words or a number accordingly.",
+            "solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
+            "text": ExercisesHelper.build_write_blanks_text(questions, start_id),
+            "type": "writeBlanks"
+        }
--- a/ielts_be/services/impl/exam/reading/init.py
+++ b/ielts_be/services/impl/exam/reading/init.py
@@ -0,0 +1,147 @@
+import asyncio
+from logging import getLogger
+
+from fastapi import UploadFile
+
+from ielts_be.configs.constants import GPTModels, FieldsAndExercises, TemperatureSettings
+from ielts_be.dtos.reading import ReadingDTO
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import IReadingService, ILLMService
+from .fill_blanks import FillBlanks
+from .idea_match import IdeaMatch
+from .paragraph_match import ParagraphMatch
+from ..shared import TrueFalse, MultipleChoice
+from .import_reading import ImportReadingModule
+from .write_blanks import WriteBlanks
+
+
+class ReadingService(IReadingService):
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+        self._fill_blanks = FillBlanks(llm)
+        self._idea_match = IdeaMatch(llm)
+        self._paragraph_match = ParagraphMatch(llm)
+        self._true_false = TrueFalse(llm)
+        self._write_blanks = WriteBlanks(llm)
+        self._multiple_choice = MultipleChoice(llm)
+        self._logger = getLogger(__name__)
+        self._import = ImportReadingModule(llm)
+
+    async def import_exam(self, exercises: UploadFile, solutions: UploadFile = None):
+        return await self._import.import_from_file(exercises, solutions)
+
+    async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
+        part_system_message = {
+            "1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
+            "2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
+            "3": (
+                'The generated text should be very hard to understand and include different points, theories, '
+                'subtle differences of opinions from people, correctly sourced to the person who said it, '
+                'over the specified topic and have multiple paragraphs.'
+            )
+        }
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"title": "title of the text", "text": "generated text"}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
+                    f'on the topic of "{topic}". The passage should offer a substantial amount of '
+                    'information, analysis, or narrative relevant to the chosen subject matter. This text '
+                    'passage aims to serve as the primary reading section of an IELTS test, providing an '
+                    'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
+                    'does not contain forbidden subjects in muslim countries.'
+                )
+            },
+            {
+                "role": "system",
+                "content": part_system_message[str(part)]
+            }
+        ]
+
+        if part == 3:
+            messages.append({
+                "role": "user",
+                "content": "Use real text excerpts on your generated passage and cite the sources."
+            })
+
+        return await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            FieldsAndExercises.GEN_TEXT_FIELDS,
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+    async def _generate_single_exercise(self, req_exercise, text: str, start_id: int, difficulty: str) -> dict:
+        if req_exercise.type == "fillBlanks":
+            question = await self._fill_blanks.gen_summary_fill_blanks_exercise(
+                text, req_exercise.quantity, start_id, difficulty, req_exercise.num_random_words
+            )
+            self._logger.info(f"Added fill blanks: {question}")
+            return question
+
+        elif req_exercise.type == "trueFalse":
+            question = await self._true_false.gen_true_false_not_given_exercise(
+                text, req_exercise.quantity, start_id, difficulty, "reading"
+            )
+            self._logger.info(f"Added trueFalse: {question}")
+            return question
+
+        elif req_exercise.type == "writeBlanks":
+            question = await self._write_blanks.gen_write_blanks_exercise(
+                text, req_exercise.quantity, start_id, difficulty, req_exercise.max_words
+            )
+
+            if ExercisesHelper.answer_word_limit_ok(question):
+                self._logger.info(f"Added write blanks: {question}")
+                return question
+            else:
+                self._logger.info("Did not add write blanks because it did not respect word limit")
+                return {}
+
+        elif req_exercise.type == "paragraphMatch":
+            question = await self._paragraph_match.gen_paragraph_match_exercise(
+                text, req_exercise.quantity, start_id
+            )
+            self._logger.info(f"Added paragraph match: {question}")
+            return question
+
+        elif req_exercise.type == "ideaMatch":
+            question = await self._idea_match.gen_idea_match_exercise(
+                text, req_exercise.quantity, start_id
+            )
+            question["variant"] = "ideaMatch"
+            self._logger.info(f"Added idea match: {question}")
+            return question
+        elif req_exercise.type == "multipleChoice":
+            question = await self._multiple_choice.gen_multiple_choice(
+                text, req_exercise.quantity, start_id, difficulty, 4
+            )
+            self._logger.info(f"Added multiple choice: {question}")
+            return question
+
+    async def generate_reading_exercises(self, dto: ReadingDTO):
+        exercise_tasks = []
+        start_id = 1
+
+        for req_exercise in dto.exercises:
+            exercise_tasks.append(
+                self._generate_single_exercise(
+                    req_exercise,
+                    dto.text,
+                    start_id,
+                    dto.difficulty
+                )
+            )
+            start_id += req_exercise.quantity
+
+        return {
+            "exercises": await asyncio.gather(*exercise_tasks)
+        }
--- a/ielts_be/services/impl/exam/reading/fill_blanks.py
+++ b/ielts_be/services/impl/exam/reading/fill_blanks.py
@@ -0,0 +1,73 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class FillBlanks:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_summary_fill_blanks_exercise(
+            self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
+    ):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'Summarize this text: "{text}"'
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"words": ["word_1", "word_2"] }'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                        f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
+                        f'from this:\n{response["summary"]}'
+                )
+            }
+        ]
+
+        words_response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        response["words"] = words_response["words"]
+        replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
+            response["summary"], response["words"], start_id
+        )
+        options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
+        solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
+
+        return {
+            "allowRepetition": True,
+            "id": str(uuid.uuid4()),
+            "prompt": (
+                "Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
+                "more words than spaces so you will not use them all. You may use any of the words more than once."
+            ),
+            "solutions": solutions,
+            "text": replaced_summary,
+            "type": "fillBlanks",
+            "words": options_words
+        }
--- a/ielts_be/services/impl/exam/reading/idea_match.py
+++ b/ielts_be/services/impl/exam/reading/idea_match.py
@@ -0,0 +1,46 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class IdeaMatch:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_idea_match_exercise(self, text: str, quantity: int, start_id: int):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"ideas": [ '
+                    '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
+                    '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
+                    ']}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
+                    f'The text: {text}'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        ideas = response["ideas"]
+
+        return {
+            "id": str(uuid.uuid4()),
+            "allowRepetition": False,
+            "options": ExercisesHelper.build_options(ideas),
+            "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
+            "sentences": ExercisesHelper.build_sentences(ideas, start_id),
+            "type": "matchSentences"
+        }
--- a/ielts_be/services/impl/exam/reading/import_reading.py
+++ b/ielts_be/services/impl/exam/reading/import_reading.py
@@ -0,0 +1,237 @@
+from logging import getLogger
+from typing import Dict, Any
+from uuid import uuid4
+
+import aiofiles
+from fastapi import UploadFile
+
+from ielts_be.helpers import FileHelper
+from ielts_be.mappers.reading import ReadingMapper
+from ielts_be.services import ILLMService
+from ielts_be.dtos.exams.reading import Exam
+
+
+class ImportReadingModule:
+    def __init__(self, openai: ILLMService):
+        self._logger = getLogger(__name__)
+        self._llm = openai
+
+    async def import_from_file(
+            self, exercises: UploadFile, solutions: UploadFile = None
+    ) -> Dict[str, Any] | None:
+        path_id = str(uuid4())
+        ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
+        FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
+
+        if solutions:
+            ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
+            FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
+
+        response = await self._get_reading_parts(path_id, solutions is not None)
+
+        FileHelper.remove_directory(f'./tmp/{path_id}')
+        if response:
+            return response.model_dump(exclude_none=True)
+        return None
+
+    async def _get_reading_parts(self, path_id: str, solutions: bool = False) -> Exam:
+        async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
+            exercises_html = await f.read()
+
+        messages = [
+            self._instructions(solutions),
+            {
+                "role": "user",
+                "content": f"Exam question sheet:\n\n{exercises_html}"
+            }
+        ]
+
+        if solutions:
+            async with aiofiles.open(f'./tmp/{path_id}/solutions.html', 'r', encoding='utf-8') as f:
+                solutions_html = await f.read()
+                messages.append({
+                    "role": "user",
+                    "content": f"Solutions:\n\n{solutions_html}"
+                })
+
+        return await self._llm.pydantic_prediction(
+            messages,
+            ReadingMapper.map_to_exam_model,
+            str(self._reading_json_schema())
+        )
+
+    def _reading_json_schema(self):
+        json = self._reading_exam_template()
+        json["parts"][0]["exercises"] = [
+            self._write_blanks(),
+            self._fill_blanks(),
+            self._match_sentences(),
+            self._true_false(),
+            self._multiple_choice()
+        ]
+        return json
+
+    @staticmethod
+    def _reading_exam_template():
+        return {
+            "minTimer": "<integer representing minutes allowed for the exam>",
+            "parts": [
+                {
+                    "text": {
+                        "title": "<title of the reading passage>",
+                        "content": "<full text content of the reading passage>",
+                    },
+                    "exercises": []
+                }
+            ]
+        }
+
+    @staticmethod
+    def _write_blanks():
+        return {
+            "maxWords": "<integer max words allowed per answer>",
+            "solutions": [
+                {
+                    "id": "<question number as string>",
+                    "solution": [
+                        "<acceptable answer(s) within maxWords limit>"
+                    ]
+                }
+            ],
+            "text": (
+                "<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] "
+                "- notice how there the question number inside {{}} -> the text MUST always contain the question number in that format "
+                "- and notice how there is a double backslash before the n -> I want an escaped newline in your output> "
+                     ),
+            "type": "writeBlanks",
+            "prompt": "<specific instructions for this exercise section>"
+        }
+
+    @staticmethod
+    def _match_sentences():
+        return {
+            "options": [
+                {
+                    "id": "<paragraph letter A-F>",
+                    "sentence": "<THIS NEEDS TO BE A PARAGRAPH OF THE SECTION TEXT>"
+                }
+            ],
+            "sentences": [
+                {
+                    "id": "<question number as string>",
+                    "solution": "<matching paragraph letter>",
+                    "sentence": "<A SHORT SENTENCE THAT CONVEYS AND IDEA OR HEADING>"
+                }
+            ],
+            "type": "matchSentences",
+            "variant": "<heading OR ideaMatch (try to figure it out via the exercises instructions)>",
+            "prompt": "<specific instructions for this exercise section>"
+        }
+
+    @staticmethod
+    def _true_false():
+        return {
+            "questions": [
+                {
+                    "id": "<question number>",
+                    "prompt": "<statement to evaluate>",
+                    "solution": "<one of: true, false, not_given>",
+                }
+            ],
+            "type": "trueFalse",
+            "prompt": "<specific instructions including T/F/NG marking scheme>"
+        }
+
+    @staticmethod
+    def _multiple_choice():
+        return {
+            "questions": [
+                {
+                    "id": "<question number>",
+                    "prompt": "<question text>",
+                    "options": [
+                        {
+                            "id": "<A, B, or C>",
+                            "text": "<option text>"
+                        }
+                    ],
+                    "solution": "<correct option letter>",
+                    "variant": "text"
+                }
+            ],
+            "type": "multipleChoice",
+            "prompt": "<specific instructions for this exercise section>"
+        }
+
+    @staticmethod
+    def _fill_blanks():
+        return {
+            "solutions": [
+                {
+                    "id": "<blank number>",
+                    "solution": "<correct word>"
+                }
+            ],
+            "text": "<text passage with blanks marked as {{<blank number>}}>",
+            "type": "fillBlanks",
+            "words": [
+                {
+                    "letter": "<word identifier letter>",
+                    "word": "<word from word bank>"
+                }
+            ],
+            "prompt": "<specific instructions for this exercise section>"
+        }
+
+    def _instructions(self, solutions=False):
+        solutions_str = " and its solutions" if solutions else ""
+        tail = (
+                "Parse the exam carefully and identify:\n"
+                "1. Time limit from instructions\n"
+                "2. Reading passage title and full content\n"
+                "3. All exercise sections and their specific instructions\n"
+                "4. Question numbering and grouping\n"
+                "5. Word limits and formatting requirements\n"
+                "6. Specific marking schemes (e.g., T/F/NG)\n\n"
+                + (
+                    "Solutions were not provided - analyze the passage carefully to determine correct answers."
+                    if not solutions else
+                    "Use the provided solutions to fill in all answer fields accurately, if word answers have all letters "
+                    "uppercase convert them to lowercase before assigning them."
+                )
+                +
+                "Pay extra attention to fillblanks exercises the solution and option wording must match in case! "
+                "There can't be options in lowercase and solutions in uppercase! "
+                "Also PAY ATTENTION TO SECTIONS, these most likely indicate parts, and in each section/part there "
+                "should be a text, if there isn't a title for it choose a reasonable one based on its contents. "
+        )
+
+        return {
+            "role": "system",
+            "content": (
+                f"You are processing an English reading comprehension exam{solutions_str}. Structure the data according "
+                f"to this json template: {self._reading_exam_template()}\n\n"
+
+                "The exam contains these exercise types:\n"
+                "1. \"writeBlanks\": Short answer questions with strict word limits\n"
+                "2. \"matchSentences\": Match headings or ideas with paragraphs, the sentences field\n"
+                "3. \"trueFalse\": Evaluate statements as True/False/Not Given\n"
+                "4. \"fillBlanks\": Complete text using provided word bank\n"
+                "5. \"multipleChoice\": Select correct option from choices\n\n"
+
+                "Exercise templates:\n"
+                f"writeBlanks: {self._write_blanks()}\n"
+                f"matchSentences: {self._match_sentences()}\n"
+                f"trueFalse: {self._true_false()}\n"
+                f"fillBlanks: {self._fill_blanks()}\n"
+                f"multipleChoice: {self._multiple_choice()}\n\n"
+
+                "Important details to capture:\n"
+                "- Exercise section instructions and constraints\n"
+                "- Question numbering and grouping\n"
+                "- Word limits and formatting requirements\n"
+                "- Marking schemes and answer formats\n\n"
+
+                f"{tail}"
+            )
+        }
--- a/ielts_be/services/impl/exam/reading/paragraph_match.py
+++ b/ielts_be/services/impl/exam/reading/paragraph_match.py
@@ -0,0 +1,63 @@
+import random
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class ParagraphMatch:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_paragraph_match_exercise(self, text: str, quantity: int, start_id: int):
+        paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    'For every paragraph of the list generate a minimum 5 word heading for it. '
+                    f'The paragraphs are these: {str(paragraphs)}'
+                )
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        headings = response["headings"]
+
+        options = []
+        for i, paragraph in enumerate(paragraphs, start=0):
+            paragraph["heading"] = headings[i]["heading"]
+            options.append({
+                "id": paragraph["letter"],
+                "sentence": paragraph["paragraph"]
+            })
+
+        random.shuffle(paragraphs)
+        sentences = []
+        for i, paragraph in enumerate(paragraphs, start=start_id):
+            sentences.append({
+                "id": i,
+                "sentence": paragraph["heading"],
+                "solution": paragraph["letter"]
+            })
+
+        return {
+            "id": str(uuid.uuid4()),
+            "allowRepetition": False,
+            "options": options,
+            "prompt": "Choose the correct heading for paragraphs from the list of headings below.",
+            "sentences": sentences[:quantity],
+            "type": "matchSentences"
+        }
--- a/ielts_be/services/impl/exam/reading/write_blanks.py
+++ b/ielts_be/services/impl/exam/reading/write_blanks.py
@@ -0,0 +1,44 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class WriteBlanks:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_write_blanks_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, max_words: int = 3):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
+                    f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
+                )
+
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = response["questions"][:quantity]
+
+        return {
+            "id": str(uuid.uuid4()),
+            "maxWords": max_words,
+            "prompt": f"Choose no more than {max_words} words and/or a number from the passage for each answer.",
+            "solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
+            "text": ExercisesHelper.build_write_blanks_text(questions, start_id),
+            "type": "writeBlanks"
+        }
--- a/ielts_be/services/impl/exam/shared/init.py
+++ b/ielts_be/services/impl/exam/shared/init.py
@@ -0,0 +1,7 @@
+from .true_false import TrueFalse
+from .multiple_choice import MultipleChoice
+
+__all__ = [
+    "TrueFalse",
+    "MultipleChoice"
+]
--- a/ielts_be/services/impl/exam/shared/multiple_choice.py
+++ b/ielts_be/services/impl/exam/shared/multiple_choice.py
@@ -0,0 +1,46 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class MultipleChoice:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_multiple_choice(
+            self, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
+    ):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
+                    '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
+                    '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
+                    'energy sources?", "solution": "C", "variant": "text"}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                        f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
+                        f'options for this text:\n"' + text + '"')
+
+            }
+        ]
+
+        questions = await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            ["questions"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        return {
+            "id": str(uuid.uuid4()),
+            "prompt": "Select the appropriate option.",
+            "questions": ExercisesHelper.fix_exercise_ids(questions, start_id)["questions"],
+            "type": "multipleChoice",
+        }
--- a/ielts_be/services/impl/exam/shared/true_false.py
+++ b/ielts_be/services/impl/exam/shared/true_false.py
@@ -0,0 +1,55 @@
+import uuid
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import ExercisesHelper
+from ielts_be.services import ILLMService
+
+
+class TrueFalse:
+
+    def __init__(self, llm: ILLMService):
+        self._llm = llm
+
+    async def gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id: int, difficulty: str, module: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
+                    '{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
+                    'Ensure that your statements accurately represent information or inferences from the text, and '
+                    'provide a variety of responses, including, at least one of each True, False, and Not Given, '
+                    f'as appropriate.\n\nReference text:\n\n {text}'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+        questions = response["prompts"]
+
+        if len(questions) > quantity:
+            questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
+
+        for i, question in enumerate(questions, start=start_id):
+            question["id"] = str(i)
+
+        tail = (
+            "the information given in the Reading Passage"
+            if module == "reading" else
+            "what you've heard"
+        )
+
+        return {
+            "id": str(uuid.uuid4()),
+            "prompt": f"Do the following statements agree with {tail}?",
+            "questions": questions,
+            "type": "trueFalse"
+        }
--- a/ielts_be/services/impl/exam/speaking/init.py
+++ b/ielts_be/services/impl/exam/speaking/init.py
@@ -0,0 +1,168 @@
+import logging
+import re
+
+from typing import Dict, List
+
+from ielts_be.configs.constants import (
+    FieldsAndExercises, GPTModels, TemperatureSettings
+)
+from ielts_be.dtos.speaking import GradeSpeakingItem
+from ielts_be.repositories import IFileStorage
+from ielts_be.services import ISpeakingService, ILLMService, ISpeechToTextService
+from .grade import GradeSpeaking
+
+
+class SpeakingService(ISpeakingService):
+
+    def __init__(
+            self, llm: ILLMService,
+            file_storage: IFileStorage,
+            stt: ISpeechToTextService
+    ):
+        self._llm = llm
+        self._file_storage = file_storage
+        self._stt = stt
+        self._logger = logging.getLogger(__name__)
+        self._grade = GradeSpeaking(llm, file_storage, stt)
+
+        # TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with
+        #  either the difficulty in the request or a random one yet the prompt doesn't change
+        self._tasks = {
+            "task_1": {
+                "get": {
+                    "json_template": {
+                        "first_topic": "topic 1",
+                        "second_topic": "topic 2",
+                        "questions": [
+                            (
+                                "Introductory question about the first topic, starting the topic with "
+                                "'Let's talk about x' and then the question."
+                            ),
+                            "Follow up question about the first topic",
+                            "Follow up question about the first topic",
+                            "Question about second topic",
+                            "Follow up question about the second topic",
+                        ]
+                    },
+                    "prompt": (
+                        'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
+                        'that encourages candidates to delve deeply into personal experiences, preferences, or '
+                        'insights on the topic of "{first_topic}" and the topic of "{second_topic}". '
+                        'Make sure that the generated question does not contain forbidden subjects in '
+                        'muslim countries.'
+                    )
+                }
+            },
+            "task_2": {
+                "get": {
+                    "json_template": {
+                        "topic": "topic",
+                        "question": "question",
+                        "prompts": [
+                            "prompt_1",
+                            "prompt_2",
+                            "prompt_3"
+                        ],
+                        "suffix": "And explain why..."
+                    },
+                    "prompt": (
+                        'Create a question of medium difficulty for IELTS Speaking Part 2 '
+                        'that encourages candidates to narrate a personal experience or story related to the topic '
+                        'of "{topic}". Include 3 prompts that guide the candidate to describe '
+                        'specific aspects of the experience, such as details about the situation, '
+                        'their actions, and the reasons it left a lasting impression. Make sure that the '
+                        'generated question does not contain forbidden subjects in muslim countries.'
+                    )
+                }
+            },
+            "task_3": {
+                "get": {
+                    "json_template": {
+                        "topic": "topic",
+                        "questions": [
+                            "Introductory question about the topic.",
+                            "Follow up question about the topic",
+                            "Follow up question about the topic",
+                            "Follow up question about the topic",
+                            "Follow up question about the topic"
+                        ]
+                    },
+                    "prompt": (
+                        'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3'
+                        'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". '
+                        'Provide inquiries, ensuring they explore various aspects, perspectives, and implications '
+                        'related to the topic. Make sure that the generated question does not contain forbidden '
+                        'subjects in muslim countries.'
+                    )
+                }
+            },
+        }
+
+    async def get_speaking_part(
+            self, part: int, topic: str, second_topic: str, difficulty: str
+    ) -> Dict:
+        task_values = self._tasks[f'task_{part}']['get']
+
+        if part == 1:
+            task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic)
+        else:
+            task_prompt = task_values["prompt"].format(topic=topic)
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    f'{task_values["json_template"]}'
+                )
+            },
+            {
+                "role": "user",
+                "content": task_prompt
+            }
+        ]
+
+        part_specific = {
+            "1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).',
+            "2": (
+                'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
+                'that start with "And explain why".'
+            )
+        }
+
+        if part in {1, 2}:
+            messages.append({
+                "role": "user",
+                "content": part_specific[str(part)]
+            })
+
+        if part in {1, 3}:
+            messages.append({
+                "role": "user",
+                "content": 'They must be 1 single question each and not be double-barreled questions.'
+            })
+
+        fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        if part == 3:
+            # Remove the numbers from the questions only if the string starts with a number
+            response["questions"] = [
+                re.sub(r"^\d+\.\s*", "", question)
+                if re.match(r"^\d+\.", question) else question
+                for question in response["questions"]
+            ]
+
+        response["type"] = part
+        response["difficulty"] = difficulty
+
+        if part in {2, 3}:
+            response["topic"] = topic
+
+        return response
+
+    async def grade_speaking_task(self, task: int, items: List[GradeSpeakingItem]) -> Dict:
+        return await self._grade.grade_speaking_task(task, items)
--- a/ielts_be/services/impl/exam/speaking/grade.py
+++ b/ielts_be/services/impl/exam/speaking/grade.py
@@ -0,0 +1,316 @@
+import asyncio
+import os
+import uuid
+from logging import getLogger
+from typing import Dict, List
+
+import aiofiles
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings, FilePaths
+from ielts_be.dtos.speaking import GradeSpeakingItem
+from ielts_be.helpers import TextHelper
+from ielts_be.repositories import IFileStorage
+from ielts_be.services import ILLMService, ISpeechToTextService
+
+
+class GradeSpeaking:
+
+    def __init__(self, llm: ILLMService, file_storage: IFileStorage, stt: ISpeechToTextService):
+        self._llm = llm
+        self._file_storage = file_storage
+        self._stt = stt
+        self._logger = getLogger(__name__)
+
+    async def grade_speaking_task(self, task: int, items: List[GradeSpeakingItem]) -> Dict:
+        request_id = str(uuid.uuid4())
+        self._log(task, request_id, f"Received request to grade speaking task {task}.")
+
+        if task != 2:
+            self._log(task, request_id, f'Received {len(items)} total answers.')
+
+        temp_files = []
+        try:
+            # Save all files first
+            temp_files = await asyncio.gather(*[
+                self.save_file(item) for item in items
+            ])
+
+            # Process all transcriptions concurrently (up to 4)
+            self._log(task, request_id, 'Starting batch transcription')
+            text_answers = await asyncio.gather(*[
+                self._stt.speech_to_text(file_path)
+                for file_path in temp_files
+            ])
+
+            for answer in text_answers:
+                self._log(task, request_id, f'Transcribed answer: {answer}')
+                if not TextHelper.has_x_words(answer, 20):
+                    self._log(
+                        task, request_id,
+                        f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
+                    )
+                    return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
+
+            # Get perfect answers
+            self._log(task, request_id, 'Requesting perfect answers')
+            perfect_answers = await asyncio.gather(*[
+                self._get_perfect_answer(task, item.question)
+                for item in items
+            ])
+
+            # Format the responses
+            if task in {1, 3}:
+                self._log(task, request_id, 'Formatting answers and questions for prompt.')
+
+                formatted_text = ""
+                for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
+                    formatted_text += f"**Question {i}:**\n{item.question}\n\n"
+                    formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
+
+                self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
+                questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
+            else:
+                questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
+
+            self._log(task, request_id, 'Requesting grading of the answer(s).')
+            response = await self._grade_task(task, questions_and_answers)
+            self._log(task, request_id, f'Answer(s) graded: {response}')
+
+            if task in {1, 3}:
+                self._log(task, request_id, 'Adding perfect answer(s) to response.')
+
+                # TODO: check if it is answer["answer"] instead
+                for i, answer in enumerate(perfect_answers, start=1):
+                    response['perfect_answer_' + str(i)] = answer
+
+                self._log(task, request_id, 'Getting speaking corrections in parallel')
+                # Get all corrections in parallel
+                fixed_texts = await asyncio.gather(*[
+                    self._get_speaking_corrections(answer)
+                    for answer in text_answers
+                ])
+
+                self._log(task, request_id, 'Adding transcript and fixed texts to response.')
+                for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
+                    response['transcript_' + str(i)] = answer
+                    response['fixed_text_' + str(i)] = fixed
+            else:
+                response['transcript'] = text_answers[0]
+
+                self._log(task, request_id, 'Requesting fixed text.')
+                response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
+                self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
+
+                response['perfect_answer'] = perfect_answers[0]["answer"]
+
+            solutions = []
+            for file_name in temp_files:
+                solutions.append(await self._file_storage.upload_file_firebase_get_url(f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav', file_name))
+
+            response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
+            response["solutions"] = solutions
+            if task in {1,3}:
+                response["answer"] = solutions
+            else:
+                response["fullPath"] = solutions[0]
+
+            self._log(task, request_id, f'Final response: {response}')
+            return response
+
+        finally:
+            for file_path in temp_files:
+                try:
+                    if os.path.exists(file_path):
+                        os.remove(file_path)
+                except Exception as e:
+                    self._log(task, request_id, f'Error cleaning up temp file {file_path}: {str(e)}')
+
+    def _log(self, task: int, request_id: str, message: str):
+        self._logger.info(f'POST - speaking_task_{task} - {request_id} - {message}')
+
+    async def _get_perfect_answer(self, task: int, question: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    'Provide a perfect answer according to ielts grading system to the following '
+                    f'Speaking Part {task} question: "{question}"'
+                )
+            }
+        ]
+
+        if task == 1:
+            messages.append({
+                "role": "user",
+                "content": 'The answer must be 2 or 3 sentences long.'
+            })
+
+        gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO
+
+        return await self._llm.prediction(
+            gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE
+        )
+
+    async def _grade_task(self, task: int, questions_and_answers: str) -> Dict:
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
+                    'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
+                    'assign a score of 0 if the response fails to address the question. Additionally, provide '
+                    'detailed commentary highlighting both strengths and weaknesses in the response.'
+                ) + questions_and_answers
+            }
+        ]
+
+        task_specific = {
+            "1": (
+                'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the '
+                'student that they should be.'
+            ),
+            "2": 'Address the student as "you"',
+            "3": 'Address the student as "you" and pay special attention to coherence between the answers.'
+        }
+
+        messages.append({
+            "role": "user",
+            "content": task_specific[str(task)]
+        })
+
+        if task in {1, 3}:
+            messages.extend([
+                {
+                    "role": "user",
+                    "content": (
+                        'For pronunciations act as if you heard the answers and they were transcribed '
+                        'as you heard them.'
+                    )
+                },
+                {
+                    "role": "user",
+                    "content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
+                }
+            ])
+
+        return await self._llm.prediction(
+            GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
+        )
+
+    @staticmethod
+    def _fix_speaking_overall(overall: float, task_response: dict):
+        grades = [category["grade"] for category in task_response.values()]
+
+        if overall > max(grades) or overall < min(grades):
+            total_sum = sum(grades)
+            average = total_sum / len(grades)
+            rounded_average = round(average, 0)
+            return rounded_average
+
+        return overall
+
+    @staticmethod
+    def _zero_rating(comment: str):
+        return {
+            "comment": comment,
+            "overall": 0,
+            "task_response": {
+                "Fluency and Coherence": {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                "Lexical Resource": {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                "Grammatical Range and Accuracy": {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                "Pronunciation": {
+                    "grade": 0.0,
+                    "comment": ""
+                }
+            }
+        }
+
+    async def _get_speaking_corrections(self, text):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"fixed_text": "fixed transcription with no misspelling errors"}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    'Fix the errors in the provided transcription and put it in a JSON. '
+                    f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_3_5_TURBO,
+            messages,
+            ["fixed_text"],
+            0.2,
+            False
+        )
+        return response["fixed_text"]
+
+
+    @staticmethod
+    def _grade_template():
+        return {
+            "comment": "extensive comment about answer quality",
+            "overall": 0.0,
+            "task_response": {
+                "Fluency and Coherence": {
+                    "grade": 0.0,
+                    "comment": (
+                        "extensive comment about fluency and coherence, use examples to justify the grade awarded."
+                    )
+                },
+                "Lexical Resource": {
+                    "grade": 0.0,
+                    "comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
+                },
+                "Grammatical Range and Accuracy": {
+                    "grade": 0.0,
+                    "comment": (
+                        "extensive comment about grammatical range and accuracy, use examples to justify the "
+                        "grade awarded."
+                    )
+                },
+                "Pronunciation": {
+                    "grade": 0.0,
+                    "comment": (
+                        "extensive comment about pronunciation on the transcribed answer, use examples to justify the "
+                        "grade awarded."
+                    )
+                }
+            }
+        }
+
+    @staticmethod
+    async def save_file(item: GradeSpeakingItem) -> str:
+        sound_file_name = "tmp/" + str(uuid.uuid4())
+        content = await item.answer.read()
+        async with aiofiles.open(sound_file_name, 'wb') as f:
+            await f.write(content)
+        return sound_file_name
--- a/ielts_be/services/impl/exam/writing/init.py
+++ b/ielts_be/services/impl/exam/writing/init.py
@@ -0,0 +1,80 @@
+from typing import List, Dict, Optional
+
+from fastapi import UploadFile
+
+from ielts_be.repositories import IFileStorage
+from ielts_be.services import IWritingService, ILLMService, IAIDetectorService
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from .academic import get_writing_args_academic
+from .general import get_writing_args_general
+from .grade import GradeWriting
+
+
+class WritingService(IWritingService):
+
+    def __init__(self, llm: ILLMService, ai_detector: IAIDetectorService, file_storage: IFileStorage):
+        self._llm = llm
+        self._grade = GradeWriting(llm, file_storage, ai_detector)
+
+    async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
+                )
+            },
+            *get_writing_args_general(task, topic, difficulty)
+        ]
+
+        llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
+
+        response = await self._llm.prediction(
+            llm_model,
+            messages,
+            ["prompt"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        question = response["prompt"].strip()
+
+        return {
+            "question": self._add_newline_before_hyphen(question) if task == 1 else question,
+            "difficulty": difficulty,
+            "topic": topic
+        }
+
+    async def get_writing_task_academic_question(self, task: int, file: UploadFile, difficulty: str):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
+                )
+            },
+            *(await get_writing_args_academic(task, file))
+        ]
+
+        llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
+
+        response = await self._llm.prediction(
+            llm_model,
+            messages,
+            ["prompt"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+        question = response["prompt"].strip()
+
+        return {
+            "question": self._add_newline_before_hyphen(question) if task == 1 else question,
+            "difficulty": difficulty,
+        }
+
+    async def grade_writing_task(self, task: int, question: str, answer: str, attachment: Optional[str] = None):
+        return await self._grade.grade_writing_task(task, question, answer, attachment)
+
+    @staticmethod
+    def _add_newline_before_hyphen(s):
+        return s.replace(" -", "\n-")
+
--- a/ielts_be/services/impl/exam/writing/academic.py
+++ b/ielts_be/services/impl/exam/writing/academic.py
@@ -0,0 +1,48 @@
+from base64 import b64encode
+from typing import List, Dict
+
+from fastapi.datastructures import UploadFile
+
+
+async def get_writing_args_academic(task: int, attachment: UploadFile) -> List[Dict]:
+    writing_args = {
+        "1": {
+            "prompt": (
+                'Analyze the uploaded image and create a detailed IELTS Writing Task 1 Academic prompt.\n'
+                'Based on the visual data presented, craft a prompt that accurately reflects the image\'s '
+                'content, complexity, and academic nature.\n'
+            ),
+            "instructions": (
+                'The generated prompt must:\n'
+                '1. Clearly describe the type of visual representation in the image\n'
+                '2. Provide a concise context for the data shown\n'
+                '3. End with the standard IELTS Task 1 Academic instruction:\n'
+                '"Summarise the information by selecting and reporting the main features, and make comparisons where relevant."'
+            )
+        },
+    }
+
+    if task == 2:
+        raise NotImplemented("Task 2 academic isn't implemented yet, current implementation still uses General Task 2 prompts.")
+
+    messages = [
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["prompt"]
+        },
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["instructions"]
+        }
+    ]
+
+    if task == 1:
+        attachment_bytes = await attachment.read()
+        messages.append({
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/{attachment.filename.split('.')[-1]};base64,{b64encode(attachment_bytes).decode('utf-8')}"
+            }
+        })
+
+    return messages
--- a/ielts_be/services/impl/exam/writing/general.py
+++ b/ielts_be/services/impl/exam/writing/general.py
@@ -0,0 +1,44 @@
+from typing import List, Dict
+
+
+def get_writing_args_general(task: int, topic: str, difficulty: str) -> List[Dict]:
+    writing_args = {
+        "1": {
+            "prompt": (
+                'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
+                'student to compose a letter. The prompt should present a specific scenario or situation, '
+                f'based on the topic of "{topic}", requiring the student to provide information, '
+                'advice, or instructions within the letter. Make sure that the generated prompt is '
+                f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
+            ),
+            "instructions": (
+                'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
+                'the answer should include.'
+            )
+        },
+        "2": {
+            # TODO: Should the muslim disclaimer be here as well?
+            "prompt": (
+                f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
+                'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
+                f'analysis of contrasting perspectives on the topic of "{topic}".'
+            ),
+            "instructions": (
+                'The question should lead to an answer with either "theories", "complicated information" or '
+                'be "very descriptive" on the topic.'
+            )
+        }
+    }
+
+    messages = [
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["prompt"]
+        },
+        {
+            "role": "user",
+            "content": writing_args[str(task)]["instructions"]
+        }
+    ]
+
+    return messages
--- a/ielts_be/services/impl/exam/writing/grade.py
+++ b/ielts_be/services/impl/exam/writing/grade.py
@@ -0,0 +1,207 @@
+import asyncio
+from typing import Dict, Optional
+from uuid import uuid4
+
+from ielts_be.configs.constants import GPTModels, TemperatureSettings
+from ielts_be.helpers import TextHelper, ExercisesHelper, FileHelper
+from ielts_be.repositories import IFileStorage
+from ielts_be.services import ILLMService, IAIDetectorService
+
+
+class GradeWriting:
+
+    def __init__(self, llm: ILLMService, file_storage: IFileStorage, ai_detector: IAIDetectorService):
+        self._llm = llm
+        self._file_storage = file_storage
+        self._ai_detector = ai_detector
+
+    async def grade_writing_task(self, task: int, question: str, answer: str, attachment: Optional[str] = None):
+        bare_minimum = 100 if task == 1 else 180
+
+        if not TextHelper.has_words(answer):
+            return self._zero_rating("The answer does not contain enough english words.")
+        elif not TextHelper.has_x_words(answer, bare_minimum):
+            return self._zero_rating("The answer is insufficient and too small to be graded.")
+        else:
+            template = self._get_writing_template()
+            messages = [
+                {
+                    "role": "system",
+                    "content": (
+                        f'You are a helpful assistant designed to output JSON on this format: {template}'
+                    )
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
+                        'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
+                        'from the task, and assign a score of 0 if the response fails to address the question. '
+                        'Additionally, provide a detailed commentary highlighting both strengths and '
+                        'weaknesses in the response. '
+                        f'\n Question: "{question}" \n Answer: "{answer}"')
+                }
+            ]
+
+            if task == 1:
+                if attachment is None:
+                    messages.append({
+                        "role": "user",
+                        "content": (
+                            'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
+                            '"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
+                        )
+                    })
+                else:
+                    uuid = str(uuid4())
+                    name = attachment.split('/')[-1]
+                    out_path = f'./tmp/{uuid}/{name}'
+                    path = await self._file_storage.download_firebase_file(attachment, out_path)
+                    messages.append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/{name.split('.')[-1]};base64,{FileHelper.encode_image(path)}"
+                        }
+                    })
+
+            llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
+            temperature = (
+                TemperatureSettings.GRADING_TEMPERATURE
+                if task == 1 else
+                TemperatureSettings.GEN_QUESTION_TEMPERATURE
+            )
+
+            evaluation_promise = self._llm.prediction(
+                llm_model,
+                messages,
+                ["comment"],
+                temperature
+            )
+
+            perfect_answer_minimum = 150 if task == 1 else 250
+            perfect_answer_promise = self._get_perfect_answer(question, perfect_answer_minimum)
+            fixed_text_promise = self._get_fixed_text(answer)
+            ai_detection_promise = self._ai_detector.run_detection(answer)
+
+            prediction_result, perfect_answer_result, fixed_text_result, ai_detection_result = await asyncio.gather(
+                evaluation_promise,
+                perfect_answer_promise,
+                fixed_text_promise,
+                ai_detection_promise
+            )
+
+            response = prediction_result
+            response["perfect_answer"] = perfect_answer_result["perfect_answer"]
+            response["overall"] = ExercisesHelper.fix_writing_overall(
+                response["overall"],
+                response["task_response"]
+            )
+            response['fixed_text'] = fixed_text_result
+
+            if ai_detection_result is not None:
+                response['ai_detection'] = ai_detection_result
+
+            return response
+
+    async def _get_fixed_text(self, text):
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"fixed_text": "fixed test with no misspelling errors"}'
+                )
+            },
+            {
+                "role": "user",
+                "content": (
+                    'Fix the errors in the given text and put it in a JSON. '
+                    f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
+                )
+            }
+        ]
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_3_5_TURBO,
+            messages,
+            ["fixed_text"],
+            0.2,
+            False
+        )
+        return response["fixed_text"]
+
+    async def _get_perfect_answer(self, question: str, size: int) -> Dict:
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    'You are a helpful assistant designed to output JSON on this format: '
+                    '{"perfect_answer": "perfect answer for the question"}'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
+
+            },
+            {
+                "role": "user",
+                "content": f'The answer must have at least {size} words'
+            }
+        ]
+        return await self._llm.prediction(
+            GPTModels.GPT_4_O,
+            messages,
+            ["perfect_answer"],
+            TemperatureSettings.GEN_QUESTION_TEMPERATURE
+        )
+
+    @staticmethod
+    def _zero_rating(comment: str):
+        return {
+            'comment': comment,
+            'overall': 0,
+            'task_response': {
+                'Task Achievement': {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                'Coherence and Cohesion': {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                'Lexical Resource': {
+                    "grade": 0.0,
+                    "comment": ""
+                },
+                'Grammatical Range and Accuracy': {
+                    "grade": 0.0,
+                    "comment": ""
+                }
+            }
+        }
+
+    @staticmethod
+    def _get_writing_template():
+        return {
+            "comment": "comment about student's response quality",
+            "overall": 0.0,
+            "task_response": {
+                "Task Achievement": {
+                    "grade": 0.0,
+                    "comment": "comment about Task Achievement of the student's response"
+                },
+                "Coherence and Cohesion": {
+                    "grade": 0.0,
+                    "comment": "comment about Coherence and Cohesion of the student's response"
+                },
+                "Lexical Resource": {
+                    "grade": 0.0,
+                    "comment": "comment about Lexical Resource of the student's response"
+                },
+                "Grammatical Range and Accuracy": {
+                    "grade": 0.0,
+                    "comment": "comment about Grammatical Range and Accuracy of the student's response"
+                }
+            }
+        }
--- a/ielts_be/services/impl/third_parties/init.py
+++ b/ielts_be/services/impl/third_parties/init.py
@@ -0,0 +1,15 @@
+from .aws_polly import AWSPolly
+from .heygen import Heygen
+from .openai import OpenAI
+from .whisper import OpenAIWhisper
+from .gpt_zero import GPTZero
+from .elai import ELAI
+
+__all__ = [
+    "AWSPolly",
+    "Heygen",
+    "OpenAI",
+    "OpenAIWhisper",
+    "GPTZero",
+    "ELAI"
+]
--- a/ielts_be/services/impl/third_parties/aws_polly.py
+++ b/ielts_be/services/impl/third_parties/aws_polly.py
@@ -0,0 +1,86 @@
+import random
+
+from aiobotocore.client import BaseClient
+
+from ielts_be.dtos.listening import Dialog
+from ielts_be.services import ITextToSpeechService
+from ielts_be.configs.constants import NeuralVoices
+
+
+class AWSPolly(ITextToSpeechService):
+
+    def __init__(self, client: BaseClient):
+        self._client = client
+
+    async def synthesize_speech(self, text: str, voice: str, engine: str = "neural", output_format: str = "mp3"):
+        tts_response = await self._client.synthesize_speech(
+            Engine=engine,
+            Text=text,
+            OutputFormat=output_format,
+            VoiceId=voice
+        )
+        return await tts_response['AudioStream'].read()
+
+    async def text_to_speech(self, dialog: Dialog) -> bytes:
+        if not dialog.conversation and not dialog.monologue:
+            raise ValueError("Unsupported argument for text_to_speech")
+
+        if not dialog.conversation:
+            audio_segments = await self._text_to_speech(dialog.monologue)
+        else:
+            audio_segments = await self._conversation_to_speech(dialog)
+
+        final_message = await self.synthesize_speech(
+            "This audio recording, for the listening exercise, has finished.",
+            "Stephen"
+        )
+
+        # Add finish message
+        audio_segments.append(final_message)
+
+        # Combine the audio segments into a single audio file
+        combined_audio = b"".join(audio_segments)
+
+        return combined_audio
+        # Save the combined audio to a single file
+        #async with aiofiles.open(file_name, "wb") as f:
+        #    await f.write(combined_audio)
+
+        #print("Speech segments saved to " + file_name)
+
+    async def _text_to_speech(self, text: str):
+        voice = random.choice(NeuralVoices.ALL_NEURAL_VOICES)['Id']
+        audio_segments = []
+        for part in self._divide_text(text):
+            audio_segments.append(await self.synthesize_speech(part, voice))
+
+        return audio_segments
+
+    async def _conversation_to_speech(self, dialog: Dialog):
+        audio_segments = []
+        for convo_payload in dialog.conversation:
+            audio_segments.append(await self.synthesize_speech(convo_payload.text, convo_payload.voice))
+
+        return audio_segments
+
+    @staticmethod
+    def _divide_text(text, max_length=3000):
+        if len(text) <= max_length:
+            return [text]
+
+        divisions = []
+        current_position = 0
+
+        while current_position < len(text):
+            next_position = min(current_position + max_length, len(text))
+            next_period_position = text.rfind('.', current_position, next_position)
+
+            if next_period_position != -1 and next_period_position > current_position:
+                divisions.append(text[current_position:next_period_position + 1])
+                current_position = next_period_position + 1
+            else:
+                # If no '.' found in the next chunk, split at max_length
+                divisions.append(text[current_position:next_position])
+                current_position = next_position
+
+        return divisions
--- a/ielts_be/services/impl/third_parties/elai/init.py
+++ b/ielts_be/services/impl/third_parties/elai/init.py
@@ -0,0 +1,84 @@
+from copy import deepcopy
+from logging import getLogger
+from httpx import AsyncClient
+
+from ielts_be.dtos.video import Task, TaskStatus
+from ielts_be.services import IVideoGeneratorService
+
+
+class ELAI(IVideoGeneratorService):
+
+    _ELAI_ENDPOINT = 'https://apis.elai.io/api/v1/videos'
+
+    def __init__(self, client: AsyncClient, token: str, avatars: dict, *, conf: dict):
+        super().__init__(deepcopy(avatars))
+
+        self._http_client = client
+        self._conf = deepcopy(conf)
+        self._logger = getLogger(__name__)
+        self._GET_HEADER = {
+            "accept": "application/json",
+            "Authorization": f"Bearer {token}"
+        }
+        self._POST_HEADER = {
+            "accept": "application/json",
+            "content-type": "application/json",
+            "Authorization": f"Bearer {token}"
+        }
+
+    async def create_video(self, text: str, avatar: str):
+        avatar_url = self._avatars[avatar].get("avatar_url")
+        avatar_code = self._avatars[avatar].get("avatar_code")
+        avatar_gender = self._avatars[avatar].get("avatar_gender")
+        avatar_canvas = self._avatars[avatar].get("avatar_canvas")
+        voice_id = self._avatars[avatar].get("voice_id")
+        voice_provider = self._avatars[avatar].get("voice_provider")
+
+        self._conf["slides"][0]["canvas"]["objects"][0]["src"] = avatar_url
+        self._conf["slides"]["avatar"] = {
+            "code": avatar_code,
+            "gender": avatar_gender,
+            "canvas": avatar_canvas
+        }
+        self._conf["slides"]["speech"] = text
+        self._conf["slides"]["voice"] = voice_id
+        self._conf["slides"]["voiceProvider"] = voice_provider
+
+        response = await self._http_client.post(self._ELAI_ENDPOINT, headers=self._POST_HEADER, json=self._conf)
+
+        self._logger.info(response.status_code)
+        self._logger.info(response.json())
+
+        video_id = response.json()["_id"]
+
+        if video_id:
+            await self._http_client.post(f'{self._ELAI_ENDPOINT}/render/{video_id}', headers=self._GET_HEADER)
+            return Task(
+                result=video_id,
+                status=TaskStatus.STARTED,
+            )
+        else:
+            return Task(status=TaskStatus.ERROR)
+
+    async def pool_status(self, video_id: str) -> Task:
+        response = await self._http_client.get(f'{self._ELAI_ENDPOINT}/{video_id}', headers=self._GET_HEADER)
+        response_data = response.json()
+
+        if response_data['status'] == 'ready':
+            self._logger.info(response_data)
+            return Task(
+                status=TaskStatus.COMPLETED,
+                result=response_data.get('url')
+            )
+        elif response_data['status'] == 'failed':
+            self._logger.error('Video creation failed.')
+            return Task(
+                status=TaskStatus.ERROR,
+                result=response_data.get('url')
+            )
+        else:
+            self._logger.info('Video is still processing.')
+            return Task(
+                status=TaskStatus.IN_PROGRESS,
+                result=video_id
+            )
--- a/ielts_be/services/impl/third_parties/elai/avatars.json
+++ b/ielts_be/services/impl/third_parties/elai/avatars.json
@@ -0,0 +1,58 @@
+{
+    "Gia": {
+        "avatar_code": "gia.business",
+        "avatar_gender": "female",
+        "avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/gia/business/gia_business.png",
+        "avatar_canvas": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/gia/business/gia_business.png",
+        "voice_id": "EXAVITQu4vr4xnSDxMaL",
+        "voice_provider": "elevenlabs"
+    },
+    "Vadim": {
+        "avatar_code": "vadim.business",
+        "avatar_gender": "male",
+        "avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/vadim/business/vadim_business.png",
+        "avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/vadim/business/vadim_business.png",
+        "voice_id": "flq6f7yk4E4fJM5XTYuZ",
+        "voice_provider": "elevenlabs"
+    },
+    "Orhan": {
+        "avatar_code": "orhan.business",
+        "avatar_gender": "male",
+        "avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/orhan/business/orhan.png",
+        "avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/orhan/business/orhan.png",
+        "voice_id": "en-US-AndrewMultilingualNeural",
+        "voice_provider": "azure"
+    },
+    "Flora": {
+        "avatar_code": "flora.business",
+        "avatar_gender": "female",
+        "avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/flora/business/flora_business.png",
+        "avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/flora/business/flora_business.png",
+        "voice_id": "en-US-JaneNeural",
+        "voice_provider": "azure"
+    },
+    "Scarlett": {
+        "avatar_code": "scarlett.business",
+        "avatar_gender": "female",
+        "avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/scarlett/business/scarlett_business.png",
+        "avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/scarlett/business/scarlett_business.png",
+        "voice_id": "en-US-NancyNeural",
+        "voice_provider": "azure"
+    },
+    "Parker": {
+        "avatar_code": "parker.casual",
+        "avatar_gender": "male",
+        "avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/parker/casual/parker_casual.png",
+        "avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/parker/casual/parker_casual.png",
+        "voice_id": "en-US-TonyNeural",
+        "voice_provider": "azure"
+    },
+    "Ethan": {
+        "avatar_code": "ethan.business",
+        "avatar_gender": "male",
+        "avatar_url": "https://elai-avatars.s3.us-east-2.amazonaws.com/common/ethan/business/ethan_business_low.png",
+        "avatar_canvas": "https://d3u63mhbhkevz8.cloudfront.net/common/ethan/business/ethan_business_low.png",
+        "voice_id": "en-US-JasonNeural",
+        "voice_provider": "azure"
+    }
+}
--- a/ielts_be/services/impl/third_parties/elai/conf.json
+++ b/ielts_be/services/impl/third_parties/elai/conf.json
@@ -0,0 +1,72 @@
+{
+    "name": "API test",
+    "slides": [
+        {
+            "id": 1,
+            "canvas": {
+                "objects": [
+                    {
+                        "type": "avatar",
+                        "left": 151.5,
+                        "top": 36,
+                        "fill": "#4868FF",
+                        "scaleX": 0.3,
+                        "scaleY": 0.3,
+                        "width": 1080,
+                        "height": 1080,
+                        "avatarType": "transparent",
+                        "animation": {
+                            "type": null,
+                            "exitType": null
+                        }
+                    },
+                    {
+                        "type": "image",
+                        "version": "5.3.0",
+                        "originX": "left",
+                        "originY": "top",
+                        "left": 30,
+                        "top": 30,
+                        "width": 800,
+                        "height": 600,
+                        "fill": "rgb(0,0,0)",
+                        "stroke": null,
+                        "strokeWidth": 0,
+                        "strokeDashArray": null,
+                        "strokeLineCap": "butt",
+                        "strokeDashOffset": 0,
+                        "strokeLineJoin": "miter",
+                        "strokeUniform": false,
+                        "strokeMiterLimit": 4,
+                        "scaleX": 0.18821429,
+                        "scaleY": 0.18821429,
+                        "angle": 0,
+                        "flipX": false,
+                        "flipY": false,
+                        "opacity": 1,
+                        "shadow": null,
+                        "visible": true,
+                        "backgroundColor": "",
+                        "fillRule": "nonzero",
+                        "paintFirst": "fill",
+                        "globalCompositeOperation": "source-over",
+                        "skewX": 0,
+                        "skewY": 0,
+                        "cropX": 0,
+                        "cropY": 0,
+                        "id": 676845479989,
+                        "src": "https://d3u63mhbhkevz8.cloudfront.net/production/uploads/66f5190349f943682dd776ff/en-coach-main-logo-800x600_sm1ype.jpg?Expires=1727654400&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9kM3U2M21oYmhrZXZ6OC5jbG91ZGZyb250Lm5ldC9wcm9kdWN0aW9uL3VwbG9hZHMvNjZmNTE5MDM0OWY5NDM2ODJkZDc3NmZmL2VuLWNvYWNoLW1haW4tbG9nby04MDB4NjAwX3NtMXlwZS5qcGciLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3Mjc2NTQ0MDB9fX1dfQ__&Signature=kTVzlDeS7cua2HiAE5G%7E-yFqbhu0bHraFH5SauUln7yuNXoX7vtiKIBYiL%7Eps3LCLEZS77arSZ7H%7EG8CKzabHDjAR-Y6Uc%7ELD5KQaMmk0jbAxbC3Wdoq6cfd0qIwEuodQYlC0It2WBidP8KsgOy3uUQ%7EvcBoqlb255yMFw4pHuptOBB1kPs%7EFyzDV0fnRNsKaYRcy0Fn2EFUp13axm0CZQclazuLFM622AyCydKMy0vfxV%7Etny3sskwPaUe2OANGMFg07Q1pRuy6fUON0DsbhAh1tA2H6-nnem5KbFwiZK3IIwwYGBx3H41ovzC6Ejt80Fd0%7EPSHw7GzVBnUmtP-IA__&Key-Pair-Id=K1Y7U91AR6T7E5",
+                        "crossOrigin": "anonymous",
+                        "filters": [],
+                        "_exists": true
+                    }
+                ],
+                "background": "#ffffff",
+                "version": "4.4.0"
+            },
+            "animation": "fade_in",
+            "language": "English",
+            "voiceType": "text"
+        }
+    ]
+}
--- a/ielts_be/services/impl/third_parties/gpt_zero.py
+++ b/ielts_be/services/impl/third_parties/gpt_zero.py
@@ -0,0 +1,52 @@
+from logging import getLogger
+from typing import Dict, Optional
+
+from httpx import AsyncClient
+
+from ielts_be.services import IAIDetectorService
+
+
+class GPTZero(IAIDetectorService):
+
+    _GPT_ZERO_ENDPOINT = 'https://api.gptzero.me/v2/predict/text'
+
+    def __init__(self, client: AsyncClient, gpt_zero_key: str):
+        self._header = {
+            'x-api-key': gpt_zero_key
+        }
+        self._http_client = client
+        self._logger = getLogger(__name__)
+
+    async def run_detection(self, text: str):
+        data = {
+            'document': text,
+            'version': '',
+            'multilingual': False
+        }
+
+        response = await self._http_client.post(self._GPT_ZERO_ENDPOINT, headers=self._header, json=data)
+        if response.status_code != 200:
+            return None
+        return self._parse_detection(response.json())
+
+    def _parse_detection(self, response: Dict) -> Optional[Dict]:
+        try:
+            text_scan = response["documents"][0]
+
+            filtered_sentences = [
+                {
+                    "sentence": item["sentence"],
+                    "highlight_sentence_for_ai": item["highlight_sentence_for_ai"]
+                }
+                for item in text_scan["sentences"]
+            ]
+
+            return {
+                "class_probabilities": text_scan["class_probabilities"],
+                "confidence_category": text_scan["confidence_category"],
+                "predicted_class": text_scan["predicted_class"],
+                "sentences": filtered_sentences
+            }
+        except Exception as e:
+            self._logger.error(f'Failed to parse GPT\'s Zero response: {str(e)}')
+            return None
--- a/ielts_be/services/impl/third_parties/heygen/init.py
+++ b/ielts_be/services/impl/third_parties/heygen/init.py
@@ -0,0 +1,82 @@
+import logging
+from copy import deepcopy
+
+from httpx import AsyncClient
+
+from ielts_be.dtos.video import Task, TaskStatus
+from ielts_be.services import IVideoGeneratorService
+
+
+class Heygen(IVideoGeneratorService):
+
+    _GET_VIDEO_URL = 'https://api.heygen.com/v1/video_status.get'
+
+    def __init__(self, client: AsyncClient, token: str, avatars: dict):
+        super().__init__(deepcopy(avatars))
+        self._get_header = {
+            'X-Api-Key': token
+        }
+        self._post_header = {
+            'X-Api-Key': token,
+            'Content-Type': 'application/json'
+        }
+        self._http_client = client
+        self._logger = logging.getLogger(__name__)
+
+    async def create_video(self, text: str, avatar: str):
+        avatar = self._avatars[avatar]["id"]
+
+        create_video_url = f'https://api.heygen.com/v2/template/{avatar}/generate'
+        data = {
+            "test": False,
+            "caption": False,
+            "title": "video_title",
+            "variables": {
+                "script_here": {
+                    "name": "script_here",
+                    "type": "text",
+                    "properties": {
+                        "content": text
+                    }
+                }
+            }
+        }
+        response = await self._http_client.post(create_video_url, headers=self._post_header, json=data)
+        self._logger.info(response.status_code)
+        self._logger.info(response.json())
+        video_id = response.json()["data"]["video_id"]
+
+        return Task(
+            result=video_id,
+            status=TaskStatus.STARTED,
+        )
+
+
+    async def poll_status(self, video_id: str) -> Task:
+        response = await self._http_client.get(self._GET_VIDEO_URL, headers=self._get_header, params={
+            'video_id': video_id
+        })
+        response_data = response.json()
+
+        status = response_data["data"]["status"]
+        error = response_data["data"]["error"]
+        if status != "completed" and error is None:
+            self._logger.info(f"Status: {status}")
+            return Task(
+                status=TaskStatus.IN_PROGRESS,
+                result=video_id
+            )
+
+        if error:
+            self._logger.error('Video creation failed.')
+            return Task(
+                status=TaskStatus.ERROR,
+                result=response_data.get('url')
+            )
+
+        url = response.json()['data']['video_url']
+        self._logger.info(f'Successfully generated video: {url}')
+        return Task(
+            status=TaskStatus.COMPLETED,
+            result=url
+        )
--- a/ielts_be/services/impl/third_parties/heygen/avatars.json
+++ b/ielts_be/services/impl/third_parties/heygen/avatars.json
@@ -0,0 +1,30 @@
+{
+  "Matthew Noah": {
+    "id": "5912afa7c77c47d3883af3d874047aaf",
+    "avatar_gender": "male"
+  },
+  "Vera Cerise": {
+    "id": "9e58d96a383e4568a7f1e49df549e0e4",
+    "avatar_gender": "female"
+  },
+  "Edward Tony": {
+    "id": "d2cdd9c0379a4d06ae2afb6e5039bd0c",
+    "avatar_gender": "male"
+  },
+  "Tanya Molly": {
+    "id": "045cb5dcd00042b3a1e4f3bc1c12176b",
+    "avatar_gender": "female"
+  },
+  "Kayla Abbi": {
+    "id": "1ae1e5396cc444bfad332155fdb7a934",
+    "avatar_gender": "female"
+  },
+  "Jerome Ryan": {
+    "id": "0ee6aa7cc1084063a630ae514fccaa31",
+    "avatar_gender": "male"
+  },
+  "Tyler Christopher": {
+    "id": "5772cff935844516ad7eeff21f839e43",
+    "avatar_gender": "male"
+  }
+}
--- a/ielts_be/services/impl/third_parties/openai.py
+++ b/ielts_be/services/impl/third_parties/openai.py
@@ -0,0 +1,153 @@
+import json
+import re
+import logging
+from typing import List, Optional, Callable, TypeVar
+
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletionMessageParam
+
+from ielts_be.services.abc import ILLMService
+from ielts_be.helpers import count_tokens
+from ielts_be.configs.constants import BLACKLISTED_WORDS
+from pydantic import BaseModel
+
+T = TypeVar('T', bound=BaseModel)
+
+
+class OpenAI(ILLMService):
+
+    MAX_TOKENS = 4097
+    TRY_LIMIT = 2
+
+    def __init__(self, client: AsyncOpenAI):
+        self._client = client
+        self._logger = logging.getLogger(__name__)
+        self._default_model = "gpt-4o"
+
+    async def prediction(
+            self,
+            model: str,
+            messages: List[ChatCompletionMessageParam],
+            fields_to_check: Optional[List[str]],
+            temperature: float,
+            check_blacklisted: bool = True,
+            token_count: int = -1
+    ):
+        if token_count == -1:
+            token_count = self._count_total_tokens(messages)
+        return await self._prediction(model, messages, token_count, fields_to_check, temperature, 0, check_blacklisted)
+
+    async def _prediction(
+            self,
+            model: str,
+            messages: List[ChatCompletionMessageParam],
+            token_count: int,
+            fields_to_check: Optional[List[str]],
+            temperature: float,
+            try_count: int,
+            check_blacklisted: bool,
+    ):
+        result = await self._client.chat.completions.create(
+            model=model,
+            max_tokens=int(self.MAX_TOKENS - token_count - 300),
+            temperature=float(temperature),
+            messages=messages,
+            response_format={"type": "json_object"}
+        )
+        result = result.choices[0].message.content
+
+        if check_blacklisted:
+            found_blacklisted_word = self._get_found_blacklisted_words(result)
+
+            if found_blacklisted_word is not None and try_count < self.TRY_LIMIT:
+                self._logger.warning("Result contains blacklisted words: " + str(found_blacklisted_word))
+                return await self._prediction(
+                    model, messages, token_count, fields_to_check, temperature, (try_count + 1), check_blacklisted
+                )
+            elif found_blacklisted_word is not None and try_count >= self.TRY_LIMIT:
+                return ""
+
+        if fields_to_check is None:
+            return json.loads(result)
+
+        if not self._check_fields(result, fields_to_check) and try_count < self.TRY_LIMIT:
+            return await self._prediction(
+                model, messages, token_count, fields_to_check, temperature, (try_count + 1), check_blacklisted
+            )
+        return json.loads(result)
+
+    async def prediction_override(self, **kwargs):
+        return await self._client.chat.completions.create(
+            **kwargs
+        )
+
+    @staticmethod
+    def _get_found_blacklisted_words(text: str):
+        text_lower = text.lower()
+        for word in BLACKLISTED_WORDS:
+            if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
+                return word
+        return None
+
+    @staticmethod
+    def _count_total_tokens(messages):
+        total_tokens = 0
+        for message in messages:
+            total_tokens += count_tokens(message["content"])["n_tokens"]
+        return total_tokens
+
+    @staticmethod
+    def _check_fields(obj, fields):
+        return all(field in obj for field in fields)
+
+    async def pydantic_prediction(
+            self,
+            messages: List[ChatCompletionMessageParam],
+            map_to_model: Callable,
+            json_scheme: str,
+            *,
+            model: Optional[str] = None,
+            temperature: Optional[float] = None,
+            max_retries: int = 3
+    ) -> List[T] | T | None:
+        params = {
+            "messages": messages,
+            "response_format": {"type": "json_object"},
+            "model": model if model else self._default_model
+        }
+
+        if temperature:
+            params["temperature"] = temperature
+
+        attempt = 0
+        while attempt < 3:
+            result = await self._client.chat.completions.create(**params)
+            result_content = result.choices[0].message.content
+
+            try:
+                result_json = json.loads(result_content)
+                print(str(result_json))
+                return map_to_model(result_json)
+            except Exception as e:
+                attempt += 1
+                self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
+                params["messages"] = [
+                    {
+                        "role": "user",
+                        "content": (
+                            "Your previous response wasn't in the json format I've explicitly told you to output. "
+                            f"In your next response, you will fix it and return me just the json I've asked."
+                        )
+                    },
+                    {
+                        "role": "user",
+                        "content": (
+                            f"Previous response: {result_content}\n"
+                            f"JSON format: {json_scheme}"
+                            f"Validation errors: {e}"
+                        )
+                    }
+                ]
+                if attempt >= max_retries:
+                    self._logger.error(f"Max retries exceeded!")
+                    return None
--- a/ielts_be/services/impl/third_parties/whisper.py
+++ b/ielts_be/services/impl/third_parties/whisper.py
@@ -0,0 +1,106 @@
+import threading
+import whisper
+import asyncio
+import numpy as np
+import soundfile as sf
+import librosa
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict
+
+from logging import getLogger
+from whisper import Whisper
+
+from ielts_be.services import ISpeechToTextService
+
+"""
+    The whisper model is not thread safe, a thread pool
+    with 4 whisper models will be created so it can
+    process up to 4 transcriptions at a time. 
+    
+    The base model requires ~1GB so 4 instances is the safe bet:
+    https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages
+"""
+class OpenAIWhisper(ISpeechToTextService):
+    def __init__(self, model_name: str = "base", num_models: int = 4):
+        self._model_name = model_name
+        self._num_models = num_models
+        self._models: Dict[int, 'Whisper'] = {}
+        self._lock = threading.Lock()
+        self._next_model_id = 0
+        self._is_closed = False
+        self._logger = getLogger(__name__)
+
+        for i in range(num_models):
+            self._models[i] = whisper.load_model(self._model_name, in_memory=True)
+
+        self._executor = ThreadPoolExecutor(
+            max_workers=num_models,
+            thread_name_prefix="whisper_worker"
+        )
+
+    def get_model(self) -> 'Whisper':
+        with self._lock:
+            model_id = self._next_model_id
+            self._next_model_id = (self._next_model_id + 1) % self._num_models
+            return self._models[model_id]
+
+    async def speech_to_text(self, path: str) -> str:
+        def transcribe():
+            try:
+                audio, sr = sf.read(path)
+
+                # Convert to mono first to reduce memory usage
+                if len(audio.shape) > 1:
+                    audio = audio.mean(axis=1)
+
+                # Resample from 48kHz to 16kHz
+                audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+
+                # Normalize to [-1, 1] range
+                audio = audio.astype(np.float32)
+                if np.max(np.abs(audio)) > 0:
+                    audio = audio / np.max(np.abs(audio))
+
+                # Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
+                max_samples = 480000
+                if len(audio) > max_samples:
+                    chunks = []
+                    for i in range(0, len(audio), max_samples):
+                        chunk = audio[i:i + max_samples]
+                        chunks.append(chunk)
+
+                    model = self.get_model()
+                    texts = []
+                    for chunk in chunks:
+                        result = model.transcribe(
+                            chunk,
+                            fp16=False,
+                            language='English',
+                            verbose=False
+                        )["text"]
+                        texts.append(result)
+                    return " ".join(texts)
+                else:
+                    model = self.get_model()
+                    return model.transcribe(
+                        audio,
+                        fp16=False,
+                        language='English',
+                        verbose=False
+                    )["text"]
+
+            except Exception as e:
+                raise
+
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(self._executor, transcribe)
+
+    def close(self):
+        with self._lock:
+            if not self._is_closed:
+                self._is_closed = True
+                if self._executor:
+                    self._executor.shutdown(wait=True, cancel_futures=True)
+
+    def __del__(self):
+        self.close()
--- a/ielts_be/services/impl/training/init.py
+++ b/ielts_be/services/impl/training/init.py
@@ -0,0 +1,7 @@
+from .training import TrainingService
+from .kb import TrainingContentKnowledgeBase
+
+__all__ = [
+    "TrainingService",
+    "TrainingContentKnowledgeBase"
+]
--- a/ielts_be/services/impl/training/kb.py
+++ b/ielts_be/services/impl/training/kb.py
@@ -0,0 +1,88 @@
+import json
+import os
+from logging import getLogger
+from typing import Dict, List
+
+import faiss
+import pickle
+
+from ielts_be.services import IKnowledgeBase
+
+
+class TrainingContentKnowledgeBase(IKnowledgeBase):
+
+    def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
+        self._embedding_model = embeddings
+        self._tips = None  # self._read_json(path)
+        self._category_metadata = None
+        self._indices = None
+        self.load_indices_and_metadata()
+        self._logger = getLogger(__name__)
+
+    @staticmethod
+    def _read_json(path: str) -> Dict[str, any]:
+        with open(path, 'r', encoding="utf-8") as json_file:
+            return json.loads(json_file.read())
+
+    def print_category_count(self):
+        category_tips = {}
+        for unit in self._tips['units']:
+            for page in unit['pages']:
+                for tip in page['tips']:
+                    category = tip['category'].lower().replace(" ", "_")
+                    if category not in category_tips:
+                        category_tips[category] = 0
+                    else:
+                        category_tips[category] = category_tips[category] + 1
+        print(category_tips)
+
+    def create_embeddings_and_save_them(self) -> None:
+        category_embeddings = {}
+        category_metadata = {}
+
+        for unit in self._tips['units']:
+            for page in unit['pages']:
+                for tip in page['tips']:
+                    category = tip['category'].lower().replace(" ", "_")
+                    if category not in category_embeddings:
+                        category_embeddings[category] = []
+                        category_metadata[category] = []
+
+                    category_embeddings[category].append(tip['embedding'])
+                    category_metadata[category].append({"id": tip['id'], "text": tip['text']})
+
+        category_indices = {}
+        for category, embeddings in category_embeddings.items():
+            embeddings_array = self._embedding_model.encode(embeddings)
+            index = faiss.IndexFlatL2(embeddings_array.shape[1])
+            index.add(embeddings_array)
+            category_indices[category] = index
+
+            faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
+
+        with open("./faiss/tips_metadata.pkl", "wb") as f:
+            pickle.dump(category_metadata, f)
+
+    def load_indices_and_metadata(
+            self,
+            directory: str = './faiss',
+            suffix: str = '_tips_index.faiss',
+            metadata_path: str = './faiss/tips_metadata.pkl'
+    ):
+        files = os.listdir(directory)
+        self._indices = {}
+        for file in files:
+            if file.endswith(suffix):
+                self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
+                self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
+
+        with open(metadata_path, 'rb') as f:
+            self._category_metadata = pickle.load(f)
+        self._logger.info("Loaded tips metadata")
+
+    def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
+        query_embedding = self._embedding_model.encode([query])
+        index = self._indices[category]
+        D, I = index.search(query_embedding, top_k)
+        results = [self._category_metadata[category][i] for i in I[0]]
+        return results
--- a/ielts_be/services/impl/training/training.py
+++ b/ielts_be/services/impl/training/training.py
@@ -0,0 +1,458 @@
+import re
+from datetime import datetime
+from functools import reduce
+from logging import getLogger
+
+from typing import Dict
+
+from ielts_be.configs.constants import TemperatureSettings, GPTModels
+from ielts_be.helpers import count_tokens
+from ielts_be.repositories import IDocumentStore
+from ielts_be.services import ILLMService, ITrainingService, IKnowledgeBase
+from ielts_be.dtos.training import *
+
+
+class TrainingService(ITrainingService):
+    TOOLS = [
+        'critical_thinking',
+        'language_for_writing',
+        'reading_skills',
+        'strategy',
+        'words',
+        'writing_skills'
+    ]
+    # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
+
+    def __init__(self, llm: ILLMService, document_store: IDocumentStore, training_kb: IKnowledgeBase):
+        self._llm = llm
+        self._db = document_store
+        self._kb = training_kb
+        self._logger = getLogger(__name__)
+
+    async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
+        messages = self._get_question_tips(question, answer, correct_answer, context)
+
+        token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
+                             map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
+
+        response = await self._llm.prediction(
+            GPTModels.GPT_3_5_TURBO,
+            messages,
+            None,
+            TemperatureSettings.TIPS_TEMPERATURE,
+            token_count=token_count
+        )
+
+        if isinstance(response, str):
+            response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
+
+        return response
+
+    @staticmethod
+    def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
+        messages = [
+            {
+                "role": "user",
+                "content": (
+                    "You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
+                    "help students understand why it was a wrong answer and gives helpful insight for the future. "
+                    "The tip should refer to the context and question."
+                ),
+            }
+        ]
+
+        if not (context is None or context == ""):
+            messages.append({
+                "role": "user",
+                "content": f"This is the context for the question: {context}",
+            })
+
+        messages.extend([
+            {
+                "role": "user",
+                "content": f"This is the question: {question}",
+            },
+            {
+                "role": "user",
+                "content": f"This is the answer: {answer}",
+            },
+            {
+                "role": "user",
+                "content": f"This is the correct answer: {correct_answer}",
+            }
+        ])
+
+        return messages
+
+    async def get_training_content(self, training_content: Dict) -> Dict:
+        user, stats = training_content["userID"], training_content["stats"]
+        exam_data, exam_map = await self._sort_out_solutions(stats)
+        training_content = await self._get_exam_details_and_tips(exam_data)
+        tips = self._query_kb(training_content.queries)
+        usefull_tips = await self._get_usefull_tips(exam_data, tips)
+        exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
+
+        weak_areas = {"weak_areas": []}
+        for area in training_content.weak_areas:
+            weak_areas["weak_areas"].append(area.dict())
+
+        training_doc = {
+            'created_at': int(datetime.now().timestamp() * 1000),
+            **exam_map,
+            **usefull_tips.dict(),
+            **weak_areas,
+            "user": user
+        }
+        new_id = await self._db.save_to_db('training', training_doc)
+
+        return {
+            "id": new_id
+        }
+
+    @staticmethod
+    def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
+        new_exam_map = {"exams": []}
+        for detail in details:
+            new_exam_map["exams"].append({
+                "id": detail.exam_id,
+                "date": detail.date,
+                "performance_comment": detail.performance_comment,
+                "detailed_summary": detail.detailed_summary,
+                **exam_map[detail.exam_id]
+            })
+        return new_exam_map
+
+    def _query_kb(self, queries: List[QueryDTO]):
+        map_categories = {
+            "critical_thinking": "ct_focus",
+            "language_for_writing": "language_for_writing",
+            "reading_skills": "reading_skill",
+            "strategy": "strategy",
+            "writing_skills": "writing_skill"
+        }
+
+        tips = {"tips": []}
+        for query in queries:
+            if query.category == "words":
+                tips["tips"].extend(
+                    self._kb.query_knowledge_base(query.text, "word_link")
+                )
+                tips["tips"].extend(
+                    self._kb.query_knowledge_base(query.text, "word_partners")
+                )
+            else:
+                if query.category in map_categories:
+                    tips["tips"].extend(
+                        self._kb.query_knowledge_base(query.text, map_categories[query.category])
+                    )
+                else:
+                    self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
+        return tips
+
+    async def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
+        json_schema = (
+            '{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
+            ' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
+        )
+        messages = [
+            {
+                "role": "user",
+                "content": (
+                    f"I'm going to provide you with exam data, you will take the exam data and fill this json "
+                    f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
+                    'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
+                    'summary of the student\'s performance, "weak_areas" are identified areas'
+                    ' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
+                    ' with sentence structure and punctuation.", the "queries" field is where you will write queries '
+                    'for tips that will be displayed to the student, the category attribute is a collection of '
+                    'embeddings and the text will be the text used to query the knowledge base. The categories are '
+                    f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
+                    '"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
+                    ' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
+                    'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
+                    'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
+                    'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
+                    'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
+                    'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
+                    'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
+                    'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
+                )
+            },
+            {
+                "role": "user",
+                "content": f'Exam Data: {str(exam_data)}'
+            }
+        ]
+        return await self._llm.pydantic_prediction(messages, self._map_gpt_response, json_schema)
+
+    async def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
+        json_schema = (
+            '{ "tip_ids": [] }'
+        )
+        messages = [
+            {
+                "role": "user",
+                "content": (
+                    f"I'm going to provide you with tips and I want you to return to me the tips that "
+                    f"can be usefull for the student that made the exam that I'm going to send you, return "
+                    f"me the tip ids in this json format {json_schema}."
+                )
+            },
+            {
+                "role": "user",
+                "content": f'Exam Data: {str(exam_data)}'
+            },
+            {
+                "role": "user",
+                "content": f'Tips: {str(tips)}'
+            }
+        ]
+        return await self._llm.pydantic_prediction(messages, lambda response: TipsDTO(**response), json_schema)
+
+    @staticmethod
+    def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
+        parsed_response = {
+            "details": [DetailsDTO(**detail) for detail in response["details"]],
+            "weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
+            "queries": [QueryDTO(**query) for query in response["queries"]]
+        }
+        return TrainingContentDTO(**parsed_response)
+
+    async def _sort_out_solutions(self, stats):
+        grouped_stats = {}
+        for stat in stats:
+            session_key = f'{str(stat["date"])}-{stat["user"]}'
+            module = stat["module"]
+            exam_id = stat["exam"]
+
+            if session_key not in grouped_stats:
+                grouped_stats[session_key] = {}
+            if module not in grouped_stats[session_key]:
+                grouped_stats[session_key][module] = {
+                    "stats": [],
+                    "exam_id": exam_id
+                }
+            grouped_stats[session_key][module]["stats"].append(stat)
+
+        exercises = {}
+        exam_map = {}
+        for session_key, modules in grouped_stats.items():
+            exercises[session_key] = {}
+            for module, module_stats in modules.items():
+                exercises[session_key][module] = {}
+
+                exam_id = module_stats["exam_id"]
+                if exam_id not in exercises[session_key][module]:
+                    exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
+
+                exam_total_questions = 0
+                exam_total_correct = 0
+
+                for stat in module_stats["stats"]:
+                    exam_total_questions += stat["score"]["total"]
+                    exam_total_correct += stat["score"]["correct"]
+                    exercises[session_key][module][exam_id]["date"] = stat["date"]
+
+                    if session_key not in exam_map:
+                        exam_map[session_key] = {"stat_ids": [], "score": 0}
+                    exam_map[session_key]["stat_ids"].append(stat["id"])
+
+                    exam = await self._db.get_doc_by_id(module, exam_id)
+                    if module == "listening":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_listening_solutions(stat, exam))
+                    elif module == "reading":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_reading_solutions(stat, exam))
+                    elif module == "writing":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_writing_prompts_and_answers(stat, exam)
+                        )
+                    elif module == "speaking":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_speaking_solutions(stat, exam)
+                        )
+                    elif module == "level":
+                        exercises[session_key][module][exam_id]["exercises"].extend(
+                            self._get_level_solutions(stat, exam)
+                        )
+
+                exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
+                exam_map[session_key]["module"] = module
+
+        return {"exams": exercises}, exam_map
+
+    def _get_writing_prompts_and_answers(self, stat, exam):
+        result = []
+        try:
+            exercises = []
+            for solution in stat['solutions']:
+                answer = solution['solution']
+                exercise_id = solution['id']
+                exercises.append({
+                    "exercise_id": exercise_id,
+                    "answer": answer
+                })
+            for exercise in exercises:
+                for exam_exercise in exam["exercises"]:
+                    if exam_exercise["id"] == exercise["exercise_id"]:
+                        result.append({
+                            "exercise": exam_exercise["prompt"],
+                            "answer": exercise["answer"]
+                        })
+
+        except KeyError as e:
+            self._logger.warning(f"Malformed stat object: {str(e)}")
+
+        return result
+
+    @staticmethod
+    def _get_mc_question(exercise, stat):
+        shuffle_maps = stat.get("shuffleMaps", [])
+        answer = stat["solutions"] if len(shuffle_maps) == 0 else []
+        if len(shuffle_maps) != 0:
+            for solution in stat["solutions"]:
+                shuffle_map = [
+                    item["map"] for item in shuffle_maps
+                    if item["questionID"] == solution["question"]
+                ]
+                answer.append({
+                    "question": solution["question"],
+                    "option": shuffle_map[solution["option"]]
+                })
+        return {
+            "question": exercise["prompt"],
+            "exercise": exercise["questions"],
+            "answer": stat["solutions"]
+        }
+
+    @staticmethod
+    def _swap_key_name(d, original_key, new_key):
+        d[new_key] = d.pop(original_key)
+        return d
+
+    def _get_level_solutions(self, stat, exam):
+        result = []
+        try:
+            for part in exam["parts"]:
+                for exercise in part["exercises"]:
+                    if exercise["id"] == stat["exercise"]:
+                        if stat["type"] == "fillBlanks":
+                            result.append({
+                                "prompt": exercise["prompt"],
+                                "template": exercise["text"],
+                                "words": exercise["words"],
+                                "solutions": exercise["solutions"],
+                                "answer": [
+                                    self._swap_key_name(item, 'solution', 'option')
+                                    for item in stat["solutions"]
+                                ]
+                            })
+                        elif stat["type"] == "multipleChoice":
+                            result.append(self._get_mc_question(exercise, stat))
+        except KeyError as e:
+            self._logger.warning(f"Malformed stat object: {str(e)}")
+        return result
+
+    def _get_listening_solutions(self, stat, exam):
+        result = []
+        try:
+            for part in exam["parts"]:
+                for exercise in part["exercises"]:
+                    if exercise["id"] == stat["exercise"]:
+                        if stat["type"] == "writeBlanks":
+                            result.append({
+                                "question": exercise["prompt"],
+                                "template": exercise["text"],
+                                "solution": exercise["solutions"],
+                                "answer": stat["solutions"]
+                            })
+                        elif stat["type"] == "fillBlanks":
+                            result.append({
+                                "question": exercise["prompt"],
+                                "template": exercise["text"],
+                                "words": exercise["words"],
+                                "solutions": exercise["solutions"],
+                                "answer": stat["solutions"]
+                            })
+                        elif stat["type"] == "multipleChoice":
+                            result.append(self._get_mc_question(exercise, stat))
+
+        except KeyError as e:
+            self._logger.warning(f"Malformed stat object: {str(e)}")
+        return result
+
+    @staticmethod
+    def _find_shuffle_map(shuffle_maps, question_id):
+        return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
+
+    def _get_speaking_solutions(self, stat, exam):
+        result = {}
+        try:
+            result = {
+                "comments": {
+                    key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
+                ,
+                "exercises": {}
+            }
+
+            for exercise in exam["exercises"]:
+                if exercise["id"] == stat["exercise"]:
+                    if stat["type"] == "interactiveSpeaking":
+                        for i in range(len(exercise["prompts"])):
+                            result["exercises"][f"exercise_{i+1}"] = {
+                                "question": exercise["prompts"][i]["text"]
+                            }
+                        for i in range(len(exercise["prompts"])):
+                            answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
+                            result["exercises"][f"exercise_{i+1}"]["answer"] = answer
+                    elif stat["type"] == "speaking":
+                        result["exercises"]["exercise_1"] = {
+                            "question": exercise["text"],
+                            "answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
+                        }
+        except KeyError as e:
+            self._logger.warning(f"Malformed stat object: {str(e)}")
+        return [result]
+
+    def _get_reading_solutions(self, stat, exam):
+        result = []
+        try:
+            for part in exam["parts"]:
+                text = part["text"]
+                for exercise in part["exercises"]:
+                    if exercise["id"] == stat["exercise"]:
+                        if stat["type"] == "fillBlanks":
+                            result.append({
+                                "text": text,
+                                "question": exercise["prompt"],
+                                "template": exercise["text"],
+                                "words": exercise["words"],
+                                "solutions": exercise["solutions"],
+                                "answer": stat["solutions"]
+                            })
+                        elif stat["type"] == "writeBlanks":
+                            result.append({
+                                "text": text,
+                                "question": exercise["prompt"],
+                                "template": exercise["text"],
+                                "solutions": exercise["solutions"],
+                                "answer": stat["solutions"]
+                            })
+                        elif stat["type"] == "trueFalse":
+                            result.append({
+                                "text": text,
+                                "questions": exercise["questions"],
+                                "answer": stat["solutions"]
+                            })
+                        elif stat["type"] == "matchSentences":
+                            result.append({
+                                "text": text,
+                                "question": exercise["prompt"],
+                                "sentences": exercise["sentences"],
+                                "options": exercise["options"],
+                                "answer": stat["solutions"]
+                            })
+        except KeyError as e:
+            self._logger.warning(f"Malformed stat object: {str(e)}")
+        return result
--- a/ielts_be/services/impl/user.py
+++ b/ielts_be/services/impl/user.py
@@ -0,0 +1,188 @@
+import os
+import subprocess
+import time
+import uuid
+
+from datetime import datetime
+from logging import getLogger
+
+import pandas as pd
+
+import shortuuid
+
+from ielts_be.dtos.user_batch import BatchUsersDTO, UserDTO
+from ielts_be.helpers import FileHelper
+from ielts_be.repositories import IDocumentStore
+from ielts_be.services import IUserService
+
+
+class UserService(IUserService):
+    _DEFAULT_DESIRED_LEVELS = {
+        "reading": 9,
+        "listening": 9,
+        "writing": 9,
+        "speaking": 9,
+    }
+
+    _DEFAULT_LEVELS = {
+        "reading": 0,
+        "listening": 0,
+        "writing": 0,
+        "speaking": 0,
+    }
+
+    def __init__(self, document_store: IDocumentStore):
+        self._db = document_store
+        self._logger = getLogger(__name__)
+
+    def batch_users(self, batch_dto: BatchUsersDTO):
+        file_name = f'{uuid.uuid4()}.csv'
+        path = f'./tmp/{file_name}'
+        self._generate_firebase_auth_csv(batch_dto, path)
+
+        result = self._upload_users('./tmp', file_name)
+        if result.returncode != 0:
+            error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stdout}```"
+            self._logger.error(error_msg)
+            return error_msg
+
+        self._init_users(batch_dto)
+
+        FileHelper.remove_file(path)
+        return {"ok": True}
+
+    @staticmethod
+    def _generate_firebase_auth_csv(batch_dto: BatchUsersDTO, path: str):
+        # https://firebase.google.com/docs/cli/auth#file_format
+        columns = [
+            'UID', 'Email', 'Email Verified', 'Password Hash', 'Password Salt', 'Name',
+            'Photo URL', 'Google ID', 'Google Email', 'Google Display Name', 'Google Photo URL',
+            'Facebook ID', 'Facebook Email', 'Facebook Display Name', 'Facebook Photo URL',
+            'Twitter ID', 'Twitter Email', 'Twitter Display Name', 'Twitter Photo URL',
+            'GitHub ID', 'GitHub Email', 'GitHub Display Name', 'GitHub Photo URL',
+            'User Creation Time', 'Last Sign-In Time', 'Phone Number'
+        ]
+        users_data = []
+
+        current_time = int(time.time() * 1000)
+
+        for user in batch_dto.users:
+            user_data = {
+                'UID': str(user.id),
+                'Email': user.email,
+                'Email Verified': False,
+                'Password Hash': user.passwordHash,
+                'Password Salt': user.passwordSalt,
+                'Name': '',
+                'Photo URL': '',
+                'Google ID': '',
+                'Google Email': '',
+                'Google Display Name': '',
+                'Google Photo URL': '',
+                'Facebook ID': '',
+                'Facebook Email': '',
+                'Facebook Display Name': '',
+                'Facebook Photo URL': '',
+                'Twitter ID': '',
+                'Twitter Email': '',
+                'Twitter Display Name': '',
+                'Twitter Photo URL': '',
+                'GitHub ID': '',
+                'GitHub Email': '',
+                'GitHub Display Name': '',
+                'GitHub Photo URL': '',
+                'User Creation Time': current_time,
+                'Last Sign-In Time': '',
+                'Phone Number': ''
+            }
+            users_data.append(user_data)
+
+        df = pd.DataFrame(users_data, columns=columns)
+        df.to_csv(path, index=False, header=False)
+
+    @staticmethod
+    def _upload_users(directory: str, file_name: str):
+        command = (
+            f'firebase auth:import {file_name} '
+            f'--hash-algo=SCRYPT '
+            f'--hash-key={os.getenv("FIREBASE_SCRYPT_B64_SIGNER_KEY")} '
+            f'--salt-separator={os.getenv("FIREBASE_SCRYPT_B64_SALT_SEPARATOR")} '
+            f'--rounds={os.getenv("FIREBASE_SCRYPT_ROUNDS")} '
+            f'--mem-cost={os.getenv("FIREBASE_SCRYPT_MEM_COST")} '
+            f'--project={os.getenv("FIREBASE_PROJECT_ID")} '
+        )
+
+        result = subprocess.run(command, shell=True, cwd=directory, capture_output=True, text=True)
+        return result
+
+    async def _init_users(self, batch_users: BatchUsersDTO):
+        maker_id = batch_users.makerID
+        for user in batch_users.users:
+            await self._insert_new_user(user)
+            await self._create_code(user, maker_id)
+
+            if user.groupName and len(user.groupName.strip()) > 0:
+                await self._assign_user_to_group_by_name(user, maker_id)
+
+    async def _insert_new_user(self, user: UserDTO):
+        new_user = {
+            **user.dict(exclude={
+                'passport_id', 'groupName', 'expiryDate',
+                'corporate', 'passwordHash', 'passwordSalt'
+            }),
+            'bio': "",
+            'focus': "academic",
+            'status': "active",
+            'desiredLevels': self._DEFAULT_DESIRED_LEVELS,
+            'profilePicture': "/defaultAvatar.png",
+            'levels': self._DEFAULT_LEVELS,
+            'isFirstLogin': False,
+            'isVerified': True,
+            'registrationDate': datetime.now(),
+            'subscriptionExpirationDate': user.expiryDate,
+            'entities': user.entities
+        }
+        await self._db.save_to_db("users", new_user, str(user.id))
+
+    async def _create_code(self, user: UserDTO, maker_id: str) -> str:
+        code = shortuuid.ShortUUID().random(length=6)
+        await self._db.save_to_db("codes", {
+            'id': code,
+            'code': code,
+            'creator': maker_id,
+            'expiryDate': user.expiryDate,
+            'type': user.type,
+            'creationDate': datetime.now(),
+            'userId': str(user.id),
+            'email': user.email,
+            'name': user.name,
+            'passport_id': user.passport_id
+        }, code)
+        return code
+
+    async def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
+        user_id = str(user.id)
+
+        groups = await self._db.find("groups", {
+                "admin": maker_id,
+                "name": user.groupName.strip()
+        })
+
+        if len(groups) == 0:
+            new_group = {
+                'admin': maker_id,
+                'name': user.groupName.strip(),
+                'participants': [user_id],
+                'disableEditing': False,
+            }
+            await self._db.save_to_db("groups", new_group, str(uuid.uuid4()))
+        else:
+            group = groups[0]
+            participants = group["participants"]
+            if user_id not in participants:
+                participants.append(user_id)
+                await self._db.update(
+                    "groups",
+                    {"id": group["id"]},
+                    {"$set": {"participants": participants}}
+                )