Updated the code to create the Students/Teachers group if it does not exist yet

Merged in feature/level-file-upload (pull request #20 )
Feature/level file upload Approved-by: Tiago Ribeiro
2024-09-05 10:56:58 +01:00 · 2024-09-04 16:14:20 +00:00 · 2024-09-04 17:10:16 +01:00 · 2024-09-04 17:09:51 +01:00 · 2024-09-04 16:07:48 +00:00 · 2024-09-04 16:01:12 +00:00
43 changed files with 3860 additions and 538 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,3 +5,4 @@ README.md
 *.pyd
 __pycache__
 .pytest_cache
 /scripts
--- a/.env
+++ b/.env
@@ -3,3 +3,10 @@ JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
 JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
 GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
 HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
 GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
 FIREBASE_SCRYPT_B64_SIGNER_KEY="vbO3Xii2lajSeSkCstq3s/dCwpXP7J2YN9rP/KRreU2vGOT1fg+wzSuy1kIhBECqJHG82tmwAilSxLFFtNKVMA=="
 FIREBASE_SCRYPT_B64_SALT_SEPARATOR="Bw=="
 FIREBASE_SCRYPT_ROUNDS=8
 FIREBASE_SCRYPT_MEM_COST=14
 FIREBASE_PROJECT_ID=storied-phalanx-349916
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@ __pycache__
 .idea
 .env
 .DS_Store
 /firebase-configs/test_firebase.json
 /scripts
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -1,8 +0,0 @@
 # Default ignored files
 /shelf/
 /workspace.xml
 # Editor-based HTTP Client requests
 /httpRequests/
 # Datasource local storage ignored files
 /dataSources/
 /dataSources.local.xml
--- a/.idea/ielts-be.iml
+++ b/.idea/ielts-be.iml
@@ -1,24 +1,17 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="Flask">
    <option name="enabled" value="true" />
  </component>
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
-      <excludeFolder url="file://$MODULE_DIR$/venv" />
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
    </content>
-    <orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.11 (ielts-be)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PackageRequirementsSettings">
    <option name="versionSpecifier" value="Don't specify version" />
  </component>
-  <component name="TemplatesService">
+  <component name="PyDocumentationSettings">
-    <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
+    <option name="format" value="GOOGLE" />
-    <option name="TEMPLATE_FOLDERS">
+    <option name="myDocStringFormat" value="Google" />
      <list>
        <option value="$MODULE_DIR$/../flaskProject\templates" />
      </list>
    </option>
  </component>
 </module>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+  <component name="Black">
    <option name="sdkName" value="Python 3.11 (ielts-be)" />
  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (ielts-be)" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+    <mapping directory="" vcs="Git" />
  </component>
 </project>
--- a/20
+++ b/20
@@ -6,12 +6,30 @@ FROM python:3.11-slim
 # Allow statements and log messages to immediately appear in the logs
 ENV PYTHONUNBUFFERED True
 ENV GOOGLE_APPLICATION_CREDENTIALS=/app/firebase-configs/storied-phalanx-349916.json
 # Copy local code to the container image.
 ENV APP_HOME /app
 WORKDIR $APP_HOME
 COPY . ./
-RUN apt update && apt install -y ffmpeg
+RUN apt update && apt install -y \
    ffmpeg \
    poppler-utils \
    texlive-latex-base \
    texlive-fonts-recommended \
    texlive-latex-extra \
    texlive-xetex \
    pandoc \
    librsvg2-bin \
    curl \
    && rm -rf /var/lib/apt/lists/*
 RUN curl -sL https://deb.nodesource.com/setup_20.x | bash - \
    && apt-get install -y nodejs
 RUN npm install -g firebase-tools
 # Install production dependencies.
 RUN pip install --no-cache-dir -r requirements.txt
--- a/app.py
+++ b/app.py
--- a/faiss/ct_focus_tips_index.faiss
+++ b/faiss/ct_focus_tips_index.faiss
--- a/faiss/language_for_writing_tips_index.faiss
+++ b/faiss/language_for_writing_tips_index.faiss
--- a/faiss/reading_skill_tips_index.faiss
+++ b/faiss/reading_skill_tips_index.faiss
--- a/faiss/strategy_tips_index.faiss
+++ b/faiss/strategy_tips_index.faiss
--- a/faiss/tips_metadata.pkl
+++ b/faiss/tips_metadata.pkl
--- a/faiss/word_link_tips_index.faiss
+++ b/faiss/word_link_tips_index.faiss
--- a/faiss/word_partners_tips_index.faiss
+++ b/faiss/word_partners_tips_index.faiss
--- a/faiss/writing_skill_tips_index.faiss
+++ b/faiss/writing_skill_tips_index.faiss
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -18,7 +18,13 @@ GEN_FIELDS = ['topic']
 GEN_TEXT_FIELDS = ['title']
 LISTENING_GEN_FIELDS = ['transcript', 'exercise']
 READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
 READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
 LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
 LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
                              'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']
 LISTENING_2_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions']
 LISTENING_3_EXERCISE_TYPES = ['multipleChoice3Options', 'writeBlanksQuestions']
 LISTENING_4_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
 TOTAL_READING_PASSAGE_1_EXERCISES = 13
 TOTAL_READING_PASSAGE_2_EXERCISES = 13
@@ -35,7 +41,7 @@ SPEAKING_MIN_TIMER_DEFAULT = 14
 BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
                     "cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
-                     "discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
+                     "discrimination", "politic", "christianity", "islam", "christian", "christians",
                     "jews", "jew", "discrimination", "discriminatory"]
 EN_US_VOICES = [
@@ -141,7 +147,6 @@ mti_topics = [
    "Poverty Alleviation",
    "Cybersecurity and Privacy",
    "Human Rights",
    "Social Justice",
    "Food and Agriculture",
    "Cyberbullying and Online Safety",
    "Linguistic Diversity",
@@ -169,7 +174,6 @@ topics = [
    "Space Exploration",
    "Artificial Intelligence",
    "Climate Change",
    "World Religions",
    "The Human Brain",
    "Renewable Energy",
    "Cultural Diversity",
@@ -232,7 +236,6 @@ topics = [
    "Meditation Practices",
    "Literary Symbolism",
    "Marine Conservation",
    "Social Justice Movements",
    "Sustainable Tourism",
    "Ancient Philosophy",
    "Cold War Era",
--- a/helper/exercises.py
+++ b/helper/exercises.py
--- a/helper/gpt_zero.py
+++ b/helper/gpt_zero.py
@@ -0,0 +1,50 @@
 from logging import getLogger
 from typing import Dict, Optional
 import requests
 class GPTZero:
    _GPT_ZERO_ENDPOINT = 'https://api.gptzero.me/v2/predict/text'
    def __init__(self, gpt_zero_key: str):
        self._logger = getLogger(__name__)
        if gpt_zero_key is None:
            self._logger.warning('GPT Zero key was not included! Skipping ai detection when grading.')
        self._gpt_zero_key = gpt_zero_key
        self._header = {
            'x-api-key': gpt_zero_key
        }
    def run_detection(self, text: str):
        if self._gpt_zero_key is None:
            return None
        data = {
            'document': text,
            'version': '',
            'multilingual': False
        }
        response = requests.post(self._GPT_ZERO_ENDPOINT, headers=self._header, json=data)
        if response.status_code != 200:
            self._logger.error(f'GPT\'s Zero Endpoint returned with {response.status_code}: {response.json()}')
            return None
        return self._parse_detection(response.json())
    def _parse_detection(self, response: Dict) -> Optional[Dict]:
        try:
            text_scan = response["documents"][0]
            filtered_sentences = [
                {
                    "sentence": item["sentence"],
                    "highlight_sentence_for_ai": item["highlight_sentence_for_ai"]
                }
                for item in text_scan["sentences"]
            ]
            return {
                "class_probabilities": text_scan["class_probabilities"],
                "confidence_category": text_scan["confidence_category"],
                "predicted_class": text_scan["predicted_class"],
                "sentences": filtered_sentences
            }
        except Exception as e:
            self._logger.error(f'Failed to parse GPT\'s Zero response: {str(e)}')
            return None
--- a/helper/heygen_api.py
+++ b/helper/heygen_api.py
@@ -1,17 +1,19 @@
 import os
 import random
 import time
 from logging import getLogger
 import requests
 from dotenv import load_dotenv
 import app
 from helper.constants import *
 from helper.firebase_helper import upload_file_firebase_get_url, save_to_db_with_id
 from heygen.AvatarEnum import AvatarEnum
 load_dotenv()
 logger = getLogger(__name__)
 # Get HeyGen token
 TOKEN = os.getenv("HEY_GEN_TOKEN")
 FIREBASE_BUCKET = os.getenv('FIREBASE_BUCKET')
@@ -29,26 +31,32 @@ GET_HEADER = {
 def create_videos_and_save_to_db(exercises, template, id):
    avatar = random.choice(list(AvatarEnum))
    # Speaking 1
    # Using list comprehension to find the element with the desired value in the 'type' field
    found_exercises_1 = [element for element in exercises if element.get('type') == 1]
    # Check if any elements were found
    if found_exercises_1:
        exercise_1 = found_exercises_1[0]
-        app.app.logger.info('Creating video for speaking part 1')
+        sp1_questions = []
-        sp1_result = create_video(exercise_1["question"], random.choice(list(AvatarEnum)))
+        logger.info('Creating video for speaking part 1')
-        if sp1_result is not None:
+        for question in exercise_1["questions"]:
-            sound_file_path = VIDEO_FILES_PATH + sp1_result
+            sp1_result = create_video(question, avatar)
-            firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
+            if sp1_result is not None:
-            url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
+                sound_file_path = VIDEO_FILES_PATH + sp1_result
-            sp1_video_path = firebase_file_path
+                firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
-            sp1_video_url = url
+                url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
-            template["exercises"][0]["text"] = exercise_1["question"]
+                video = {
-            template["exercises"][0]["title"] = exercise_1["topic"]
+                    "text": question,
-            template["exercises"][0]["video_url"] = sp1_video_url
+                    "video_path": firebase_file_path,
-            template["exercises"][0]["video_path"] = sp1_video_path
+                    "video_url": url
-        else:
+                }
-            app.app.logger.error("Failed to create video for part 1 question: " + exercise_1["question"])
+                sp1_questions.append(video)
            else:
                logger.error("Failed to create video for part 1 question: " + exercise_1["question"])
        template["exercises"][0]["prompts"] = sp1_questions
        template["exercises"][0]["first_title"] = exercise_1["first_topic"]
        template["exercises"][0]["second_title"] = exercise_1["second_topic"]
    # Speaking 2
    # Using list comprehension to find the element with the desired value in the 'type' field
@@ -56,8 +64,8 @@ def create_videos_and_save_to_db(exercises, template, id):
    # Check if any elements were found
    if found_exercises_2:
        exercise_2 = found_exercises_2[0]
-        app.app.logger.info('Creating video for speaking part 2')
+        logger.info('Creating video for speaking part 2')
-        sp2_result = create_video(exercise_2["question"], random.choice(list(AvatarEnum)))
+        sp2_result = create_video(exercise_2["question"], avatar)
        if sp2_result is not None:
            sound_file_path = VIDEO_FILES_PATH + sp2_result
            firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp2_result
@@ -70,7 +78,7 @@ def create_videos_and_save_to_db(exercises, template, id):
            template["exercises"][1]["video_url"] = sp2_video_url
            template["exercises"][1]["video_path"] = sp2_video_path
        else:
-            app.app.logger.error("Failed to create video for part 2 question: " + exercise_2["question"])
+            logger.error("Failed to create video for part 2 question: " + exercise_2["question"])
    # Speaking 3
    # Using list comprehension to find the element with the desired value in the 'type' field
@@ -79,8 +87,7 @@ def create_videos_and_save_to_db(exercises, template, id):
    if found_exercises_3:
        exercise_3 = found_exercises_3[0]
        sp3_questions = []
-        avatar = random.choice(list(AvatarEnum))
+        logger.info('Creating videos for speaking part 3')
        app.app.logger.info('Creating videos for speaking part 3')
        for question in exercise_3["questions"]:
            result = create_video(question, avatar)
            if result is not None:
@@ -94,7 +101,7 @@ def create_videos_and_save_to_db(exercises, template, id):
                }
                sp3_questions.append(video)
            else:
-                app.app.logger.error("Failed to create video for part 3 question: " + question)
+                logger.error("Failed to create video for part 3 question: " + question)
        template["exercises"][2]["prompts"] = sp3_questions
        template["exercises"][2]["title"] = exercise_3["topic"]
@@ -106,7 +113,7 @@ def create_videos_and_save_to_db(exercises, template, id):
        template["exercises"].pop(0)
    save_to_db_with_id("speaking", template, id)
-    app.app.logger.info('Saved speaking to DB with id ' + id + " : " + str(template))
+    logger.info('Saved speaking to DB with id ' + id + " : " + str(template))
 def create_video(text, avatar):
@@ -127,8 +134,8 @@ def create_video(text, avatar):
        }
    }
    response = requests.post(create_video_url, headers=POST_HEADER, json=data)
-    app.app.logger.info(response.status_code)
+    logger.info(response.status_code)
-    app.app.logger.info(response.json())
+    logger.info(response.json())
    # GET TO CHECK STATUS AND GET VIDEO WHEN READY
    video_id = response.json()["data"]["video_id"]
@@ -147,11 +154,11 @@ def create_video(text, avatar):
        error = response_data["data"]["error"]
        if status != "completed" and error is None:
-            app.app.logger.info(f"Status: {status}")
+            logger.info(f"Status: {status}")
            time.sleep(10)  # Wait for 10 second before the next request
-    app.app.logger.info(response.status_code)
+    logger.info(response.status_code)
-    app.app.logger.info(response.json())
+    logger.info(response.json())
    # DOWNLOAD VIDEO
    download_url = response.json()['data']['video_url']
@@ -165,8 +172,8 @@ def create_video(text, avatar):
        output_path = os.path.join(output_directory, output_filename)
        with open(output_path, 'wb') as f:
            f.write(response.content)
-        app.app.logger.info(f"File '{output_filename}' downloaded successfully.")
+        logger.info(f"File '{output_filename}' downloaded successfully.")
        return output_filename
    else:
-        app.app.logger.error(f"Failed to download file. Status code: {response.status_code}")
+        logger.error(f"Failed to download file. Status code: {response.status_code}")
        return None
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -2,8 +2,8 @@ import json
 import os
 import re
 from openai import OpenAI
 from dotenv import load_dotenv
 from openai import OpenAI
 from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO
 from helper.token_counter import count_tokens
@@ -54,7 +54,7 @@ def check_fields(obj, fields):
    return all(field in obj for field in fields)
-def make_openai_call(model, messages, token_count, fields_to_check, temperature):
+def make_openai_call(model, messages, token_count, fields_to_check, temperature, check_blacklisted=True):
    global try_count
    result = client.chat.completions.create(
        model=model,
@@ -65,15 +65,16 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
    )
    result = result.choices[0].message.content
-    found_blacklisted_word = get_found_blacklisted_words(result)
+    if check_blacklisted:
        found_blacklisted_word = get_found_blacklisted_words(result)
-    if found_blacklisted_word is not None and try_count < TRY_LIMIT:
+        if found_blacklisted_word is not None and try_count < TRY_LIMIT:
-        from app import app
+            from app import app
-        app.logger.warning("Result contains blacklisted words: " + str(found_blacklisted_word))
+            app.logger.warning("Result contains blacklisted words: " + str(found_blacklisted_word))
-        try_count = try_count + 1
+            try_count = try_count + 1
-        return make_openai_call(model, messages, token_count, fields_to_check, temperature)
+            return make_openai_call(model, messages, token_count, fields_to_check, temperature)
-    elif found_blacklisted_word is not None and try_count >= TRY_LIMIT:
+        elif found_blacklisted_word is not None and try_count >= TRY_LIMIT:
-        return ""
+            return ""
    if fields_to_check is None:
        return json.loads(result)
@@ -188,7 +189,7 @@ def get_fixed_text(text):
         }
    ]
    token_count = count_total_tokens(messages)
-    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
+    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2, False)
    return response["fixed_text"]
@@ -203,7 +204,7 @@ def get_speaking_corrections(text):
         }
    ]
    token_count = count_total_tokens(messages)
-    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
+    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2, False)
    return response["fixed_text"]
@@ -211,6 +212,7 @@ def has_blacklisted_words(text: str):
    text_lower = text.lower()
    return any(word in text_lower for word in BLACKLISTED_WORDS)
 def get_found_blacklisted_words(text: str):
    text_lower = text.lower()
    for word in BLACKLISTED_WORDS:
@@ -218,6 +220,7 @@ def get_found_blacklisted_words(text: str):
            return word
    return None
 def remove_special_characters_from_beginning(string):
    cleaned_string = string.lstrip('\n')
    if string.startswith("'") or string.startswith('"'):
@@ -239,6 +242,7 @@ def replace_expression_in_object(obj, expression, replacement):
                obj[key] = replace_expression_in_object(obj[key], expression, replacement)
    return obj
 def count_total_tokens(messages):
    total_tokens = 0
    for message in messages:
--- a/helper/question_templates.py
+++ b/helper/question_templates.py
@@ -1136,12 +1136,11 @@ def getSpeakingTemplate():
        "exercises": [
            {
                "id": str(uuid.uuid4()),
-                "prompts": [],
+                "prompts": ["questions"],
-                "text": "text",
+                "text": "Listen carefully and respond.",
-                "title": "topic",
+                "first_title": "first_topic",
-                "video_url": "sp1_video_url",
+                "second_title": "second_topic",
-                "video_path": "sp1_video_path",
+                "type": "interactiveSpeaking"
                "type": "speaking"
            },
            {
                "id": str(uuid.uuid4()),
--- a/helper/speech_to_text_helper.py
+++ b/helper/speech_to_text_helper.py
@@ -95,17 +95,26 @@ def conversation_text_to_speech(conversation: list, file_name: str):
 def has_words(text: str):
    if not has_common_words(text):
        return False
    english_words = set(words.words())
    words_in_input = text.split()
    return any(word.lower() in english_words for word in words_in_input)
 def has_x_words(text: str, quantity):
    if not has_common_words(text):
        return False
    english_words = set(words.words())
    words_in_input = text.split()
    english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
    return english_word_count >= quantity
 def has_common_words(text: str):
    english_words = {"the", "be", "to", "of", "and", "a", "in", "that", "have", "i"}
    words_in_input = text.split()
    english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
    return english_word_count >= 10
 def divide_text(text, max_length=3000):
    if len(text) <= max_length:
--- a/modules/init.py
+++ b/modules/init.py
@@ -0,0 +1,5 @@
 from .gpt import GPT
 __all__ = [
    "GPT"
 ]
--- a/modules/batch_users/init.py
+++ b/modules/batch_users/init.py
@@ -0,0 +1,5 @@
 from .service import BatchUsers
 __all__ = [
    "BatchUsers"
 ]
--- a/modules/batch_users/batch_users.py
+++ b/modules/batch_users/batch_users.py
@@ -0,0 +1,31 @@
 import uuid
 from typing import Optional
 from pydantic import BaseModel, Field
 from datetime import datetime
 class DemographicInfo(BaseModel):
    phone: str
    passport_id: Optional[str] = None
    country: Optional[str] = None
 class UserDTO(BaseModel):
    id: uuid.UUID = Field(default_factory=uuid.uuid4)
    email: str
    name: str
    type: str
    passport_id: str
    passwordHash: str
    passwordSalt: str
    groupName: Optional[str] = None
    corporate: Optional[str] = None
    studentID: Optional[str] = None
    expiryDate: Optional[str] = None
    demographicInformation: Optional[DemographicInfo] = None
 class BatchUsersDTO(BaseModel):
    makerID: str
    users: list[UserDTO]
--- a/modules/batch_users/service.py
+++ b/modules/batch_users/service.py
@@ -0,0 +1,261 @@
 import os
 import subprocess
 import time
 import uuid
 from datetime import datetime
 from logging import getLogger
 import pandas as pd
 from typing import Dict
 import shortuuid
 from google.cloud.firestore_v1 import Client
 from google.cloud.firestore_v1.base_query import FieldFilter
 from modules.batch_users.batch_users import BatchUsersDTO, UserDTO
 from modules.helper.file_helper import FileHelper
 class BatchUsers:
    _DEFAULT_DESIRED_LEVELS = {
        "reading": 9,
        "listening": 9,
        "writing": 9,
        "speaking": 9,
    }
    _DEFAULT_LEVELS = {
        "reading": 0,
        "listening": 0,
        "writing": 0,
        "speaking": 0,
    }
    def __init__(self, firestore: Client):
        self._db = firestore
        self._logger = getLogger(__name__)
    def batch_users(self, request_data: Dict):
        batch_dto = self._map_to_batch(request_data)
        file_name = f'{uuid.uuid4()}.csv'
        path = f'./tmp/{file_name}'
        self._generate_firebase_auth_csv(batch_dto, path)
        result = self._upload_users('./tmp', file_name)
        if result.returncode != 0:
            error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stderr}```"
            self._logger.error(error_msg)
            return error_msg
        self._init_users(batch_dto)
        FileHelper.remove_file(path)
        return {"ok": True}
    @staticmethod
    def _map_to_batch(request_data: Dict) -> BatchUsersDTO:
        users: list[UserDTO] = [UserDTO(**user) for user in request_data["users"]]
        return BatchUsersDTO(makerID=request_data["makerID"], users=users)
    @staticmethod
    def _generate_firebase_auth_csv(batch_dto: BatchUsersDTO, path: str):
        # https://firebase.google.com/docs/cli/auth#file_format
        columns = [
            'UID', 'Email', 'Email Verified', 'Password Hash', 'Password Salt', 'Name',
            'Photo URL', 'Google ID', 'Google Email', 'Google Display Name', 'Google Photo URL',
            'Facebook ID', 'Facebook Email', 'Facebook Display Name', 'Facebook Photo URL',
            'Twitter ID', 'Twitter Email', 'Twitter Display Name', 'Twitter Photo URL',
            'GitHub ID', 'GitHub Email', 'GitHub Display Name', 'GitHub Photo URL',
            'User Creation Time', 'Last Sign-In Time', 'Phone Number'
        ]
        users_data = []
        current_time = int(time.time() * 1000)
        for user in batch_dto.users:
            user_data = {
                'UID': str(user.id),
                'Email': user.email,
                'Email Verified': False,
                'Password Hash': user.passwordHash,
                'Password Salt': user.passwordSalt,
                'Name': '',
                'Photo URL': '',
                'Google ID': '',
                'Google Email': '',
                'Google Display Name': '',
                'Google Photo URL': '',
                'Facebook ID': '',
                'Facebook Email': '',
                'Facebook Display Name': '',
                'Facebook Photo URL': '',
                'Twitter ID': '',
                'Twitter Email': '',
                'Twitter Display Name': '',
                'Twitter Photo URL': '',
                'GitHub ID': '',
                'GitHub Email': '',
                'GitHub Display Name': '',
                'GitHub Photo URL': '',
                'User Creation Time': current_time,
                'Last Sign-In Time': '',
                'Phone Number': ''
            }
            users_data.append(user_data)
        df = pd.DataFrame(users_data, columns=columns)
        df.to_csv(path, index=False, header=False)
    @staticmethod
    def _upload_users(directory: str, file_name: str):
        command = (
            f'firebase auth:import {file_name} '
            f'--hash-algo=SCRYPT '
            f'--hash-key={os.getenv("FIREBASE_SCRYPT_B64_SIGNER_KEY")} '
            f'--salt-separator={os.getenv("FIREBASE_SCRYPT_B64_SALT_SEPARATOR")} '
            f'--rounds={os.getenv("FIREBASE_SCRYPT_ROUNDS")} '
            f'--mem-cost={os.getenv("FIREBASE_SCRYPT_MEM_COST")} '
            f'--project={os.getenv("FIREBASE_PROJECT_ID")} '
        )
        result = subprocess.run(command, shell=True, cwd=directory, capture_output=True, text=True)
        return result
    def _init_users(self, batch_users: BatchUsersDTO):
        maker_id = batch_users.makerID
        for user in batch_users.users:
            self._insert_new_user(user)
            code = self._create_code(user, maker_id)
            if user.type == "corporate":
                self._set_corporate_default_groups(user)
            if user.corporate:
                self._assign_corporate_to_user(user, code)
            if user.groupName and len(user.groupName.strip()) > 0:
                self._assign_user_to_group_by_name(user, maker_id)
    def _insert_new_user(self, user: UserDTO):
        new_user = {
            **user.dict(exclude={
                'id', 'passport_id', 'groupName', 'expiryDate',
                'corporate', 'passwordHash', 'passwordSalt'
            }),
            'bio': "",
            'focus': "academic",
            'status': "active",
            'desiredLevels': self._DEFAULT_DESIRED_LEVELS,
            'profilePicture': "/defaultAvatar.png",
            'levels': self._DEFAULT_LEVELS,
            'isFirstLogin': False,
            'isVerified': True,
            'registrationDate': datetime.now(),
            'subscriptionExpirationDate': user.expiryDate
        }
        self._db.collection('users').document(str(user.id)).set(new_user)
    def _create_code(self, user: UserDTO, maker_id: str) -> str:
        code = shortuuid.ShortUUID().random(length=6)
        self._db.collection('codes').document(code).set({
            'code': code,
            'creator': maker_id,
            'expiryDate': user.expiryDate,
            'type': user.type,
            'creationDate': datetime.now(),
            'userId': str(user.id),
            'email': user.email,
            'name': user.name,
            'passport_id': user.passport_id
        })
        return code
    def _set_corporate_default_groups(self, user: UserDTO):
        user_id = str(user.id)
        default_groups = [
            {
                'admin': user_id,
                'id': str(uuid.uuid4()),
                'name': "Teachers",
                'participants': [],
                'disableEditing': True,
            },
            {
                'admin': user_id,
                'id': str(uuid.uuid4()),
                'name': "Students",
                'participants': [],
                'disableEditing': True,
            },
            {
                'admin': user_id,
                'id': str(uuid.uuid4()),
                'name': "Corporate",
                'participants': [],
                'disableEditing': True,
            }
        ]
        for group in default_groups:
            self._db.collection('groups').document(group['id']).set(group)
    def _assign_corporate_to_user(self, user: UserDTO, code: str):
        user_id = str(user.id)
        corporate_users = self._db.collection('users').where(
            filter=FieldFilter('email', '==', user.corporate)
        ).limit(1).get()
        if len(corporate_users) > 0:
            corporate_user = corporate_users[0]
            self._db.collection('codes').document(code).set({'creator': corporate_user.id}, merge=True)
            group_type = "Students" if user.type == "student" else "Teachers"
            groups = self._db.collection('groups').where(
                filter=FieldFilter('admin', '==', corporate_user.id)
            ).where(
                filter=FieldFilter('name', '==', group_type)
            ).limit(1).get()
            if len(groups) > 0:
                group = groups[0]
                participants = group.get('participants')
                if user_id not in participants:
                    participants.append(user_id)
                    group.reference.update({'participants': participants})
            else:
                group = {
                    'admin': corporate_user.id,
                    'id': str(uuid.uuid4()),
                    'name': group_type,
                    'participants': [user_id],
                    'disableEditing': True,
                }
                self._db.collection('groups').document(group['id']).set(group)
    def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
        user_id = str(user.id)
        groups = self._db.collection('groups').where(
            filter=FieldFilter('admin', '==', maker_id)
        ).where(
            filter=FieldFilter('name', '==', user.groupName.strip())
        ).limit(1).get()
        if len(groups) == 0:
            new_group = {
                'id': str(uuid.uuid4()),
                'admin': maker_id,
                'name': user.groupName.strip(),
                'participants': [user_id],
                'disableEditing': False,
            }
            self._db.collection('groups').document(new_group['id']).set(new_group)
        else:
            group = groups[0]
            participants = group.get('participants')
            if user_id not in participants:
                participants.append(user_id)
                group.reference.update({'participants': participants})
--- a/modules/gpt.py
+++ b/modules/gpt.py
@@ -0,0 +1,66 @@
 import json
 from logging import getLogger
 from typing import List, Optional, Callable, TypeVar
 from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
 T = TypeVar('T', bound=BaseModel)
 class GPT:
    def __init__(self, openai_client):
        self._client = openai_client
        self._default_model = "gpt-4o-2024-08-06"
        self._logger = getLogger(__name__)
    def prediction(
            self,
            messages: List[ChatCompletionMessageParam],
            map_to_model: Callable,
            json_scheme: str,
            *,
            model: Optional[str] = None,
            temperature: Optional[float] = None,
            max_retries: int = 3
    ) -> List[T] | T | None:
        params = {
            "messages": messages,
            "response_format": {"type": "json_object"},
            "model": model if model else self._default_model
        }
        if temperature:
            params["temperature"] = temperature
        attempt = 0
        while attempt < max_retries:
            result = self._client.chat.completions.create(**params)
            result_content = result.choices[0].message.content
            try:
                result_json = json.loads(result_content)
                return map_to_model(result_json)
            except Exception as e:
                attempt += 1
                self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
                params["messages"] = [
                    {
                        "role": "user",
                        "content": (
                            "Your previous response wasn't in the json format I've explicitly told you to output. "
                            f"In your next response, you will fix it and return me just the json I've asked."
                        )
                    },
                    {
                        "role": "user",
                        "content": (
                            f"Previous response: {result_content}\n"
                            f"JSON format: {json_scheme}"
                        )
                    }
                ]
                if attempt >= max_retries:
                    self._logger.error(f"Max retries exceeded!")
                    return None
--- a/modules/helper/init.py
+++ b/modules/helper/init.py
@@ -0,0 +1,5 @@
 from .logger import LoggerHelper
 __all__ = [
    "LoggerHelper"
 ]
--- a/modules/helper/file_helper.py
+++ b/modules/helper/file_helper.py
@@ -0,0 +1,97 @@
 import base64
 import io
 import os
 import shutil
 import subprocess
 import uuid
 from typing import Optional, Tuple
 import numpy as np
 import pypandoc
 from PIL import Image
 class FileHelper:
    # Supposedly pandoc covers a wide range of file extensions only tested with docx
    @staticmethod
    def convert_file_to_pdf(input_path: str, output_path: str):
        pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
            '-V', 'geometry:paperwidth=5.5in',
            '-V', 'geometry:paperheight=8.5in',
            '-V', 'geometry:margin=0.5in',
            '-V', 'pagestyle=empty'
        ])
    @staticmethod
    def convert_file_to_html(input_path: str, output_path: str):
        pypandoc.convert_file(input_path, 'html', outputfile=output_path)
    @staticmethod
    def pdf_to_png(path_id: str):
        to_png = f"pdftoppm -png exercises.pdf page"
        result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
        if result.returncode != 0:
            raise Exception(
                f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
    @staticmethod
    def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
        with Image.open(io.BytesIO(image_bytes)) as img:
            img_gray = img.convert('L')
            img_array = np.array(img_gray)
            non_white_pixels = np.sum(img_array < 255)
        return non_white_pixels <= image_threshold
    @classmethod
    def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
        with open(image_path, "rb") as image_file:
            image_bytes = image_file.read()
        if cls.is_page_blank(image_bytes, image_threshold):
            return None
        return base64.b64encode(image_bytes).decode('utf-8')
    @classmethod
    def b64_pngs(cls, path_id: str, files: list[str]):
        png_messages = []
        for filename in files:
            b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
            if b64_string:
                png_messages.append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{b64_string}"
                    }
                })
        return png_messages
    @staticmethod
    def remove_directory(path):
        try:
            if os.path.exists(path):
                if os.path.isdir(path):
                    shutil.rmtree(path)
        except Exception as e:
            print(f"An error occurred while trying to remove {path}: {str(e)}")
    @staticmethod
    def remove_file(file_path):
        try:
            if os.path.exists(file_path):
                if os.path.isfile(file_path):
                    os.remove(file_path)
        except Exception as e:
            print(f"An error occurred while trying to remove the file {file_path}: {str(e)}")
    @staticmethod
    def save_upload(file) -> Tuple[str, str]:
        ext = file.filename.split('.')[-1]
        path_id = str(uuid.uuid4())
        os.makedirs(f'./tmp/{path_id}', exist_ok=True)
        tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
        file.save(tmp_filename)
        return ext, path_id
--- a/modules/helper/logger.py
+++ b/modules/helper/logger.py
@@ -0,0 +1,23 @@
 import logging
 from functools import wraps
 class LoggerHelper:
    @staticmethod
    def suppress_loggers():
        def decorator(f):
            @wraps(f)
            def wrapped(*args, **kwargs):
                root_logger = logging.getLogger()
                original_level = root_logger.level
                root_logger.setLevel(logging.ERROR)
                try:
                    return f(*args, **kwargs)
                finally:
                    root_logger.setLevel(original_level)
            return wrapped
        return decorator
--- a/modules/training_content/init.py
+++ b/modules/training_content/init.py
@@ -0,0 +1,7 @@
 from .kb import TrainingContentKnowledgeBase
 from .service import TrainingContentService
 __all__ = [
    "TrainingContentService",
    "TrainingContentKnowledgeBase"
 ]
--- a/modules/training_content/dtos.py
+++ b/modules/training_content/dtos.py
@@ -0,0 +1,29 @@
 from pydantic import BaseModel
 from typing import List
 class QueryDTO(BaseModel):
    category: str
    text: str
 class DetailsDTO(BaseModel):
    exam_id: str
    date: int
    performance_comment: str
    detailed_summary: str
 class WeakAreaDTO(BaseModel):
    area: str
    comment: str
 class TrainingContentDTO(BaseModel):
    details: List[DetailsDTO]
    weak_areas: List[WeakAreaDTO]
    queries: List[QueryDTO]
 class TipsDTO(BaseModel):
    tip_ids: List[str]
--- a/modules/training_content/kb.py
+++ b/modules/training_content/kb.py
@@ -0,0 +1,85 @@
 import json
 import os
 from logging import getLogger
 from typing import Dict, List
 import faiss
 import pickle
 class TrainingContentKnowledgeBase:
    def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
        self._embedding_model = embeddings
        self._tips = None  # self._read_json(path)
        self._category_metadata = None
        self._indices = None
        self._logger = getLogger(__name__)
    @staticmethod
    def _read_json(path: str) -> Dict[str, any]:
        with open(path, 'r', encoding="utf-8") as json_file:
            return json.loads(json_file.read())
    def print_category_count(self):
        category_tips = {}
        for unit in self._tips['units']:
            for page in unit['pages']:
                for tip in page['tips']:
                    category = tip['category'].lower().replace(" ", "_")
                    if category not in category_tips:
                        category_tips[category] = 0
                    else:
                        category_tips[category] = category_tips[category] + 1
        print(category_tips)
    def create_embeddings_and_save_them(self) -> None:
        category_embeddings = {}
        category_metadata = {}
        for unit in self._tips['units']:
            for page in unit['pages']:
                for tip in page['tips']:
                    category = tip['category'].lower().replace(" ", "_")
                    if category not in category_embeddings:
                        category_embeddings[category] = []
                        category_metadata[category] = []
                    category_embeddings[category].append(tip['embedding'])
                    category_metadata[category].append({"id": tip['id'], "text": tip['text']})
        category_indices = {}
        for category, embeddings in category_embeddings.items():
            embeddings_array = self._embedding_model.encode(embeddings)
            index = faiss.IndexFlatL2(embeddings_array.shape[1])
            index.add(embeddings_array)
            category_indices[category] = index
            faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
        with open("./faiss/tips_metadata.pkl", "wb") as f:
            pickle.dump(category_metadata, f)
    def load_indices_and_metadata(
            self,
            directory: str = './faiss',
            suffix: str = '_tips_index.faiss',
            metadata_path: str = './faiss/tips_metadata.pkl'
    ):
        files = os.listdir(directory)
        self._indices = {}
        for file in files:
            if file.endswith(suffix):
                self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
                self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
        with open(metadata_path, 'rb') as f:
            self._category_metadata = pickle.load(f)
        self._logger.info("Loaded tips metadata")
    def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
        query_embedding = self._embedding_model.encode([query])
        index = self._indices[category]
        D, I = index.search(query_embedding, top_k)
        results = [self._category_metadata[category][i] for i in I[0]]
        return results
--- a/modules/training_content/service.py
+++ b/modules/training_content/service.py
@@ -0,0 +1,409 @@
 import json
 from datetime import datetime
 from logging import getLogger
 from typing import Dict, List
 from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
 class TrainingContentService:
    TOOLS = [
        'critical_thinking',
        'language_for_writing',
        'reading_skills',
        'strategy',
        'words',
        'writing_skills'
    ]
    # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
    def __init__(self, kb, openai, firestore):
        self._training_content_module = kb
        self._db = firestore
        self._logger = getLogger(__name__)
        self._llm = openai
    def get_tips(self, training_content):
        user, stats = training_content["userID"], training_content["stats"]
        exam_data, exam_map = self._sort_out_solutions(stats)
        training_content = self._get_exam_details_and_tips(exam_data)
        tips = self._query_kb(training_content.queries)
        usefull_tips = self._get_usefull_tips(exam_data, tips)
        exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
        weak_areas = {"weak_areas": []}
        for area in training_content.weak_areas:
            weak_areas["weak_areas"].append(area.dict())
        training_doc = {
            'created_at': int(datetime.now().timestamp() * 1000),
            **exam_map,
            **usefull_tips.dict(),
            **weak_areas,
            "user": user
        }
        doc_ref = self._db.collection('training').add(training_doc)
        return {
            "id": doc_ref[1].id
        }
    @staticmethod
    def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
        new_exam_map = {"exams": []}
        for detail in details:
            new_exam_map["exams"].append({
                "id": detail.exam_id,
                "date": detail.date,
                "performance_comment": detail.performance_comment,
                "detailed_summary": detail.detailed_summary,
                **exam_map[detail.exam_id]
            })
        return new_exam_map
    def _query_kb(self, queries: List[QueryDTO]):
        map_categories = {
            "critical_thinking": "ct_focus",
            "language_for_writing": "language_for_writing",
            "reading_skills": "reading_skill",
            "strategy": "strategy",
            "writing_skills": "writing_skill"
        }
        tips = {"tips": []}
        for query in queries:
            if query.category == "words":
                tips["tips"].extend(
                    self._training_content_module.query_knowledge_base(query.text, "word_link")
                )
                tips["tips"].extend(
                    self._training_content_module.query_knowledge_base(query.text, "word_partners")
                )
            else:
                if query.category in map_categories:
                    tips["tips"].extend(
                        self._training_content_module.query_knowledge_base(query.text, map_categories[query.category])
                    )
                else:
                    self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
        return tips
    def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
        json_schema = (
            '{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
            ' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
        )
        messages = [
            {
                "role": "user",
                "content": (
                    f"I'm going to provide you with exam data, you will take the exam data and fill this json "
                    f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
                    'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
                    'summary of the student\'s performance, "weak_areas" are identified areas'
                    ' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
                    ' with sentence structure and punctuation.", the "queries" field is where you will write queries '
                    'for tips that will be displayed to the student, the category attribute is a collection of '
                    'embeddings and the text will be the text used to query the knowledge base. The categories are '
                    f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
                    '"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
                    ' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
                    'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
                    'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
                    'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
                    'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
                    'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
                    'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
                    'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
                )
            },
            {
                "role": "user",
                "content": f'Exam Data: {str(exam_data)}'
            }
        ]
        return self._llm.prediction(messages, self._map_gpt_response, json_schema)
    def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
        json_schema = (
            '{ "tip_ids": [] }'
        )
        messages = [
            {
                "role": "user",
                "content": (
                    f"I'm going to provide you with tips and I want you to return to me the tips that "
                    f"can be usefull for the student that made the exam that I'm going to send you, return "
                    f"me the tip ids in this json format {json_schema}."
                )
            },
            {
                "role": "user",
                "content": f'Exam Data: {str(exam_data)}'
            },
            {
                "role": "user",
                "content": f'Tips: {str(tips)}'
            }
        ]
        return self._llm.prediction(messages, lambda response: TipsDTO(**response), json_schema)
    @staticmethod
    def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
        parsed_response = {
            "details": [DetailsDTO(**detail) for detail in response["details"]],
            "weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
            "queries": [QueryDTO(**query) for query in response["queries"]]
        }
        return TrainingContentDTO(**parsed_response)
    def _sort_out_solutions(self, stats):
        grouped_stats = {}
        for stat in stats:
            session_key = f'{str(stat["date"])}-{stat["user"]}'
            module = stat["module"]
            exam_id = stat["exam"]
            if session_key not in grouped_stats:
                grouped_stats[session_key] = {}
            if module not in grouped_stats[session_key]:
                grouped_stats[session_key][module] = {
                    "stats": [],
                    "exam_id": exam_id
                }
            grouped_stats[session_key][module]["stats"].append(stat)
        exercises = {}
        exam_map = {}
        for session_key, modules in grouped_stats.items():
            exercises[session_key] = {}
            for module, module_stats in modules.items():
                exercises[session_key][module] = {}
                exam_id = module_stats["exam_id"]
                if exam_id not in exercises[session_key][module]:
                    exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
                exam_total_questions = 0
                exam_total_correct = 0
                for stat in module_stats["stats"]:
                    exam_total_questions += stat["score"]["total"]
                    exam_total_correct += stat["score"]["correct"]
                    exercises[session_key][module][exam_id]["date"] = stat["date"]
                    if session_key not in exam_map:
                        exam_map[session_key] = {"stat_ids": [], "score": 0}
                    exam_map[session_key]["stat_ids"].append(stat["id"])
                    exam = self._get_doc_by_id(module, exam_id)
                    if module == "listening":
                        exercises[session_key][module][exam_id]["exercises"].extend(
                            self._get_listening_solutions(stat, exam))
                    elif module == "reading":
                        exercises[session_key][module][exam_id]["exercises"].extend(
                            self._get_reading_solutions(stat, exam))
                    elif module == "writing":
                        exercises[session_key][module][exam_id]["exercises"].extend(
                            self._get_writing_prompts_and_answers(stat, exam)
                        )
                    elif module == "speaking":
                        exercises[session_key][module][exam_id]["exercises"].extend(
                            self._get_speaking_solutions(stat, exam)
                        )
                    elif module == "level":
                        exercises[session_key][module][exam_id]["exercises"].extend(
                            self._get_level_solutions(stat, exam)
                        )
                exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
                exam_map[session_key]["module"] = module
                with open('exam_result.json', 'w') as file:
                    json.dump({"exams": exercises}, file, indent=4)
        return {"exams": exercises}, exam_map
    def _get_writing_prompts_and_answers(self, stat, exam):
        result = []
        try:
            exercises = []
            for solution in stat['solutions']:
                answer = solution['solution']
                exercise_id = solution['id']
                exercises.append({
                    "exercise_id": exercise_id,
                    "answer": answer
                })
            for exercise in exercises:
                for exam_exercise in exam["exercises"]:
                    if exam_exercise["id"] == exercise["exercise_id"]:
                        result.append({
                            "exercise": exam_exercise["prompt"],
                            "answer": exercise["answer"]
                        })
        except KeyError as e:
            self._logger.warning(f"Malformed stat object: {str(e)}")
        return result
    @staticmethod
    def _get_mc_question(exercise, stat):
        shuffle_maps = stat.get("shuffleMaps", [])
        answer = stat["solutions"] if len(shuffle_maps) == 0 else []
        if len(shuffle_maps) != 0:
            for solution in stat["solutions"]:
                shuffle_map = [
                    item["map"] for item in shuffle_maps
                    if item["questionID"] == solution["question"]
                ]
                answer.append({
                    "question": solution["question"],
                    "option": shuffle_map[solution["option"]]
                })
        return {
            "question": exercise["prompt"],
            "exercise": exercise["questions"],
            "answer": stat["solutions"]
        }
    @staticmethod
    def _swap_key_name(d, original_key, new_key):
        d[new_key] = d.pop(original_key)
        return d
    def _get_level_solutions(self, stat, exam):
        result = []
        try:
            for part in exam["parts"]:
                for exercise in part["exercises"]:
                    if exercise["id"] == stat["exercise"]:
                        if stat["type"] == "fillBlanks":
                            result.append({
                                "prompt": exercise["prompt"],
                                "template": exercise["text"],
                                "words": exercise["words"],
                                "solutions": exercise["solutions"],
                                "answer": [
                                    self._swap_key_name(item, 'solution', 'option')
                                    for item in stat["solutions"]
                                ]
                            })
                        elif stat["type"] == "multipleChoice":
                            result.append(self._get_mc_question(exercise, stat))
        except KeyError as e:
            self._logger.warning(f"Malformed stat object: {str(e)}")
        return result
    def _get_listening_solutions(self, stat, exam):
        result = []
        try:
            for part in exam["parts"]:
                for exercise in part["exercises"]:
                    if exercise["id"] == stat["exercise"]:
                        if stat["type"] == "writeBlanks":
                            result.append({
                                "question": exercise["prompt"],
                                "template": exercise["text"],
                                "solution": exercise["solutions"],
                                "answer": stat["solutions"]
                            })
                        elif stat["type"] == "fillBlanks":
                            result.append({
                                "question": exercise["prompt"],
                                "template": exercise["text"],
                                "words": exercise["words"],
                                "solutions": exercise["solutions"],
                                "answer": stat["solutions"]
                            })
                        elif stat["type"] == "multipleChoice":
                            result.append(self._get_mc_question(exercise, stat))
        except KeyError as e:
            self._logger.warning(f"Malformed stat object: {str(e)}")
        return result
    @staticmethod
    def _find_shuffle_map(shuffle_maps, question_id):
        return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
    def _get_speaking_solutions(self, stat, exam):
        result = {}
        try:
            result = {
                "comments": {
                    key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
                ,
                "exercises": {}
            }
            for exercise in exam["exercises"]:
                if exercise["id"] == stat["exercise"]:
                    if stat["type"] == "interactiveSpeaking":
                        for i in range(len(exercise["prompts"])):
                            result["exercises"][f"exercise_{i+1}"] = {
                                "question": exercise["prompts"][i]["text"]
                            }
                        for i in range(len(exercise["prompts"])):
                            answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
                            result["exercises"][f"exercise_{i+1}"]["answer"] = answer
                    elif stat["type"] == "speaking":
                        result["exercises"]["exercise_1"] = {
                            "question": exercise["text"],
                            "answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
                        }
        except KeyError as e:
            self._logger.warning(f"Malformed stat object: {str(e)}")
        return [result]
    def _get_reading_solutions(self, stat, exam):
        result = []
        try:
            for part in exam["parts"]:
                text = part["text"]
                for exercise in part["exercises"]:
                    if exercise["id"] == stat["exercise"]:
                        if stat["type"] == "fillBlanks":
                            result.append({
                                "text": text,
                                "question": exercise["prompt"],
                                "template": exercise["text"],
                                "words": exercise["words"],
                                "solutions": exercise["solutions"],
                                "answer": stat["solutions"]
                            })
                        elif stat["type"] == "writeBlanks":
                            result.append({
                                "text": text,
                                "question": exercise["prompt"],
                                "template": exercise["text"],
                                "solutions": exercise["solutions"],
                                "answer": stat["solutions"]
                            })
                        elif stat["type"] == "trueFalse":
                            result.append({
                                "text": text,
                                "questions": exercise["questions"],
                                "answer": stat["solutions"]
                            })
                        elif stat["type"] == "matchSentences":
                            result.append({
                                "text": text,
                                "question": exercise["prompt"],
                                "sentences": exercise["sentences"],
                                "options": exercise["options"],
                                "answer": stat["solutions"]
                            })
        except KeyError as e:
            self._logger.warning(f"Malformed stat object: {str(e)}")
        return result
    def _get_doc_by_id(self, collection: str, doc_id: str):
        collection_ref = self._db.collection(collection)
        doc_ref = collection_ref.document(doc_id)
        doc = doc_ref.get()
        if doc.exists:
            return doc.to_dict()
        return None
--- a/modules/upload_level/init.py
+++ b/modules/upload_level/init.py
@@ -0,0 +1,5 @@
 from .service import UploadLevelService
 __all__ = [
    "UploadLevelService"
 ]
--- a/modules/upload_level/exam_dtos.py
+++ b/modules/upload_level/exam_dtos.py
@@ -0,0 +1,57 @@
 from pydantic import BaseModel, Field
 from typing import List, Dict, Union, Optional, Any
 from uuid import uuid4, UUID
 class Option(BaseModel):
    id: str
    text: str
 class MultipleChoiceQuestion(BaseModel):
    id: str
    prompt: str
    variant: str = "text"
    solution: str
    options: List[Option]
 class MultipleChoiceExercise(BaseModel):
    id: UUID = Field(default_factory=uuid4)
    type: str = "multipleChoice"
    prompt: str = "Select the appropriate option."
    questions: List[MultipleChoiceQuestion]
    userSolutions: List = Field(default_factory=list)
 class FillBlanksWord(BaseModel):
    id: str
    options: Dict[str, str]
 class FillBlanksSolution(BaseModel):
    id: str
    solution: str
 class FillBlanksExercise(BaseModel):
    id: UUID = Field(default_factory=uuid4)
    type: str = "fillBlanks"
    variant: str = "mc"
    prompt: str = "Click a blank to select the appropriate word for it."
    text: str
    solutions: List[FillBlanksSolution]
    words: List[FillBlanksWord]
    userSolutions: List = Field(default_factory=list)
 Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
 class Part(BaseModel):
    exercises: List[Exercise]
    context: Optional[str] = Field(default=None)
 class Exam(BaseModel):
    parts: List[Part]
--- a/modules/upload_level/mapper.py
+++ b/modules/upload_level/mapper.py
@@ -0,0 +1,66 @@
 from typing import Dict, Any
 from pydantic import ValidationError
 from modules.upload_level.exam_dtos import (
    MultipleChoiceExercise,
    FillBlanksExercise,
    Part, Exam
 )
 from modules.upload_level.sheet_dtos import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
 class ExamMapper:
    @staticmethod
    def map_to_exam_model(response: Dict[str, Any]) -> Exam:
        parts = []
        for part in response['parts']:
            part_exercises = part['exercises']
            context = part.get('context', None)
            exercises = []
            for exercise in part_exercises:
                exercise_type = exercise['type']
                if exercise_type == 'multipleChoice':
                    exercise_model = MultipleChoiceExercise(**exercise)
                elif exercise_type == 'fillBlanks':
                    exercise_model = FillBlanksExercise(**exercise)
                else:
                    raise ValidationError(f"Unknown exercise type: {exercise_type}")
                exercises.append(exercise_model)
            part_kwargs = {"exercises": exercises}
            if context is not None:
                part_kwargs["context"] = context
            part_model = Part(**part_kwargs)
            parts.append(part_model)
        return Exam(parts=parts)
    @staticmethod
    def map_to_sheet(response: Dict[str, Any]) -> Sheet:
        components = []
        for item in response["components"]:
            component_type = item["type"]
            if component_type == "multipleChoice":
                options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]]
                components.append(MultipleChoiceQuestion(
                    id=item["id"],
                    prompt=item["prompt"],
                    variant=item.get("variant", "text"),
                    options=options
                ))
            elif component_type == "fillBlanks":
                components.append(FillBlanksWord(
                    id=item["id"],
                    options=item["options"]
                ))
            else:
                components.append(item)
        return Sheet(components=components)
--- a/modules/upload_level/service.py
+++ b/modules/upload_level/service.py
@@ -0,0 +1,385 @@
 import json
 import os
 import uuid
 from logging import getLogger
 from typing import Dict, Any, Tuple, Callable
 import pdfplumber
 from modules import GPT
 from modules.helper.file_helper import FileHelper
 from modules.helper import LoggerHelper
 from modules.upload_level.exam_dtos import Exam
 from modules.upload_level.mapper import ExamMapper
 from modules.upload_level.sheet_dtos import Sheet
 class UploadLevelService:
    def __init__(self, openai: GPT):
        self._logger = getLogger(__name__)
        self._llm = openai
    def generate_level_from_file(self, file) -> Dict[str, Any] | None:
        ext, path_id = FileHelper.save_upload(file)
        FileHelper.convert_file_to_pdf(
            f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
        )
        file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
        if not file_has_images:
            FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
        completion: Callable[[str], Exam] = self._png_completion if file_has_images else self._html_completion
        response = completion(path_id)
        FileHelper.remove_directory(f'./tmp/{path_id}')
        if response:
            return self.fix_ids(response.dict(exclude_none=True))
        return None
    @staticmethod
    @LoggerHelper.suppress_loggers()
    def _check_pdf_for_images(pdf_path: str) -> bool:
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                if page.images:
                    return True
        return False
    def _level_json_schema(self):
        return {
            "parts": [
                {
                    "context": "<this attribute is optional you may exclude it if not required>",
                    "exercises": [
                        self._multiple_choice_html(),
                        self._passage_blank_space_html()
                    ]
                }
            ]
        }
    def _html_completion(self, path_id: str) -> Exam:
        with open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
            html = f.read()
        return self._llm.prediction(
            [self._gpt_instructions_html(),
             {
                 "role": "user",
                 "content": html
             }
             ],
            ExamMapper.map_to_exam_model,
            str(self._level_json_schema())
        )
    def _gpt_instructions_html(self):
        return {
            "role": "system",
            "content": (
                'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
                'Your current task is to scrape html english questions sheets.\n\n'
                'In the question sheet you will only see 4 types of question:\n'
                '- blank space multiple choice\n'
                '- underline multiple choice\n'
                '- reading passage blank space multiple choice\n'
                '- reading passage multiple choice\n\n'
                'For the first two types of questions the template is the same but the question prompts differ, '
                'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
                'multiple "_", in the underline you must include in the prompt the <u></u> to '
                'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
                'For the reading passage exercise you must handle the formatting of the passages. If it is a '
                'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
                'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
                'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
                'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
                'between paragraphs.\n\n'
                'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
                'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
                'out the best paragraph separation possible.'
                'You will place all the information in a single JSON: {"parts": [{"exercises": [{...}], "context": ""}]}\n '
                'Where {...} are the exercises templates for each part of a question sheet and the optional field '
                'context.'
                'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
                'so that you can group the exercises by the parts that are in the html, this is crucial since only '
                'reading passage multiple choice require context and if the context is included in parts where it '
                'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
                'The templates for the exercises are the following:\n'
                '- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
                f'{self._multiple_choice_html()}\n'
                f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
                'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
                'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
                'exercises exclude the context field.'
            )
        }
    @staticmethod
    def _multiple_choice_html():
        return {
            "type": "multipleChoice",
            "prompt": "Select the appropriate option.",
            "questions": [
                {
                    "id": "<the question id>",
                    "prompt": "<the question>",
                    "solution": "<the option id solution>",
                    "options": [
                        {
                            "id": "A",
                            "text": "<the a option>"
                        },
                        {
                            "id": "B",
                            "text": "<the b option>"
                        },
                        {
                            "id": "C",
                            "text": "<the c option>"
                        },
                        {
                            "id": "D",
                            "text": "<the d option>"
                        }
                    ]
                }
            ]
        }
    @staticmethod
    def _passage_blank_space_html():
        return {
            "type": "fillBlanks",
            "variant": "mc",
            "prompt": "Click a blank to select the appropriate word for it.",
            "text": (
                "<The whole text for the exercise with replacements for blank spaces and their "
                "ids with {{<question id>}} with 2 newlines between paragraphs>"
            ),
            "solutions": [
                {
                    "id": "<question id>",
                    "solution": "<the option that holds the solution>"
                }
            ],
            "words": [
                {
                    "id": "<question id>",
                    "options": {
                        "A": "<a option>",
                        "B": "<b option>",
                        "C": "<c option>",
                        "D": "<d option>"
                    }
                }
            ]
        }
    def _png_completion(self, path_id: str) -> Exam:
        FileHelper.pdf_to_png(path_id)
        tmp_files = os.listdir(f'./tmp/{path_id}')
        pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
        pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
        json_schema = {
            "components": [
                {"type": "part", "part": "<name or number of the part>"},
                self._multiple_choice_png(),
                {"type": "blanksPassage", "text": (
                    "<The whole text for the exercise with replacements for blank spaces and their "
                    "ids with {{<question id>}} with 2 newlines between paragraphs>"
                )},
                {"type": "passage", "context": (
                    "<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
                )},
                self._passage_blank_space_png()
            ]
        }
        components = []
        for i in range(len(pages)):
            current_page = pages[i]
            next_page = pages[i + 1] if i + 1 < len(pages) else None
            batch = [current_page, next_page] if next_page else [current_page]
            sheet = self._png_batch(path_id, batch, json_schema)
            sheet.batch = i + 1
            components.append(sheet.dict())
        batches = {"batches": components}
        with open('output.json', 'w') as json_file:
            json.dump(batches, json_file, indent=4)
        return self._batches_to_exam_completion(batches)
    def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
        return self._llm.prediction(
            [self._gpt_instructions_png(),
             {
                 "role": "user",
                 "content": [
                     *FileHelper.b64_pngs(path_id, files)
                 ]
             }
             ],
            ExamMapper.map_to_sheet,
            str(json_schema)
        )
    def _gpt_instructions_png(self):
        return {
            "role": "system",
            "content": (
                'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
                'Your current task is to scan english questions sheets.\n\n'
                'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
                'sheet components you will retrieve from the images, the components and their corresponding JSON '
                'templates are as follows:\n'
                '- Part, a standalone part or part of a section of the question sheet: '
                '{"type": "part", "part": "<name or number of the part>"}\n'
                '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
                'the prompt field of the template: blanks, underlines and normal. '
                'In the blanks prompt you must leave 5 underscores to represent the blank space. '
                'In the underlines questions the objective is to pick the words that are incorrect in the given '
                'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
                'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
                'in the order they appear in the question for the options A to D, disreguard options that might be '
                'included underneath the underlines question and use the ones you wrapped in <u></u>.'
                'In normal you just leave the question as is. '
                f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
                '- Reading Passages, there are two types of reading passages. Reading passages where you will see '
                'blanks represented by a (question id) followed by a line, you must format these types of reading '
                'passages to be only the text with the brackets that have the question id and line replaced with '
                '"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
                'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
                'and place 2 newlines between paragraphs. '
                'For the reading passages with blanks the template is: {"type": "blanksPassage", '
                '"text": "<The whole text for the exercise with replacements for blank spaces and their '
                'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
                'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
                'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
                '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
                'options with the question id and the options from a to d. The template is: '
                f'{self._passage_blank_space_png()}\n'
                'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
                'reading passages are cut off don\'t include them in the JSON.'
            )
        }
    def _multiple_choice_png(self):
        multiple_choice = self._multiple_choice_html()["questions"][0]
        multiple_choice["type"] = "multipleChoice"
        multiple_choice.pop("solution")
        return multiple_choice
    def _passage_blank_space_png(self):
        passage_blank_space = self._passage_blank_space_html()["words"][0]
        passage_blank_space["type"] = "fillBlanks"
        return passage_blank_space
    def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
        return self._llm.prediction(
            [self._gpt_instructions_html(),
             {
                 "role": "user",
                 "content": str(batches)
             }
             ],
            ExamMapper.map_to_exam_model,
            str(self._level_json_schema())
        )
    def _gpt_instructions_batches(self):
        return {
            "role": "system",
            "content": (
                'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
                'components and solve the questions. Each batch may contain overlapping content with the previous '
                'batch, or close enough content which needs to be excluded. The components are as follows:'
                '- Part, a standalone part or part of a section of the question sheet: '
                '{"type": "part", "part": "<name or number of the part>"}\n'
                '- Multiple Choice Question, there are three types of multiple choice questions that differ on '
                'the prompt field of the template: blanks, underlines and normal. '
                'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
                'appropriate option to solve it.'
                'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
                'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
                'the order in which the underlines appear in the prompt you will need to fix it.'
                'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
                'select the appropriate solution.'
                f'The template for these questions is the same: {self._multiple_choice_png()}\n'
                '- Reading Passages, there are two types of reading passages with different templates. The one with '
                'type "blanksPassage" where the text field holds the passage and a blank is represented by '
                '{{<some number>}} and the other one with type "passage" that has the context field with just '
                'reading passages. For both of these components you will have to remove any additional data that might '
                'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
                ' if there are any. These components are used in conjunction with other ones.'
                '- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
                'options with the question id and the options from a to d. The template is: '
                f'{self._passage_blank_space_png()}\n\n'
                'Now that you know the possible components here\'s what I want you to do:\n'
                '1. Remove duplicates. A batch will have duplicates of other batches and the components of '
                'the next batch should always take precedence over the previous one batch, what I mean by this is that '
                'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
                'you pick the next one.\n'
                '2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
                'exercise. For the multiple choice question follow the previous instruction to solve them and place '
                f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
                'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
                f'the template for this exercise: {self._passage_blank_space_html()}.\n'
                f'3. Restructure the JSON to match this template: {self._level_json_schema()}. You must group the exercises by '
                'the parts in the order they appear in the batches components. The context field of a part is the '
                'context of a passage component that has text relevant to normal multiple choice questions.\n'
                'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
                'in your response and correctly structure the JSON.'
            )
        }
    @staticmethod
    def fix_ids(response):
        counter = 1
        for part in response["parts"]:
            for exercise in part["exercises"]:
                if exercise["type"] == "multipleChoice":
                    for question in exercise["questions"]:
                        question["id"] = counter
                        counter += 1
                if exercise["type"] == "fillBlanks":
                    for i in range(len(exercise["words"])):
                        exercise["words"][i]["id"] = counter
                        exercise["solutions"][i]["id"] = counter
                        counter += 1
        return response
--- a/modules/upload_level/sheet_dtos.py
+++ b/modules/upload_level/sheet_dtos.py
@@ -0,0 +1,29 @@
 from pydantic import BaseModel
 from typing import List, Dict, Union, Any, Optional
 class Option(BaseModel):
    id: str
    text: str
 class MultipleChoiceQuestion(BaseModel):
    type: str = "multipleChoice"
    id: str
    prompt: str
    variant: str = "text"
    options: List[Option]
 class FillBlanksWord(BaseModel):
    type: str = "fillBlanks"
    id: str
    options: Dict[str, str]
 Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]]
 class Sheet(BaseModel):
    batch: Optional[int] = None
    components: List[Component]
--- a/requirements.txt
+++ b/requirements.txt
--- a/tmp/placeholder.txt
+++ b/tmp/placeholder.txt
@@ -0,0 +1 @@
 THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO
Author	SHA1	Message	Date
Tiago Ribeiro	a88d6bb568	Updated the code to create the Students/Teachers group if it does not exist yet	2024-09-05 10:56:58 +01:00
carlos.mesquita	f0f904f2e4	Merged in feature/level-file-upload (pull request #20 ) Feature/level file upload Approved-by: Tiago Ribeiro	2024-09-04 16:14:20 +00:00
Carlos Mesquita	a23bbe581a	Merge branch 'feature/level-file-upload' of https://bitbucket.org/ecropdev/ielts-be into feature/level-file-upload	2024-09-04 17:10:16 +01:00
Carlos Mesquita	bb26282d25	Forgot to change this, should not affect, but still	2024-09-04 17:09:51 +01:00
carlos.mesquita	73c29cda25	Merged master into feature/level-file-upload	2024-09-04 16:07:48 +00:00
carlos.mesquita	aaa3361575	Merged master into feature/level-file-upload	2024-09-04 16:01:12 +00:00
Carlos Mesquita	94a16b636d	Merge branch 'feature/level-file-upload' of https://bitbucket.org/ecropdev/ielts-be into feature/level-file-upload	2024-09-04 17:00:03 +01:00
Carlos Mesquita	cffec795a7	Swapped .env vars	2024-09-04 16:59:47 +01:00
carlos.mesquita	2716f52a0a	Merged develop into feature/level-file-upload	2024-09-04 10:57:11 +00:00
Carlos Mesquita	4099d99f80	Merge branch 'feature/level-file-upload' of https://bitbucket.org/ecropdev/ielts-be into feature/level-file-upload	2024-09-04 11:56:18 +01:00
Carlos Mesquita	ab4db36445	Switched cli token to GOOGLE_APPLICATION_CREDENTIALS	2024-09-04 11:55:58 +01:00
Tiago Ribeiro	59f047afba	Merge branch 'develop'	2024-09-03 22:12:23 +01:00
carlos.mesquita	09b57cb346	Merged in feature/level-file-upload (pull request #17 ) Upload batches of users onto firebase Approved-by: Tiago Ribeiro	2024-09-03 20:43:40 +00:00
carlos.mesquita	bfc3e3f083	Merged develop into feature/level-file-upload	2024-09-03 19:27:52 +00:00
Carlos Mesquita	7b5e10fd79	Upload batches of users onto firebase	2024-09-03 20:09:19 +01:00
Tiago Ribeiro	a2a160f61b	Merged in develop (pull request #16 ) Develop	2024-09-02 13:12:04 +00:00
carlos.mesquita	5d5cd21e1e	Merged in feature/level-file-upload (pull request #15 ) ENCOA-94: Added user to training content docs, added support for shuffles, tweaked training prompt Approved-by: Tiago Ribeiro	2024-08-27 21:43:26 +00:00
Carlos Mesquita	06a8384f42	Forgot to remove comment, already tested it in a container	2024-08-26 20:15:03 +01:00
Carlos Mesquita	dd74a3d259	Removed unused latext packages, texlive already includes the needed packages for level upload	2024-08-26 20:14:22 +01:00
Carlos Mesquita	efff0b904e	ENCOA-94: Added user to training content docs, added support for shuffles, tweaked training prompt	2024-08-26 18:14:57 +01:00
carlos.mesquita	cf7a966141	Merged in feature/training-content (pull request #14 ) Feature/training content	2024-08-19 15:57:09 +00:00
Carlos Mesquita	03f5b7d72c	Upload level exam without hooking up to firestore and running in thread, will do this when I have the edit view done	2024-08-17 09:29:58 +01:00
Cristiano Ferreira	d68617f33b	Add regular ielts modules to custom level.	2024-08-15 13:58:07 +01:00
Carlos Mesquita	eeaa04f856	Added suport for speaking exercises in training content	2024-08-07 10:19:56 +01:00
Cristiano Ferreira	beccf8b501	Change model on speaking 2 grading to 4o.	2024-08-06 20:28:56 +01:00
Cristiano Ferreira	470f4cc83b	Minor speaking improvements.	2024-08-05 21:57:42 +01:00
Carlos Mesquita	3ad411ed71	Forgot to remove some debugging lines	2024-08-05 21:47:17 +01:00
Carlos Mesquita	7144a3f3ca	Supports now 1 exam multiple exercises, and level exercises	2024-08-05 21:41:49 +01:00
carlos.mesquita	b795a3fb79	Merged in feature/training-content (pull request #13 ) Feature/training content Approved-by: Tiago Ribeiro	2024-08-03 09:49:22 +00:00
Carlos Mesquita	034be25e8e	Added created_at and score to training docs	2024-08-01 20:49:22 +01:00
Carlos Mesquita	a931f06c47	Forgot to add __name__ in getLogger() don't know if it is harmless grabbing the root logger, added __name__ just to be safe	2024-07-31 15:03:00 +01:00
Carlos Mesquita	8e56a3228b	Finished training content backend	2024-07-31 14:56:33 +01:00
Cristiano Ferreira	14c5914420	Add default text size blank space custom level.	2024-07-30 22:40:26 +01:00
Tiago Ribeiro	6878e0a276	Added the ability to send the ID for the listening	2024-07-30 22:34:31 +01:00
Cristiano Ferreira	1f29ac6ee5	Fix id on custom level.	2024-07-30 19:53:17 +01:00
Cristiano Ferreira	a1ee7e47da	Can now generate lots of mc in level custom.	2024-07-28 14:33:08 +01:00
Cristiano Ferreira	adfc027458	Add excerpts to reading 3.	2024-07-26 23:46:46 +01:00
Cristiano Ferreira	3a7bb7764f	Writing improvements.	2024-07-26 23:33:42 +01:00
Cristiano Ferreira	19f204d74d	Add default for topic on custom level and random reorder for multiple choice options.	2024-07-26 15:59:11 +01:00
carlos.mesquita	88ba9ab561	Merged in feature/ai-detection (pull request #12 ) Feature/ai detection Approved-by: Tiago Ribeiro	2024-07-25 21:02:57 +00:00
Carlos Mesquita	34afb5d1e8	Logging when GPT's Zero response != 200	2024-07-25 17:11:14 +01:00
Carlos Mesquita	eb904f836a	Forgot to change the .env	2024-07-25 17:01:09 +01:00
Carlos Mesquita	ca12ad1161	Used main as base branch in the last time	2024-07-25 16:55:42 +01:00
Cristiano Ferreira	8b8460517c	Merged in level-utas-custom-tests (pull request #11 ) Add endpoint for custom level exams.	2024-07-24 19:00:13 +00:00
Cristiano Ferreira	9be9bfce0e	Add endpoint for custom level exams.	2024-07-24 19:58:53 +01:00
Cristiano Ferreira	4776f24229	Fix speaking grading overall.	2024-07-23 13:22:52 +01:00
Cristiano Ferreira	bf9251eebb	Fix array index out of bounds.	2024-07-22 15:29:01 +01:00
Cristiano Ferreira	1ecda04c6b	Fix array index out of bounds.	2024-07-22 14:54:01 +01:00
Cristiano Ferreira	d5621c1793	Added new ideaMatch exercise type.	2024-07-18 23:22:23 +01:00
Cristiano Ferreira	4c41942dfe	Added new ideaMatch exercise type.	2024-07-18 23:21:24 +01:00
Cristiano Ferreira	bef606fe14	Added new ideaMatch exercise type.	2024-07-18 23:20:06 +01:00
Cristiano Ferreira	358f240d16	Update reading fill the blanks.	2024-07-18 19:07:38 +01:00
Cristiano Ferreira	e7d84b9704	Fix paragraph match bug.	2024-07-16 23:38:35 +01:00
Cristiano Ferreira	b4dc6be927	Add comment to grading of writing.	2024-07-16 21:35:36 +01:00
Cristiano Ferreira	afca610c09	Fix level test generation.	2024-07-15 18:21:06 +01:00
Tiago Ribeiro	495502bc93	Merge branch 'develop' of bitbucket.org:ecropdev/ielts-be into develop	2024-07-09 12:11:46 +01:00
Cristiano Ferreira	565874ad41	Minor improvements to speaking.	2024-06-28 18:33:42 +01:00
Cristiano Ferreira	e693f5ee2a	Make speaking 1 questions simple.	2024-06-27 22:48:42 +01:00
Cristiano Ferreira	a8b46160d4	Minor fixes to speaking.	2024-06-27 22:31:57 +01:00
Cristiano Ferreira	640039d372	Merged in listening-revamp (pull request #10 ) Listening revamp	2024-06-27 21:13:29 +00:00
Cristiano Ferreira	a3cd1cdf59	Listening part 3 and 4.	2024-06-27 22:03:59 +01:00
Cristiano Ferreira	9a696bbeb5	Listening part 2.	2024-06-27 21:29:22 +01:00
Cristiano Ferreira	2adb7d1847	Listening part 1.	2024-06-25 20:49:27 +01:00
Cristiano Ferreira	b93ead3a7b	Update speaking generation endpoints.	2024-06-25 20:47:49 +01:00
Cristiano Ferreira	ad3a32ce45	Merged in speaking-improvements (pull request #9 ) Speaking improvements	2024-06-17 13:06:15 +00:00
Cristiano Ferreira	ee5f23b3d7	Update speaking 3 to have 5 questions.	2024-06-17 14:03:21 +01:00
Cristiano Ferreira	545aee1a19	Improve prompts and add suffix to speaking 2.	2024-06-17 14:03:21 +01:00
Cristiano Ferreira	3f749f1ff5	Update speaking 1 to be like interactive with 5 questions and 2 topics.	2024-06-17 14:03:21 +01:00
Cristiano Ferreira	32ac2149f5	Improve comments for each criteria in speaking grading.	2024-06-17 14:03:21 +01:00
Cristiano Ferreira	64cc207fe8	Add comment for each criteria in speaking grading.	2024-06-17 14:03:21 +01:00
Cristiano Ferreira	a4caecdb4f	Merged in utas-stuff (pull request #8 ) Utas stuff	2024-06-13 17:32:48 +00:00
Cristiano Ferreira	20dfd5be78	Add exercises for utas level.	2024-06-13 18:30:58 +01:00
Cristiano Ferreira	1d110d5fa9	Add exercises for utas level.	2024-06-13 18:24:42 +01:00
Cristiano Ferreira	7633822916	Add exercises for utas level.	2024-06-12 23:10:55 +01:00
Cristiano Ferreira	9bc06d8340	Start on level exam for utas.	2024-06-11 22:07:09 +01:00
Cristiano Ferreira	4ff3b02a1d	Double check for english words in writing grading.	2024-06-11 21:49:27 +01:00
Cristiano Ferreira	7637322239	Double check for english words in writing grading.	2024-06-11 21:45:56 +01:00
Cristiano Ferreira	3676d7ad39	Fix check for blacklisted on free form answers.	2024-06-10 19:39:08 +01:00
		`@@ -0,0 +1 @@`
							`THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO`