Merged develop into feature/training-content

UUID wasn't being converted to string, before it used the firebase id and when transitioning to mongo this bug was introduced
Merged master into feature/training-content
2024-09-22 22:27:02 +00:00 · 2024-09-22 23:25:54 +01:00 · 2024-09-08 20:47:50 +00:00 · 2024-09-08 21:47:02 +01:00 · 2024-09-08 08:46:06 +00:00 · 2024-09-08 02:29:56 +01:00
13 changed files with 7900 additions and 109 deletions
--- a/.env
+++ b/.env
@@ -1,12 +0,0 @@
 OPENAI_API_KEY=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
 JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
 JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
 GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
 HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
 GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
 FIREBASE_SCRYPT_B64_SIGNER_KEY="vbO3Xii2lajSeSkCstq3s/dCwpXP7J2YN9rP/KRreU2vGOT1fg+wzSuy1kIhBECqJHG82tmwAilSxLFFtNKVMA=="
 FIREBASE_SCRYPT_B64_SALT_SEPARATOR="Bw=="
 FIREBASE_SCRYPT_ROUNDS=8
 FIREBASE_SCRYPT_MEM_COST=14
 FIREBASE_PROJECT_ID=storied-phalanx-349916
--- a/2
+++ b/2
@@ -6,8 +6,6 @@ FROM python:3.11-slim
 # Allow statements and log messages to immediately appear in the logs
 ENV PYTHONUNBUFFERED True
 ENV GOOGLE_APPLICATION_CREDENTIALS=/app/firebase-configs/storied-phalanx-349916.json
 # Copy local code to the container image.
 ENV APP_HOME /app
 WORKDIR $APP_HOME
--- a/app.py
+++ b/app.py
@@ -5,6 +5,7 @@ import firebase_admin
 from firebase_admin import credentials
 from flask import Flask, request
 from flask_jwt_extended import JWTManager, jwt_required
 from pymongo import MongoClient
 from sentence_transformers import SentenceTransformer
 from helper.api_messages import *
@@ -44,12 +45,14 @@ embeddings = SentenceTransformer('all-MiniLM-L6-v2')
 kb = TrainingContentKnowledgeBase(embeddings)
 kb.load_indices_and_metadata()
 open_ai = GPT(OpenAI())
-firestore_client = firestore.client()
+
-tc_service = TrainingContentService(kb, open_ai, firestore_client)
+mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
 tc_service = TrainingContentService(kb, open_ai, mongo_db)
 upload_level_service = UploadLevelService(open_ai)
-batch_users_service = BatchUsers(firestore_client)
+batch_users_service = BatchUsers(mongo_db)
 thread_event = threading.Event()
@@ -157,7 +160,7 @@ def save_listening():
        else:
            template["variant"] = ExamVariant.FULL.value
-        (result, id) = save_to_db_with_id("listening", template, id)
+        (result, id) = save_to_db_with_id(mongo_db, "listening", template, id)
        if result:
            return {**template, "id": id}
        else:
@@ -967,7 +970,7 @@ def save_speaking():
            name=("thread-save-speaking-" + id)
        )
        thread.start()
-        app.logger.info('Started thread to save speaking. Thread: ' + thread.getName())
+        app.logger.info('Started thread to save speaking. Thread: ' + thread.name)
        # Return response without waiting for create_videos_and_save_to_db to finish
        return {**template, "id": id}
@@ -1197,7 +1200,7 @@ def get_reading_passage_3_question():
 def get_level_exam():
    try:
        number_of_exercises = 25
-        exercises = gen_multiple_choice_level(number_of_exercises)
+        exercises = gen_multiple_choice_level(mongo_db, number_of_exercises)
        return {
            "exercises": [exercises],
            "isDiagnostic": False,
@@ -1290,7 +1293,7 @@ def get_level_utas():
        bs_2["questions"] = blank_space_text_2
        # Reading text
-        reading_text = gen_reading_passage_utas(87, 10, 4)
+        reading_text = gen_reading_passage_utas(mongo_db, 87, 10, 4)
        print(json.dumps(reading_text, indent=4))
        reading["questions"] = reading_text
@@ -1317,6 +1320,7 @@ class CustomLevelExerciseTypes(Enum):
    MULTIPLE_CHOICE_4 = "multiple_choice_4"
    MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
    MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
    FILL_BLANKS_MC = "fill_blanks_mc"
    BLANK_SPACE_TEXT = "blank_space_text"
    READING_PASSAGE_UTAS = "reading_passage_utas"
    WRITING_LETTER = "writing_letter"
@@ -1414,6 +1418,14 @@ def get_custom_level():
                exercise_id = exercise_id + qty
                exercise_qty = exercise_qty - qty
        elif exercise_type == CustomLevelExerciseTypes.FILL_BLANKS_MC.value:
            response["exercises"]["exercise_" + str(i)] = gen_fill_blanks_mc_utas(
                exercise_qty, exercise_id, exercise_text_size
            )
            response["exercises"]["exercise_" + str(i)]["type"] = "fillBlanks"
            response["exercises"]["exercise_" + str(i)]["variant"] = "mc"
            exercise_id = exercise_id + exercise_qty
        elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
            response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id,
                                                                                    exercise_text_size)
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -5,6 +5,7 @@ import string
 import uuid
 import nltk
 from pymongo.database import Database
 from wonderwords import RandomWord
 from helper.constants import *
@@ -1210,7 +1211,7 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int,
    }
-def gen_multiple_choice_level(quantity: int, start_id=1):
+def gen_multiple_choice_level(mongo_db: Database, quantity: int, start_id=1):
    gen_multiple_choice_for_text = "Generate " + str(
        quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
                    "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
@@ -1240,9 +1241,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
                                GEN_QUESTION_TEMPERATURE)
    if len(question["questions"]) != quantity:
-        return gen_multiple_choice_level(quantity, start_id)
+        return gen_multiple_choice_level(mongo_db, quantity, start_id)
    else:
-        all_exams = get_all("level")
+        all_exams = get_all(mongo_db, "level")
        seen_keys = set()
        for i in range(len(question["questions"])):
            question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
@@ -1563,6 +1564,66 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=
        return response
 def gen_fill_blanks_mc_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
    json_format = {
        "question": {
            "solutions": [
                {
                    "id": "<question id>",
                    "solution": "<the option that holds the solution>"
                }
            ],
            "words": [
                {
                    "id": "<question id>",
                    "options": {
                        "A": "<a option>",
                        "B": "<b option>",
                        "C": "<c option>",
                        "D": "<d option>"
                    }
                }
            ],
            "text": "text"
        }
    }
    messages = [
        {
            "role": "system",
            "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
        },
        {
            "role": "user",
            "content": (
                f'Generate a text of at least {size} words about the topic {topic}. Make sure the text is structured '
                'in paragraphs formatted with newlines (\\n\\n) to delimit them.'
            )
        },
        {
            "role": "user",
            "content": (
                f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
                'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
                'For each word choose 4 options, 1 correct and the other ones false. Make sure that only 1 is the '
                'correct one amongst the 4 options and put the solution on the solutions array. '
                'The ids must be ordered throughout the text and the words must be replaced only once. Put the '
                'removed words and respective ids on the words array of the json in the correct order. You can\'t '
                'reference multiple times the same id across the text, if for example one of the chosen words is '
                '"word1" then word1 must be placed in the text with an id once, if word1 is referenced other '
                'times in the text then replace with the actual text of word.'
            )
        }
    ]
    token_count = count_total_tokens(messages)
    question = make_openai_call(GPT_4_O, messages, token_count,
                                ["question"],
                                GEN_QUESTION_TEMPERATURE)
    return question["question"]
 def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
    json_format = {
        "question": {
@@ -1617,10 +1678,10 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
    return question["question"]
-def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
+def gen_reading_passage_utas(mongo_db: Database, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
    passage = generate_reading_passage_1_text(topic)
    short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
-    mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
+    mc_exercises = gen_text_multiple_choice_utas(mongo_db, passage["text"], start_id + sa_quantity, mc_quantity)
    return {
        "exercises": {
            "shortAnswer": short_answer,
@@ -1659,7 +1720,7 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
                            GEN_QUESTION_TEMPERATURE)["questions"]
-def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
+def gen_text_multiple_choice_utas(mongo_db: Database, text: str, start_id: int, mc_quantity: int):
    json_format = {
        "questions": [
            {
@@ -1711,7 +1772,7 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
                                GEN_QUESTION_TEMPERATURE)
    if len(question["questions"]) != mc_quantity:
-        return gen_multiple_choice_level(mc_quantity, start_id)
+        return gen_multiple_choice_level(mongo_db, mc_quantity, start_id)
    else:
        response = fix_exercise_ids(question, start_id)
        response["questions"] = randomize_mc_options_order(response["questions"])
--- a/helper/firebase_helper.py
+++ b/helper/firebase_helper.py
@@ -1,7 +1,7 @@
 import logging
 from firebase_admin import firestore
 from google.cloud import storage
 from pymongo.database import Database
 def download_firebase_file(bucket_name, source_blob_name, destination_file_name):
@@ -50,38 +50,16 @@ def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file
        return None
-def save_to_db(collection: str, item):
+def save_to_db_with_id(mongo_db: Database, collection: str, item, id: str):
-    db = firestore.client()
+    collection_ref = mongo_db[collection]
-    collection_ref = db.collection(collection)
+
-    (update_time, document_ref) = collection_ref.add(item)
+    document_ref = collection_ref.insert_one({"id": id, **item})
    if document_ref:
-        logging.info(f"Document added with ID: {document_ref.id}")
+        logging.info(f"Document added with ID: {document_ref.inserted_id}")
-        return (True, document_ref.id)
+        return (True, document_ref.inserted_id)
    else:
        return (False, None)
-def save_to_db_with_id(collection: str, item, id: str):
+def get_all(mongo_db: Database, collection: str):
-    db = firestore.client()
+    return list(mongo_db[collection].find())
    collection_ref = db.collection(collection)
    # Reference to the specific document with the desired ID
    document_ref = collection_ref.document(id)
    # Set the data to the document
    document_ref.set(item)
    if document_ref:
        logging.info(f"Document added with ID: {document_ref.id}")
        return (True, document_ref.id)
    else:
        return (False, None)
 def get_all(collection: str):
    db = firestore.client()
    collection_ref = db.collection(collection)
    all_exercises = (
        collection_ref
        .get()
    )
    return all_exercises
--- a/modules/batch_users/batch_users.py
+++ b/modules/batch_users/batch_users.py
@@ -21,7 +21,7 @@ class UserDTO(BaseModel):
    passwordSalt: str
    groupName: Optional[str] = None
    corporate: Optional[str] = None
-    studentID: Optional[str] = None
+    studentID: Optional[str | int] = None
    expiryDate: Optional[str] = None
    demographicInformation: Optional[DemographicInfo] = None
--- a/modules/batch_users/service.py
+++ b/modules/batch_users/service.py
@@ -9,8 +9,7 @@ import pandas as pd
 from typing import Dict
 import shortuuid
-from google.cloud.firestore_v1 import Client
+from pymongo.database import Database
 from google.cloud.firestore_v1.base_query import FieldFilter
 from modules.batch_users.batch_users import BatchUsersDTO, UserDTO
 from modules.helper.file_helper import FileHelper
@@ -32,8 +31,8 @@ class BatchUsers:
        "speaking": 0,
    }
-    def __init__(self, firestore: Client):
+    def __init__(self, mongo: Database):
-        self._db = firestore
+        self._db: Database = mongo
        self._logger = getLogger(__name__)
    def batch_users(self, request_data: Dict):
@@ -45,7 +44,7 @@ class BatchUsers:
        result = self._upload_users('./tmp', file_name)
        if result.returncode != 0:
-            error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stderr}```"
+            error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stdout}```"
            self._logger.error(error_msg)
            return error_msg
@@ -56,7 +55,11 @@ class BatchUsers:
    @staticmethod
    def _map_to_batch(request_data: Dict) -> BatchUsersDTO:
-        users: list[UserDTO] = [UserDTO(**user) for user in request_data["users"]]
+        users_list = [{**user} for user in request_data["users"]]
        for user in users_list:
            user["studentID"] = str(user["studentID"])
        users: list[UserDTO] = [UserDTO(**user) for user in users_list]
        return BatchUsersDTO(makerID=request_data["makerID"], users=users)
    @staticmethod
@@ -141,9 +144,10 @@ class BatchUsers:
    def _insert_new_user(self, user: UserDTO):
        new_user = {
            **user.dict(exclude={
-                'id', 'passport_id', 'groupName', 'expiryDate',
+                'passport_id', 'groupName', 'expiryDate',
                'corporate', 'passwordHash', 'passwordSalt'
            }),
            'id': str(user.id),
            'bio': "",
            'focus': "academic",
            'status': "active",
@@ -155,11 +159,12 @@ class BatchUsers:
            'registrationDate': datetime.now(),
            'subscriptionExpirationDate': user.expiryDate
        }
-        self._db.collection('users').document(str(user.id)).set(new_user)
+        self._db.users.insert_one(new_user)
    def _create_code(self, user: UserDTO, maker_id: str) -> str:
        code = shortuuid.ShortUUID().random(length=6)
-        self._db.collection('codes').document(code).set({
+        self._db.codes.insert_one({
            'id': code,
            'code': code,
            'creator': maker_id,
            'expiryDate': user.expiryDate,
@@ -198,51 +203,57 @@ class BatchUsers:
            }
        ]
        for group in default_groups:
-            self._db.collection('groups').document(group['id']).set(group)
+            self._db.groups.insert_one(group)
    def _assign_corporate_to_user(self, user: UserDTO, code: str):
        user_id = str(user.id)
-        corporate_users = self._db.collection('users').where(
+        corporate_user = self._db.users.find_one(
-            filter=FieldFilter('email', '==', user.corporate)
+            {"email": user.corporate}
-        ).limit(1).get()
+        )
-        if len(corporate_users) > 0:
+        if corporate_user:
-            corporate_user = corporate_users[0]
+            self._db.codes.update_one(
-            self._db.collection('codes').document(code).set({'creator': corporate_user.id}, merge=True)
+                {"id": code},
-
+                {"$set": {"creator": corporate_user["id"]}},
                upsert=True
            )
            group_type = "Students" if user.type == "student" else "Teachers"
-            groups = self._db.collection('groups').where(
+            group = self._db.groups.find_one(
-                filter=FieldFilter('admin', '==', corporate_user.id)
+                {
-            ).where(
+                    "admin": corporate_user["id"],
-                filter=FieldFilter('name', '==', group_type)
+                    "name": group_type
-            ).limit(1).get()
+                }
            )
-            if len(groups) > 0:
+            if group:
-                group = groups[0]
+                participants = group['participants']
                participants = group.get('participants')
                if user_id not in participants:
                    participants.append(user_id)
-                    group.reference.update({'participants': participants})
+                    self._db.groups.update_one(
                        {"id": group["id"]},
                        {"$set": {"participants": participants}}
                    )
            else:
                group = {
-                    'admin': corporate_user.id,
+                    'admin': corporate_user["id"],
                    'id': str(uuid.uuid4()),
                    'name': group_type,
                    'participants': [user_id],
                    'disableEditing': True,
                }
-                self._db.collection('groups').document(group['id']).set(group)
+                self._db.groups.insert_one(group)
    def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
        user_id = str(user.id)
-        groups = self._db.collection('groups').where(
+        groups = list(self._db.groups.find(
-            filter=FieldFilter('admin', '==', maker_id)
+            {
-        ).where(
+                "admin": maker_id,
-            filter=FieldFilter('name', '==', user.groupName.strip())
+                "name": user.groupName.strip()
-        ).limit(1).get()
+            }
        ))
        if len(groups) == 0:
            new_group = {
@@ -252,10 +263,13 @@ class BatchUsers:
                'participants': [user_id],
                'disableEditing': False,
            }
-            self._db.collection('groups').document(new_group['id']).set(new_group)
+            self._db.groups.insert_one(new_group)
        else:
            group = groups[0]
-            participants = group.get('participants')
+            participants = group["participants"]
            if user_id not in participants:
                participants.append(user_id)
-                group.reference.update({'participants': participants})
+                self._db.groups.update_one(
                    {"id": group["id"]},
                    {"$set": {"participants": participants}}
                )
--- a/modules/training_content/service.py
+++ b/modules/training_content/service.py
@@ -1,9 +1,12 @@
 import json
 import uuid
 from datetime import datetime
 from logging import getLogger
 from typing import Dict, List
 from pymongo.database import Database
 from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
@@ -19,9 +22,9 @@ class TrainingContentService:
    ]
    # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
-    def __init__(self, kb, openai, firestore):
+    def __init__(self, kb, openai, mongo: Database):
        self._training_content_module = kb
-        self._db = firestore
+        self._db: Database = mongo
        self._logger = getLogger(__name__)
        self._llm = openai
@@ -37,16 +40,18 @@ class TrainingContentService:
        for area in training_content.weak_areas:
            weak_areas["weak_areas"].append(area.dict())
        new_id = str(uuid.uuid4())
        training_doc = {
            'id': new_id,
            'created_at': int(datetime.now().timestamp() * 1000),
            **exam_map,
            **usefull_tips.dict(),
            **weak_areas,
            "user": user
        }
-        doc_ref = self._db.collection('training').add(training_doc)
+        self._db.training.insert_one(training_doc)
        return {
-            "id": doc_ref[1].id
+            "id": new_id
        }
    @staticmethod
@@ -219,8 +224,6 @@ class TrainingContentService:
                exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
                exam_map[session_key]["module"] = module
                with open('exam_result.json', 'w') as file:
                    json.dump({"exams": exercises}, file, indent=4)
        return {"exams": exercises}, exam_map
@@ -400,10 +403,5 @@ class TrainingContentService:
        return result
    def _get_doc_by_id(self, collection: str, doc_id: str):
-        collection_ref = self._db.collection(collection)
+        doc = self._db[collection].find_one({"id": doc_id})
-        doc_ref = collection_ref.document(doc_id)
+        return doc
        doc = doc_ref.get()
        if doc.exists:
            return doc.to_dict()
        return None
--- a/modules/training_content/tips/instructions.MD
+++ b/modules/training_content/tips/instructions.MD
@@ -0,0 +1,67 @@
 # Adding new training content
 If you're ever tasked with the grueling task of adding more tips from manuals, my condolences.
 There are 4 components of a training content tip: the tip itself, the question, the additional and the segment.
 The tip is the actual tip, if the manual doesn't have an exercise that relates to that tip fill this out:
 ```json
 {
    "category": "<the category of the tip that will be used to categorize the embeddings and also used in the tip header>",
    "embedding": "<the relevant part of the tip that is needed to make the embedding (clean the tip of useless info that might mislead the queries)>",
    "text": "<The text that the llm will use to assess whether the tip is relevant according to the performance of the student (most of the time just include all the text of the tip)>",
    "html": "<The html that will be rendered in the tip component>",
    "id": "<a uuid4>",
    "verified": <this is just to keep track of the tips that were manually confirmed by you>,
    "standalone": <if the tip doesn't have an exercise this is true else it's false>
 }
 ```
 If the manual does have an exercise that relates to the tip:
 ```json
 {
    // ...    
    "question": "<the exercise question(s) html>",
    "additional": "<context of the question html>",
    "segments": [
        {
            "html": "<the html of a segment, you MUST wrap the html in a single <div> >",
            "wordDelay": <the speed at which letters will be placed on the segment, 200ms is a good one>,
            "holdDelay": <the total time that the segment will be paused before moving onto the next segment, 5000ms is a good one>,
            "highlight": [
                {
                    "targets": ["<the target of the highlight can be: question, additional, segment, all>"],
                    "phrases": ["<the words/phrases/raw html you want to highlight>"]
                }
            ],
            "insertHTML": [
                {
                    "target": "<the target of the insert can be: question, additional>",
                    "targetId": "<the id of an html element>",
                    "position": "<the position of the inserted html can be: replace, prepend and append. Most of the time you will only use replace>",
                    "html": "<the html to replace the element with targetId>"
                },
            ]
        }
    ]
 }
 ```
 In order to create these structures you will have to mannually screenshot the tips, exercises, context and send them to an llm (gpt-4o or claude)
 with a prompt like "get me the html for this", you will have to check whether the html is properly structured and then
 paste them in the prompt.txt file of this directory and send it 
 back to an llm.
 Afterwards you will have to check whether the default styles in /src/components/TrainingContent/FormatTip.ts are adequate, divs 
 (except for the wrapper div of a segment) and span styles are not overriden but you should aim to use the least ammount of
 styles in the tip itself and create custom reusable html elements
 in FormatTip.ts.
 After checking all of the tips render you will have to create new embeddings in the backend, you CAN'T change ids of existing tips since there 
 might be training tips that are already stored in firebase.
 This is a very tedious task here's a recommendation for [background noise](https://www.youtube.com/watch?v=lDnva_3fcTc).
 GL HF 
--- a/modules/training_content/tips/pathways_2_rw.json
+++ b/modules/training_content/tips/pathways_2_rw.json
--- a/modules/training_content/tips/prompt.txt
+++ b/modules/training_content/tips/prompt.txt
@@ -0,0 +1,62 @@
 I am going to give you an exercise and a tip, explain how to solve the exercise and how the tip is beneficial, 
 your response must be with this format:
 {
  "segments": [
    {
        "html": "",
        "wordDelay": 0,
        "holdDelay"; 0,
        "highlight": [
          {
              "targets": [],
              "phrases": []
          }
        ],
        "insertHTML": [
          {
              "target": "",
              "targetId": "",
              "position": "replace",
              "html": ""
          }
        ]
    }
  ]
 }
 Basically you are going to produce multiple objects and place it in data with the format above to integrate with a react component that highlights passages and inserts html, 
 these objects are segments of your explanation that will be presented to a student. 
 In the html field place a segment of your response that will be streamed to the component with a delay of "wordDelay" ms and in the end of that segment stream the phrases or words inside 
 "highlight" will be highlighted for "holdDelay" ms, and the cycle repeats until the whole data array is iterated. Make it so
 that the delays are reasonable for the student have time to process the message your trying to send. Take note that 
 "wordDelay" is the time between words to display (always 200), and "holdDelay" (no less than 5000) is the total time the highlighter will highlight what you put
 inside "highlight". 
 There are 3 target areas:
 - "question": where the question is placed
 - "additional": where additional content is placed required to answer the question (this section is optional)
 - "segment": a particular segment
 You can use these targets in highlight and insertHTML. In order for insertHTML to work, you will have to place an html element with an "id" attribute
 in the targets you will reference and provide the id via the "targetId", by this I mean if you want to use insert you will need to provide me the 
 html I've sent you with either a placeholder element with an id set or set an id in an existent element.
 If there are already id's in the html I'm giving you then you must use insertHtml.
 Each segment html will be rendered in a div that as margins, you should condense the information don't give me just single short phrases that occupy a whole div.
 As previously said this wil be seen by a student so show some train of thought to solve the exercise.
 All the segment's html must be wrapped in a div element, and again since this div element will be rendered with some margins make proper use of the segments html.
 Try to make bulletpoints.
 Dont explicitely mention the tip right away at the beginning, aim more towards the end.
 Tip:
 Target: "question"
 Target: "additional"
--- a/modules/training_content/tips/send_tips_to_firestore.py
+++ b/modules/training_content/tips/send_tips_to_firestore.py
@@ -0,0 +1,34 @@
 import json
 import os
 from dotenv import load_dotenv
 from pymongo import MongoClient
 load_dotenv()
 # staging:   encoach-staging.json
 # prod:      storied-phalanx-349916.json
 mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
 if __name__ == "__main__":
    with open('pathways_2_rw.json', 'r', encoding='utf-8') as file:
        book = json.load(file)
    tips = []
    for unit in book["units"]:
        for page in unit["pages"]:
            for tip in page["tips"]:
                new_tip = {
                    "id": tip["id"],
                    "standalone": tip["standalone"],
                    "tipCategory": tip["category"],
                    "tipHtml": tip["html"]
                }
                if not tip["standalone"]:
                    new_tip["exercise"] = tip["exercise"]
                tips.append(new_tip)
    for tip in tips:
        doc_ref = mongo_db.walkthrough.insert_one(tip)
--- a/requirements.txt
+++ b/requirements.txt
Author	SHA1	Message	Date
carlos.mesquita	895aaa1b33	Merged develop into feature/training-content	2024-09-22 22:27:02 +00:00
Carlos Mesquita	aa1433e9ea	UUID wasn't being converted to string, before it used the firebase id and when transitioning to mongo this bug was introduced	2024-09-22 23:25:54 +01:00
carlos.mesquita	8eb5fb6d5f	Merged master into feature/training-content	2024-09-08 20:47:50 +00:00
Carlos Mesquita	c004d9c83c	Pydantic was causing validation errors when passportID was an int	2024-09-08 21:47:02 +01:00
carlos.mesquita	66abc42abb	Merged in feature/training-content (pull request #29 ) And this is why llm code shouldn't be copy pasted blindly Approved-by: Tiago Ribeiro	2024-09-08 08:46:06 +00:00
Carlos Mesquita	2b59119eca	And this is why llm code shouldn't be copy pasted blindly	2024-09-08 02:29:56 +01:00
Tiago Ribeiro	b9a35281ec	Merge branch 'master' into develop	2024-09-08 00:59:33 +01:00
carlos.mesquita	2bbc1f456d	Merged in feature/training-content (pull request #28 ) Forgot to str() on a uuid Approved-by: Tiago Ribeiro	2024-09-07 23:48:39 +00:00
Carlos Mesquita	e8ec862f86	Merge remote-tracking branch 'origin/master' into feature/training-content	2024-09-08 00:39:00 +01:00
Carlos Mesquita	8d4584b8b7	Forgot to str() on a uuid	2024-09-08 00:38:35 +01:00
carlos.mesquita	7a0424aa33	Merged in feature/training-content (pull request #27 ) Feature/training content Approved-by: Tiago Ribeiro	2024-09-07 22:10:55 +00:00
Carlos Mesquita	24ce198dfd	Forgot to change the tips script to mongo	2024-09-07 23:09:00 +01:00
Carlos Mesquita	81911e635c	Merge remote-tracking branch 'origin/master' into feature/training-content	2024-09-07 23:04:20 +01:00
Carlos Mesquita	849db06760	Merge branch 'feature/training-content' of https://bitbucket.org/ecropdev/ielts-be into feature/training-content	2024-09-07 23:04:18 +01:00
Carlos Mesquita	6a38164f9b	Merge remote-tracking branch 'origin/master' into feature/training-content	2024-09-07 23:03:25 +01:00
Tiago Ribeiro	8ae9b64f1a	Merged in release/mongodb-migration (pull request #26 ) Release/mongodb migration	2024-09-07 21:54:25 +00:00
Tiago Ribeiro	676f660f3e	Merged master into release/mongodb-migration	2024-09-07 21:54:00 +00:00
carlos.mesquita	ddf050d692	Merged in feature/training-content (pull request #25 ) ENCOA-69 Pathways 2 Reading and Writing Tips Approved-by: Tiago Ribeiro	2024-09-07 21:50:21 +00:00
Carlos Mesquita	6cb7c07f57	Firestore to Mongodb	2024-09-07 19:14:40 +01:00
carlos.mesquita	8c60f4596f	Merged master into feature/training-content	2024-09-07 10:43:53 +00:00
Carlos Mesquita	cd11fa38ae	Pathways 2 Reading and Writing Tips	2024-09-07 11:42:31 +01:00
carlos.mesquita	a328f01d2e	Merged in feature/level-file-upload (pull request #24 ) Added missing fillBlanks mc variant that was in UTAS to custom level Approved-by: Tiago Ribeiro	2024-09-06 08:52:42 +00:00
Carlos Mesquita	a931c5ec2e	Added missing fillBlanks mc variant that was in UTAS to custom level	2024-09-06 09:36:24 +01:00
carlos.mesquita	bfc9565e85	Merged in develop (pull request #23 ) Develop Approved-by: Tiago Ribeiro	2024-09-05 11:29:08 +00:00
carlos.mesquita	3d70bcbfd1	Merged in feature/level-file-upload (pull request #22 ) Feature/level file upload Approved-by: Tiago Ribeiro	2024-09-05 10:51:26 +00:00
carlos.mesquita	a2cfa335d7	Merged develop into feature/level-file-upload	2024-09-05 10:48:22 +00:00
Carlos Mesquita	0427d6e1b4	Deleted google creds ENV from Dockerfile since those will be supplied by cloud run	2024-09-05 11:47:34 +01:00
Carlos Mesquita	31c6ed570a	Merge remote-tracking branch 'origin/bug/create-default-groups-if-not-already' into feature/level-file-upload	2024-09-05 11:43:11 +01:00
Carlos Mesquita	3a27c42a69	Removed .env, will add it to gitignore in next commit	2024-09-05 11:41:56 +01:00
Tiago Ribeiro	260dba1ee6	Merged in bug/create-default-groups-if-not-already (pull request #21 ) Updated the code to create the Students/Teachers group if it does not exist yet	2024-09-05 10:11:16 +00:00
carlos.mesquita	b2b4dfb74e	Merged in feature/level-file-upload (pull request #18 ) Switched cli token to GOOGLE_APPLICATION_CREDENTIALS	2024-09-04 11:00:22 +00:00