Merged master into release/mongodb-migration

Firestore to Mongodb
Merged in feature/level-file-upload (pull request #24 )
2024-09-07 21:54:00 +00:00 · 2024-09-07 19:14:40 +01:00 · 2024-09-06 08:52:42 +00:00 · 2024-09-06 09:36:24 +01:00 · 2024-09-05 11:29:08 +00:00 · 2024-09-05 10:51:26 +00:00
8 changed files with 150 additions and 105 deletions
--- a/.env
+++ b/.env
@@ -1,12 +0,0 @@
 OPENAI_API_KEY=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
 JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
 JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
 GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
 HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
 GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
 FIREBASE_SCRYPT_B64_SIGNER_KEY="vbO3Xii2lajSeSkCstq3s/dCwpXP7J2YN9rP/KRreU2vGOT1fg+wzSuy1kIhBECqJHG82tmwAilSxLFFtNKVMA=="
 FIREBASE_SCRYPT_B64_SALT_SEPARATOR="Bw=="
 FIREBASE_SCRYPT_ROUNDS=8
 FIREBASE_SCRYPT_MEM_COST=14
 FIREBASE_PROJECT_ID=storied-phalanx-349916
--- a/2
+++ b/2
@@ -6,8 +6,6 @@ FROM python:3.11-slim
 # Allow statements and log messages to immediately appear in the logs
 ENV PYTHONUNBUFFERED True
 ENV GOOGLE_APPLICATION_CREDENTIALS=/app/firebase-configs/storied-phalanx-349916.json
 # Copy local code to the container image.
 ENV APP_HOME /app
 WORKDIR $APP_HOME
--- a/app.py
+++ b/app.py
@@ -5,6 +5,7 @@ import firebase_admin
 from firebase_admin import credentials
 from flask import Flask, request
 from flask_jwt_extended import JWTManager, jwt_required
 from pymongo import MongoClient
 from sentence_transformers import SentenceTransformer
 from helper.api_messages import *
@@ -44,12 +45,14 @@ embeddings = SentenceTransformer('all-MiniLM-L6-v2')
 kb = TrainingContentKnowledgeBase(embeddings)
 kb.load_indices_and_metadata()
 open_ai = GPT(OpenAI())
-firestore_client = firestore.client()
+
-tc_service = TrainingContentService(kb, open_ai, firestore_client)
+mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
 tc_service = TrainingContentService(kb, open_ai, mongo_db)
 upload_level_service = UploadLevelService(open_ai)
-batch_users_service = BatchUsers(firestore_client)
+batch_users_service = BatchUsers(mongo_db)
 thread_event = threading.Event()
@@ -157,7 +160,7 @@ def save_listening():
        else:
            template["variant"] = ExamVariant.FULL.value
-        (result, id) = save_to_db_with_id("listening", template, id)
+        (result, id) = save_to_db_with_id(mongo_db, "listening", template, id)
        if result:
            return {**template, "id": id}
        else:
@@ -967,7 +970,7 @@ def save_speaking():
            name=("thread-save-speaking-" + id)
        )
        thread.start()
-        app.logger.info('Started thread to save speaking. Thread: ' + thread.getName())
+        app.logger.info('Started thread to save speaking. Thread: ' + thread.name)
        # Return response without waiting for create_videos_and_save_to_db to finish
        return {**template, "id": id}
@@ -1197,7 +1200,7 @@ def get_reading_passage_3_question():
 def get_level_exam():
    try:
        number_of_exercises = 25
-        exercises = gen_multiple_choice_level(number_of_exercises)
+        exercises = gen_multiple_choice_level(mongo_db, number_of_exercises)
        return {
            "exercises": [exercises],
            "isDiagnostic": False,
@@ -1290,7 +1293,7 @@ def get_level_utas():
        bs_2["questions"] = blank_space_text_2
        # Reading text
-        reading_text = gen_reading_passage_utas(87, 10, 4)
+        reading_text = gen_reading_passage_utas(mongo_db, 87, 10, 4)
        print(json.dumps(reading_text, indent=4))
        reading["questions"] = reading_text
@@ -1317,6 +1320,7 @@ class CustomLevelExerciseTypes(Enum):
    MULTIPLE_CHOICE_4 = "multiple_choice_4"
    MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
    MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
    FILL_BLANKS_MC = "fill_blanks_mc"
    BLANK_SPACE_TEXT = "blank_space_text"
    READING_PASSAGE_UTAS = "reading_passage_utas"
    WRITING_LETTER = "writing_letter"
@@ -1414,6 +1418,14 @@ def get_custom_level():
                exercise_id = exercise_id + qty
                exercise_qty = exercise_qty - qty
        elif exercise_type == CustomLevelExerciseTypes.FILL_BLANKS_MC.value:
            response["exercises"]["exercise_" + str(i)] = gen_fill_blanks_mc_utas(
                exercise_qty, exercise_id, exercise_text_size
            )
            response["exercises"]["exercise_" + str(i)]["type"] = "fillBlanks"
            response["exercises"]["exercise_" + str(i)]["variant"] = "mc"
            exercise_id = exercise_id + exercise_qty
        elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
            response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id,
                                                                                    exercise_text_size)
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -5,6 +5,7 @@ import string
 import uuid
 import nltk
 from pymongo.database import Database
 from wonderwords import RandomWord
 from helper.constants import *
@@ -1210,7 +1211,7 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int,
    }
-def gen_multiple_choice_level(quantity: int, start_id=1):
+def gen_multiple_choice_level(mongo_db: Database, quantity: int, start_id=1):
    gen_multiple_choice_for_text = "Generate " + str(
        quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
                    "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
@@ -1240,9 +1241,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
                                GEN_QUESTION_TEMPERATURE)
    if len(question["questions"]) != quantity:
-        return gen_multiple_choice_level(quantity, start_id)
+        return gen_multiple_choice_level(mongo_db, quantity, start_id)
    else:
-        all_exams = get_all("level")
+        all_exams = get_all(mongo_db, "level")
        seen_keys = set()
        for i in range(len(question["questions"])):
            question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
@@ -1563,6 +1564,66 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=
        return response
 def gen_fill_blanks_mc_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
    json_format = {
        "question": {
            "solutions": [
                {
                    "id": "<question id>",
                    "solution": "<the option that holds the solution>"
                }
            ],
            "words": [
                {
                    "id": "<question id>",
                    "options": {
                        "A": "<a option>",
                        "B": "<b option>",
                        "C": "<c option>",
                        "D": "<d option>"
                    }
                }
            ],
            "text": "text"
        }
    }
    messages = [
        {
            "role": "system",
            "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
        },
        {
            "role": "user",
            "content": (
                f'Generate a text of at least {size} words about the topic {topic}. Make sure the text is structured '
                'in paragraphs formatted with newlines (\\n\\n) to delimit them.'
            )
        },
        {
            "role": "user",
            "content": (
                f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
                'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
                'For each word choose 4 options, 1 correct and the other ones false. Make sure that only 1 is the '
                'correct one amongst the 4 options and put the solution on the solutions array. '
                'The ids must be ordered throughout the text and the words must be replaced only once. Put the '
                'removed words and respective ids on the words array of the json in the correct order. You can\'t '
                'reference multiple times the same id across the text, if for example one of the chosen words is '
                '"word1" then word1 must be placed in the text with an id once, if word1 is referenced other '
                'times in the text then replace with the actual text of word.'
            )
        }
    ]
    token_count = count_total_tokens(messages)
    question = make_openai_call(GPT_4_O, messages, token_count,
                                ["question"],
                                GEN_QUESTION_TEMPERATURE)
    return question["question"]
 def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
    json_format = {
        "question": {
@@ -1617,10 +1678,10 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
    return question["question"]
-def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
+def gen_reading_passage_utas(mongo_db: Database, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
    passage = generate_reading_passage_1_text(topic)
    short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
-    mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
+    mc_exercises = gen_text_multiple_choice_utas(mongo_db, passage["text"], start_id + sa_quantity, mc_quantity)
    return {
        "exercises": {
            "shortAnswer": short_answer,
@@ -1659,7 +1720,7 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
                            GEN_QUESTION_TEMPERATURE)["questions"]
-def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
+def gen_text_multiple_choice_utas(mongo_db: Database, text: str, start_id: int, mc_quantity: int):
    json_format = {
        "questions": [
            {
@@ -1711,7 +1772,7 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
                                GEN_QUESTION_TEMPERATURE)
    if len(question["questions"]) != mc_quantity:
-        return gen_multiple_choice_level(mc_quantity, start_id)
+        return gen_multiple_choice_level(mongo_db, mc_quantity, start_id)
    else:
        response = fix_exercise_ids(question, start_id)
        response["questions"] = randomize_mc_options_order(response["questions"])
--- a/helper/firebase_helper.py
+++ b/helper/firebase_helper.py
@@ -1,7 +1,7 @@
 import logging
 from firebase_admin import firestore
 from google.cloud import storage
 from pymongo.database import Database
 def download_firebase_file(bucket_name, source_blob_name, destination_file_name):
@@ -50,38 +50,16 @@ def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file
        return None
-def save_to_db(collection: str, item):
+def save_to_db_with_id(mongo_db: Database, collection: str, item, id: str):
-    db = firestore.client()
+    collection_ref = mongo_db[collection]
-    collection_ref = db.collection(collection)
+
-    (update_time, document_ref) = collection_ref.add(item)
+    document_ref = collection_ref.insert_one({"id": id, **item})
    if document_ref:
-        logging.info(f"Document added with ID: {document_ref.id}")
+        logging.info(f"Document added with ID: {document_ref.inserted_id}")
-        return (True, document_ref.id)
+        return (True, document_ref.inserted_id)
    else:
        return (False, None)
-def save_to_db_with_id(collection: str, item, id: str):
+def get_all(mongo_db: Database, collection: str):
-    db = firestore.client()
+    return list(mongo_db[collection].find())
    collection_ref = db.collection(collection)
    # Reference to the specific document with the desired ID
    document_ref = collection_ref.document(id)
    # Set the data to the document
    document_ref.set(item)
    if document_ref:
        logging.info(f"Document added with ID: {document_ref.id}")
        return (True, document_ref.id)
    else:
        return (False, None)
 def get_all(collection: str):
    db = firestore.client()
    collection_ref = db.collection(collection)
    all_exercises = (
        collection_ref
        .get()
    )
    return all_exercises
--- a/modules/batch_users/service.py
+++ b/modules/batch_users/service.py
@@ -9,8 +9,7 @@ import pandas as pd
 from typing import Dict
 import shortuuid
-from google.cloud.firestore_v1 import Client
+from pymongo.database import Database
 from google.cloud.firestore_v1.base_query import FieldFilter
 from modules.batch_users.batch_users import BatchUsersDTO, UserDTO
 from modules.helper.file_helper import FileHelper
@@ -32,8 +31,8 @@ class BatchUsers:
        "speaking": 0,
    }
-    def __init__(self, firestore: Client):
+    def __init__(self, mongo: Database):
-        self._db = firestore
+        self._db: Database = mongo
        self._logger = getLogger(__name__)
    def batch_users(self, request_data: Dict):
@@ -141,7 +140,7 @@ class BatchUsers:
    def _insert_new_user(self, user: UserDTO):
        new_user = {
            **user.dict(exclude={
-                'id', 'passport_id', 'groupName', 'expiryDate',
+                'passport_id', 'groupName', 'expiryDate',
                'corporate', 'passwordHash', 'passwordSalt'
            }),
            'bio': "",
@@ -155,11 +154,12 @@ class BatchUsers:
            'registrationDate': datetime.now(),
            'subscriptionExpirationDate': user.expiryDate
        }
-        self._db.collection('users').document(str(user.id)).set(new_user)
+        self._db.users.insert_one(new_user)
    def _create_code(self, user: UserDTO, maker_id: str) -> str:
        code = shortuuid.ShortUUID().random(length=6)
-        self._db.collection('codes').document(code).set({
+        self._db.codes.insert_one({
            'id': code,
            'code': code,
            'creator': maker_id,
            'expiryDate': user.expiryDate,
@@ -198,31 +198,36 @@ class BatchUsers:
            }
        ]
        for group in default_groups:
-            self._db.collection('groups').document(group['id']).set(group)
+            self._db.groups.insert_one(group)
    def _assign_corporate_to_user(self, user: UserDTO, code: str):
        user_id = str(user.id)
-        corporate_users = self._db.collection('users').where(
+        corporate_user = self._db.users.find_one(
-            filter=FieldFilter('email', '==', user.corporate)
+            {"email": user.corporate}
-        ).limit(1).get()
+        )
-        if len(corporate_users) > 0:
+        if corporate_user:
-            corporate_user = corporate_users[0]
+            self._db.codes.update_one(
-            self._db.collection('codes').document(code).set({'creator': corporate_user.id}, merge=True)
+                {"id": code},
-
+                {"$set": {"creator": corporate_user.id}},
                upsert=True
            )
            group_type = "Students" if user.type == "student" else "Teachers"
-            groups = self._db.collection('groups').where(
+            group = self._db.groups.find_one(
-                filter=FieldFilter('admin', '==', corporate_user.id)
+                {
-            ).where(
+                    "admin": corporate_user.id,
-                filter=FieldFilter('name', '==', group_type)
+                    "name": group_type
-            ).limit(1).get()
+                }
            )
-            if len(groups) > 0:
+            if group:
-                group = groups[0]
+                participants = group['participants']
                participants = group.get('participants')
                if user_id not in participants:
                    participants.append(user_id)
-                    group.reference.update({'participants': participants})
+                    self._db.groups.update_one(
                        {"id": group.id},
                        {"$set": {"participants": participants}}
                    )
            else:
                group = {
@@ -233,18 +238,19 @@ class BatchUsers:
                    'disableEditing': True,
                }
-                self._db.collection('groups').document(group['id']).set(group)
+                self._db.groups.insert_one(group)
    def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
        user_id = str(user.id)
-        groups = self._db.collection('groups').where(
+        group = self._db.groups.find_one(
-            filter=FieldFilter('admin', '==', maker_id)
+            {
-        ).where(
+                "admin": maker_id,
-            filter=FieldFilter('name', '==', user.groupName.strip())
+                "name": user.group_name.strip()
-        ).limit(1).get()
+            }
        )
-        if len(groups) == 0:
+        if group:
            new_group = {
                'id': str(uuid.uuid4()),
                'admin': maker_id,
@@ -252,10 +258,12 @@ class BatchUsers:
                'participants': [user_id],
                'disableEditing': False,
            }
-            self._db.collection('groups').document(new_group['id']).set(new_group)
+            self._db.groups.insert_one(new_group)
        else:
-            group = groups[0]
+            participants = group.participants
            participants = group.get('participants')
            if user_id not in participants:
                participants.append(user_id)
-                group.reference.update({'participants': participants})
+                self._db.groups.update_one(
                    {"id": group.id},
                    {"$set": {"participants": participants}}
                )
--- a/modules/training_content/service.py
+++ b/modules/training_content/service.py
@@ -1,9 +1,12 @@
 import json
 import uuid
 from datetime import datetime
 from logging import getLogger
 from typing import Dict, List
 from pymongo.database import Database
 from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
@@ -19,9 +22,9 @@ class TrainingContentService:
    ]
    # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
-    def __init__(self, kb, openai, firestore):
+    def __init__(self, kb, openai, mongo: Database):
        self._training_content_module = kb
-        self._db = firestore
+        self._db: Database = mongo
        self._logger = getLogger(__name__)
        self._llm = openai
@@ -37,16 +40,18 @@ class TrainingContentService:
        for area in training_content.weak_areas:
            weak_areas["weak_areas"].append(area.dict())
        new_id = uuid.uuid4()
        training_doc = {
            'id': new_id,
            'created_at': int(datetime.now().timestamp() * 1000),
            **exam_map,
            **usefull_tips.dict(),
            **weak_areas,
            "user": user
        }
-        doc_ref = self._db.collection('training').add(training_doc)
+        self._db.training.insert_one(training_doc)
        return {
-            "id": doc_ref[1].id
+            "id": new_id
        }
    @staticmethod
@@ -400,10 +405,5 @@ class TrainingContentService:
        return result
    def _get_doc_by_id(self, collection: str, doc_id: str):
-        collection_ref = self._db.collection(collection)
+        doc = self._db[collection].find_one({"id": doc_id})
-        doc_ref = collection_ref.document(doc_id)
+        return doc
        doc = doc_ref.get()
        if doc.exists:
            return doc.to_dict()
        return None
--- a/requirements.txt
+++ b/requirements.txt
Author	SHA1	Message	Date
Tiago Ribeiro	676f660f3e	Merged master into release/mongodb-migration	2024-09-07 21:54:00 +00:00
Carlos Mesquita	6cb7c07f57	Firestore to Mongodb	2024-09-07 19:14:40 +01:00
carlos.mesquita	a328f01d2e	Merged in feature/level-file-upload (pull request #24 ) Added missing fillBlanks mc variant that was in UTAS to custom level Approved-by: Tiago Ribeiro	2024-09-06 08:52:42 +00:00
Carlos Mesquita	a931c5ec2e	Added missing fillBlanks mc variant that was in UTAS to custom level	2024-09-06 09:36:24 +01:00
carlos.mesquita	bfc9565e85	Merged in develop (pull request #23 ) Develop Approved-by: Tiago Ribeiro	2024-09-05 11:29:08 +00:00
carlos.mesquita	3d70bcbfd1	Merged in feature/level-file-upload (pull request #22 ) Feature/level file upload Approved-by: Tiago Ribeiro	2024-09-05 10:51:26 +00:00
carlos.mesquita	a2cfa335d7	Merged develop into feature/level-file-upload	2024-09-05 10:48:22 +00:00
Carlos Mesquita	0427d6e1b4	Deleted google creds ENV from Dockerfile since those will be supplied by cloud run	2024-09-05 11:47:34 +01:00
Carlos Mesquita	31c6ed570a	Merge remote-tracking branch 'origin/bug/create-default-groups-if-not-already' into feature/level-file-upload	2024-09-05 11:43:11 +01:00
Carlos Mesquita	3a27c42a69	Removed .env, will add it to gitignore in next commit	2024-09-05 11:41:56 +01:00
Tiago Ribeiro	260dba1ee6	Merged in bug/create-default-groups-if-not-already (pull request #21 ) Updated the code to create the Students/Teachers group if it does not exist yet	2024-09-05 10:11:16 +00:00
carlos.mesquita	b2b4dfb74e	Merged in feature/level-file-upload (pull request #18 ) Switched cli token to GOOGLE_APPLICATION_CREDENTIALS	2024-09-04 11:00:22 +00:00