From 6cb7c07f57cce1497acac032730679986e06c2cd Mon Sep 17 00:00:00 2001 From: Carlos Mesquita Date: Sat, 7 Sep 2024 19:14:40 +0100 Subject: [PATCH 1/3] Firestore to Mongodb --- app.py | 17 +++--- helper/exercises.py | 15 +++--- helper/firebase_helper.py | 40 ++++---------- modules/batch_users/service.py | 78 +++++++++++++++------------- modules/training_content/service.py | 22 ++++---- requirements.txt | Bin 898 -> 916 bytes 6 files changed, 81 insertions(+), 91 deletions(-) diff --git a/app.py b/app.py index 0dc2b45..b1d060f 100644 --- a/app.py +++ b/app.py @@ -5,6 +5,7 @@ import firebase_admin from firebase_admin import credentials from flask import Flask, request from flask_jwt_extended import JWTManager, jwt_required +from pymongo import MongoClient from sentence_transformers import SentenceTransformer from helper.api_messages import * @@ -44,12 +45,14 @@ embeddings = SentenceTransformer('all-MiniLM-L6-v2') kb = TrainingContentKnowledgeBase(embeddings) kb.load_indices_and_metadata() open_ai = GPT(OpenAI()) -firestore_client = firestore.client() -tc_service = TrainingContentService(kb, open_ai, firestore_client) + +mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')] + +tc_service = TrainingContentService(kb, open_ai, mongo_db) upload_level_service = UploadLevelService(open_ai) -batch_users_service = BatchUsers(firestore_client) +batch_users_service = BatchUsers(mongo_db) thread_event = threading.Event() @@ -157,7 +160,7 @@ def save_listening(): else: template["variant"] = ExamVariant.FULL.value - (result, id) = save_to_db_with_id("listening", template, id) + (result, id) = save_to_db_with_id(mongo_db, "listening", template, id) if result: return {**template, "id": id} else: @@ -967,7 +970,7 @@ def save_speaking(): name=("thread-save-speaking-" + id) ) thread.start() - app.logger.info('Started thread to save speaking. Thread: ' + thread.getName()) + app.logger.info('Started thread to save speaking. Thread: ' + thread.name) # Return response without waiting for create_videos_and_save_to_db to finish return {**template, "id": id} @@ -1197,7 +1200,7 @@ def get_reading_passage_3_question(): def get_level_exam(): try: number_of_exercises = 25 - exercises = gen_multiple_choice_level(number_of_exercises) + exercises = gen_multiple_choice_level(mongo_db, number_of_exercises) return { "exercises": [exercises], "isDiagnostic": False, @@ -1290,7 +1293,7 @@ def get_level_utas(): bs_2["questions"] = blank_space_text_2 # Reading text - reading_text = gen_reading_passage_utas(87, 10, 4) + reading_text = gen_reading_passage_utas(mongo_db, 87, 10, 4) print(json.dumps(reading_text, indent=4)) reading["questions"] = reading_text diff --git a/helper/exercises.py b/helper/exercises.py index d5a065e..3818ec0 100644 --- a/helper/exercises.py +++ b/helper/exercises.py @@ -5,6 +5,7 @@ import string import uuid import nltk +from pymongo.database import Database from wonderwords import RandomWord from helper.constants import * @@ -1210,7 +1211,7 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, } -def gen_multiple_choice_level(quantity: int, start_id=1): +def gen_multiple_choice_level(mongo_db: Database, quantity: int, start_id=1): gen_multiple_choice_for_text = "Generate " + str( quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \ "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \ @@ -1240,9 +1241,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1): GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != quantity: - return gen_multiple_choice_level(quantity, start_id) + return gen_multiple_choice_level(mongo_db, quantity, start_id) else: - all_exams = get_all("level") + all_exams = get_all(mongo_db, "level") seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], @@ -1677,10 +1678,10 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran return question["question"] -def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)): +def gen_reading_passage_utas(mongo_db: Database, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)): passage = generate_reading_passage_1_text(topic) short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity) - mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) + mc_exercises = gen_text_multiple_choice_utas(mongo_db, passage["text"], start_id + sa_quantity, mc_quantity) return { "exercises": { "shortAnswer": short_answer, @@ -1719,7 +1720,7 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int): GEN_QUESTION_TEMPERATURE)["questions"] -def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): +def gen_text_multiple_choice_utas(mongo_db: Database, text: str, start_id: int, mc_quantity: int): json_format = { "questions": [ { @@ -1771,7 +1772,7 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != mc_quantity: - return gen_multiple_choice_level(mc_quantity, start_id) + return gen_multiple_choice_level(mongo_db, mc_quantity, start_id) else: response = fix_exercise_ids(question, start_id) response["questions"] = randomize_mc_options_order(response["questions"]) diff --git a/helper/firebase_helper.py b/helper/firebase_helper.py index 48592b3..2b7773b 100644 --- a/helper/firebase_helper.py +++ b/helper/firebase_helper.py @@ -1,7 +1,7 @@ import logging -from firebase_admin import firestore from google.cloud import storage +from pymongo.database import Database def download_firebase_file(bucket_name, source_blob_name, destination_file_name): @@ -50,38 +50,16 @@ def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file return None -def save_to_db(collection: str, item): - db = firestore.client() - collection_ref = db.collection(collection) - (update_time, document_ref) = collection_ref.add(item) +def save_to_db_with_id(mongo_db: Database, collection: str, item, id: str): + collection_ref = mongo_db[collection] + + document_ref = collection_ref.insert_one({"id": id, **item}) if document_ref: - logging.info(f"Document added with ID: {document_ref.id}") - return (True, document_ref.id) + logging.info(f"Document added with ID: {document_ref.inserted_id}") + return (True, document_ref.inserted_id) else: return (False, None) -def save_to_db_with_id(collection: str, item, id: str): - db = firestore.client() - collection_ref = db.collection(collection) - # Reference to the specific document with the desired ID - document_ref = collection_ref.document(id) - # Set the data to the document - document_ref.set(item) - if document_ref: - logging.info(f"Document added with ID: {document_ref.id}") - return (True, document_ref.id) - else: - return (False, None) - - -def get_all(collection: str): - db = firestore.client() - collection_ref = db.collection(collection) - - all_exercises = ( - collection_ref - .get() - ) - - return all_exercises +def get_all(mongo_db: Database, collection: str): + return list(mongo_db[collection].find()) diff --git a/modules/batch_users/service.py b/modules/batch_users/service.py index df00d9e..ed7c016 100644 --- a/modules/batch_users/service.py +++ b/modules/batch_users/service.py @@ -9,8 +9,7 @@ import pandas as pd from typing import Dict import shortuuid -from google.cloud.firestore_v1 import Client -from google.cloud.firestore_v1.base_query import FieldFilter +from pymongo.database import Database from modules.batch_users.batch_users import BatchUsersDTO, UserDTO from modules.helper.file_helper import FileHelper @@ -32,8 +31,8 @@ class BatchUsers: "speaking": 0, } - def __init__(self, firestore: Client): - self._db = firestore + def __init__(self, mongo: Database): + self._db: Database = mongo self._logger = getLogger(__name__) def batch_users(self, request_data: Dict): @@ -141,7 +140,7 @@ class BatchUsers: def _insert_new_user(self, user: UserDTO): new_user = { **user.dict(exclude={ - 'id', 'passport_id', 'groupName', 'expiryDate', + 'passport_id', 'groupName', 'expiryDate', 'corporate', 'passwordHash', 'passwordSalt' }), 'bio': "", @@ -155,11 +154,12 @@ class BatchUsers: 'registrationDate': datetime.now(), 'subscriptionExpirationDate': user.expiryDate } - self._db.collection('users').document(str(user.id)).set(new_user) + self._db.users.insert_one(new_user) def _create_code(self, user: UserDTO, maker_id: str) -> str: code = shortuuid.ShortUUID().random(length=6) - self._db.collection('codes').document(code).set({ + self._db.codes.insert_one({ + 'id': code, 'code': code, 'creator': maker_id, 'expiryDate': user.expiryDate, @@ -198,31 +198,36 @@ class BatchUsers: } ] for group in default_groups: - self._db.collection('groups').document(group['id']).set(group) + self._db.groups.insert_one(group) def _assign_corporate_to_user(self, user: UserDTO, code: str): user_id = str(user.id) - corporate_users = self._db.collection('users').where( - filter=FieldFilter('email', '==', user.corporate) - ).limit(1).get() - if len(corporate_users) > 0: - corporate_user = corporate_users[0] - self._db.collection('codes').document(code).set({'creator': corporate_user.id}, merge=True) - + corporate_user = self._db.users.find_one( + {"email": user.corporate} + ) + if corporate_user: + self._db.codes.update_one( + {"id": code}, + {"$set": {"creator": corporate_user.id}}, + upsert=True + ) group_type = "Students" if user.type == "student" else "Teachers" - groups = self._db.collection('groups').where( - filter=FieldFilter('admin', '==', corporate_user.id) - ).where( - filter=FieldFilter('name', '==', group_type) - ).limit(1).get() + group = self._db.groups.find_one( + { + "admin": corporate_user.id, + "name": group_type + } + ) - if len(groups) > 0: - group = groups[0] - participants = group.get('participants') + if group: + participants = group['participants'] if user_id not in participants: participants.append(user_id) - group.reference.update({'participants': participants}) + self._db.groups.update_one( + {"id": group.id}, + {"$set": {"participants": participants}} + ) else: group = { @@ -233,18 +238,19 @@ class BatchUsers: 'disableEditing': True, } - self._db.collection('groups').document(group['id']).set(group) + self._db.groups.insert_one(group) def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str): user_id = str(user.id) - groups = self._db.collection('groups').where( - filter=FieldFilter('admin', '==', maker_id) - ).where( - filter=FieldFilter('name', '==', user.groupName.strip()) - ).limit(1).get() + group = self._db.groups.find_one( + { + "admin": maker_id, + "name": user.group_name.strip() + } + ) - if len(groups) == 0: + if group: new_group = { 'id': str(uuid.uuid4()), 'admin': maker_id, @@ -252,10 +258,12 @@ class BatchUsers: 'participants': [user_id], 'disableEditing': False, } - self._db.collection('groups').document(new_group['id']).set(new_group) + self._db.groups.insert_one(new_group) else: - group = groups[0] - participants = group.get('participants') + participants = group.participants if user_id not in participants: participants.append(user_id) - group.reference.update({'participants': participants}) + self._db.groups.update_one( + {"id": group.id}, + {"$set": {"participants": participants}} + ) diff --git a/modules/training_content/service.py b/modules/training_content/service.py index f583571..e6162d7 100644 --- a/modules/training_content/service.py +++ b/modules/training_content/service.py @@ -1,9 +1,12 @@ import json +import uuid from datetime import datetime from logging import getLogger from typing import Dict, List +from pymongo.database import Database + from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO @@ -19,9 +22,9 @@ class TrainingContentService: ] # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing - def __init__(self, kb, openai, firestore): + def __init__(self, kb, openai, mongo: Database): self._training_content_module = kb - self._db = firestore + self._db: Database = mongo self._logger = getLogger(__name__) self._llm = openai @@ -37,16 +40,18 @@ class TrainingContentService: for area in training_content.weak_areas: weak_areas["weak_areas"].append(area.dict()) + new_id = uuid.uuid4() training_doc = { + 'id': new_id, 'created_at': int(datetime.now().timestamp() * 1000), **exam_map, **usefull_tips.dict(), **weak_areas, "user": user } - doc_ref = self._db.collection('training').add(training_doc) + self._db.training.insert_one(training_doc) return { - "id": doc_ref[1].id + "id": new_id } @staticmethod @@ -400,10 +405,5 @@ class TrainingContentService: return result def _get_doc_by_id(self, collection: str, doc_id: str): - collection_ref = self._db.collection(collection) - doc_ref = collection_ref.document(doc_id) - doc = doc_ref.get() - - if doc.exists: - return doc.to_dict() - return None + doc = self._db[collection].find_one({"id": doc_id}) + return doc diff --git a/requirements.txt b/requirements.txt index aa784cd4c73c03f0503bfac294be0240df51c865..0e92c311d26246553428fa806079ad282862464b 100644 GIT binary patch delta 26 gcmZo-pTfSOiCL(Cp^_n&A)g_SAstBaGH@{f09@Jyi~s-t delta 7 OcmbQj-o(D4i5UP1`T|-2 From 24ce198dfd6908680f13c5953624117624c2981a Mon Sep 17 00:00:00 2001 From: Carlos Mesquita Date: Sat, 7 Sep 2024 23:09:00 +0100 Subject: [PATCH 2/3] Forgot to change the tips script to mongo --- .../training_content/tips/send_tips_to_firestore.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/modules/training_content/tips/send_tips_to_firestore.py b/modules/training_content/tips/send_tips_to_firestore.py index a4cfaee..714e944 100644 --- a/modules/training_content/tips/send_tips_to_firestore.py +++ b/modules/training_content/tips/send_tips_to_firestore.py @@ -1,20 +1,18 @@ import json +import os -import firebase_admin from dotenv import load_dotenv -from firebase_admin import credentials, firestore + +from pymongo import MongoClient load_dotenv() # staging: encoach-staging.json # prod: storied-phalanx-349916.json -cred = credentials.Certificate('../../../firebase-configs/encoach-staging.json') -firebase_admin.initialize_app(cred) - +mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')] if __name__ == "__main__": - db = firestore.client() with open('pathways_2_rw.json', 'r', encoding='utf-8') as file: book = json.load(file) @@ -33,5 +31,4 @@ if __name__ == "__main__": tips.append(new_tip) for tip in tips: - doc_ref = db.collection("walkthrough").document(tip["id"]) - doc_ref.set(tip) + doc_ref = mongo_db.walkthrough.insert_one(tip) From 8d4584b8b73ef9bc4a88c3daf5b8b5f0c4d7f87e Mon Sep 17 00:00:00 2001 From: Carlos Mesquita Date: Sun, 8 Sep 2024 00:38:35 +0100 Subject: [PATCH 3/3] Forgot to str() on a uuid --- modules/batch_users/service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/batch_users/service.py b/modules/batch_users/service.py index ed7c016..18155cf 100644 --- a/modules/batch_users/service.py +++ b/modules/batch_users/service.py @@ -143,6 +143,7 @@ class BatchUsers: 'passport_id', 'groupName', 'expiryDate', 'corporate', 'passwordHash', 'passwordSalt' }), + 'id': str(user.id), 'bio': "", 'focus': "academic", 'status': "active",