Merged in release/mongodb-migration (pull request #26)
Release/mongodb migration
This commit is contained in:
26
app.py
26
app.py
@@ -5,6 +5,7 @@ import firebase_admin
|
||||
from firebase_admin import credentials
|
||||
from flask import Flask, request
|
||||
from flask_jwt_extended import JWTManager, jwt_required
|
||||
from pymongo import MongoClient
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
from helper.api_messages import *
|
||||
@@ -44,12 +45,14 @@ embeddings = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
kb = TrainingContentKnowledgeBase(embeddings)
|
||||
kb.load_indices_and_metadata()
|
||||
open_ai = GPT(OpenAI())
|
||||
firestore_client = firestore.client()
|
||||
tc_service = TrainingContentService(kb, open_ai, firestore_client)
|
||||
|
||||
mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
|
||||
|
||||
tc_service = TrainingContentService(kb, open_ai, mongo_db)
|
||||
|
||||
upload_level_service = UploadLevelService(open_ai)
|
||||
|
||||
batch_users_service = BatchUsers(firestore_client)
|
||||
batch_users_service = BatchUsers(mongo_db)
|
||||
|
||||
thread_event = threading.Event()
|
||||
|
||||
@@ -157,7 +160,7 @@ def save_listening():
|
||||
else:
|
||||
template["variant"] = ExamVariant.FULL.value
|
||||
|
||||
(result, id) = save_to_db_with_id("listening", template, id)
|
||||
(result, id) = save_to_db_with_id(mongo_db, "listening", template, id)
|
||||
if result:
|
||||
return {**template, "id": id}
|
||||
else:
|
||||
@@ -967,7 +970,7 @@ def save_speaking():
|
||||
name=("thread-save-speaking-" + id)
|
||||
)
|
||||
thread.start()
|
||||
app.logger.info('Started thread to save speaking. Thread: ' + thread.getName())
|
||||
app.logger.info('Started thread to save speaking. Thread: ' + thread.name)
|
||||
|
||||
# Return response without waiting for create_videos_and_save_to_db to finish
|
||||
return {**template, "id": id}
|
||||
@@ -1197,7 +1200,7 @@ def get_reading_passage_3_question():
|
||||
def get_level_exam():
|
||||
try:
|
||||
number_of_exercises = 25
|
||||
exercises = gen_multiple_choice_level(number_of_exercises)
|
||||
exercises = gen_multiple_choice_level(mongo_db, number_of_exercises)
|
||||
return {
|
||||
"exercises": [exercises],
|
||||
"isDiagnostic": False,
|
||||
@@ -1290,7 +1293,7 @@ def get_level_utas():
|
||||
bs_2["questions"] = blank_space_text_2
|
||||
|
||||
# Reading text
|
||||
reading_text = gen_reading_passage_utas(87, 10, 4)
|
||||
reading_text = gen_reading_passage_utas(mongo_db, 87, 10, 4)
|
||||
print(json.dumps(reading_text, indent=4))
|
||||
reading["questions"] = reading_text
|
||||
|
||||
@@ -1317,6 +1320,7 @@ class CustomLevelExerciseTypes(Enum):
|
||||
MULTIPLE_CHOICE_4 = "multiple_choice_4"
|
||||
MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
|
||||
MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
|
||||
FILL_BLANKS_MC = "fill_blanks_mc"
|
||||
BLANK_SPACE_TEXT = "blank_space_text"
|
||||
READING_PASSAGE_UTAS = "reading_passage_utas"
|
||||
WRITING_LETTER = "writing_letter"
|
||||
@@ -1414,6 +1418,14 @@ def get_custom_level():
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.FILL_BLANKS_MC.value:
|
||||
response["exercises"]["exercise_" + str(i)] = gen_fill_blanks_mc_utas(
|
||||
exercise_qty, exercise_id, exercise_text_size
|
||||
)
|
||||
response["exercises"]["exercise_" + str(i)]["type"] = "fillBlanks"
|
||||
response["exercises"]["exercise_" + str(i)]["variant"] = "mc"
|
||||
exercise_id = exercise_id + exercise_qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
|
||||
response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id,
|
||||
exercise_text_size)
|
||||
|
||||
@@ -5,6 +5,7 @@ import string
|
||||
import uuid
|
||||
|
||||
import nltk
|
||||
from pymongo.database import Database
|
||||
from wonderwords import RandomWord
|
||||
|
||||
from helper.constants import *
|
||||
@@ -1210,7 +1211,7 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int,
|
||||
}
|
||||
|
||||
|
||||
def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
def gen_multiple_choice_level(mongo_db: Database, quantity: int, start_id=1):
|
||||
gen_multiple_choice_for_text = "Generate " + str(
|
||||
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
|
||||
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
|
||||
@@ -1240,9 +1241,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
if len(question["questions"]) != quantity:
|
||||
return gen_multiple_choice_level(quantity, start_id)
|
||||
return gen_multiple_choice_level(mongo_db, quantity, start_id)
|
||||
else:
|
||||
all_exams = get_all("level")
|
||||
all_exams = get_all(mongo_db, "level")
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
|
||||
@@ -1563,6 +1564,66 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=
|
||||
return response
|
||||
|
||||
|
||||
def gen_fill_blanks_mc_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
|
||||
json_format = {
|
||||
"question": {
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"solution": "<the option that holds the solution>"
|
||||
}
|
||||
],
|
||||
"words": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"options": {
|
||||
"A": "<a option>",
|
||||
"B": "<b option>",
|
||||
"C": "<c option>",
|
||||
"D": "<d option>"
|
||||
}
|
||||
}
|
||||
],
|
||||
"text": "text"
|
||||
}
|
||||
}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate a text of at least {size} words about the topic {topic}. Make sure the text is structured '
|
||||
'in paragraphs formatted with newlines (\\n\\n) to delimit them.'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
|
||||
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
|
||||
'For each word choose 4 options, 1 correct and the other ones false. Make sure that only 1 is the '
|
||||
'correct one amongst the 4 options and put the solution on the solutions array. '
|
||||
'The ids must be ordered throughout the text and the words must be replaced only once. Put the '
|
||||
'removed words and respective ids on the words array of the json in the correct order. You can\'t '
|
||||
'reference multiple times the same id across the text, if for example one of the chosen words is '
|
||||
'"word1" then word1 must be placed in the text with an id once, if word1 is referenced other '
|
||||
'times in the text then replace with the actual text of word.'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
token_count = count_total_tokens(messages)
|
||||
question = make_openai_call(GPT_4_O, messages, token_count,
|
||||
["question"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
return question["question"]
|
||||
|
||||
|
||||
def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
|
||||
json_format = {
|
||||
"question": {
|
||||
@@ -1617,10 +1678,10 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
|
||||
return question["question"]
|
||||
|
||||
|
||||
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
|
||||
def gen_reading_passage_utas(mongo_db: Database, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
|
||||
passage = generate_reading_passage_1_text(topic)
|
||||
short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
|
||||
mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
|
||||
mc_exercises = gen_text_multiple_choice_utas(mongo_db, passage["text"], start_id + sa_quantity, mc_quantity)
|
||||
return {
|
||||
"exercises": {
|
||||
"shortAnswer": short_answer,
|
||||
@@ -1659,7 +1720,7 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
|
||||
GEN_QUESTION_TEMPERATURE)["questions"]
|
||||
|
||||
|
||||
def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
|
||||
def gen_text_multiple_choice_utas(mongo_db: Database, text: str, start_id: int, mc_quantity: int):
|
||||
json_format = {
|
||||
"questions": [
|
||||
{
|
||||
@@ -1711,7 +1772,7 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
if len(question["questions"]) != mc_quantity:
|
||||
return gen_multiple_choice_level(mc_quantity, start_id)
|
||||
return gen_multiple_choice_level(mongo_db, mc_quantity, start_id)
|
||||
else:
|
||||
response = fix_exercise_ids(question, start_id)
|
||||
response["questions"] = randomize_mc_options_order(response["questions"])
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
|
||||
from firebase_admin import firestore
|
||||
from google.cloud import storage
|
||||
from pymongo.database import Database
|
||||
|
||||
|
||||
def download_firebase_file(bucket_name, source_blob_name, destination_file_name):
|
||||
@@ -50,38 +50,16 @@ def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file
|
||||
return None
|
||||
|
||||
|
||||
def save_to_db(collection: str, item):
|
||||
db = firestore.client()
|
||||
collection_ref = db.collection(collection)
|
||||
(update_time, document_ref) = collection_ref.add(item)
|
||||
def save_to_db_with_id(mongo_db: Database, collection: str, item, id: str):
|
||||
collection_ref = mongo_db[collection]
|
||||
|
||||
document_ref = collection_ref.insert_one({"id": id, **item})
|
||||
if document_ref:
|
||||
logging.info(f"Document added with ID: {document_ref.id}")
|
||||
return (True, document_ref.id)
|
||||
logging.info(f"Document added with ID: {document_ref.inserted_id}")
|
||||
return (True, document_ref.inserted_id)
|
||||
else:
|
||||
return (False, None)
|
||||
|
||||
|
||||
def save_to_db_with_id(collection: str, item, id: str):
|
||||
db = firestore.client()
|
||||
collection_ref = db.collection(collection)
|
||||
# Reference to the specific document with the desired ID
|
||||
document_ref = collection_ref.document(id)
|
||||
# Set the data to the document
|
||||
document_ref.set(item)
|
||||
if document_ref:
|
||||
logging.info(f"Document added with ID: {document_ref.id}")
|
||||
return (True, document_ref.id)
|
||||
else:
|
||||
return (False, None)
|
||||
|
||||
|
||||
def get_all(collection: str):
|
||||
db = firestore.client()
|
||||
collection_ref = db.collection(collection)
|
||||
|
||||
all_exercises = (
|
||||
collection_ref
|
||||
.get()
|
||||
)
|
||||
|
||||
return all_exercises
|
||||
def get_all(mongo_db: Database, collection: str):
|
||||
return list(mongo_db[collection].find())
|
||||
|
||||
@@ -9,8 +9,7 @@ import pandas as pd
|
||||
from typing import Dict
|
||||
|
||||
import shortuuid
|
||||
from google.cloud.firestore_v1 import Client
|
||||
from google.cloud.firestore_v1.base_query import FieldFilter
|
||||
from pymongo.database import Database
|
||||
|
||||
from modules.batch_users.batch_users import BatchUsersDTO, UserDTO
|
||||
from modules.helper.file_helper import FileHelper
|
||||
@@ -32,8 +31,8 @@ class BatchUsers:
|
||||
"speaking": 0,
|
||||
}
|
||||
|
||||
def __init__(self, firestore: Client):
|
||||
self._db = firestore
|
||||
def __init__(self, mongo: Database):
|
||||
self._db: Database = mongo
|
||||
self._logger = getLogger(__name__)
|
||||
|
||||
def batch_users(self, request_data: Dict):
|
||||
@@ -141,7 +140,7 @@ class BatchUsers:
|
||||
def _insert_new_user(self, user: UserDTO):
|
||||
new_user = {
|
||||
**user.dict(exclude={
|
||||
'id', 'passport_id', 'groupName', 'expiryDate',
|
||||
'passport_id', 'groupName', 'expiryDate',
|
||||
'corporate', 'passwordHash', 'passwordSalt'
|
||||
}),
|
||||
'bio': "",
|
||||
@@ -155,11 +154,12 @@ class BatchUsers:
|
||||
'registrationDate': datetime.now(),
|
||||
'subscriptionExpirationDate': user.expiryDate
|
||||
}
|
||||
self._db.collection('users').document(str(user.id)).set(new_user)
|
||||
self._db.users.insert_one(new_user)
|
||||
|
||||
def _create_code(self, user: UserDTO, maker_id: str) -> str:
|
||||
code = shortuuid.ShortUUID().random(length=6)
|
||||
self._db.collection('codes').document(code).set({
|
||||
self._db.codes.insert_one({
|
||||
'id': code,
|
||||
'code': code,
|
||||
'creator': maker_id,
|
||||
'expiryDate': user.expiryDate,
|
||||
@@ -198,31 +198,36 @@ class BatchUsers:
|
||||
}
|
||||
]
|
||||
for group in default_groups:
|
||||
self._db.collection('groups').document(group['id']).set(group)
|
||||
self._db.groups.insert_one(group)
|
||||
|
||||
def _assign_corporate_to_user(self, user: UserDTO, code: str):
|
||||
user_id = str(user.id)
|
||||
corporate_users = self._db.collection('users').where(
|
||||
filter=FieldFilter('email', '==', user.corporate)
|
||||
).limit(1).get()
|
||||
if len(corporate_users) > 0:
|
||||
corporate_user = corporate_users[0]
|
||||
self._db.collection('codes').document(code).set({'creator': corporate_user.id}, merge=True)
|
||||
|
||||
corporate_user = self._db.users.find_one(
|
||||
{"email": user.corporate}
|
||||
)
|
||||
if corporate_user:
|
||||
self._db.codes.update_one(
|
||||
{"id": code},
|
||||
{"$set": {"creator": corporate_user.id}},
|
||||
upsert=True
|
||||
)
|
||||
group_type = "Students" if user.type == "student" else "Teachers"
|
||||
|
||||
groups = self._db.collection('groups').where(
|
||||
filter=FieldFilter('admin', '==', corporate_user.id)
|
||||
).where(
|
||||
filter=FieldFilter('name', '==', group_type)
|
||||
).limit(1).get()
|
||||
group = self._db.groups.find_one(
|
||||
{
|
||||
"admin": corporate_user.id,
|
||||
"name": group_type
|
||||
}
|
||||
)
|
||||
|
||||
if len(groups) > 0:
|
||||
group = groups[0]
|
||||
participants = group.get('participants')
|
||||
if group:
|
||||
participants = group['participants']
|
||||
if user_id not in participants:
|
||||
participants.append(user_id)
|
||||
group.reference.update({'participants': participants})
|
||||
self._db.groups.update_one(
|
||||
{"id": group.id},
|
||||
{"$set": {"participants": participants}}
|
||||
)
|
||||
|
||||
else:
|
||||
group = {
|
||||
@@ -233,18 +238,19 @@ class BatchUsers:
|
||||
'disableEditing': True,
|
||||
}
|
||||
|
||||
self._db.collection('groups').document(group['id']).set(group)
|
||||
self._db.groups.insert_one(group)
|
||||
|
||||
def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
|
||||
user_id = str(user.id)
|
||||
|
||||
groups = self._db.collection('groups').where(
|
||||
filter=FieldFilter('admin', '==', maker_id)
|
||||
).where(
|
||||
filter=FieldFilter('name', '==', user.groupName.strip())
|
||||
).limit(1).get()
|
||||
group = self._db.groups.find_one(
|
||||
{
|
||||
"admin": maker_id,
|
||||
"name": user.group_name.strip()
|
||||
}
|
||||
)
|
||||
|
||||
if len(groups) == 0:
|
||||
if group:
|
||||
new_group = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'admin': maker_id,
|
||||
@@ -252,10 +258,12 @@ class BatchUsers:
|
||||
'participants': [user_id],
|
||||
'disableEditing': False,
|
||||
}
|
||||
self._db.collection('groups').document(new_group['id']).set(new_group)
|
||||
self._db.groups.insert_one(new_group)
|
||||
else:
|
||||
group = groups[0]
|
||||
participants = group.get('participants')
|
||||
participants = group.participants
|
||||
if user_id not in participants:
|
||||
participants.append(user_id)
|
||||
group.reference.update({'participants': participants})
|
||||
self._db.groups.update_one(
|
||||
{"id": group.id},
|
||||
{"$set": {"participants": participants}}
|
||||
)
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from logging import getLogger
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from pymongo.database import Database
|
||||
|
||||
from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
|
||||
|
||||
|
||||
@@ -19,9 +22,9 @@ class TrainingContentService:
|
||||
]
|
||||
# strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
|
||||
|
||||
def __init__(self, kb, openai, firestore):
|
||||
def __init__(self, kb, openai, mongo: Database):
|
||||
self._training_content_module = kb
|
||||
self._db = firestore
|
||||
self._db: Database = mongo
|
||||
self._logger = getLogger(__name__)
|
||||
self._llm = openai
|
||||
|
||||
@@ -37,16 +40,18 @@ class TrainingContentService:
|
||||
for area in training_content.weak_areas:
|
||||
weak_areas["weak_areas"].append(area.dict())
|
||||
|
||||
new_id = uuid.uuid4()
|
||||
training_doc = {
|
||||
'id': new_id,
|
||||
'created_at': int(datetime.now().timestamp() * 1000),
|
||||
**exam_map,
|
||||
**usefull_tips.dict(),
|
||||
**weak_areas,
|
||||
"user": user
|
||||
}
|
||||
doc_ref = self._db.collection('training').add(training_doc)
|
||||
self._db.training.insert_one(training_doc)
|
||||
return {
|
||||
"id": doc_ref[1].id
|
||||
"id": new_id
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -400,10 +405,5 @@ class TrainingContentService:
|
||||
return result
|
||||
|
||||
def _get_doc_by_id(self, collection: str, doc_id: str):
|
||||
collection_ref = self._db.collection(collection)
|
||||
doc_ref = collection_ref.document(doc_id)
|
||||
doc = doc_ref.get()
|
||||
|
||||
if doc.exists:
|
||||
return doc.to_dict()
|
||||
return None
|
||||
doc = self._db[collection].find_one({"id": doc_id})
|
||||
return doc
|
||||
|
||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Reference in New Issue
Block a user