12 Commits

Author SHA1 Message Date
Tiago Ribeiro
676f660f3e Merged master into release/mongodb-migration 2024-09-07 21:54:00 +00:00
Carlos Mesquita
6cb7c07f57 Firestore to Mongodb 2024-09-07 19:14:40 +01:00
carlos.mesquita
a328f01d2e Merged in feature/level-file-upload (pull request #24)
Added missing fillBlanks mc variant that was in UTAS to custom level

Approved-by: Tiago Ribeiro
2024-09-06 08:52:42 +00:00
Carlos Mesquita
a931c5ec2e Added missing fillBlanks mc variant that was in UTAS to custom level 2024-09-06 09:36:24 +01:00
carlos.mesquita
bfc9565e85 Merged in develop (pull request #23)
Develop

Approved-by: Tiago Ribeiro
2024-09-05 11:29:08 +00:00
carlos.mesquita
3d70bcbfd1 Merged in feature/level-file-upload (pull request #22)
Feature/level file upload

Approved-by: Tiago Ribeiro
2024-09-05 10:51:26 +00:00
carlos.mesquita
a2cfa335d7 Merged develop into feature/level-file-upload 2024-09-05 10:48:22 +00:00
Carlos Mesquita
0427d6e1b4 Deleted google creds ENV from Dockerfile since those will be supplied by cloud run 2024-09-05 11:47:34 +01:00
Carlos Mesquita
31c6ed570a Merge remote-tracking branch 'origin/bug/create-default-groups-if-not-already' into feature/level-file-upload 2024-09-05 11:43:11 +01:00
Carlos Mesquita
3a27c42a69 Removed .env, will add it to gitignore in next commit 2024-09-05 11:41:56 +01:00
Tiago Ribeiro
260dba1ee6 Merged in bug/create-default-groups-if-not-already (pull request #21)
Updated the code to create the Students/Teachers group if it does not exist yet
2024-09-05 10:11:16 +00:00
carlos.mesquita
b2b4dfb74e Merged in feature/level-file-upload (pull request #18)
Switched cli token to GOOGLE_APPLICATION_CREDENTIALS
2024-09-04 11:00:22 +00:00
8 changed files with 150 additions and 105 deletions

12
.env
View File

@@ -1,12 +0,0 @@
OPENAI_API_KEY=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
JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
FIREBASE_SCRYPT_B64_SIGNER_KEY="vbO3Xii2lajSeSkCstq3s/dCwpXP7J2YN9rP/KRreU2vGOT1fg+wzSuy1kIhBECqJHG82tmwAilSxLFFtNKVMA=="
FIREBASE_SCRYPT_B64_SALT_SEPARATOR="Bw=="
FIREBASE_SCRYPT_ROUNDS=8
FIREBASE_SCRYPT_MEM_COST=14
FIREBASE_PROJECT_ID=storied-phalanx-349916

View File

@@ -6,8 +6,6 @@ FROM python:3.11-slim
# Allow statements and log messages to immediately appear in the logs # Allow statements and log messages to immediately appear in the logs
ENV PYTHONUNBUFFERED True ENV PYTHONUNBUFFERED True
ENV GOOGLE_APPLICATION_CREDENTIALS=/app/firebase-configs/storied-phalanx-349916.json
# Copy local code to the container image. # Copy local code to the container image.
ENV APP_HOME /app ENV APP_HOME /app
WORKDIR $APP_HOME WORKDIR $APP_HOME

26
app.py
View File

@@ -5,6 +5,7 @@ import firebase_admin
from firebase_admin import credentials from firebase_admin import credentials
from flask import Flask, request from flask import Flask, request
from flask_jwt_extended import JWTManager, jwt_required from flask_jwt_extended import JWTManager, jwt_required
from pymongo import MongoClient
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
from helper.api_messages import * from helper.api_messages import *
@@ -44,12 +45,14 @@ embeddings = SentenceTransformer('all-MiniLM-L6-v2')
kb = TrainingContentKnowledgeBase(embeddings) kb = TrainingContentKnowledgeBase(embeddings)
kb.load_indices_and_metadata() kb.load_indices_and_metadata()
open_ai = GPT(OpenAI()) open_ai = GPT(OpenAI())
firestore_client = firestore.client()
tc_service = TrainingContentService(kb, open_ai, firestore_client) mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
tc_service = TrainingContentService(kb, open_ai, mongo_db)
upload_level_service = UploadLevelService(open_ai) upload_level_service = UploadLevelService(open_ai)
batch_users_service = BatchUsers(firestore_client) batch_users_service = BatchUsers(mongo_db)
thread_event = threading.Event() thread_event = threading.Event()
@@ -157,7 +160,7 @@ def save_listening():
else: else:
template["variant"] = ExamVariant.FULL.value template["variant"] = ExamVariant.FULL.value
(result, id) = save_to_db_with_id("listening", template, id) (result, id) = save_to_db_with_id(mongo_db, "listening", template, id)
if result: if result:
return {**template, "id": id} return {**template, "id": id}
else: else:
@@ -967,7 +970,7 @@ def save_speaking():
name=("thread-save-speaking-" + id) name=("thread-save-speaking-" + id)
) )
thread.start() thread.start()
app.logger.info('Started thread to save speaking. Thread: ' + thread.getName()) app.logger.info('Started thread to save speaking. Thread: ' + thread.name)
# Return response without waiting for create_videos_and_save_to_db to finish # Return response without waiting for create_videos_and_save_to_db to finish
return {**template, "id": id} return {**template, "id": id}
@@ -1197,7 +1200,7 @@ def get_reading_passage_3_question():
def get_level_exam(): def get_level_exam():
try: try:
number_of_exercises = 25 number_of_exercises = 25
exercises = gen_multiple_choice_level(number_of_exercises) exercises = gen_multiple_choice_level(mongo_db, number_of_exercises)
return { return {
"exercises": [exercises], "exercises": [exercises],
"isDiagnostic": False, "isDiagnostic": False,
@@ -1290,7 +1293,7 @@ def get_level_utas():
bs_2["questions"] = blank_space_text_2 bs_2["questions"] = blank_space_text_2
# Reading text # Reading text
reading_text = gen_reading_passage_utas(87, 10, 4) reading_text = gen_reading_passage_utas(mongo_db, 87, 10, 4)
print(json.dumps(reading_text, indent=4)) print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text reading["questions"] = reading_text
@@ -1317,6 +1320,7 @@ class CustomLevelExerciseTypes(Enum):
MULTIPLE_CHOICE_4 = "multiple_choice_4" MULTIPLE_CHOICE_4 = "multiple_choice_4"
MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space" MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined" MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
FILL_BLANKS_MC = "fill_blanks_mc"
BLANK_SPACE_TEXT = "blank_space_text" BLANK_SPACE_TEXT = "blank_space_text"
READING_PASSAGE_UTAS = "reading_passage_utas" READING_PASSAGE_UTAS = "reading_passage_utas"
WRITING_LETTER = "writing_letter" WRITING_LETTER = "writing_letter"
@@ -1414,6 +1418,14 @@ def get_custom_level():
exercise_id = exercise_id + qty exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.FILL_BLANKS_MC.value:
response["exercises"]["exercise_" + str(i)] = gen_fill_blanks_mc_utas(
exercise_qty, exercise_id, exercise_text_size
)
response["exercises"]["exercise_" + str(i)]["type"] = "fillBlanks"
response["exercises"]["exercise_" + str(i)]["variant"] = "mc"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value: elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id, response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id,
exercise_text_size) exercise_text_size)

View File

@@ -5,6 +5,7 @@ import string
import uuid import uuid
import nltk import nltk
from pymongo.database import Database
from wonderwords import RandomWord from wonderwords import RandomWord
from helper.constants import * from helper.constants import *
@@ -1210,7 +1211,7 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int,
} }
def gen_multiple_choice_level(quantity: int, start_id=1): def gen_multiple_choice_level(mongo_db: Database, quantity: int, start_id=1):
gen_multiple_choice_for_text = "Generate " + str( gen_multiple_choice_for_text = "Generate " + str(
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \ quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \ "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
@@ -1240,9 +1241,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity: if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id) return gen_multiple_choice_level(mongo_db, quantity, start_id)
else: else:
all_exams = get_all("level") all_exams = get_all(mongo_db, "level")
seen_keys = set() seen_keys = set()
for i in range(len(question["questions"])): for i in range(len(question["questions"])):
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
@@ -1563,6 +1564,66 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=
return response return response
def gen_fill_blanks_mc_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
json_format = {
"question": {
"solutions": [
{
"id": "<question id>",
"solution": "<the option that holds the solution>"
}
],
"words": [
{
"id": "<question id>",
"options": {
"A": "<a option>",
"B": "<b option>",
"C": "<c option>",
"D": "<d option>"
}
}
],
"text": "text"
}
}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": (
f'Generate a text of at least {size} words about the topic {topic}. Make sure the text is structured '
'in paragraphs formatted with newlines (\\n\\n) to delimit them.'
)
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'For each word choose 4 options, 1 correct and the other ones false. Make sure that only 1 is the '
'correct one amongst the 4 options and put the solution on the solutions array. '
'The ids must be ordered throughout the text and the words must be replaced only once. Put the '
'removed words and respective ids on the words array of the json in the correct order. You can\'t '
'reference multiple times the same id across the text, if for example one of the chosen words is '
'"word1" then word1 must be placed in the text with an id once, if word1 is referenced other '
'times in the text then replace with the actual text of word.'
)
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["question"],
GEN_QUESTION_TEMPERATURE)
return question["question"]
def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)): def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
json_format = { json_format = {
"question": { "question": {
@@ -1617,10 +1678,10 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
return question["question"] return question["question"]
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)): def gen_reading_passage_utas(mongo_db: Database, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
passage = generate_reading_passage_1_text(topic) passage = generate_reading_passage_1_text(topic)
short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity) short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) mc_exercises = gen_text_multiple_choice_utas(mongo_db, passage["text"], start_id + sa_quantity, mc_quantity)
return { return {
"exercises": { "exercises": {
"shortAnswer": short_answer, "shortAnswer": short_answer,
@@ -1659,7 +1720,7 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
GEN_QUESTION_TEMPERATURE)["questions"] GEN_QUESTION_TEMPERATURE)["questions"]
def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): def gen_text_multiple_choice_utas(mongo_db: Database, text: str, start_id: int, mc_quantity: int):
json_format = { json_format = {
"questions": [ "questions": [
{ {
@@ -1711,7 +1772,7 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != mc_quantity: if len(question["questions"]) != mc_quantity:
return gen_multiple_choice_level(mc_quantity, start_id) return gen_multiple_choice_level(mongo_db, mc_quantity, start_id)
else: else:
response = fix_exercise_ids(question, start_id) response = fix_exercise_ids(question, start_id)
response["questions"] = randomize_mc_options_order(response["questions"]) response["questions"] = randomize_mc_options_order(response["questions"])

View File

@@ -1,7 +1,7 @@
import logging import logging
from firebase_admin import firestore
from google.cloud import storage from google.cloud import storage
from pymongo.database import Database
def download_firebase_file(bucket_name, source_blob_name, destination_file_name): def download_firebase_file(bucket_name, source_blob_name, destination_file_name):
@@ -50,38 +50,16 @@ def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file
return None return None
def save_to_db(collection: str, item): def save_to_db_with_id(mongo_db: Database, collection: str, item, id: str):
db = firestore.client() collection_ref = mongo_db[collection]
collection_ref = db.collection(collection)
(update_time, document_ref) = collection_ref.add(item) document_ref = collection_ref.insert_one({"id": id, **item})
if document_ref: if document_ref:
logging.info(f"Document added with ID: {document_ref.id}") logging.info(f"Document added with ID: {document_ref.inserted_id}")
return (True, document_ref.id) return (True, document_ref.inserted_id)
else: else:
return (False, None) return (False, None)
def save_to_db_with_id(collection: str, item, id: str): def get_all(mongo_db: Database, collection: str):
db = firestore.client() return list(mongo_db[collection].find())
collection_ref = db.collection(collection)
# Reference to the specific document with the desired ID
document_ref = collection_ref.document(id)
# Set the data to the document
document_ref.set(item)
if document_ref:
logging.info(f"Document added with ID: {document_ref.id}")
return (True, document_ref.id)
else:
return (False, None)
def get_all(collection: str):
db = firestore.client()
collection_ref = db.collection(collection)
all_exercises = (
collection_ref
.get()
)
return all_exercises

View File

@@ -9,8 +9,7 @@ import pandas as pd
from typing import Dict from typing import Dict
import shortuuid import shortuuid
from google.cloud.firestore_v1 import Client from pymongo.database import Database
from google.cloud.firestore_v1.base_query import FieldFilter
from modules.batch_users.batch_users import BatchUsersDTO, UserDTO from modules.batch_users.batch_users import BatchUsersDTO, UserDTO
from modules.helper.file_helper import FileHelper from modules.helper.file_helper import FileHelper
@@ -32,8 +31,8 @@ class BatchUsers:
"speaking": 0, "speaking": 0,
} }
def __init__(self, firestore: Client): def __init__(self, mongo: Database):
self._db = firestore self._db: Database = mongo
self._logger = getLogger(__name__) self._logger = getLogger(__name__)
def batch_users(self, request_data: Dict): def batch_users(self, request_data: Dict):
@@ -141,7 +140,7 @@ class BatchUsers:
def _insert_new_user(self, user: UserDTO): def _insert_new_user(self, user: UserDTO):
new_user = { new_user = {
**user.dict(exclude={ **user.dict(exclude={
'id', 'passport_id', 'groupName', 'expiryDate', 'passport_id', 'groupName', 'expiryDate',
'corporate', 'passwordHash', 'passwordSalt' 'corporate', 'passwordHash', 'passwordSalt'
}), }),
'bio': "", 'bio': "",
@@ -155,11 +154,12 @@ class BatchUsers:
'registrationDate': datetime.now(), 'registrationDate': datetime.now(),
'subscriptionExpirationDate': user.expiryDate 'subscriptionExpirationDate': user.expiryDate
} }
self._db.collection('users').document(str(user.id)).set(new_user) self._db.users.insert_one(new_user)
def _create_code(self, user: UserDTO, maker_id: str) -> str: def _create_code(self, user: UserDTO, maker_id: str) -> str:
code = shortuuid.ShortUUID().random(length=6) code = shortuuid.ShortUUID().random(length=6)
self._db.collection('codes').document(code).set({ self._db.codes.insert_one({
'id': code,
'code': code, 'code': code,
'creator': maker_id, 'creator': maker_id,
'expiryDate': user.expiryDate, 'expiryDate': user.expiryDate,
@@ -198,31 +198,36 @@ class BatchUsers:
} }
] ]
for group in default_groups: for group in default_groups:
self._db.collection('groups').document(group['id']).set(group) self._db.groups.insert_one(group)
def _assign_corporate_to_user(self, user: UserDTO, code: str): def _assign_corporate_to_user(self, user: UserDTO, code: str):
user_id = str(user.id) user_id = str(user.id)
corporate_users = self._db.collection('users').where( corporate_user = self._db.users.find_one(
filter=FieldFilter('email', '==', user.corporate) {"email": user.corporate}
).limit(1).get() )
if len(corporate_users) > 0: if corporate_user:
corporate_user = corporate_users[0] self._db.codes.update_one(
self._db.collection('codes').document(code).set({'creator': corporate_user.id}, merge=True) {"id": code},
{"$set": {"creator": corporate_user.id}},
upsert=True
)
group_type = "Students" if user.type == "student" else "Teachers" group_type = "Students" if user.type == "student" else "Teachers"
groups = self._db.collection('groups').where( group = self._db.groups.find_one(
filter=FieldFilter('admin', '==', corporate_user.id) {
).where( "admin": corporate_user.id,
filter=FieldFilter('name', '==', group_type) "name": group_type
).limit(1).get() }
)
if len(groups) > 0: if group:
group = groups[0] participants = group['participants']
participants = group.get('participants')
if user_id not in participants: if user_id not in participants:
participants.append(user_id) participants.append(user_id)
group.reference.update({'participants': participants}) self._db.groups.update_one(
{"id": group.id},
{"$set": {"participants": participants}}
)
else: else:
group = { group = {
@@ -233,18 +238,19 @@ class BatchUsers:
'disableEditing': True, 'disableEditing': True,
} }
self._db.collection('groups').document(group['id']).set(group) self._db.groups.insert_one(group)
def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str): def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
user_id = str(user.id) user_id = str(user.id)
groups = self._db.collection('groups').where( group = self._db.groups.find_one(
filter=FieldFilter('admin', '==', maker_id) {
).where( "admin": maker_id,
filter=FieldFilter('name', '==', user.groupName.strip()) "name": user.group_name.strip()
).limit(1).get() }
)
if len(groups) == 0: if group:
new_group = { new_group = {
'id': str(uuid.uuid4()), 'id': str(uuid.uuid4()),
'admin': maker_id, 'admin': maker_id,
@@ -252,10 +258,12 @@ class BatchUsers:
'participants': [user_id], 'participants': [user_id],
'disableEditing': False, 'disableEditing': False,
} }
self._db.collection('groups').document(new_group['id']).set(new_group) self._db.groups.insert_one(new_group)
else: else:
group = groups[0] participants = group.participants
participants = group.get('participants')
if user_id not in participants: if user_id not in participants:
participants.append(user_id) participants.append(user_id)
group.reference.update({'participants': participants}) self._db.groups.update_one(
{"id": group.id},
{"$set": {"participants": participants}}
)

View File

@@ -1,9 +1,12 @@
import json import json
import uuid
from datetime import datetime from datetime import datetime
from logging import getLogger from logging import getLogger
from typing import Dict, List from typing import Dict, List
from pymongo.database import Database
from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
@@ -19,9 +22,9 @@ class TrainingContentService:
] ]
# strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing # strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
def __init__(self, kb, openai, firestore): def __init__(self, kb, openai, mongo: Database):
self._training_content_module = kb self._training_content_module = kb
self._db = firestore self._db: Database = mongo
self._logger = getLogger(__name__) self._logger = getLogger(__name__)
self._llm = openai self._llm = openai
@@ -37,16 +40,18 @@ class TrainingContentService:
for area in training_content.weak_areas: for area in training_content.weak_areas:
weak_areas["weak_areas"].append(area.dict()) weak_areas["weak_areas"].append(area.dict())
new_id = uuid.uuid4()
training_doc = { training_doc = {
'id': new_id,
'created_at': int(datetime.now().timestamp() * 1000), 'created_at': int(datetime.now().timestamp() * 1000),
**exam_map, **exam_map,
**usefull_tips.dict(), **usefull_tips.dict(),
**weak_areas, **weak_areas,
"user": user "user": user
} }
doc_ref = self._db.collection('training').add(training_doc) self._db.training.insert_one(training_doc)
return { return {
"id": doc_ref[1].id "id": new_id
} }
@staticmethod @staticmethod
@@ -400,10 +405,5 @@ class TrainingContentService:
return result return result
def _get_doc_by_id(self, collection: str, doc_id: str): def _get_doc_by_id(self, collection: str, doc_id: str):
collection_ref = self._db.collection(collection) doc = self._db[collection].find_one({"id": doc_id})
doc_ref = collection_ref.document(doc_id) return doc
doc = doc_ref.get()
if doc.exists:
return doc.to_dict()
return None

Binary file not shown.