Compare commits
31 Commits
bug/create
...
feature/tr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
895aaa1b33 | ||
|
|
aa1433e9ea | ||
|
|
8eb5fb6d5f | ||
|
|
c004d9c83c | ||
|
|
66abc42abb | ||
|
|
2b59119eca | ||
|
|
b9a35281ec | ||
|
|
2bbc1f456d | ||
|
|
e8ec862f86 | ||
|
|
8d4584b8b7 | ||
|
|
7a0424aa33 | ||
|
|
24ce198dfd | ||
|
|
81911e635c | ||
|
|
849db06760 | ||
|
|
6a38164f9b | ||
|
|
8ae9b64f1a | ||
|
|
676f660f3e | ||
|
|
ddf050d692 | ||
|
|
6cb7c07f57 | ||
|
|
8c60f4596f | ||
|
|
cd11fa38ae | ||
|
|
a328f01d2e | ||
|
|
a931c5ec2e | ||
|
|
bfc9565e85 | ||
|
|
3d70bcbfd1 | ||
|
|
a2cfa335d7 | ||
|
|
0427d6e1b4 | ||
|
|
31c6ed570a | ||
|
|
3a27c42a69 | ||
|
|
260dba1ee6 | ||
|
|
b2b4dfb74e |
12
.env
12
.env
@@ -1,12 +0,0 @@
|
||||
OPENAI_API_KEY=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
|
||||
JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
|
||||
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
|
||||
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
|
||||
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
|
||||
GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
|
||||
|
||||
FIREBASE_SCRYPT_B64_SIGNER_KEY="vbO3Xii2lajSeSkCstq3s/dCwpXP7J2YN9rP/KRreU2vGOT1fg+wzSuy1kIhBECqJHG82tmwAilSxLFFtNKVMA=="
|
||||
FIREBASE_SCRYPT_B64_SALT_SEPARATOR="Bw=="
|
||||
FIREBASE_SCRYPT_ROUNDS=8
|
||||
FIREBASE_SCRYPT_MEM_COST=14
|
||||
FIREBASE_PROJECT_ID=storied-phalanx-349916
|
||||
@@ -6,8 +6,6 @@ FROM python:3.11-slim
|
||||
# Allow statements and log messages to immediately appear in the logs
|
||||
ENV PYTHONUNBUFFERED True
|
||||
|
||||
ENV GOOGLE_APPLICATION_CREDENTIALS=/app/firebase-configs/storied-phalanx-349916.json
|
||||
|
||||
# Copy local code to the container image.
|
||||
ENV APP_HOME /app
|
||||
WORKDIR $APP_HOME
|
||||
|
||||
26
app.py
26
app.py
@@ -5,6 +5,7 @@ import firebase_admin
|
||||
from firebase_admin import credentials
|
||||
from flask import Flask, request
|
||||
from flask_jwt_extended import JWTManager, jwt_required
|
||||
from pymongo import MongoClient
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
from helper.api_messages import *
|
||||
@@ -44,12 +45,14 @@ embeddings = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
kb = TrainingContentKnowledgeBase(embeddings)
|
||||
kb.load_indices_and_metadata()
|
||||
open_ai = GPT(OpenAI())
|
||||
firestore_client = firestore.client()
|
||||
tc_service = TrainingContentService(kb, open_ai, firestore_client)
|
||||
|
||||
mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
|
||||
|
||||
tc_service = TrainingContentService(kb, open_ai, mongo_db)
|
||||
|
||||
upload_level_service = UploadLevelService(open_ai)
|
||||
|
||||
batch_users_service = BatchUsers(firestore_client)
|
||||
batch_users_service = BatchUsers(mongo_db)
|
||||
|
||||
thread_event = threading.Event()
|
||||
|
||||
@@ -157,7 +160,7 @@ def save_listening():
|
||||
else:
|
||||
template["variant"] = ExamVariant.FULL.value
|
||||
|
||||
(result, id) = save_to_db_with_id("listening", template, id)
|
||||
(result, id) = save_to_db_with_id(mongo_db, "listening", template, id)
|
||||
if result:
|
||||
return {**template, "id": id}
|
||||
else:
|
||||
@@ -967,7 +970,7 @@ def save_speaking():
|
||||
name=("thread-save-speaking-" + id)
|
||||
)
|
||||
thread.start()
|
||||
app.logger.info('Started thread to save speaking. Thread: ' + thread.getName())
|
||||
app.logger.info('Started thread to save speaking. Thread: ' + thread.name)
|
||||
|
||||
# Return response without waiting for create_videos_and_save_to_db to finish
|
||||
return {**template, "id": id}
|
||||
@@ -1197,7 +1200,7 @@ def get_reading_passage_3_question():
|
||||
def get_level_exam():
|
||||
try:
|
||||
number_of_exercises = 25
|
||||
exercises = gen_multiple_choice_level(number_of_exercises)
|
||||
exercises = gen_multiple_choice_level(mongo_db, number_of_exercises)
|
||||
return {
|
||||
"exercises": [exercises],
|
||||
"isDiagnostic": False,
|
||||
@@ -1290,7 +1293,7 @@ def get_level_utas():
|
||||
bs_2["questions"] = blank_space_text_2
|
||||
|
||||
# Reading text
|
||||
reading_text = gen_reading_passage_utas(87, 10, 4)
|
||||
reading_text = gen_reading_passage_utas(mongo_db, 87, 10, 4)
|
||||
print(json.dumps(reading_text, indent=4))
|
||||
reading["questions"] = reading_text
|
||||
|
||||
@@ -1317,6 +1320,7 @@ class CustomLevelExerciseTypes(Enum):
|
||||
MULTIPLE_CHOICE_4 = "multiple_choice_4"
|
||||
MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
|
||||
MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
|
||||
FILL_BLANKS_MC = "fill_blanks_mc"
|
||||
BLANK_SPACE_TEXT = "blank_space_text"
|
||||
READING_PASSAGE_UTAS = "reading_passage_utas"
|
||||
WRITING_LETTER = "writing_letter"
|
||||
@@ -1414,6 +1418,14 @@ def get_custom_level():
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.FILL_BLANKS_MC.value:
|
||||
response["exercises"]["exercise_" + str(i)] = gen_fill_blanks_mc_utas(
|
||||
exercise_qty, exercise_id, exercise_text_size
|
||||
)
|
||||
response["exercises"]["exercise_" + str(i)]["type"] = "fillBlanks"
|
||||
response["exercises"]["exercise_" + str(i)]["variant"] = "mc"
|
||||
exercise_id = exercise_id + exercise_qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
|
||||
response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id,
|
||||
exercise_text_size)
|
||||
|
||||
@@ -5,6 +5,7 @@ import string
|
||||
import uuid
|
||||
|
||||
import nltk
|
||||
from pymongo.database import Database
|
||||
from wonderwords import RandomWord
|
||||
|
||||
from helper.constants import *
|
||||
@@ -1210,7 +1211,7 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int,
|
||||
}
|
||||
|
||||
|
||||
def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
def gen_multiple_choice_level(mongo_db: Database, quantity: int, start_id=1):
|
||||
gen_multiple_choice_for_text = "Generate " + str(
|
||||
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
|
||||
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
|
||||
@@ -1240,9 +1241,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
if len(question["questions"]) != quantity:
|
||||
return gen_multiple_choice_level(quantity, start_id)
|
||||
return gen_multiple_choice_level(mongo_db, quantity, start_id)
|
||||
else:
|
||||
all_exams = get_all("level")
|
||||
all_exams = get_all(mongo_db, "level")
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
|
||||
@@ -1563,6 +1564,66 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=
|
||||
return response
|
||||
|
||||
|
||||
def gen_fill_blanks_mc_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
|
||||
json_format = {
|
||||
"question": {
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"solution": "<the option that holds the solution>"
|
||||
}
|
||||
],
|
||||
"words": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"options": {
|
||||
"A": "<a option>",
|
||||
"B": "<b option>",
|
||||
"C": "<c option>",
|
||||
"D": "<d option>"
|
||||
}
|
||||
}
|
||||
],
|
||||
"text": "text"
|
||||
}
|
||||
}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate a text of at least {size} words about the topic {topic}. Make sure the text is structured '
|
||||
'in paragraphs formatted with newlines (\\n\\n) to delimit them.'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
|
||||
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
|
||||
'For each word choose 4 options, 1 correct and the other ones false. Make sure that only 1 is the '
|
||||
'correct one amongst the 4 options and put the solution on the solutions array. '
|
||||
'The ids must be ordered throughout the text and the words must be replaced only once. Put the '
|
||||
'removed words and respective ids on the words array of the json in the correct order. You can\'t '
|
||||
'reference multiple times the same id across the text, if for example one of the chosen words is '
|
||||
'"word1" then word1 must be placed in the text with an id once, if word1 is referenced other '
|
||||
'times in the text then replace with the actual text of word.'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
token_count = count_total_tokens(messages)
|
||||
question = make_openai_call(GPT_4_O, messages, token_count,
|
||||
["question"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
return question["question"]
|
||||
|
||||
|
||||
def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
|
||||
json_format = {
|
||||
"question": {
|
||||
@@ -1617,10 +1678,10 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
|
||||
return question["question"]
|
||||
|
||||
|
||||
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
|
||||
def gen_reading_passage_utas(mongo_db: Database, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
|
||||
passage = generate_reading_passage_1_text(topic)
|
||||
short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
|
||||
mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
|
||||
mc_exercises = gen_text_multiple_choice_utas(mongo_db, passage["text"], start_id + sa_quantity, mc_quantity)
|
||||
return {
|
||||
"exercises": {
|
||||
"shortAnswer": short_answer,
|
||||
@@ -1659,7 +1720,7 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
|
||||
GEN_QUESTION_TEMPERATURE)["questions"]
|
||||
|
||||
|
||||
def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
|
||||
def gen_text_multiple_choice_utas(mongo_db: Database, text: str, start_id: int, mc_quantity: int):
|
||||
json_format = {
|
||||
"questions": [
|
||||
{
|
||||
@@ -1711,7 +1772,7 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
if len(question["questions"]) != mc_quantity:
|
||||
return gen_multiple_choice_level(mc_quantity, start_id)
|
||||
return gen_multiple_choice_level(mongo_db, mc_quantity, start_id)
|
||||
else:
|
||||
response = fix_exercise_ids(question, start_id)
|
||||
response["questions"] = randomize_mc_options_order(response["questions"])
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
|
||||
from firebase_admin import firestore
|
||||
from google.cloud import storage
|
||||
from pymongo.database import Database
|
||||
|
||||
|
||||
def download_firebase_file(bucket_name, source_blob_name, destination_file_name):
|
||||
@@ -50,38 +50,16 @@ def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file
|
||||
return None
|
||||
|
||||
|
||||
def save_to_db(collection: str, item):
|
||||
db = firestore.client()
|
||||
collection_ref = db.collection(collection)
|
||||
(update_time, document_ref) = collection_ref.add(item)
|
||||
def save_to_db_with_id(mongo_db: Database, collection: str, item, id: str):
|
||||
collection_ref = mongo_db[collection]
|
||||
|
||||
document_ref = collection_ref.insert_one({"id": id, **item})
|
||||
if document_ref:
|
||||
logging.info(f"Document added with ID: {document_ref.id}")
|
||||
return (True, document_ref.id)
|
||||
logging.info(f"Document added with ID: {document_ref.inserted_id}")
|
||||
return (True, document_ref.inserted_id)
|
||||
else:
|
||||
return (False, None)
|
||||
|
||||
|
||||
def save_to_db_with_id(collection: str, item, id: str):
|
||||
db = firestore.client()
|
||||
collection_ref = db.collection(collection)
|
||||
# Reference to the specific document with the desired ID
|
||||
document_ref = collection_ref.document(id)
|
||||
# Set the data to the document
|
||||
document_ref.set(item)
|
||||
if document_ref:
|
||||
logging.info(f"Document added with ID: {document_ref.id}")
|
||||
return (True, document_ref.id)
|
||||
else:
|
||||
return (False, None)
|
||||
|
||||
|
||||
def get_all(collection: str):
|
||||
db = firestore.client()
|
||||
collection_ref = db.collection(collection)
|
||||
|
||||
all_exercises = (
|
||||
collection_ref
|
||||
.get()
|
||||
)
|
||||
|
||||
return all_exercises
|
||||
def get_all(mongo_db: Database, collection: str):
|
||||
return list(mongo_db[collection].find())
|
||||
|
||||
@@ -21,7 +21,7 @@ class UserDTO(BaseModel):
|
||||
passwordSalt: str
|
||||
groupName: Optional[str] = None
|
||||
corporate: Optional[str] = None
|
||||
studentID: Optional[str] = None
|
||||
studentID: Optional[str | int] = None
|
||||
expiryDate: Optional[str] = None
|
||||
demographicInformation: Optional[DemographicInfo] = None
|
||||
|
||||
|
||||
@@ -9,8 +9,7 @@ import pandas as pd
|
||||
from typing import Dict
|
||||
|
||||
import shortuuid
|
||||
from google.cloud.firestore_v1 import Client
|
||||
from google.cloud.firestore_v1.base_query import FieldFilter
|
||||
from pymongo.database import Database
|
||||
|
||||
from modules.batch_users.batch_users import BatchUsersDTO, UserDTO
|
||||
from modules.helper.file_helper import FileHelper
|
||||
@@ -32,8 +31,8 @@ class BatchUsers:
|
||||
"speaking": 0,
|
||||
}
|
||||
|
||||
def __init__(self, firestore: Client):
|
||||
self._db = firestore
|
||||
def __init__(self, mongo: Database):
|
||||
self._db: Database = mongo
|
||||
self._logger = getLogger(__name__)
|
||||
|
||||
def batch_users(self, request_data: Dict):
|
||||
@@ -45,7 +44,7 @@ class BatchUsers:
|
||||
|
||||
result = self._upload_users('./tmp', file_name)
|
||||
if result.returncode != 0:
|
||||
error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stderr}```"
|
||||
error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stdout}```"
|
||||
self._logger.error(error_msg)
|
||||
return error_msg
|
||||
|
||||
@@ -56,7 +55,11 @@ class BatchUsers:
|
||||
|
||||
@staticmethod
|
||||
def _map_to_batch(request_data: Dict) -> BatchUsersDTO:
|
||||
users: list[UserDTO] = [UserDTO(**user) for user in request_data["users"]]
|
||||
users_list = [{**user} for user in request_data["users"]]
|
||||
for user in users_list:
|
||||
user["studentID"] = str(user["studentID"])
|
||||
|
||||
users: list[UserDTO] = [UserDTO(**user) for user in users_list]
|
||||
return BatchUsersDTO(makerID=request_data["makerID"], users=users)
|
||||
|
||||
@staticmethod
|
||||
@@ -141,9 +144,10 @@ class BatchUsers:
|
||||
def _insert_new_user(self, user: UserDTO):
|
||||
new_user = {
|
||||
**user.dict(exclude={
|
||||
'id', 'passport_id', 'groupName', 'expiryDate',
|
||||
'passport_id', 'groupName', 'expiryDate',
|
||||
'corporate', 'passwordHash', 'passwordSalt'
|
||||
}),
|
||||
'id': str(user.id),
|
||||
'bio': "",
|
||||
'focus': "academic",
|
||||
'status': "active",
|
||||
@@ -155,11 +159,12 @@ class BatchUsers:
|
||||
'registrationDate': datetime.now(),
|
||||
'subscriptionExpirationDate': user.expiryDate
|
||||
}
|
||||
self._db.collection('users').document(str(user.id)).set(new_user)
|
||||
self._db.users.insert_one(new_user)
|
||||
|
||||
def _create_code(self, user: UserDTO, maker_id: str) -> str:
|
||||
code = shortuuid.ShortUUID().random(length=6)
|
||||
self._db.collection('codes').document(code).set({
|
||||
self._db.codes.insert_one({
|
||||
'id': code,
|
||||
'code': code,
|
||||
'creator': maker_id,
|
||||
'expiryDate': user.expiryDate,
|
||||
@@ -198,51 +203,57 @@ class BatchUsers:
|
||||
}
|
||||
]
|
||||
for group in default_groups:
|
||||
self._db.collection('groups').document(group['id']).set(group)
|
||||
self._db.groups.insert_one(group)
|
||||
|
||||
def _assign_corporate_to_user(self, user: UserDTO, code: str):
|
||||
user_id = str(user.id)
|
||||
corporate_users = self._db.collection('users').where(
|
||||
filter=FieldFilter('email', '==', user.corporate)
|
||||
).limit(1).get()
|
||||
if len(corporate_users) > 0:
|
||||
corporate_user = corporate_users[0]
|
||||
self._db.collection('codes').document(code).set({'creator': corporate_user.id}, merge=True)
|
||||
|
||||
corporate_user = self._db.users.find_one(
|
||||
{"email": user.corporate}
|
||||
)
|
||||
if corporate_user:
|
||||
self._db.codes.update_one(
|
||||
{"id": code},
|
||||
{"$set": {"creator": corporate_user["id"]}},
|
||||
upsert=True
|
||||
)
|
||||
group_type = "Students" if user.type == "student" else "Teachers"
|
||||
|
||||
groups = self._db.collection('groups').where(
|
||||
filter=FieldFilter('admin', '==', corporate_user.id)
|
||||
).where(
|
||||
filter=FieldFilter('name', '==', group_type)
|
||||
).limit(1).get()
|
||||
group = self._db.groups.find_one(
|
||||
{
|
||||
"admin": corporate_user["id"],
|
||||
"name": group_type
|
||||
}
|
||||
)
|
||||
|
||||
if len(groups) > 0:
|
||||
group = groups[0]
|
||||
participants = group.get('participants')
|
||||
if group:
|
||||
participants = group['participants']
|
||||
if user_id not in participants:
|
||||
participants.append(user_id)
|
||||
group.reference.update({'participants': participants})
|
||||
self._db.groups.update_one(
|
||||
{"id": group["id"]},
|
||||
{"$set": {"participants": participants}}
|
||||
)
|
||||
|
||||
else:
|
||||
group = {
|
||||
'admin': corporate_user.id,
|
||||
'admin': corporate_user["id"],
|
||||
'id': str(uuid.uuid4()),
|
||||
'name': group_type,
|
||||
'participants': [user_id],
|
||||
'disableEditing': True,
|
||||
}
|
||||
|
||||
self._db.collection('groups').document(group['id']).set(group)
|
||||
self._db.groups.insert_one(group)
|
||||
|
||||
def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
|
||||
user_id = str(user.id)
|
||||
|
||||
groups = self._db.collection('groups').where(
|
||||
filter=FieldFilter('admin', '==', maker_id)
|
||||
).where(
|
||||
filter=FieldFilter('name', '==', user.groupName.strip())
|
||||
).limit(1).get()
|
||||
groups = list(self._db.groups.find(
|
||||
{
|
||||
"admin": maker_id,
|
||||
"name": user.groupName.strip()
|
||||
}
|
||||
))
|
||||
|
||||
if len(groups) == 0:
|
||||
new_group = {
|
||||
@@ -252,10 +263,13 @@ class BatchUsers:
|
||||
'participants': [user_id],
|
||||
'disableEditing': False,
|
||||
}
|
||||
self._db.collection('groups').document(new_group['id']).set(new_group)
|
||||
self._db.groups.insert_one(new_group)
|
||||
else:
|
||||
group = groups[0]
|
||||
participants = group.get('participants')
|
||||
participants = group["participants"]
|
||||
if user_id not in participants:
|
||||
participants.append(user_id)
|
||||
group.reference.update({'participants': participants})
|
||||
self._db.groups.update_one(
|
||||
{"id": group["id"]},
|
||||
{"$set": {"participants": participants}}
|
||||
)
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from logging import getLogger
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from pymongo.database import Database
|
||||
|
||||
from modules.training_content.dtos import TrainingContentDTO, WeakAreaDTO, QueryDTO, DetailsDTO, TipsDTO
|
||||
|
||||
|
||||
@@ -19,9 +22,9 @@ class TrainingContentService:
|
||||
]
|
||||
# strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
|
||||
|
||||
def __init__(self, kb, openai, firestore):
|
||||
def __init__(self, kb, openai, mongo: Database):
|
||||
self._training_content_module = kb
|
||||
self._db = firestore
|
||||
self._db: Database = mongo
|
||||
self._logger = getLogger(__name__)
|
||||
self._llm = openai
|
||||
|
||||
@@ -37,16 +40,18 @@ class TrainingContentService:
|
||||
for area in training_content.weak_areas:
|
||||
weak_areas["weak_areas"].append(area.dict())
|
||||
|
||||
new_id = str(uuid.uuid4())
|
||||
training_doc = {
|
||||
'id': new_id,
|
||||
'created_at': int(datetime.now().timestamp() * 1000),
|
||||
**exam_map,
|
||||
**usefull_tips.dict(),
|
||||
**weak_areas,
|
||||
"user": user
|
||||
}
|
||||
doc_ref = self._db.collection('training').add(training_doc)
|
||||
self._db.training.insert_one(training_doc)
|
||||
return {
|
||||
"id": doc_ref[1].id
|
||||
"id": new_id
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -219,8 +224,6 @@ class TrainingContentService:
|
||||
|
||||
exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
|
||||
exam_map[session_key]["module"] = module
|
||||
with open('exam_result.json', 'w') as file:
|
||||
json.dump({"exams": exercises}, file, indent=4)
|
||||
|
||||
return {"exams": exercises}, exam_map
|
||||
|
||||
@@ -400,10 +403,5 @@ class TrainingContentService:
|
||||
return result
|
||||
|
||||
def _get_doc_by_id(self, collection: str, doc_id: str):
|
||||
collection_ref = self._db.collection(collection)
|
||||
doc_ref = collection_ref.document(doc_id)
|
||||
doc = doc_ref.get()
|
||||
|
||||
if doc.exists:
|
||||
return doc.to_dict()
|
||||
return None
|
||||
doc = self._db[collection].find_one({"id": doc_id})
|
||||
return doc
|
||||
|
||||
67
modules/training_content/tips/instructions.MD
Normal file
67
modules/training_content/tips/instructions.MD
Normal file
@@ -0,0 +1,67 @@
|
||||
# Adding new training content
|
||||
|
||||
If you're ever tasked with the grueling task of adding more tips from manuals, my condolences.
|
||||
|
||||
There are 4 components of a training content tip: the tip itself, the question, the additional and the segment.
|
||||
|
||||
The tip is the actual tip, if the manual doesn't have an exercise that relates to that tip fill this out:
|
||||
|
||||
```json
|
||||
{
|
||||
"category": "<the category of the tip that will be used to categorize the embeddings and also used in the tip header>",
|
||||
"embedding": "<the relevant part of the tip that is needed to make the embedding (clean the tip of useless info that might mislead the queries)>",
|
||||
"text": "<The text that the llm will use to assess whether the tip is relevant according to the performance of the student (most of the time just include all the text of the tip)>",
|
||||
"html": "<The html that will be rendered in the tip component>",
|
||||
"id": "<a uuid4>",
|
||||
"verified": <this is just to keep track of the tips that were manually confirmed by you>,
|
||||
"standalone": <if the tip doesn't have an exercise this is true else it's false>
|
||||
}
|
||||
```
|
||||
|
||||
If the manual does have an exercise that relates to the tip:
|
||||
|
||||
```json
|
||||
{
|
||||
// ...
|
||||
"question": "<the exercise question(s) html>",
|
||||
"additional": "<context of the question html>",
|
||||
"segments": [
|
||||
{
|
||||
"html": "<the html of a segment, you MUST wrap the html in a single <div> >",
|
||||
"wordDelay": <the speed at which letters will be placed on the segment, 200ms is a good one>,
|
||||
"holdDelay": <the total time that the segment will be paused before moving onto the next segment, 5000ms is a good one>,
|
||||
"highlight": [
|
||||
{
|
||||
"targets": ["<the target of the highlight can be: question, additional, segment, all>"],
|
||||
"phrases": ["<the words/phrases/raw html you want to highlight>"]
|
||||
}
|
||||
],
|
||||
"insertHTML": [
|
||||
{
|
||||
"target": "<the target of the insert can be: question, additional>",
|
||||
"targetId": "<the id of an html element>",
|
||||
"position": "<the position of the inserted html can be: replace, prepend and append. Most of the time you will only use replace>",
|
||||
"html": "<the html to replace the element with targetId>"
|
||||
},
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
In order to create these structures you will have to mannually screenshot the tips, exercises, context and send them to an llm (gpt-4o or claude)
|
||||
with a prompt like "get me the html for this", you will have to check whether the html is properly structured and then
|
||||
paste them in the prompt.txt file of this directory and send it
|
||||
back to an llm.
|
||||
|
||||
Afterwards you will have to check whether the default styles in /src/components/TrainingContent/FormatTip.ts are adequate, divs
|
||||
(except for the wrapper div of a segment) and span styles are not overriden but you should aim to use the least ammount of
|
||||
styles in the tip itself and create custom reusable html elements
|
||||
in FormatTip.ts.
|
||||
|
||||
After checking all of the tips render you will have to create new embeddings in the backend, you CAN'T change ids of existing tips since there
|
||||
might be training tips that are already stored in firebase.
|
||||
|
||||
This is a very tedious task here's a recommendation for [background noise](https://www.youtube.com/watch?v=lDnva_3fcTc).
|
||||
|
||||
GL HF
|
||||
7579
modules/training_content/tips/pathways_2_rw.json
Normal file
7579
modules/training_content/tips/pathways_2_rw.json
Normal file
File diff suppressed because it is too large
Load Diff
62
modules/training_content/tips/prompt.txt
Normal file
62
modules/training_content/tips/prompt.txt
Normal file
@@ -0,0 +1,62 @@
|
||||
I am going to give you an exercise and a tip, explain how to solve the exercise and how the tip is beneficial,
|
||||
your response must be with this format:
|
||||
|
||||
{
|
||||
"segments": [
|
||||
{
|
||||
"html": "",
|
||||
"wordDelay": 0,
|
||||
"holdDelay"; 0,
|
||||
"highlight": [
|
||||
{
|
||||
"targets": [],
|
||||
"phrases": []
|
||||
}
|
||||
],
|
||||
"insertHTML": [
|
||||
{
|
||||
"target": "",
|
||||
"targetId": "",
|
||||
"position": "replace",
|
||||
"html": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Basically you are going to produce multiple objects and place it in data with the format above to integrate with a react component that highlights passages and inserts html,
|
||||
these objects are segments of your explanation that will be presented to a student.
|
||||
|
||||
In the html field place a segment of your response that will be streamed to the component with a delay of "wordDelay" ms and in the end of that segment stream the phrases or words inside
|
||||
"highlight" will be highlighted for "holdDelay" ms, and the cycle repeats until the whole data array is iterated. Make it so
|
||||
that the delays are reasonable for the student have time to process the message your trying to send. Take note that
|
||||
"wordDelay" is the time between words to display (always 200), and "holdDelay" (no less than 5000) is the total time the highlighter will highlight what you put
|
||||
inside "highlight".
|
||||
|
||||
There are 3 target areas:
|
||||
- "question": where the question is placed
|
||||
- "additional": where additional content is placed required to answer the question (this section is optional)
|
||||
- "segment": a particular segment
|
||||
|
||||
You can use these targets in highlight and insertHTML. In order for insertHTML to work, you will have to place an html element with an "id" attribute
|
||||
in the targets you will reference and provide the id via the "targetId", by this I mean if you want to use insert you will need to provide me the
|
||||
html I've sent you with either a placeholder element with an id set or set an id in an existent element.
|
||||
|
||||
If there are already id's in the html I'm giving you then you must use insertHtml.
|
||||
|
||||
Each segment html will be rendered in a div that as margins, you should condense the information don't give me just single short phrases that occupy a whole div.
|
||||
As previously said this wil be seen by a student so show some train of thought to solve the exercise.
|
||||
All the segment's html must be wrapped in a div element, and again since this div element will be rendered with some margins make proper use of the segments html.
|
||||
|
||||
Try to make bulletpoints.
|
||||
Dont explicitely mention the tip right away at the beginning, aim more towards the end.
|
||||
|
||||
|
||||
Tip:
|
||||
|
||||
|
||||
Target: "question"
|
||||
|
||||
|
||||
Target: "additional"
|
||||
34
modules/training_content/tips/send_tips_to_firestore.py
Normal file
34
modules/training_content/tips/send_tips_to_firestore.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# staging: encoach-staging.json
|
||||
# prod: storied-phalanx-349916.json
|
||||
|
||||
mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open('pathways_2_rw.json', 'r', encoding='utf-8') as file:
|
||||
book = json.load(file)
|
||||
|
||||
tips = []
|
||||
for unit in book["units"]:
|
||||
for page in unit["pages"]:
|
||||
for tip in page["tips"]:
|
||||
new_tip = {
|
||||
"id": tip["id"],
|
||||
"standalone": tip["standalone"],
|
||||
"tipCategory": tip["category"],
|
||||
"tipHtml": tip["html"]
|
||||
}
|
||||
if not tip["standalone"]:
|
||||
new_tip["exercise"] = tip["exercise"]
|
||||
tips.append(new_tip)
|
||||
|
||||
for tip in tips:
|
||||
doc_ref = mongo_db.walkthrough.insert_one(tip)
|
||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Reference in New Issue
Block a user