Fastapi refactor update

This commit is contained in:
Carlos-Mesquita
2024-10-01 19:31:01 +01:00
parent f92a803d96
commit 2a032c5aba
132 changed files with 22856 additions and 10309 deletions

28
.env
View File

@@ -1,8 +1,30 @@
ENV=local
OPENAI_API_KEY=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
JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/encoach-staging.json
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
MONGODB_URI=mongodb+srv://user:JKpFBymv0WLv3STj@encoach.lz18a.mongodb.net/?retryWrites=true&w=majority&appName=EnCoach
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/encoach-staging.json
# Staging
ENV=staging
#
#FIREBASE_SCRYPT_B64_SIGNER_KEY="qjo/b5U5oNxA8o+PHFMZx/ZfG8ZQ7688zYmwMOcfZvVjOM6aHe4Jf270xgyrVArqLIQwFi7VkFnbysBjueMbVw=="
#FIREBASE_SCRYPT_B64_SALT_SEPARATOR="Bw=="
#FIREBASE_SCRYPT_ROUNDS=8
#FIREBASE_SCRYPT_MEM_COST=14
#FIREBASE_PROJECT_ID=encoach-staging
#MONGODB_DB=staging
# Prod
#ENV=production
#GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
#FIREBASE_SCRYPT_B64_SIGNER_KEY="vbO3Xii2lajSeSkCstq3s/dCwpXP7J2YN9rP/KRreU2vGOT1fg+wzSuy1kIhBECqJHG82tmwAilSxLFFtNKVMA=="
#FIREBASE_SCRYPT_B64_SALT_SEPARATOR="Bw=="
#FIREBASE_SCRYPT_ROUNDS=8
#FIREBASE_SCRYPT_MEM_COST=14
#FIREBASE_PROJECT_ID=storied-phalanx-349916
MONGODB_DB=staging

2
.gitignore vendored
View File

@@ -3,4 +3,4 @@ __pycache__
.env
.DS_Store
.venv
scripts
_scripts

3
.idea/ielts-be.iml generated
View File

@@ -7,8 +7,9 @@
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
<excludeFolder url="file://$MODULE_DIR$/venv" />
<excludeFolder url="file://$MODULE_DIR$/_scripts" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 3.11 (ielts-be)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PackageRequirementsSettings">

View File

@@ -3,9 +3,6 @@ Latest refactor from develop's branch commit 5d5cd21 2024-08-28
# Endpoints
In ielts-ui I've added a wrapper to every backend request in '/src/utils/translate.backend.endpoints.ts' to use the
new endpoints if the "BACKEND_TYPE" environment variable is set to "async", if the env variable is not present or
with another value, the wrapper will return the old endpoint.
| Method | ielts-be | This one |
|--------|--------------------------------------|---------------------------------------------|

15
app.py
View File

@@ -1,27 +1,22 @@
import os
import click
import uvicorn
from dotenv import load_dotenv
load_dotenv()
@click.command()
@click.option(
"--env",
type=click.Choice(["local", "dev", "prod"], case_sensitive=False),
default="local",
type=click.Choice(["local", "staging", "production"], case_sensitive=False),
default="staging",
)
def main(env: str):
load_dotenv()
os.environ["ENV"] = env
if env == "prod":
raise Exception("Production environment not supported yet!")
uvicorn.run(
app="app.server:app",
host="localhost",
port=8000,
reload=True if env != "prod" else False,
reload=True if env != "production" else False,
workers=1,
)

View File

@@ -7,6 +7,7 @@ from .speaking import speaking_router
from .training import training_router
from .writing import writing_router
from .grade import grade_router
from .user import user_router
router = APIRouter()
router.include_router(home_router, prefix="/api", tags=["Home"])
@@ -16,3 +17,4 @@ router.include_router(speaking_router, prefix="/api/speaking", tags=["Speaking"]
router.include_router(writing_router, prefix="/api/writing", tags=["Writing"])
router.include_router(grade_router, prefix="/api/grade", tags=["Grade"])
router.include_router(training_router, prefix="/api/training", tags=["Training"])
router.include_router(user_router, prefix="/api/user", tags=["Users"])

21
app/api/user.py Normal file
View File

@@ -0,0 +1,21 @@
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends
from app.dtos.user_batch import BatchUsersDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import IUserController
controller = "user_controller"
user_router = APIRouter()
@user_router.post(
'/import',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def batch_import(
batch: BatchUsersDTO,
user_controller: IUserController = Depends(Provide[controller])
):
return await user_controller.batch_import(batch)

View File

@@ -1,5 +1,5 @@
from .dependency_injection import config_di
from .dependency_injection import DependencyInjector
__all__ = [
"config_di"
"DependencyInjector"
]

View File

@@ -1,11 +1,41 @@
from enum import Enum
########################################################################################################################
# DISCLAIMER #
# #
# All the array and dict "constants" are mutable variables, if somewhere in the app you modify them in any way, shape #
# or form all the other methods that will use these "constants" will also use the modified version. If you're unsure #
# whether a method will modify it use copy's deepcopy: #
# #
# from copy import deepcopy #
# #
# new_ref = deepcopy(CONSTANT) #
# #
# Using a wrapper method that returns a "constant" won't handle nested mutables. #
########################################################################################################################
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
"discrimination", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew", "discrimination", "discriminatory"]
class UserDefaults:
DESIRED_LEVELS = {
"reading": 9,
"listening": 9,
"writing": 9,
"speaking": 9,
}
LEVELS = {
"reading": 0,
"listening": 0,
"writing": 0,
"speaking": 0,
}
class ExamVariant(Enum):
FULL = "full"
PARTIAL = "partial"

View File

@@ -3,9 +3,9 @@ import os
from dependency_injector import providers, containers
from firebase_admin import credentials
from motor.motor_asyncio import AsyncIOMotorClient
from openai import AsyncOpenAI
from httpx import AsyncClient as HTTPClient
from google.cloud.firestore_v1 import AsyncClient as FirestoreClient
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
@@ -16,105 +16,125 @@ from app.controllers.impl import *
load_dotenv()
def config_di(
*, polly_client: any, http_client: HTTPClient, whisper_model: any
) -> None:
"""
Loads up all the common configs of all the environments
and then calls the specific env configs
"""
# Firebase token
class DependencyInjector:
def __init__(self, polly_client: any, http_client: HTTPClient, whisper_model: any):
self._container = containers.DynamicContainer()
self._polly_client = polly_client
self._http_client = http_client
self._whisper_model = whisper_model
def inject(self):
self._setup_clients()
self._setup_third_parties()
self._setup_repositories()
self._setup_services()
self._setup_controllers()
self._container.wire(
packages=["app"]
)
def _setup_clients(self):
self._container.openai_client = providers.Singleton(AsyncOpenAI)
self._container.polly_client = providers.Object(self._polly_client)
self._container.http_client = providers.Object(self._http_client)
self._container.whisper_model = providers.Object(self._whisper_model)
def _setup_third_parties(self):
self._container.llm = providers.Factory(OpenAI, client=self._container.openai_client)
self._container.stt = providers.Factory(OpenAIWhisper, model=self._container.whisper_model)
self._container.tts = providers.Factory(AWSPolly, client=self._container.polly_client)
self._container.vid_gen = providers.Factory(
Heygen, client=self._container.http_client, heygen_token=os.getenv("HEY_GEN_TOKEN")
)
self._container.ai_detector = providers.Factory(
GPTZero, client=self._container.http_client, gpt_zero_key=os.getenv("GPT_ZERO_API_KEY")
)
def _setup_repositories(self):
cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
firebase_token = cred.get_access_token().access_token
container = containers.DynamicContainer()
openai_client = providers.Singleton(AsyncOpenAI)
polly_client = providers.Object(polly_client)
http_client = providers.Object(http_client)
firestore_client = providers.Singleton(FirestoreClient)
whisper_model = providers.Object(whisper_model)
llm = providers.Factory(OpenAI, client=openai_client)
stt = providers.Factory(OpenAIWhisper, model=whisper_model)
tts = providers.Factory(AWSPolly, client=polly_client)
vid_gen = providers.Factory(Heygen, client=http_client, heygen_token=os.getenv("HEY_GEN_TOKEN"))
ai_detector = providers.Factory(GPTZero, client=http_client, gpt_zero_key=os.getenv("GPT_ZERO_API_KEY"))
firebase_instance = providers.Factory(
FirebaseStorage, client=http_client, token=firebase_token, bucket=os.getenv("FIREBASE_BUCKET")
self._container.document_store = providers.Object(
AsyncIOMotorClient(os.getenv("MONGODB_URI"))[os.getenv("MONGODB_DB")]
)
firestore = providers.Factory(Firestore, client=firestore_client)
# Services
listening_service = providers.Factory(
ListeningService, llm=llm, tts=tts, file_storage=firebase_instance, document_store=firestore
)
reading_service = providers.Factory(ReadingService, llm=llm)
speaking_service = providers.Factory(
SpeakingService, llm=llm, vid_gen=vid_gen,
file_storage=firebase_instance, document_store=firestore,
stt=stt
self._container.firebase_instance = providers.Factory(
FirebaseStorage,
client=self._container.http_client, token=firebase_token, bucket=os.getenv("FIREBASE_BUCKET")
)
writing_service = providers.Factory(WritingService, llm=llm, ai_detector=ai_detector)
def _setup_services(self):
self._container.listening_service = providers.Factory(
ListeningService,
llm=self._container.llm,
tts=self._container.tts,
file_storage=self._container.firebase_instance,
document_store=self._container.document_store
)
self._container.reading_service = providers.Factory(ReadingService, llm=self._container.llm)
with open('app/services/impl/level/mc_variants.json', 'r') as file:
self._container.speaking_service = providers.Factory(
SpeakingService, llm=self._container.llm, vid_gen=self._container.vid_gen,
file_storage=self._container.firebase_instance, document_store=self._container.document_store,
stt=self._container.stt
)
self._container.writing_service = providers.Factory(
WritingService, llm=self._container.llm, ai_detector=self._container.ai_detector
)
with open('app/services/impl/exam/level/mc_variants.json', 'r') as file:
mc_variants = json.load(file)
level_service = providers.Factory(
LevelService, llm=llm, document_store=firestore, mc_variants=mc_variants, reading_service=reading_service,
writing_service=writing_service, speaking_service=speaking_service, listening_service=listening_service
self._container.level_service = providers.Factory(
LevelService, llm=self._container.llm, document_store=self._container.document_store,
mc_variants=mc_variants, reading_service=self._container.reading_service,
writing_service=self._container.writing_service, speaking_service=self._container.speaking_service,
listening_service=self._container.listening_service
)
grade_service = providers.Factory(
GradeService, llm=llm
self._container.grade_service = providers.Factory(
GradeService, llm=self._container.llm
)
embeddings = SentenceTransformer('all-MiniLM-L6-v2')
training_kb = providers.Factory(
self._container.training_kb = providers.Factory(
TrainingContentKnowledgeBase, embeddings=embeddings
)
training_service = providers.Factory(
TrainingService, llm=llm, firestore=firestore, training_kb=training_kb
self._container.training_service = providers.Factory(
TrainingService, llm=self._container.llm,
firestore=self._container.document_store, training_kb=self._container.training_kb
)
# Controllers
container.grade_controller = providers.Factory(
GradeController, grade_service=grade_service, speaking_service=speaking_service, writing_service=writing_service
def _setup_controllers(self):
self._container.grade_controller = providers.Factory(
GradeController, grade_service=self._container.grade_service,
speaking_service=self._container.speaking_service,
writing_service=self._container.writing_service
)
container.training_controller = providers.Factory(
TrainingController, training_service=training_service
self._container.training_controller = providers.Factory(
TrainingController, training_service=self._container.training_service
)
container.level_controller = providers.Factory(
LevelController, level_service=level_service
self._container.level_controller = providers.Factory(
LevelController, level_service=self._container.level_service
)
container.listening_controller = providers.Factory(
ListeningController, listening_service=listening_service
self._container.listening_controller = providers.Factory(
ListeningController, listening_service=self._container.listening_service
)
container.reading_controller = providers.Factory(
ReadingController, reading_service=reading_service
self._container.reading_controller = providers.Factory(
ReadingController, reading_service=self._container.reading_service
)
container.speaking_controller = providers.Factory(
SpeakingController, speaking_service=speaking_service
self._container.speaking_controller = providers.Factory(
SpeakingController, speaking_service=self._container.speaking_service
)
container.writing_controller = providers.Factory(
WritingController, writing_service=writing_service
)
container.llm = llm
container.wire(
packages=["app"]
self._container.writing_controller = providers.Factory(
WritingController, writing_service=self._container.writing_service
)

View File

@@ -5,6 +5,7 @@ from .writing import IWritingController
from .speaking import ISpeakingController
from .grade import IGradeController
from .training import ITrainingController
from .user import IUserController
__all__ = [
"IListeningController",
@@ -13,5 +14,6 @@ __all__ = [
"ISpeakingController",
"ILevelController",
"IGradeController",
"ITrainingController"
"ITrainingController",
"IUserController"
]

View File

@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
from app.dtos.user_batch import BatchUsersDTO
class IUserController(ABC):
@abstractmethod
async def batch_import(self, batch: BatchUsersDTO):
pass

View File

@@ -5,6 +5,7 @@ from .speaking import SpeakingController
from .writing import WritingController
from .training import TrainingController
from .grade import GradeController
from .user import UserController
__all__ = [
"LevelController",
@@ -13,5 +14,6 @@ __all__ = [
"SpeakingController",
"WritingController",
"TrainingController",
"GradeController"
"GradeController",
"UserController"
]

View File

@@ -0,0 +1,12 @@
from app.controllers.abc import IUserController
from app.dtos.user_batch import BatchUsersDTO
from app.services.abc import IUserService
class UserController(IUserController):
def __init__(self, user_service: IUserService):
self._service = user_service
async def batch_import(self, batch: BatchUsersDTO):
return await self._service.fetch_tips(batch)

30
app/dtos/user_batch.py Normal file
View File

@@ -0,0 +1,30 @@
import uuid
from typing import Optional
from pydantic import BaseModel, Field
class DemographicInfo(BaseModel):
phone: str
passport_id: Optional[str] = None
country: Optional[str] = None
class UserDTO(BaseModel):
id: uuid.UUID = Field(default_factory=uuid.uuid4)
email: str
name: str
type: str
passport_id: str
passwordHash: str
passwordSalt: str
groupName: Optional[str] = None
corporate: Optional[str] = None
studentID: Optional[str | int] = None
expiryDate: Optional[str] = None
demographicInformation: Optional[DemographicInfo] = None
class BatchUsersDTO(BaseModel):
makerID: str
users: list[UserDTO]

View File

@@ -1,18 +1,18 @@
import datetime
from pathlib import Path
import base64
import io
import os
import shutil
import subprocess
from typing import Optional
import uuid
import datetime
from pathlib import Path
from typing import Optional, Tuple
import aiofiles
import numpy as np
import pypandoc
from PIL import Image
import aiofiles
class FileHelper:
@@ -72,10 +72,10 @@ class FileHelper:
return base64.b64encode(image_bytes).decode('utf-8')
@classmethod
def b64_pngs(cls, path_id: str, files: list[str]):
async def b64_pngs(cls, path_id: str, files: list[str]):
png_messages = []
for filename in files:
b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
b64_string = await cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
if b64_string:
png_messages.append({
"type": "image_url",
@@ -93,3 +93,22 @@ class FileHelper:
shutil.rmtree(path)
except Exception as e:
print(f"An error occurred while trying to remove {path}: {str(e)}")
@staticmethod
def remove_file(file_path):
try:
if os.path.exists(file_path):
if os.path.isfile(file_path):
os.remove(file_path)
except Exception as e:
print(f"An error occurred while trying to remove the file {file_path}: {str(e)}")
@staticmethod
def save_upload(file) -> Tuple[str, str]:
ext = file.filename.split('.')[-1]
path_id = str(uuid.uuid4())
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
file.save(tmp_filename)
return ext, path_id

View File

@@ -1,16 +1,15 @@
from abc import ABC
from typing import Dict, Optional, List
class IDocumentStore(ABC):
async def save_to_db(self, collection: str, item):
async def save_to_db(self, collection: str, item: Dict, doc_id: Optional[str]) -> Optional[str]:
pass
async def save_to_db_with_id(self, collection: str, item, id: str):
async def get_all(self, collection: str) -> List[Dict]:
pass
async def get_all(self, collection: str):
pass
async def get_doc_by_id(self, collection: str, doc_id: str):
async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]:
pass

View File

@@ -1,5 +1,5 @@
from .document_stores import *
from .firebase import FirebaseStorage
from app.repositories.impl.file_storage.firebase import FirebaseStorage
__all__ = [
"FirebaseStorage"

View File

@@ -1,4 +1,6 @@
import logging
from typing import Optional, List, Dict
from google.cloud.firestore_v1.async_client import AsyncClient
from google.cloud.firestore_v1.async_collection import AsyncCollectionReference
from google.cloud.firestore_v1.async_document import AsyncDocumentReference
@@ -10,34 +12,32 @@ class Firestore(IDocumentStore):
self._client = client
self._logger = logging.getLogger(__name__)
async def save_to_db(self, collection: str, item):
async def save_to_db(self, collection: str, item, doc_id: Optional[str] = None) -> Optional[str]:
collection_ref: AsyncCollectionReference = self._client.collection(collection)
update_time, document_ref = await collection_ref.add(item)
if document_ref:
self._logger.info(f"Document added with ID: {document_ref.id}")
return document_ref.id
else:
return None
async def save_to_db_with_id(self, collection: str, item, id: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
document_ref: AsyncDocumentReference = collection_ref.document(id)
if doc_id:
document_ref: AsyncDocumentReference = collection_ref.document(doc_id)
await document_ref.set(item)
doc_snapshot = await document_ref.get()
if doc_snapshot.exists:
self._logger.info(f"Document added with ID: {document_ref.id}")
return document_ref.id
else:
update_time, document_ref = await collection_ref.add(item)
if document_ref:
self._logger.info(f"Document added with ID: {document_ref.id}")
return document_ref.id
return None
async def get_all(self, collection: str):
async def get_all(self, collection: str) -> List[Dict]:
collection_ref: AsyncCollectionReference = self._client.collection(collection)
docs = []
async for doc in collection_ref.stream():
docs.append(doc.to_dict())
return docs
async def get_doc_by_id(self, collection: str, doc_id: str):
async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]:
collection_ref: AsyncCollectionReference = self._client.collection(collection)
doc_ref: AsyncDocumentReference = collection_ref.document(doc_id)
doc = await doc_ref.get()

View File

@@ -1,36 +1,37 @@
"""import logging
from pymongo import MongoClient
import logging
import uuid
from typing import Optional, List, Dict
from motor.motor_asyncio import AsyncIOMotorDatabase
from app.repositories.abc import IDocumentStore
class MongoDB(IDocumentStore):
def __init__(self, client: MongoClient):
self._client = client
def __init__(self, mongo_db: AsyncIOMotorDatabase):
self._mongo_db = mongo_db
self._logger = logging.getLogger(__name__)
def save_to_db(self, collection: str, item):
collection_ref = self._client[collection]
result = collection_ref.insert_one(item)
async def save_to_db(self, collection: str, item, doc_id: Optional[str] = None) -> Optional[str]:
collection_ref = self._mongo_db[collection]
if doc_id is None:
doc_id = str(uuid.uuid4())
item['id'] = doc_id
result = await collection_ref.insert_one(item)
if result.inserted_id:
self._logger.info(f"Document added with ID: {result.inserted_id}")
return True, str(result.inserted_id)
else:
return False, None
def save_to_db_with_id(self, collection: str, item, doc_id: str):
collection_ref = self._client[collection]
item['_id'] = doc_id
result = collection_ref.replace_one({'_id': id}, item, upsert=True)
if result.upserted_id or result.matched_count:
# returning id instead of _id
self._logger.info(f"Document added with ID: {doc_id}")
return True, doc_id
else:
return False, None
return doc_id
def get_all(self, collection: str):
collection_ref = self._client[collection]
all_documents = list(collection_ref.find())
return all_documents
"""
return None
async def get_all(self, collection: str) -> List[Dict]:
cursor = self._mongo_db[collection].find()
return [document async for document in cursor]
async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]:
return await self._mongo_db[collection].find_one({"id": doc_id})

View File

@@ -0,0 +1,5 @@
from .firebase import FirebaseStorage
__all__ = [
"FirebaseStorage"
]

View File

@@ -21,16 +21,13 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import nltk
from dotenv import load_dotenv
from starlette import status
from app.api import router
from app.configs import config_di
from app.configs import DependencyInjector
from app.exceptions import CustomException
from app.middlewares import AuthenticationMiddleware, AuthBackend
load_dotenv()
@asynccontextmanager
async def lifespan(_app: FastAPI):
@@ -58,14 +55,13 @@ async def lifespan(_app: FastAPI):
)
)
# HTTP Client
http_client = httpx.AsyncClient()
config_di(
polly_client=polly_client,
http_client=http_client,
whisper_model=whisper_model
)
DependencyInjector(
polly_client,
http_client,
whisper_model
).inject()
# Setup logging
config_file = pathlib.Path("./app/configs/logging/logging_config.json")
@@ -147,8 +143,8 @@ def setup_middleware() -> List[Middleware]:
def create_app() -> FastAPI:
env = os.getenv("ENV")
_app = FastAPI(
docs_url="/docs" if env != "prod" else None,
redoc_url="/redoc" if env != "prod" else None,
docs_url="/docs" if env != "production" else None,
redoc_url="/redoc" if env != "production" else None,
middleware=setup_middleware(),
lifespan=lifespan
)

View File

@@ -1,20 +1,11 @@
from .level import ILevelService
from .listening import IListeningService
from .writing import IWritingService
from .speaking import ISpeakingService
from .reading import IReadingService
from .grade import IGradeService
from .training import ITrainingService
from .kb import IKnowledgeBase
from .third_parties import *
from .exam import *
from .training import *
from .user import IUserService
__all__ = [
"ILevelService",
"IListeningService",
"IWritingService",
"ISpeakingService",
"IReadingService",
"IGradeService",
"ITrainingService"
"IUserService"
]
__all__.extend(third_parties.__all__)
__all__.extend(exam.__all__)
__all__.extend(training.__all__)

View File

@@ -0,0 +1,15 @@
from .level import ILevelService
from .listening import IListeningService
from .writing import IWritingService
from .speaking import ISpeakingService
from .reading import IReadingService
from .grade import IGradeService
__all__ = [
"ILevelService",
"IListeningService",
"IWritingService",
"ISpeakingService",
"IReadingService",
"IGradeService",
]

View File

@@ -0,0 +1,7 @@
from .training import ITrainingService
from .kb import IKnowledgeBase
__all__ = [
"ITrainingService",
"IKnowledgeBase"
]

10
app/services/abc/user.py Normal file
View File

@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
from app.dtos.user_batch import BatchUsersDTO
class IUserService(ABC):
@abstractmethod
async def fetch_tips(self, batch: BatchUsersDTO):
pass

View File

@@ -1,19 +1,11 @@
from .level import LevelService
from .listening import ListeningService
from .reading import ReadingService
from .speaking import SpeakingService
from .writing import WritingService
from .grade import GradeService
from .user import UserService
from .training import *
from .third_parties import *
from .exam import *
__all__ = [
"LevelService",
"ListeningService",
"ReadingService",
"SpeakingService",
"WritingService",
"GradeService",
"UserService"
]
__all__.extend(third_parties.__all__)
__all__.extend(training.__all__)
__all__.extend(exam.__all__)

View File

@@ -0,0 +1,16 @@
from .level import LevelService
from .listening import ListeningService
from .reading import ReadingService
from .speaking import SpeakingService
from .writing import WritingService
from .grade import GradeService
__all__ = [
"LevelService",
"ListeningService",
"ReadingService",
"SpeakingService",
"WritingService",
"GradeService",
]

View File

@@ -482,11 +482,13 @@ class SpeakingService(ISpeakingService):
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
params = locals()
params.pop('self')
request_id = str(uuid.uuid4())
# TODO: request data
self._logger.info(
f'POST - generate_video_{part} - Received request to generate video {part}. '
f'Use this id to track the logs: {request_id} - Request data: " + str(request.get_json())'
f'Use this id to track the logs: {request_id} - Request data: " + {params}'
)
part_questions = self._get_part_questions(part, questions, avatar)

262
app/services/impl/user.py Normal file
View File

@@ -0,0 +1,262 @@
import os
import subprocess
import time
import uuid
import pandas as pd
import shortuuid
from datetime import datetime
from logging import getLogger
from pymongo.database import Database
from app.dtos.user_batch import BatchUsersDTO, UserDTO
from app.helpers import FileHelper
from app.services.abc import IUserService
class UserService(IUserService):
_DEFAULT_DESIRED_LEVELS = {
"reading": 9,
"listening": 9,
"writing": 9,
"speaking": 9,
}
_DEFAULT_LEVELS = {
"reading": 0,
"listening": 0,
"writing": 0,
"speaking": 0,
}
def __init__(self, mongo: Database):
self._db: Database = mongo
self._logger = getLogger(__name__)
def fetch_tips(self, batch: BatchUsersDTO):
file_name = f'{uuid.uuid4()}.csv'
path = f'./tmp/{file_name}'
self._generate_firebase_auth_csv(batch, path)
result = self._upload_users('./tmp', file_name)
if result.returncode != 0:
error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stdout}```"
self._logger.error(error_msg)
return error_msg
self._init_users(batch)
FileHelper.remove_file(path)
return {"ok": True}
@staticmethod
def _generate_firebase_auth_csv(batch_dto: BatchUsersDTO, path: str):
# https://firebase.google.com/docs/cli/auth#file_format
columns = [
'UID', 'Email', 'Email Verified', 'Password Hash', 'Password Salt', 'Name',
'Photo URL', 'Google ID', 'Google Email', 'Google Display Name', 'Google Photo URL',
'Facebook ID', 'Facebook Email', 'Facebook Display Name', 'Facebook Photo URL',
'Twitter ID', 'Twitter Email', 'Twitter Display Name', 'Twitter Photo URL',
'GitHub ID', 'GitHub Email', 'GitHub Display Name', 'GitHub Photo URL',
'User Creation Time', 'Last Sign-In Time', 'Phone Number'
]
users_data = []
current_time = int(time.time() * 1000)
for user in batch_dto.users:
user_data = {
'UID': str(user.id),
'Email': user.email,
'Email Verified': False,
'Password Hash': user.passwordHash,
'Password Salt': user.passwordSalt,
'Name': '',
'Photo URL': '',
'Google ID': '',
'Google Email': '',
'Google Display Name': '',
'Google Photo URL': '',
'Facebook ID': '',
'Facebook Email': '',
'Facebook Display Name': '',
'Facebook Photo URL': '',
'Twitter ID': '',
'Twitter Email': '',
'Twitter Display Name': '',
'Twitter Photo URL': '',
'GitHub ID': '',
'GitHub Email': '',
'GitHub Display Name': '',
'GitHub Photo URL': '',
'User Creation Time': current_time,
'Last Sign-In Time': '',
'Phone Number': ''
}
users_data.append(user_data)
df = pd.DataFrame(users_data, columns=columns)
df.to_csv(path, index=False, header=False)
@staticmethod
def _upload_users(directory: str, file_name: str):
command = (
f'firebase auth:import {file_name} '
f'--hash-algo=SCRYPT '
f'--hash-key={os.getenv("FIREBASE_SCRYPT_B64_SIGNER_KEY")} '
f'--salt-separator={os.getenv("FIREBASE_SCRYPT_B64_SALT_SEPARATOR")} '
f'--rounds={os.getenv("FIREBASE_SCRYPT_ROUNDS")} '
f'--mem-cost={os.getenv("FIREBASE_SCRYPT_MEM_COST")} '
f'--project={os.getenv("FIREBASE_PROJECT_ID")} '
)
result = subprocess.run(command, shell=True, cwd=directory, capture_output=True, text=True)
return result
def _init_users(self, batch_users: BatchUsersDTO):
maker_id = batch_users.makerID
for user in batch_users.users:
self._insert_new_user(user)
code = self._create_code(user, maker_id)
if user.type == "corporate":
self._set_corporate_default_groups(user)
if user.corporate:
self._assign_corporate_to_user(user, code)
if user.groupName and len(user.groupName.strip()) > 0:
self._assign_user_to_group_by_name(user, maker_id)
def _insert_new_user(self, user: UserDTO):
new_user = {
**user.dict(exclude={
'passport_id', 'groupName', 'expiryDate',
'corporate', 'passwordHash', 'passwordSalt'
}),
'id': str(user.id),
'bio': "",
'focus': "academic",
'status': "active",
'desiredLevels': self._DEFAULT_DESIRED_LEVELS,
'profilePicture': "/defaultAvatar.png",
'levels': self._DEFAULT_LEVELS,
'isFirstLogin': False,
'isVerified': True,
'registrationDate': datetime.now(),
'subscriptionExpirationDate': user.expiryDate
}
self._db.users.insert_one(new_user)
def _create_code(self, user: UserDTO, maker_id: str) -> str:
code = shortuuid.ShortUUID().random(length=6)
self._db.codes.insert_one({
'id': code,
'code': code,
'creator': maker_id,
'expiryDate': user.expiryDate,
'type': user.type,
'creationDate': datetime.now(),
'userId': str(user.id),
'email': user.email,
'name': user.name,
'passport_id': user.passport_id
})
return code
def _set_corporate_default_groups(self, user: UserDTO):
user_id = str(user.id)
default_groups = [
{
'admin': user_id,
'id': str(uuid.uuid4()),
'name': "Teachers",
'participants': [],
'disableEditing': True,
},
{
'admin': user_id,
'id': str(uuid.uuid4()),
'name': "Students",
'participants': [],
'disableEditing': True,
},
{
'admin': user_id,
'id': str(uuid.uuid4()),
'name': "Corporate",
'participants': [],
'disableEditing': True,
}
]
for group in default_groups:
self._db.groups.insert_one(group)
def _assign_corporate_to_user(self, user: UserDTO, code: str):
user_id = str(user.id)
corporate_user = self._db.users.find_one(
{"email": user.corporate}
)
if corporate_user:
self._db.codes.update_one(
{"id": code},
{"$set": {"creator": corporate_user["id"]}},
upsert=True
)
group_type = "Students" if user.type == "student" else "Teachers"
group = self._db.groups.find_one(
{
"admin": corporate_user["id"],
"name": group_type
}
)
if group:
participants = group['participants']
if user_id not in participants:
participants.append(user_id)
self._db.groups.update_one(
{"id": group["id"]},
{"$set": {"participants": participants}}
)
else:
group = {
'admin': corporate_user["id"],
'id': str(uuid.uuid4()),
'name': group_type,
'participants': [user_id],
'disableEditing': True,
}
self._db.groups.insert_one(group)
def _assign_user_to_group_by_name(self, user: UserDTO, maker_id: str):
user_id = str(user.id)
groups = list(self._db.groups.find(
{
"admin": maker_id,
"name": user.groupName.strip()
}
))
if len(groups) == 0:
new_group = {
'id': str(uuid.uuid4()),
'admin': maker_id,
'name': user.groupName.strip(),
'participants': [user_id],
'disableEditing': False,
}
self._db.groups.insert_one(new_group)
else:
group = groups[0]
participants = group["participants"]
if user_id not in participants:
participants.append(user_id)
self._db.groups.update_one(
{"id": group["id"]},
{"$set": {"participants": participants}}
)

4343
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -25,6 +25,9 @@ numpy = "1.26.4"
pillow = "10.4.0"
sentence-transformers = "3.0.1"
openai-whisper = "20231117"
motor = "3.6.0"
shortuuid = "1.0.13"
pandas = "2.2.3"
[build-system]

View File

@@ -0,0 +1,67 @@
# Adding new training content
If you're ever tasked with the grueling task of adding more tips from manuals, my condolences.
There are 4 components of a training content tip: the tip itself, the question, the additional and the segment.
The tip is the actual tip, if the manual doesn't have an exercise that relates to that tip fill this out:
```json
{
"category": "<the category of the tip that will be used to categorize the embeddings and also used in the tip header>",
"embedding": "<the relevant part of the tip that is needed to make the embedding (clean the tip of useless info that might mislead the queries)>",
"text": "<The text that the llm will use to assess whether the tip is relevant according to the performance of the student (most of the time just include all the text of the tip)>",
"html": "<The html that will be rendered in the tip component>",
"id": "<a uuid4>",
"verified": <this is just to keep track of the tips that were manually confirmed by you>,
"standalone": <if the tip doesn't have an exercise this is true else it's false>
}
```
If the manual does have an exercise that relates to the tip:
```json
{
// ...
"question": "<the exercise question(s) html>",
"additional": "<context of the question html>",
"segments": [
{
"html": "<the html of a segment, you MUST wrap the html in a single <div> >",
"wordDelay": <the speed at which letters will be placed on the segment, 200ms is a good one>,
"holdDelay": <the total time that the segment will be paused before moving onto the next segment, 5000ms is a good one>,
"highlight": [
{
"targets": ["<the target of the highlight can be: question, additional, segment, all>"],
"phrases": ["<the words/phrases/raw html you want to highlight>"]
}
],
"insertHTML": [
{
"target": "<the target of the insert can be: question, additional>",
"targetId": "<the id of an html element>",
"position": "<the position of the inserted html can be: replace, prepend and append. Most of the time you will only use replace>",
"html": "<the html to replace the element with targetId>"
},
]
}
]
}
```
In order to create these structures you will have to mannually screenshot the tips, exercises, context and send them to an llm (gpt-4o or claude)
with a prompt like "get me the html for this", you will have to check whether the html is properly structured and then
paste them in the prompt.txt file of this directory and send it
back to an llm.
Afterwards you will have to check whether the default styles in /src/components/TrainingContent/FormatTip.ts are adequate, divs
(except for the wrapper div of a segment) and span styles are not overriden but you should aim to use the least ammount of
styles in the tip itself and create custom reusable html elements
in FormatTip.ts.
After checking all of the tips render you will have to create new embeddings in the backend, you CAN'T change ids of existing tips since there
might be training tips that are already stored in firebase.
This is a very tedious task here's a recommendation for [background noise](https://www.youtube.com/watch?v=lDnva_3fcTc).
GL HF

File diff suppressed because it is too large Load Diff

62
scripts/tips/prompt.txt Normal file
View File

@@ -0,0 +1,62 @@
I am going to give you an exercise and a tip, explain how to solve the exercise and how the tip is beneficial,
your response must be with this format:
{
"segments": [
{
"html": "",
"wordDelay": 0,
"holdDelay"; 0,
"highlight": [
{
"targets": [],
"phrases": []
}
],
"insertHTML": [
{
"target": "",
"targetId": "",
"position": "replace",
"html": ""
}
]
}
]
}
Basically you are going to produce multiple objects and place it in data with the format above to integrate with a react component that highlights passages and inserts html,
these objects are segments of your explanation that will be presented to a student.
In the html field place a segment of your response that will be streamed to the component with a delay of "wordDelay" ms and in the end of that segment stream the phrases or words inside
"highlight" will be highlighted for "holdDelay" ms, and the cycle repeats until the whole data array is iterated. Make it so
that the delays are reasonable for the student have time to process the message your trying to send. Take note that
"wordDelay" is the time between words to display (always 200), and "holdDelay" (no less than 5000) is the total time the highlighter will highlight what you put
inside "highlight".
There are 3 target areas:
- "question": where the question is placed
- "additional": where additional content is placed required to answer the question (this section is optional)
- "segment": a particular segment
You can use these targets in highlight and insertHTML. In order for insertHTML to work, you will have to place an html element with an "id" attribute
in the targets you will reference and provide the id via the "targetId", by this I mean if you want to use insert you will need to provide me the
html I've sent you with either a placeholder element with an id set or set an id in an existent element.
If there are already id's in the html I'm giving you then you must use insertHtml.
Each segment html will be rendered in a div that as margins, you should condense the information don't give me just single short phrases that occupy a whole div.
As previously said this wil be seen by a student so show some train of thought to solve the exercise.
All the segment's html must be wrapped in a div element, and again since this div element will be rendered with some margins make proper use of the segments html.
Try to make bulletpoints.
Dont explicitely mention the tip right away at the beginning, aim more towards the end.
Tip:
Target: "question"
Target: "additional"

View File

@@ -0,0 +1,34 @@
import json
import os
from dotenv import load_dotenv
from pymongo import MongoClient
load_dotenv()
# staging: encoach-staging.json
# prod: storied-phalanx-349916.json
mongo_db = MongoClient(os.getenv('MONGODB_URI'))[os.getenv('MONGODB_DB')]
if __name__ == "__main__":
with open('pathways_2_rw.json', 'r', encoding='utf-8') as file:
book = json.load(file)
tips = []
for unit in book["units"]:
for page in unit["pages"]:
for tip in page["tips"]:
new_tip = {
"id": tip["id"],
"standalone": tip["standalone"],
"tipCategory": tip["category"],
"tipHtml": tip["html"]
}
if not tip["standalone"]:
new_tip["exercise"] = tip["exercise"]
tips.append(new_tip)
for tip in tips:
doc_ref = mongo_db.walkthrough.insert_one(tip)