Fastapi refactor update

This commit is contained in:
Carlos-Mesquita
2024-10-01 19:31:01 +01:00
parent f92a803d96
commit 2a032c5aba
132 changed files with 22856 additions and 10309 deletions

View File

@@ -1,18 +1,20 @@
from fastapi import APIRouter
from .home import home_router
from .listening import listening_router
from .reading import reading_router
from .speaking import speaking_router
from .training import training_router
from .writing import writing_router
from .grade import grade_router
router = APIRouter()
router.include_router(home_router, prefix="/api", tags=["Home"])
router.include_router(listening_router, prefix="/api/listening", tags=["Listening"])
router.include_router(reading_router, prefix="/api/reading", tags=["Reading"])
router.include_router(speaking_router, prefix="/api/speaking", tags=["Speaking"])
router.include_router(writing_router, prefix="/api/writing", tags=["Writing"])
router.include_router(grade_router, prefix="/api/grade", tags=["Grade"])
router.include_router(training_router, prefix="/api/training", tags=["Training"])
from fastapi import APIRouter
from .home import home_router
from .listening import listening_router
from .reading import reading_router
from .speaking import speaking_router
from .training import training_router
from .writing import writing_router
from .grade import grade_router
from .user import user_router
router = APIRouter()
router.include_router(home_router, prefix="/api", tags=["Home"])
router.include_router(listening_router, prefix="/api/listening", tags=["Listening"])
router.include_router(reading_router, prefix="/api/reading", tags=["Reading"])
router.include_router(speaking_router, prefix="/api/speaking", tags=["Speaking"])
router.include_router(writing_router, prefix="/api/writing", tags=["Writing"])
router.include_router(grade_router, prefix="/api/grade", tags=["Grade"])
router.include_router(training_router, prefix="/api/training", tags=["Training"])
router.include_router(user_router, prefix="/api/user", tags=["Users"])

View File

@@ -1,74 +1,74 @@
from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Depends, Path, Request
from app.controllers.abc import IGradeController
from app.dtos.writing import WritingGradeTaskDTO
from app.dtos.speaking import GradeSpeakingAnswersDTO, GradeSpeakingDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
controller = "grade_controller"
grade_router = APIRouter()
@grade_router.post(
'/writing/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_writing_task(
data: WritingGradeTaskDTO,
task: int = Path(..., ge=1, le=2),
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_writing_task(task, data)
@grade_router.post(
'/speaking/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_speaking_task_2(
data: GradeSpeakingDTO,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_speaking_task(2, [data.dict()])
@grade_router.post(
'/speaking/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_speaking_task_1_and_3(
data: GradeSpeakingAnswersDTO,
task: int = Path(..., ge=1, le=3),
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_speaking_task(task, data.answers)
@grade_router.post(
'/summary',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grading_summary(
request: Request,
grade_controller: IGradeController = Depends(Provide[controller])
):
data = await request.json()
return await grade_controller.grading_summary(data)
@grade_router.post(
'/short_answers',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_short_answers(
request: Request,
grade_controller: IGradeController = Depends(Provide[controller])
):
data = await request.json()
return await grade_controller.grade_short_answers(data)
from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Depends, Path, Request
from app.controllers.abc import IGradeController
from app.dtos.writing import WritingGradeTaskDTO
from app.dtos.speaking import GradeSpeakingAnswersDTO, GradeSpeakingDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
controller = "grade_controller"
grade_router = APIRouter()
@grade_router.post(
'/writing/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_writing_task(
data: WritingGradeTaskDTO,
task: int = Path(..., ge=1, le=2),
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_writing_task(task, data)
@grade_router.post(
'/speaking/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_speaking_task_2(
data: GradeSpeakingDTO,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_speaking_task(2, [data.dict()])
@grade_router.post(
'/speaking/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_speaking_task_1_and_3(
data: GradeSpeakingAnswersDTO,
task: int = Path(..., ge=1, le=3),
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_speaking_task(task, data.answers)
@grade_router.post(
'/summary',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grading_summary(
request: Request,
grade_controller: IGradeController = Depends(Provide[controller])
):
data = await request.json()
return await grade_controller.grading_summary(data)
@grade_router.post(
'/short_answers',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_short_answers(
request: Request,
grade_controller: IGradeController = Depends(Provide[controller])
):
data = await request.json()
return await grade_controller.grade_short_answers(data)

View File

@@ -1,9 +1,9 @@
from fastapi import APIRouter
home_router = APIRouter()
@home_router.get(
'/healthcheck'
)
async def healthcheck():
return {"healthy": True}
from fastapi import APIRouter
home_router = APIRouter()
@home_router.get(
'/healthcheck'
)
async def healthcheck():
return {"healthy": True}

View File

@@ -1,55 +1,55 @@
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, UploadFile, Request
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ILevelController
controller = "level_controller"
level_router = APIRouter()
@level_router.get(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_level_exam(
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.get_level_exam()
@level_router.get(
'/utas',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_level_utas(
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.get_level_utas()
@level_router.post(
'/upload',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def upload(
file: UploadFile,
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.upload_level(file)
@level_router.post(
'/custom',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def custom_level(
request: Request,
level_controller: ILevelController = Depends(Provide[controller])
):
data = await request.json()
return await level_controller.get_custom_level(data)
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, UploadFile, Request
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ILevelController
controller = "level_controller"
level_router = APIRouter()
@level_router.get(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_level_exam(
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.get_level_exam()
@level_router.get(
'/utas',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_level_utas(
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.get_level_utas()
@level_router.post(
'/upload',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def upload(
file: UploadFile,
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.upload_level(file)
@level_router.post(
'/custom',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def custom_level(
request: Request,
level_controller: ILevelController = Depends(Provide[controller])
):
data = await request.json()
return await level_controller.get_custom_level(data)

View File

@@ -1,40 +1,40 @@
import random
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, Path
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import IListeningController
from app.configs.constants import EducationalContent
from app.dtos.listening import SaveListeningDTO
controller = "listening_controller"
listening_router = APIRouter()
@listening_router.get(
'/section/{section}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_listening_question(
exercises: list[str],
section: int = Path(..., ge=1, le=4),
topic: str | None = None,
difficulty: str = random.choice(EducationalContent.DIFFICULTIES),
listening_controller: IListeningController = Depends(Provide[controller])
):
return await listening_controller.get_listening_question(section, topic, exercises, difficulty)
@listening_router.post(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def save_listening(
data: SaveListeningDTO,
listening_controller: IListeningController = Depends(Provide[controller])
):
return await listening_controller.save_listening(data)
import random
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, Path
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import IListeningController
from app.configs.constants import EducationalContent
from app.dtos.listening import SaveListeningDTO
controller = "listening_controller"
listening_router = APIRouter()
@listening_router.get(
'/section/{section}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_listening_question(
exercises: list[str],
section: int = Path(..., ge=1, le=4),
topic: str | None = None,
difficulty: str = random.choice(EducationalContent.DIFFICULTIES),
listening_controller: IListeningController = Depends(Provide[controller])
):
return await listening_controller.get_listening_question(section, topic, exercises, difficulty)
@listening_router.post(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def save_listening(
data: SaveListeningDTO,
listening_controller: IListeningController = Depends(Provide[controller])
):
return await listening_controller.save_listening(data)

View File

@@ -1,28 +1,28 @@
import random
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, Path, Query
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import IReadingController
controller = "reading_controller"
reading_router = APIRouter()
@reading_router.get(
'/passage/{passage}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_reading_passage(
passage: int = Path(..., ge=1, le=3),
topic: str = Query(default=random.choice(EducationalContent.TOPICS)),
exercises: list[str] = Query(default=[]),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
reading_controller: IReadingController = Depends(Provide[controller])
):
return await reading_controller.get_reading_passage(passage, topic, exercises, difficulty)
import random
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, Path, Query
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import IReadingController
controller = "reading_controller"
reading_router = APIRouter()
@reading_router.get(
'/passage/{passage}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_reading_passage(
passage: int = Path(..., ge=1, le=3),
topic: str = Query(default=random.choice(EducationalContent.TOPICS)),
exercises: list[str] = Query(default=[]),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
reading_controller: IReadingController = Depends(Provide[controller])
):
return await reading_controller.get_reading_passage(passage, topic, exercises, difficulty)

View File

@@ -1,97 +1,97 @@
import random
from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Path, Query, Depends, BackgroundTasks
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import ISpeakingController
from app.dtos.speaking import (
SaveSpeakingDTO, GenerateVideo1DTO, GenerateVideo2DTO, GenerateVideo3DTO
)
controller = "speaking_controller"
speaking_router = APIRouter()
@speaking_router.get(
'/1',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_speaking_task(
first_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
second_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.get_speaking_part(1, first_topic, difficulty, second_topic)
@speaking_router.get(
'/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_speaking_task(
task: int = Path(..., ge=2, le=3),
topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.get_speaking_part(task, topic, difficulty)
@speaking_router.post(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def save_speaking(
data: SaveSpeakingDTO,
background_tasks: BackgroundTasks,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.save_speaking(data, background_tasks)
@speaking_router.post(
'/generate_video/1',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_video_1(
data: GenerateVideo1DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_video(
1, data.avatar, data.first_topic, data.questions, second_topic=data.second_topic
)
@speaking_router.post(
'/generate_video/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_video_2(
data: GenerateVideo2DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_video(
2, data.avatar, data.topic, [data.question], prompts=data.prompts, suffix=data.suffix
)
@speaking_router.post(
'/generate_video/3',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_video_3(
data: GenerateVideo3DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_video(
3, data.avatar, data.topic, data.questions
)
import random
from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Path, Query, Depends, BackgroundTasks
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import ISpeakingController
from app.dtos.speaking import (
SaveSpeakingDTO, GenerateVideo1DTO, GenerateVideo2DTO, GenerateVideo3DTO
)
controller = "speaking_controller"
speaking_router = APIRouter()
@speaking_router.get(
'/1',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_speaking_task(
first_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
second_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.get_speaking_part(1, first_topic, difficulty, second_topic)
@speaking_router.get(
'/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_speaking_task(
task: int = Path(..., ge=2, le=3),
topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.get_speaking_part(task, topic, difficulty)
@speaking_router.post(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def save_speaking(
data: SaveSpeakingDTO,
background_tasks: BackgroundTasks,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.save_speaking(data, background_tasks)
@speaking_router.post(
'/generate_video/1',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_video_1(
data: GenerateVideo1DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_video(
1, data.avatar, data.first_topic, data.questions, second_topic=data.second_topic
)
@speaking_router.post(
'/generate_video/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_video_2(
data: GenerateVideo2DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_video(
2, data.avatar, data.topic, [data.question], prompts=data.prompts, suffix=data.suffix
)
@speaking_router.post(
'/generate_video/3',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_video_3(
data: GenerateVideo3DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_video(
3, data.avatar, data.topic, data.questions
)

View File

@@ -1,34 +1,34 @@
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, Request
from app.dtos.training import FetchTipsDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ITrainingController
controller = "training_controller"
training_router = APIRouter()
@training_router.post(
'/tips',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_reading_passage(
data: FetchTipsDTO,
training_controller: ITrainingController = Depends(Provide[controller])
):
return await training_controller.fetch_tips(data)
@training_router.post(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def training_content(
request: Request,
training_controller: ITrainingController = Depends(Provide[controller])
):
data = await request.json()
return await training_controller.get_training_content(data)
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends, Request
from app.dtos.training import FetchTipsDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ITrainingController
controller = "training_controller"
training_router = APIRouter()
@training_router.post(
'/tips',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_reading_passage(
data: FetchTipsDTO,
training_controller: ITrainingController = Depends(Provide[controller])
):
return await training_controller.fetch_tips(data)
@training_router.post(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def training_content(
request: Request,
training_controller: ITrainingController = Depends(Provide[controller])
):
data = await request.json()
return await training_controller.get_training_content(data)

21
app/api/user.py Normal file
View File

@@ -0,0 +1,21 @@
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends
from app.dtos.user_batch import BatchUsersDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import IUserController
controller = "user_controller"
user_router = APIRouter()
@user_router.post(
'/import',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def batch_import(
batch: BatchUsersDTO,
user_controller: IUserController = Depends(Provide[controller])
):
return await user_controller.batch_import(batch)

View File

@@ -1,25 +1,25 @@
import random
from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Path, Query, Depends
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import IWritingController
controller = "writing_controller"
writing_router = APIRouter()
@writing_router.get(
'/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_writing_task_general_question(
task: int = Path(..., ge=1, le=2),
topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
writing_controller: IWritingController = Depends(Provide[controller])
):
return await writing_controller.get_writing_task_general_question(task, topic, difficulty)
import random
from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Path, Query, Depends
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import IWritingController
controller = "writing_controller"
writing_router = APIRouter()
@writing_router.get(
'/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_writing_task_general_question(
task: int = Path(..., ge=1, le=2),
topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
writing_controller: IWritingController = Depends(Provide[controller])
):
return await writing_controller.get_writing_task_general_question(task, topic, difficulty)

View File

@@ -1,5 +1,5 @@
from .dependency_injection import config_di
__all__ = [
"config_di"
]
from .dependency_injection import DependencyInjector
__all__ = [
"DependencyInjector"
]

File diff suppressed because it is too large Load Diff

View File

@@ -1,120 +1,140 @@
import json
import os
from dependency_injector import providers, containers
from firebase_admin import credentials
from openai import AsyncOpenAI
from httpx import AsyncClient as HTTPClient
from google.cloud.firestore_v1 import AsyncClient as FirestoreClient
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from app.repositories.impl import *
from app.services.impl import *
from app.controllers.impl import *
load_dotenv()
def config_di(
*, polly_client: any, http_client: HTTPClient, whisper_model: any
) -> None:
"""
Loads up all the common configs of all the environments
and then calls the specific env configs
"""
# Firebase token
cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
firebase_token = cred.get_access_token().access_token
container = containers.DynamicContainer()
openai_client = providers.Singleton(AsyncOpenAI)
polly_client = providers.Object(polly_client)
http_client = providers.Object(http_client)
firestore_client = providers.Singleton(FirestoreClient)
whisper_model = providers.Object(whisper_model)
llm = providers.Factory(OpenAI, client=openai_client)
stt = providers.Factory(OpenAIWhisper, model=whisper_model)
tts = providers.Factory(AWSPolly, client=polly_client)
vid_gen = providers.Factory(Heygen, client=http_client, heygen_token=os.getenv("HEY_GEN_TOKEN"))
ai_detector = providers.Factory(GPTZero, client=http_client, gpt_zero_key=os.getenv("GPT_ZERO_API_KEY"))
firebase_instance = providers.Factory(
FirebaseStorage, client=http_client, token=firebase_token, bucket=os.getenv("FIREBASE_BUCKET")
)
firestore = providers.Factory(Firestore, client=firestore_client)
# Services
listening_service = providers.Factory(
ListeningService, llm=llm, tts=tts, file_storage=firebase_instance, document_store=firestore
)
reading_service = providers.Factory(ReadingService, llm=llm)
speaking_service = providers.Factory(
SpeakingService, llm=llm, vid_gen=vid_gen,
file_storage=firebase_instance, document_store=firestore,
stt=stt
)
writing_service = providers.Factory(WritingService, llm=llm, ai_detector=ai_detector)
with open('app/services/impl/level/mc_variants.json', 'r') as file:
mc_variants = json.load(file)
level_service = providers.Factory(
LevelService, llm=llm, document_store=firestore, mc_variants=mc_variants, reading_service=reading_service,
writing_service=writing_service, speaking_service=speaking_service, listening_service=listening_service
)
grade_service = providers.Factory(
GradeService, llm=llm
)
embeddings = SentenceTransformer('all-MiniLM-L6-v2')
training_kb = providers.Factory(
TrainingContentKnowledgeBase, embeddings=embeddings
)
training_service = providers.Factory(
TrainingService, llm=llm, firestore=firestore, training_kb=training_kb
)
# Controllers
container.grade_controller = providers.Factory(
GradeController, grade_service=grade_service, speaking_service=speaking_service, writing_service=writing_service
)
container.training_controller = providers.Factory(
TrainingController, training_service=training_service
)
container.level_controller = providers.Factory(
LevelController, level_service=level_service
)
container.listening_controller = providers.Factory(
ListeningController, listening_service=listening_service
)
container.reading_controller = providers.Factory(
ReadingController, reading_service=reading_service
)
container.speaking_controller = providers.Factory(
SpeakingController, speaking_service=speaking_service
)
container.writing_controller = providers.Factory(
WritingController, writing_service=writing_service
)
container.llm = llm
container.wire(
packages=["app"]
)
import json
import os
from dependency_injector import providers, containers
from firebase_admin import credentials
from motor.motor_asyncio import AsyncIOMotorClient
from openai import AsyncOpenAI
from httpx import AsyncClient as HTTPClient
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from app.repositories.impl import *
from app.services.impl import *
from app.controllers.impl import *
load_dotenv()
class DependencyInjector:
def __init__(self, polly_client: any, http_client: HTTPClient, whisper_model: any):
self._container = containers.DynamicContainer()
self._polly_client = polly_client
self._http_client = http_client
self._whisper_model = whisper_model
def inject(self):
self._setup_clients()
self._setup_third_parties()
self._setup_repositories()
self._setup_services()
self._setup_controllers()
self._container.wire(
packages=["app"]
)
def _setup_clients(self):
self._container.openai_client = providers.Singleton(AsyncOpenAI)
self._container.polly_client = providers.Object(self._polly_client)
self._container.http_client = providers.Object(self._http_client)
self._container.whisper_model = providers.Object(self._whisper_model)
def _setup_third_parties(self):
self._container.llm = providers.Factory(OpenAI, client=self._container.openai_client)
self._container.stt = providers.Factory(OpenAIWhisper, model=self._container.whisper_model)
self._container.tts = providers.Factory(AWSPolly, client=self._container.polly_client)
self._container.vid_gen = providers.Factory(
Heygen, client=self._container.http_client, heygen_token=os.getenv("HEY_GEN_TOKEN")
)
self._container.ai_detector = providers.Factory(
GPTZero, client=self._container.http_client, gpt_zero_key=os.getenv("GPT_ZERO_API_KEY")
)
def _setup_repositories(self):
cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
firebase_token = cred.get_access_token().access_token
self._container.document_store = providers.Object(
AsyncIOMotorClient(os.getenv("MONGODB_URI"))[os.getenv("MONGODB_DB")]
)
self._container.firebase_instance = providers.Factory(
FirebaseStorage,
client=self._container.http_client, token=firebase_token, bucket=os.getenv("FIREBASE_BUCKET")
)
def _setup_services(self):
self._container.listening_service = providers.Factory(
ListeningService,
llm=self._container.llm,
tts=self._container.tts,
file_storage=self._container.firebase_instance,
document_store=self._container.document_store
)
self._container.reading_service = providers.Factory(ReadingService, llm=self._container.llm)
self._container.speaking_service = providers.Factory(
SpeakingService, llm=self._container.llm, vid_gen=self._container.vid_gen,
file_storage=self._container.firebase_instance, document_store=self._container.document_store,
stt=self._container.stt
)
self._container.writing_service = providers.Factory(
WritingService, llm=self._container.llm, ai_detector=self._container.ai_detector
)
with open('app/services/impl/exam/level/mc_variants.json', 'r') as file:
mc_variants = json.load(file)
self._container.level_service = providers.Factory(
LevelService, llm=self._container.llm, document_store=self._container.document_store,
mc_variants=mc_variants, reading_service=self._container.reading_service,
writing_service=self._container.writing_service, speaking_service=self._container.speaking_service,
listening_service=self._container.listening_service
)
self._container.grade_service = providers.Factory(
GradeService, llm=self._container.llm
)
embeddings = SentenceTransformer('all-MiniLM-L6-v2')
self._container.training_kb = providers.Factory(
TrainingContentKnowledgeBase, embeddings=embeddings
)
self._container.training_service = providers.Factory(
TrainingService, llm=self._container.llm,
firestore=self._container.document_store, training_kb=self._container.training_kb
)
def _setup_controllers(self):
self._container.grade_controller = providers.Factory(
GradeController, grade_service=self._container.grade_service,
speaking_service=self._container.speaking_service,
writing_service=self._container.writing_service
)
self._container.training_controller = providers.Factory(
TrainingController, training_service=self._container.training_service
)
self._container.level_controller = providers.Factory(
LevelController, level_service=self._container.level_service
)
self._container.listening_controller = providers.Factory(
ListeningController, listening_service=self._container.listening_service
)
self._container.reading_controller = providers.Factory(
ReadingController, reading_service=self._container.reading_service
)
self._container.speaking_controller = providers.Factory(
SpeakingController, speaking_service=self._container.speaking_service
)
self._container.writing_controller = providers.Factory(
WritingController, writing_service=self._container.writing_service
)

View File

@@ -1,7 +1,7 @@
from .filters import ErrorAndAboveFilter
from .queue_handler import QueueListenerHandler
__all__ = [
"ErrorAndAboveFilter",
"QueueListenerHandler"
]
from .filters import ErrorAndAboveFilter
from .queue_handler import QueueListenerHandler
__all__ = [
"ErrorAndAboveFilter",
"QueueListenerHandler"
]

View File

@@ -1,6 +1,6 @@
import logging
class ErrorAndAboveFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool | logging.LogRecord:
return record.levelno < logging.ERROR
import logging
class ErrorAndAboveFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool | logging.LogRecord:
return record.levelno < logging.ERROR

View File

@@ -1,105 +1,105 @@
import datetime as dt
import json
import logging
LOG_RECORD_BUILTIN_ATTRS = {
"args",
"asctime",
"created",
"exc_info",
"exc_text",
"filename",
"funcName",
"levelname",
"levelno",
"lineno",
"module",
"msecs",
"message",
"msg",
"name",
"pathname",
"process",
"processName",
"relativeCreated",
"stack_info",
"thread",
"threadName",
"taskName",
}
"""
This isn't being used since the app will be run on gcloud run but this can be used for future apps.
If you want to test it:
formatters:
"json": {
"()": "json_formatter.JSONFormatter",
"fmt_keys": {
"level": "levelname",
"message": "message",
"timestamp": "timestamp",
"logger": "name",
"module": "module",
"function": "funcName",
"line": "lineno",
"thread_name": "threadName"
}
}
handlers:
"file_json": {
"class": "logging.handlers.RotatingFileHandler",
"level": "DEBUG",
"formatter": "json",
"filename": "logs/log",
"maxBytes": 1000000,
"backupCount": 3
}
and add "cfg://handlers.file_json" to queue handler
"""
# From this video https://www.youtube.com/watch?v=9L77QExPmI0
# Src here: https://github.com/mCodingLLC/VideosSampleCode/blob/master/videos/135_modern_logging/mylogger.py
class JSONFormatter(logging.Formatter):
def __init__(
self,
*,
fmt_keys: dict[str, str] | None = None,
):
super().__init__()
self.fmt_keys = fmt_keys if fmt_keys is not None else {}
def format(self, record: logging.LogRecord) -> str:
message = self._prepare_log_dict(record)
return json.dumps(message, default=str)
def _prepare_log_dict(self, record: logging.LogRecord):
always_fields = {
"message": record.getMessage(),
"timestamp": dt.datetime.fromtimestamp(
record.created, tz=dt.timezone.utc
).isoformat(),
}
if record.exc_info is not None:
always_fields["exc_info"] = self.formatException(record.exc_info)
if record.stack_info is not None:
always_fields["stack_info"] = self.formatStack(record.stack_info)
message = {
key: msg_val
if (msg_val := always_fields.pop(val, None)) is not None
else getattr(record, val)
for key, val in self.fmt_keys.items()
}
message.update(always_fields)
for key, val in record.__dict__.items():
if key not in LOG_RECORD_BUILTIN_ATTRS:
message[key] = val
return message
import datetime as dt
import json
import logging
LOG_RECORD_BUILTIN_ATTRS = {
"args",
"asctime",
"created",
"exc_info",
"exc_text",
"filename",
"funcName",
"levelname",
"levelno",
"lineno",
"module",
"msecs",
"message",
"msg",
"name",
"pathname",
"process",
"processName",
"relativeCreated",
"stack_info",
"thread",
"threadName",
"taskName",
}
"""
This isn't being used since the app will be run on gcloud run but this can be used for future apps.
If you want to test it:
formatters:
"json": {
"()": "json_formatter.JSONFormatter",
"fmt_keys": {
"level": "levelname",
"message": "message",
"timestamp": "timestamp",
"logger": "name",
"module": "module",
"function": "funcName",
"line": "lineno",
"thread_name": "threadName"
}
}
handlers:
"file_json": {
"class": "logging.handlers.RotatingFileHandler",
"level": "DEBUG",
"formatter": "json",
"filename": "logs/log",
"maxBytes": 1000000,
"backupCount": 3
}
and add "cfg://handlers.file_json" to queue handler
"""
# From this video https://www.youtube.com/watch?v=9L77QExPmI0
# Src here: https://github.com/mCodingLLC/VideosSampleCode/blob/master/videos/135_modern_logging/mylogger.py
class JSONFormatter(logging.Formatter):
def __init__(
self,
*,
fmt_keys: dict[str, str] | None = None,
):
super().__init__()
self.fmt_keys = fmt_keys if fmt_keys is not None else {}
def format(self, record: logging.LogRecord) -> str:
message = self._prepare_log_dict(record)
return json.dumps(message, default=str)
def _prepare_log_dict(self, record: logging.LogRecord):
always_fields = {
"message": record.getMessage(),
"timestamp": dt.datetime.fromtimestamp(
record.created, tz=dt.timezone.utc
).isoformat(),
}
if record.exc_info is not None:
always_fields["exc_info"] = self.formatException(record.exc_info)
if record.stack_info is not None:
always_fields["stack_info"] = self.formatStack(record.stack_info)
message = {
key: msg_val
if (msg_val := always_fields.pop(val, None)) is not None
else getattr(record, val)
for key, val in self.fmt_keys.items()
}
message.update(always_fields)
for key, val in record.__dict__.items():
if key not in LOG_RECORD_BUILTIN_ATTRS:
message[key] = val
return message

View File

@@ -1,53 +1,53 @@
{
"version": 1,
"objects": {
"queue": {
"class": "queue.Queue",
"maxsize": 1000
}
},
"disable_existing_loggers": false,
"formatters": {
"simple": {
"format": "[%(levelname)s] (%(module)s|L: %(lineno)d) %(asctime)s: %(message)s",
"datefmt": "%Y-%m-%dT%H:%M:%S%z"
}
},
"filters": {
"error_and_above": {
"()": "app.configs.logging.ErrorAndAboveFilter"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": "INFO",
"formatter": "simple",
"stream": "ext://sys.stdout",
"filters": ["error_and_above"]
},
"error": {
"class": "logging.StreamHandler",
"level": "ERROR",
"formatter": "simple",
"stream": "ext://sys.stderr"
},
"queue_handler": {
"class": "app.configs.logging.QueueListenerHandler",
"handlers": [
"cfg://handlers.console",
"cfg://handlers.error"
],
"queue": "cfg://objects.queue",
"respect_handler_level": true
}
},
"loggers": {
"root": {
"level": "DEBUG",
"handlers": [
"queue_handler"
]
}
}
}
{
"version": 1,
"objects": {
"queue": {
"class": "queue.Queue",
"maxsize": 1000
}
},
"disable_existing_loggers": false,
"formatters": {
"simple": {
"format": "[%(levelname)s] (%(module)s|L: %(lineno)d) %(asctime)s: %(message)s",
"datefmt": "%Y-%m-%dT%H:%M:%S%z"
}
},
"filters": {
"error_and_above": {
"()": "app.configs.logging.ErrorAndAboveFilter"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": "INFO",
"formatter": "simple",
"stream": "ext://sys.stdout",
"filters": ["error_and_above"]
},
"error": {
"class": "logging.StreamHandler",
"level": "ERROR",
"formatter": "simple",
"stream": "ext://sys.stderr"
},
"queue_handler": {
"class": "app.configs.logging.QueueListenerHandler",
"handlers": [
"cfg://handlers.console",
"cfg://handlers.error"
],
"queue": "cfg://objects.queue",
"respect_handler_level": true
}
},
"loggers": {
"root": {
"level": "DEBUG",
"handlers": [
"queue_handler"
]
}
}
}

View File

@@ -1,61 +1,61 @@
from logging.config import ConvertingList, ConvertingDict, valid_ident
from logging.handlers import QueueHandler, QueueListener
from queue import Queue
import atexit
class QueueHnadlerHelper:
@staticmethod
def resolve_handlers(l):
if not isinstance(l, ConvertingList):
return l
# Indexing the list performs the evaluation.
return [l[i] for i in range(len(l))]
@staticmethod
def resolve_queue(q):
if not isinstance(q, ConvertingDict):
return q
if '__resolved_value__' in q:
return q['__resolved_value__']
cname = q.pop('class')
klass = q.configurator.resolve(cname)
props = q.pop('.', None)
kwargs = {k: q[k] for k in q if valid_ident(k)}
result = klass(**kwargs)
if props:
for name, value in props.items():
setattr(result, name, value)
q['__resolved_value__'] = result
return result
# The guy from this video https://www.youtube.com/watch?v=9L77QExPmI0 is using logging features only available in 3.12
# This article had the class required to build the queue handler in 3.11
# https://rob-blackbourn.medium.com/how-to-use-python-logging-queuehandler-with-dictconfig-1e8b1284e27a
class QueueListenerHandler(QueueHandler):
def __init__(self, handlers, respect_handler_level=False, auto_run=True, queue=Queue(-1)):
queue = QueueHnadlerHelper.resolve_queue(queue)
super().__init__(queue)
handlers = QueueHnadlerHelper.resolve_handlers(handlers)
self._listener = QueueListener(
self.queue,
*handlers,
respect_handler_level=respect_handler_level)
if auto_run:
self.start()
atexit.register(self.stop)
def start(self):
self._listener.start()
def stop(self):
self._listener.stop()
def emit(self, record):
return super().emit(record)
from logging.config import ConvertingList, ConvertingDict, valid_ident
from logging.handlers import QueueHandler, QueueListener
from queue import Queue
import atexit
class QueueHnadlerHelper:
@staticmethod
def resolve_handlers(l):
if not isinstance(l, ConvertingList):
return l
# Indexing the list performs the evaluation.
return [l[i] for i in range(len(l))]
@staticmethod
def resolve_queue(q):
if not isinstance(q, ConvertingDict):
return q
if '__resolved_value__' in q:
return q['__resolved_value__']
cname = q.pop('class')
klass = q.configurator.resolve(cname)
props = q.pop('.', None)
kwargs = {k: q[k] for k in q if valid_ident(k)}
result = klass(**kwargs)
if props:
for name, value in props.items():
setattr(result, name, value)
q['__resolved_value__'] = result
return result
# The guy from this video https://www.youtube.com/watch?v=9L77QExPmI0 is using logging features only available in 3.12
# This article had the class required to build the queue handler in 3.11
# https://rob-blackbourn.medium.com/how-to-use-python-logging-queuehandler-with-dictconfig-1e8b1284e27a
class QueueListenerHandler(QueueHandler):
def __init__(self, handlers, respect_handler_level=False, auto_run=True, queue=Queue(-1)):
queue = QueueHnadlerHelper.resolve_queue(queue)
super().__init__(queue)
handlers = QueueHnadlerHelper.resolve_handlers(handlers)
self._listener = QueueListener(
self.queue,
*handlers,
respect_handler_level=respect_handler_level)
if auto_run:
self.start()
atexit.register(self.stop)
def start(self):
self._listener.start()
def stop(self):
self._listener.stop()
def emit(self, record):
return super().emit(record)

File diff suppressed because it is too large Load Diff

View File

@@ -1,17 +1,19 @@
from .level import ILevelController
from .listening import IListeningController
from .reading import IReadingController
from .writing import IWritingController
from .speaking import ISpeakingController
from .grade import IGradeController
from .training import ITrainingController
__all__ = [
"IListeningController",
"IReadingController",
"IWritingController",
"ISpeakingController",
"ILevelController",
"IGradeController",
"ITrainingController"
]
from .level import ILevelController
from .listening import IListeningController
from .reading import IReadingController
from .writing import IWritingController
from .speaking import ISpeakingController
from .grade import IGradeController
from .training import ITrainingController
from .user import IUserController
__all__ = [
"IListeningController",
"IReadingController",
"IWritingController",
"ISpeakingController",
"ILevelController",
"IGradeController",
"ITrainingController",
"IUserController"
]

View File

@@ -1,22 +1,22 @@
from abc import ABC, abstractmethod
from typing import Dict, List
class IGradeController(ABC):
@abstractmethod
async def grade_writing_task(self, task: int, data):
pass
@abstractmethod
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
pass
@abstractmethod
async def grade_short_answers(self, data: Dict):
pass
@abstractmethod
async def grading_summary(self, data: Dict):
pass
from abc import ABC, abstractmethod
from typing import Dict, List
class IGradeController(ABC):
@abstractmethod
async def grade_writing_task(self, task: int, data):
pass
@abstractmethod
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
pass
@abstractmethod
async def grade_short_answers(self, data: Dict):
pass
@abstractmethod
async def grading_summary(self, data: Dict):
pass

View File

@@ -1,23 +1,23 @@
from abc import ABC, abstractmethod
from fastapi import UploadFile
from typing import Dict
class ILevelController(ABC):
@abstractmethod
async def get_level_exam(self):
pass
@abstractmethod
async def get_level_utas(self):
pass
@abstractmethod
async def upload_level(self, file: UploadFile):
pass
@abstractmethod
async def get_custom_level(self, data: Dict):
pass
from abc import ABC, abstractmethod
from fastapi import UploadFile
from typing import Dict
class ILevelController(ABC):
@abstractmethod
async def get_level_exam(self):
pass
@abstractmethod
async def get_level_utas(self):
pass
@abstractmethod
async def upload_level(self, file: UploadFile):
pass
@abstractmethod
async def get_custom_level(self, data: Dict):
pass

View File

@@ -1,13 +1,13 @@
from abc import ABC, abstractmethod
from typing import List
class IListeningController(ABC):
@abstractmethod
async def get_listening_question(self, section_id: int, topic: str, exercises: List[str], difficulty: str):
pass
@abstractmethod
async def save_listening(self, data):
pass
from abc import ABC, abstractmethod
from typing import List
class IListeningController(ABC):
@abstractmethod
async def get_listening_question(self, section_id: int, topic: str, exercises: List[str], difficulty: str):
pass
@abstractmethod
async def save_listening(self, data):
pass

View File

@@ -1,10 +1,10 @@
from abc import ABC, abstractmethod
from typing import List
class IReadingController(ABC):
@abstractmethod
async def get_reading_passage(self, passage: int, topic: str, exercises: List[str], difficulty: str):
pass
from abc import ABC, abstractmethod
from typing import List
class IReadingController(ABC):
@abstractmethod
async def get_reading_passage(self, passage: int, topic: str, exercises: List[str], difficulty: str):
pass

View File

@@ -1,25 +1,25 @@
from abc import ABC, abstractmethod
from typing import Optional
from fastapi import BackgroundTasks
class ISpeakingController(ABC):
@abstractmethod
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
pass
@abstractmethod
async def save_speaking(self, data, background_tasks: BackgroundTasks):
pass
@abstractmethod
async def generate_video(
self, part: int, avatar: str, topic: str, questions: list[str],
*,
second_topic: Optional[str] = None,
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
pass
from abc import ABC, abstractmethod
from typing import Optional
from fastapi import BackgroundTasks
class ISpeakingController(ABC):
@abstractmethod
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
pass
@abstractmethod
async def save_speaking(self, data, background_tasks: BackgroundTasks):
pass
@abstractmethod
async def generate_video(
self, part: int, avatar: str, topic: str, questions: list[str],
*,
second_topic: Optional[str] = None,
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
pass

View File

@@ -1,12 +1,12 @@
from abc import ABC, abstractmethod
class ITrainingController(ABC):
@abstractmethod
async def fetch_tips(self, data):
pass
@abstractmethod
async def get_training_content(self, data):
pass
from abc import ABC, abstractmethod
class ITrainingController(ABC):
@abstractmethod
async def fetch_tips(self, data):
pass
@abstractmethod
async def get_training_content(self, data):
pass

View File

@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
from app.dtos.user_batch import BatchUsersDTO
class IUserController(ABC):
@abstractmethod
async def batch_import(self, batch: BatchUsersDTO):
pass

View File

@@ -1,8 +1,8 @@
from abc import ABC, abstractmethod
class IWritingController(ABC):
@abstractmethod
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
pass
from abc import ABC, abstractmethod
class IWritingController(ABC):
@abstractmethod
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
pass

View File

@@ -1,17 +1,19 @@
from .level import LevelController
from .listening import ListeningController
from .reading import ReadingController
from .speaking import SpeakingController
from .writing import WritingController
from .training import TrainingController
from .grade import GradeController
__all__ = [
"LevelController",
"ListeningController",
"ReadingController",
"SpeakingController",
"WritingController",
"TrainingController",
"GradeController"
]
from .level import LevelController
from .listening import ListeningController
from .reading import ReadingController
from .speaking import SpeakingController
from .writing import WritingController
from .training import TrainingController
from .grade import GradeController
from .user import UserController
__all__ = [
"LevelController",
"ListeningController",
"ReadingController",
"SpeakingController",
"WritingController",
"TrainingController",
"GradeController",
"UserController"
]

View File

@@ -1,54 +1,54 @@
import logging
from typing import Dict, List
from app.configs.constants import FilePaths
from app.controllers.abc import IGradeController
from app.dtos.writing import WritingGradeTaskDTO
from app.helpers import FileHelper
from app.services.abc import ISpeakingService, IWritingService, IGradeService
from app.utils import handle_exception
class GradeController(IGradeController):
def __init__(
self,
grade_service: IGradeService,
speaking_service: ISpeakingService,
writing_service: IWritingService
):
self._service = grade_service
self._speaking_service = speaking_service
self._writing_service = writing_service
self._logger = logging.getLogger(__name__)
async def grade_writing_task(self, task: int, data: WritingGradeTaskDTO):
return await self._writing_service.grade_writing_task(task, data.question, data.answer)
@handle_exception(400)
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
return await self._speaking_service.grade_speaking_task(task, answers)
async def grade_short_answers(self, data: Dict):
return await self._service.grade_short_answers(data)
async def grading_summary(self, data: Dict):
section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
return await self._service.calculate_grading_summary(extracted_sections)
@staticmethod
def _extract_existing_sections_from_body(my_dict, keys_to_extract):
if 'sections' in my_dict and isinstance(my_dict['sections'], list) and len(my_dict['sections']) > 0:
return list(
filter(
lambda item:
'code' in item and
item['code'] in keys_to_extract and
'grade' in item and
'name' in item,
my_dict['sections']
)
)
import logging
from typing import Dict, List
from app.configs.constants import FilePaths
from app.controllers.abc import IGradeController
from app.dtos.writing import WritingGradeTaskDTO
from app.helpers import FileHelper
from app.services.abc import ISpeakingService, IWritingService, IGradeService
from app.utils import handle_exception
class GradeController(IGradeController):
def __init__(
self,
grade_service: IGradeService,
speaking_service: ISpeakingService,
writing_service: IWritingService
):
self._service = grade_service
self._speaking_service = speaking_service
self._writing_service = writing_service
self._logger = logging.getLogger(__name__)
async def grade_writing_task(self, task: int, data: WritingGradeTaskDTO):
return await self._writing_service.grade_writing_task(task, data.question, data.answer)
@handle_exception(400)
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
return await self._speaking_service.grade_speaking_task(task, answers)
async def grade_short_answers(self, data: Dict):
return await self._service.grade_short_answers(data)
async def grading_summary(self, data: Dict):
section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
return await self._service.calculate_grading_summary(extracted_sections)
@staticmethod
def _extract_existing_sections_from_body(my_dict, keys_to_extract):
if 'sections' in my_dict and isinstance(my_dict['sections'], list) and len(my_dict['sections']) > 0:
return list(
filter(
lambda item:
'code' in item and
item['code'] in keys_to_extract and
'grade' in item and
'name' in item,
my_dict['sections']
)
)

View File

@@ -1,23 +1,23 @@
from fastapi import UploadFile
from typing import Dict
from app.controllers.abc import ILevelController
from app.services.abc import ILevelService
class LevelController(ILevelController):
def __init__(self, level_service: ILevelService):
self._service = level_service
async def get_level_exam(self):
return await self._service.get_level_exam()
async def get_level_utas(self):
return await self._service.get_level_utas()
async def upload_level(self, file: UploadFile):
return await self._service.upload_level(file)
async def get_custom_level(self, data: Dict):
return await self._service.get_custom_level(data)
from fastapi import UploadFile
from typing import Dict
from app.controllers.abc import ILevelController
from app.services.abc import ILevelService
class LevelController(ILevelController):
def __init__(self, level_service: ILevelService):
self._service = level_service
async def get_level_exam(self):
return await self._service.get_level_exam()
async def get_level_utas(self):
return await self._service.get_level_utas()
async def upload_level(self, file: UploadFile):
return await self._service.upload_level(file)
async def get_custom_level(self, data: Dict):
return await self._service.get_custom_level(data)

View File

@@ -1,19 +1,19 @@
from typing import List
from app.controllers.abc import IListeningController
from app.dtos.listening import SaveListeningDTO
from app.services.abc import IListeningService
class ListeningController(IListeningController):
def __init__(self, listening_service: IListeningService):
self._service = listening_service
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str
):
return await self._service.get_listening_question(section_id, topic, req_exercises, difficulty)
async def save_listening(self, data: SaveListeningDTO):
return await self._service.save_listening(data.parts, data.minTimer, data.difficulty, data.id)
from typing import List
from app.controllers.abc import IListeningController
from app.dtos.listening import SaveListeningDTO
from app.services.abc import IListeningService
class ListeningController(IListeningController):
def __init__(self, listening_service: IListeningService):
self._service = listening_service
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str
):
return await self._service.get_listening_question(section_id, topic, req_exercises, difficulty)
async def save_listening(self, data: SaveListeningDTO):
return await self._service.save_listening(data.parts, data.minTimer, data.difficulty, data.id)

View File

@@ -1,43 +1,43 @@
import random
import logging
from typing import List
from app.controllers.abc import IReadingController
from app.services.abc import IReadingService
from app.configs.constants import FieldsAndExercises
from app.helpers import ExercisesHelper
class ReadingController(IReadingController):
def __init__(self, reading_service: IReadingService):
self._service = reading_service
self._logger = logging.getLogger(__name__)
self._passages = {
"passage_1": {
"start_id": 1,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_1_EXERCISES
},
"passage_2": {
"start_id": 14,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_2_EXERCISES
},
"passage_3": {
"start_id": 27,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_3_EXERCISES
}
}
async def get_reading_passage(self, passage_id: int, topic: str, req_exercises: List[str], difficulty: str):
passage = self._passages[f'passage_{str(passage_id)}']
if len(req_exercises) == 0:
req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
passage["total_exercises"], len(req_exercises)
)
return await self._service.gen_reading_passage(
passage_id, topic, req_exercises, number_of_exercises_q, difficulty, passage["start_id"]
)
import random
import logging
from typing import List
from app.controllers.abc import IReadingController
from app.services.abc import IReadingService
from app.configs.constants import FieldsAndExercises
from app.helpers import ExercisesHelper
class ReadingController(IReadingController):
def __init__(self, reading_service: IReadingService):
self._service = reading_service
self._logger = logging.getLogger(__name__)
self._passages = {
"passage_1": {
"start_id": 1,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_1_EXERCISES
},
"passage_2": {
"start_id": 14,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_2_EXERCISES
},
"passage_3": {
"start_id": 27,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_3_EXERCISES
}
}
async def get_reading_passage(self, passage_id: int, topic: str, req_exercises: List[str], difficulty: str):
passage = self._passages[f'passage_{str(passage_id)}']
if len(req_exercises) == 0:
req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
passage["total_exercises"], len(req_exercises)
)
return await self._service.gen_reading_passage(
passage_id, topic, req_exercises, number_of_exercises_q, difficulty, passage["start_id"]
)

View File

@@ -1,47 +1,47 @@
import logging
import uuid
from typing import Optional
from fastapi import BackgroundTasks
from app.controllers.abc import ISpeakingController
from app.dtos.speaking import SaveSpeakingDTO
from app.services.abc import ISpeakingService
from app.configs.constants import ExamVariant, MinTimers
from app.configs.question_templates import getSpeakingTemplate
class SpeakingController(ISpeakingController):
def __init__(self, speaking_service: ISpeakingService):
self._service = speaking_service
self._logger = logging.getLogger(__name__)
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
return await self._service.get_speaking_part(task, topic, difficulty, second_topic)
async def save_speaking(self, data: SaveSpeakingDTO, background_tasks: BackgroundTasks):
exercises = data.exercises
min_timer = data.minTimer
template = getSpeakingTemplate()
template["minTimer"] = min_timer
if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
req_id = str(uuid.uuid4())
self._logger.info(f'Received request to save speaking with id: {req_id}')
background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
self._logger.info('Started background task to save speaking.')
# Return response without waiting for create_videos_and_save_to_db to finish
return {**template, "id": req_id}
async def generate_video(self, *args, **kwargs):
return await self._service.generate_video(*args, **kwargs)
import logging
import uuid
from typing import Optional
from fastapi import BackgroundTasks
from app.controllers.abc import ISpeakingController
from app.dtos.speaking import SaveSpeakingDTO
from app.services.abc import ISpeakingService
from app.configs.constants import ExamVariant, MinTimers
from app.configs.question_templates import getSpeakingTemplate
class SpeakingController(ISpeakingController):
def __init__(self, speaking_service: ISpeakingService):
self._service = speaking_service
self._logger = logging.getLogger(__name__)
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
return await self._service.get_speaking_part(task, topic, difficulty, second_topic)
async def save_speaking(self, data: SaveSpeakingDTO, background_tasks: BackgroundTasks):
exercises = data.exercises
min_timer = data.minTimer
template = getSpeakingTemplate()
template["minTimer"] = min_timer
if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
req_id = str(uuid.uuid4())
self._logger.info(f'Received request to save speaking with id: {req_id}')
background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
self._logger.info('Started background task to save speaking.')
# Return response without waiting for create_videos_and_save_to_db to finish
return {**template, "id": req_id}
async def generate_video(self, *args, **kwargs):
return await self._service.generate_video(*args, **kwargs)

View File

@@ -1,17 +1,17 @@
from typing import Dict
from app.controllers.abc import ITrainingController
from app.dtos.training import FetchTipsDTO
from app.services.abc import ITrainingService
class TrainingController(ITrainingController):
def __init__(self, training_service: ITrainingService):
self._service = training_service
async def fetch_tips(self, data: FetchTipsDTO):
return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
async def get_training_content(self, data: Dict):
return await self._service.get_training_content(data)
from typing import Dict
from app.controllers.abc import ITrainingController
from app.dtos.training import FetchTipsDTO
from app.services.abc import ITrainingService
class TrainingController(ITrainingController):
def __init__(self, training_service: ITrainingService):
self._service = training_service
async def fetch_tips(self, data: FetchTipsDTO):
return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
async def get_training_content(self, data: Dict):
return await self._service.get_training_content(data)

View File

@@ -0,0 +1,12 @@
from app.controllers.abc import IUserController
from app.dtos.user_batch import BatchUsersDTO
from app.services.abc import IUserService
class UserController(IUserController):
def __init__(self, user_service: IUserService):
self._service = user_service
async def batch_import(self, batch: BatchUsersDTO):
return await self._service.fetch_tips(batch)

View File

@@ -1,11 +1,11 @@
from app.controllers.abc import IWritingController
from app.services.abc import IWritingService
class WritingController(IWritingController):
def __init__(self, writing_service: IWritingService):
self._service = writing_service
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
return await self._service.get_writing_task_general_question(task, topic, difficulty)
from app.controllers.abc import IWritingController
from app.services.abc import IWritingService
class WritingController(IWritingController):
def __init__(self, writing_service: IWritingService):
self._service = writing_service
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
return await self._service.get_writing_task_general_question(task, topic, difficulty)

View File

@@ -1,57 +1,57 @@
from pydantic import BaseModel, Field
from typing import List, Dict, Union, Optional
from uuid import uuid4, UUID
class Option(BaseModel):
id: str
text: str
class MultipleChoiceQuestion(BaseModel):
id: str
prompt: str
variant: str = "text"
solution: str
options: List[Option]
class MultipleChoiceExercise(BaseModel):
id: UUID = Field(default_factory=uuid4)
type: str = "multipleChoice"
prompt: str = "Select the appropriate option."
questions: List[MultipleChoiceQuestion]
userSolutions: List = Field(default_factory=list)
class FillBlanksWord(BaseModel):
id: str
options: Dict[str, str]
class FillBlanksSolution(BaseModel):
id: str
solution: str
class FillBlanksExercise(BaseModel):
id: UUID = Field(default_factory=uuid4)
type: str = "fillBlanks"
variant: str = "mc"
prompt: str = "Click a blank to select the appropriate word for it."
text: str
solutions: List[FillBlanksSolution]
words: List[FillBlanksWord]
userSolutions: List = Field(default_factory=list)
Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
class Part(BaseModel):
exercises: List[Exercise]
context: Optional[str] = Field(default=None)
class Exam(BaseModel):
parts: List[Part]
from pydantic import BaseModel, Field
from typing import List, Dict, Union, Optional
from uuid import uuid4, UUID
class Option(BaseModel):
id: str
text: str
class MultipleChoiceQuestion(BaseModel):
id: str
prompt: str
variant: str = "text"
solution: str
options: List[Option]
class MultipleChoiceExercise(BaseModel):
id: UUID = Field(default_factory=uuid4)
type: str = "multipleChoice"
prompt: str = "Select the appropriate option."
questions: List[MultipleChoiceQuestion]
userSolutions: List = Field(default_factory=list)
class FillBlanksWord(BaseModel):
id: str
options: Dict[str, str]
class FillBlanksSolution(BaseModel):
id: str
solution: str
class FillBlanksExercise(BaseModel):
id: UUID = Field(default_factory=uuid4)
type: str = "fillBlanks"
variant: str = "mc"
prompt: str = "Click a blank to select the appropriate word for it."
text: str
solutions: List[FillBlanksSolution]
words: List[FillBlanksWord]
userSolutions: List = Field(default_factory=list)
Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
class Part(BaseModel):
exercises: List[Exercise]
context: Optional[str] = Field(default=None)
class Exam(BaseModel):
parts: List[Part]

View File

@@ -1,14 +1,14 @@
import random
import uuid
from typing import List, Dict
from pydantic import BaseModel
from app.configs.constants import MinTimers, EducationalContent
class SaveListeningDTO(BaseModel):
parts: List[Dict]
minTimer: int = MinTimers.LISTENING_MIN_TIMER_DEFAULT
difficulty: str = random.choice(EducationalContent.DIFFICULTIES)
id: str = str(uuid.uuid4())
import random
import uuid
from typing import List, Dict
from pydantic import BaseModel
from app.configs.constants import MinTimers, EducationalContent
class SaveListeningDTO(BaseModel):
parts: List[Dict]
minTimer: int = MinTimers.LISTENING_MIN_TIMER_DEFAULT
difficulty: str = random.choice(EducationalContent.DIFFICULTIES)
id: str = str(uuid.uuid4())

View File

@@ -1,29 +1,29 @@
from pydantic import BaseModel
from typing import List, Dict, Union, Any, Optional
class Option(BaseModel):
id: str
text: str
class MultipleChoiceQuestion(BaseModel):
type: str = "multipleChoice"
id: str
prompt: str
variant: str = "text"
options: List[Option]
class FillBlanksWord(BaseModel):
type: str = "fillBlanks"
id: str
options: Dict[str, str]
Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]]
class Sheet(BaseModel):
batch: Optional[int] = None
components: List[Component]
from pydantic import BaseModel
from typing import List, Dict, Union, Any, Optional
class Option(BaseModel):
id: str
text: str
class MultipleChoiceQuestion(BaseModel):
type: str = "multipleChoice"
id: str
prompt: str
variant: str = "text"
options: List[Option]
class FillBlanksWord(BaseModel):
type: str = "fillBlanks"
id: str
options: Dict[str, str]
Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]]
class Sheet(BaseModel):
batch: Optional[int] = None
components: List[Component]

View File

@@ -1,42 +1,42 @@
import random
from typing import List, Dict
from pydantic import BaseModel
from app.configs.constants import MinTimers, AvatarEnum
class SaveSpeakingDTO(BaseModel):
exercises: List[Dict]
minTimer: int = MinTimers.SPEAKING_MIN_TIMER_DEFAULT
class GradeSpeakingDTO(BaseModel):
question: str
answer: str
class GradeSpeakingAnswersDTO(BaseModel):
answers: List[Dict]
class GenerateVideo1DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
questions: List[str]
first_topic: str
second_topic: str
class GenerateVideo2DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
prompts: List[str] = []
suffix: str = ""
question: str
topic: str
class GenerateVideo3DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
questions: List[str]
topic: str
import random
from typing import List, Dict
from pydantic import BaseModel
from app.configs.constants import MinTimers, AvatarEnum
class SaveSpeakingDTO(BaseModel):
exercises: List[Dict]
minTimer: int = MinTimers.SPEAKING_MIN_TIMER_DEFAULT
class GradeSpeakingDTO(BaseModel):
question: str
answer: str
class GradeSpeakingAnswersDTO(BaseModel):
answers: List[Dict]
class GenerateVideo1DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
questions: List[str]
first_topic: str
second_topic: str
class GenerateVideo2DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
prompts: List[str] = []
suffix: str = ""
question: str
topic: str
class GenerateVideo3DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
questions: List[str]
topic: str

View File

@@ -1,37 +1,37 @@
from pydantic import BaseModel
from typing import List
class FetchTipsDTO(BaseModel):
context: str
question: str
answer: str
correct_answer: str
class QueryDTO(BaseModel):
category: str
text: str
class DetailsDTO(BaseModel):
exam_id: str
date: int
performance_comment: str
detailed_summary: str
class WeakAreaDTO(BaseModel):
area: str
comment: str
class TrainingContentDTO(BaseModel):
details: List[DetailsDTO]
weak_areas: List[WeakAreaDTO]
queries: List[QueryDTO]
class TipsDTO(BaseModel):
tip_ids: List[str]
from pydantic import BaseModel
from typing import List
class FetchTipsDTO(BaseModel):
context: str
question: str
answer: str
correct_answer: str
class QueryDTO(BaseModel):
category: str
text: str
class DetailsDTO(BaseModel):
exam_id: str
date: int
performance_comment: str
detailed_summary: str
class WeakAreaDTO(BaseModel):
area: str
comment: str
class TrainingContentDTO(BaseModel):
details: List[DetailsDTO]
weak_areas: List[WeakAreaDTO]
queries: List[QueryDTO]
class TipsDTO(BaseModel):
tip_ids: List[str]

30
app/dtos/user_batch.py Normal file
View File

@@ -0,0 +1,30 @@
import uuid
from typing import Optional
from pydantic import BaseModel, Field
class DemographicInfo(BaseModel):
phone: str
passport_id: Optional[str] = None
country: Optional[str] = None
class UserDTO(BaseModel):
id: uuid.UUID = Field(default_factory=uuid.uuid4)
email: str
name: str
type: str
passport_id: str
passwordHash: str
passwordSalt: str
groupName: Optional[str] = None
corporate: Optional[str] = None
studentID: Optional[str | int] = None
expiryDate: Optional[str] = None
demographicInformation: Optional[DemographicInfo] = None
class BatchUsersDTO(BaseModel):
makerID: str
users: list[UserDTO]

View File

@@ -1,6 +1,6 @@
from pydantic import BaseModel
class WritingGradeTaskDTO(BaseModel):
question: str
answer: str
from pydantic import BaseModel
class WritingGradeTaskDTO(BaseModel):
question: str
answer: str

View File

@@ -1,6 +1,6 @@
from .exceptions import CustomException, UnauthorizedException
__all__ = [
"CustomException",
"UnauthorizedException"
]
from .exceptions import CustomException, UnauthorizedException
__all__ = [
"CustomException",
"UnauthorizedException"
]

View File

@@ -1,17 +1,17 @@
from http import HTTPStatus
class CustomException(Exception):
code = HTTPStatus.INTERNAL_SERVER_ERROR
error_code = HTTPStatus.INTERNAL_SERVER_ERROR
message = HTTPStatus.INTERNAL_SERVER_ERROR.description
def __init__(self, message=None):
if message:
self.message = message
class UnauthorizedException(CustomException):
code = HTTPStatus.UNAUTHORIZED
error_code = HTTPStatus.UNAUTHORIZED
message = HTTPStatus.UNAUTHORIZED.description
from http import HTTPStatus
class CustomException(Exception):
code = HTTPStatus.INTERNAL_SERVER_ERROR
error_code = HTTPStatus.INTERNAL_SERVER_ERROR
message = HTTPStatus.INTERNAL_SERVER_ERROR.description
def __init__(self, message=None):
if message:
self.message = message
class UnauthorizedException(CustomException):
code = HTTPStatus.UNAUTHORIZED
error_code = HTTPStatus.UNAUTHORIZED
message = HTTPStatus.UNAUTHORIZED.description

View File

@@ -1,13 +1,13 @@
from .file import FileHelper
from .text import TextHelper
from .token_counter import count_tokens
from .exercises import ExercisesHelper
from .logger import LoggerHelper
__all__ = [
"FileHelper",
"TextHelper",
"count_tokens",
"ExercisesHelper",
"LoggerHelper"
]
from .file import FileHelper
from .text import TextHelper
from .token_counter import count_tokens
from .exercises import ExercisesHelper
from .logger import LoggerHelper
__all__ = [
"FileHelper",
"TextHelper",
"count_tokens",
"ExercisesHelper",
"LoggerHelper"
]

View File

@@ -1,249 +1,249 @@
import queue
import random
import re
import string
from wonderwords import RandomWord
from .text import TextHelper
class ExercisesHelper:
@staticmethod
def divide_number_into_parts(number, parts):
if number < parts:
return None
part_size = number // parts
remaining = number % parts
q = queue.Queue()
for i in range(parts):
if i < remaining:
q.put(part_size + 1)
else:
q.put(part_size)
return q
@staticmethod
def fix_exercise_ids(exercise, start_id):
# Initialize the starting ID for the first exercise
current_id = start_id
questions = exercise["questions"]
# Iterate through questions and update the "id" value
for question in questions:
question["id"] = str(current_id)
current_id += 1
return exercise
@staticmethod
def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
for i, word in enumerate(words_to_replace, start=start_id):
# Create a case-insensitive regular expression pattern
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(i) + '}}'
text = pattern.sub(placeholder, text, 1)
return text
@staticmethod
def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
replaced_notes = []
for i, note in enumerate(notes, start=0):
word = words_to_replace[i]
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(start_id + i) + '}}'
note = pattern.sub(placeholder, note, 1)
replaced_notes.append(note)
return replaced_notes
@staticmethod
def add_random_words_and_shuffle(word_array, num_random_words):
r = RandomWord()
random_words_selected = r.random_words(num_random_words)
combined_array = word_array + random_words_selected
random.shuffle(combined_array)
result = []
for i, word in enumerate(combined_array):
letter = chr(65 + i) # chr(65) is 'A'
result.append({"letter": letter, "word": word})
return result
@staticmethod
def fillblanks_build_solutions_array(words, start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solutions.append(
{
"id": str(i),
"solution": word
}
)
return solutions
@staticmethod
def remove_excess_questions(questions: [], quantity):
count_true = 0
result = []
for item in reversed(questions):
if item.get('solution') == 'true' and count_true < quantity:
count_true += 1
else:
result.append(item)
result.reverse()
return result
@staticmethod
def build_write_blanks_text(questions: [], start_id):
result = ""
for i, q in enumerate(questions, start=start_id):
placeholder = '{{' + str(i) + '}}'
result = result + q["question"] + placeholder + "\\n"
return result
@staticmethod
def build_write_blanks_text_form(form: [], start_id):
result = ""
replaced_words = []
for i, entry in enumerate(form, start=start_id):
placeholder = '{{' + str(i) + '}}'
# Use regular expression to find the string after ':'
match = re.search(r'(?<=:)\s*(.*)', entry)
# Extract the matched string
original_string = match.group(1)
# Split the string into words
words = re.findall(r'\b\w+\b', original_string)
# Remove words with only one letter
filtered_words = [word for word in words if len(word) > 1]
# Choose a random word from the list of words
selected_word = random.choice(filtered_words)
pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)
# Replace the chosen word with the placeholder
replaced_string = pattern.sub(placeholder, original_string, 1)
# Construct the final replaced string
replaced_string = entry.replace(original_string, replaced_string)
result = result + replaced_string + "\\n"
# Save the replaced word or use it as needed
# For example, you can save it to a file or a list
replaced_words.append(selected_word)
return result, replaced_words
@staticmethod
def build_write_blanks_solutions(questions: [], start_id):
solutions = []
for i, q in enumerate(questions, start=start_id):
solution = [q["possible_answers"]] if isinstance(q["possible_answers"], str) else q["possible_answers"]
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
@staticmethod
def build_write_blanks_solutions_listening(words: [], start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solution = [word] if isinstance(word, str) else word
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
@staticmethod
def answer_word_limit_ok(question):
# Check if any option in any solution has more than three words
return not any(
len(option.split()) > 3
for solution in question["solutions"]
for option in solution["solution"]
)
@staticmethod
def assign_letters_to_paragraphs(paragraphs):
result = []
letters = iter(string.ascii_uppercase)
for paragraph in paragraphs.split("\n\n"):
if TextHelper.has_x_words(paragraph, 10):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
return result
@staticmethod
def contains_empty_dict(arr):
return any(elem == {} for elem in arr)
@staticmethod
def fix_writing_overall(overall: float, task_response: dict):
grades = [category["grade"] for category in task_response.values()]
if overall > max(grades) or overall < min(grades):
total_sum = sum(grades)
average = total_sum / len(grades)
rounded_average = round(average, 0)
return rounded_average
return overall
@staticmethod
def build_options(ideas):
options = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
options.append({
"id": next(letters),
"sentence": idea["from"]
})
return options
@staticmethod
def build_sentences(ideas, start_id):
sentences = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
sentences.append({
"solution": next(letters),
"sentence": idea["idea"]
})
random.shuffle(sentences)
for i, sentence in enumerate(sentences, start=start_id):
sentence["id"] = i
return sentences
@staticmethod
def randomize_mc_options_order(questions):
option_ids = ['A', 'B', 'C', 'D']
for question in questions:
# Store the original solution text
original_solution_text = next(
option['text'] for option in question['options'] if option['id'] == question['solution'])
# Shuffle the options
random.shuffle(question['options'])
# Update the option ids and find the new solution id
for idx, option in enumerate(question['options']):
option['id'] = option_ids[idx]
if option['text'] == original_solution_text:
question['solution'] = option['id']
return questions
import queue
import random
import re
import string
from wonderwords import RandomWord
from .text import TextHelper
class ExercisesHelper:
@staticmethod
def divide_number_into_parts(number, parts):
if number < parts:
return None
part_size = number // parts
remaining = number % parts
q = queue.Queue()
for i in range(parts):
if i < remaining:
q.put(part_size + 1)
else:
q.put(part_size)
return q
@staticmethod
def fix_exercise_ids(exercise, start_id):
# Initialize the starting ID for the first exercise
current_id = start_id
questions = exercise["questions"]
# Iterate through questions and update the "id" value
for question in questions:
question["id"] = str(current_id)
current_id += 1
return exercise
@staticmethod
def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
for i, word in enumerate(words_to_replace, start=start_id):
# Create a case-insensitive regular expression pattern
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(i) + '}}'
text = pattern.sub(placeholder, text, 1)
return text
@staticmethod
def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
replaced_notes = []
for i, note in enumerate(notes, start=0):
word = words_to_replace[i]
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(start_id + i) + '}}'
note = pattern.sub(placeholder, note, 1)
replaced_notes.append(note)
return replaced_notes
@staticmethod
def add_random_words_and_shuffle(word_array, num_random_words):
r = RandomWord()
random_words_selected = r.random_words(num_random_words)
combined_array = word_array + random_words_selected
random.shuffle(combined_array)
result = []
for i, word in enumerate(combined_array):
letter = chr(65 + i) # chr(65) is 'A'
result.append({"letter": letter, "word": word})
return result
@staticmethod
def fillblanks_build_solutions_array(words, start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solutions.append(
{
"id": str(i),
"solution": word
}
)
return solutions
@staticmethod
def remove_excess_questions(questions: [], quantity):
count_true = 0
result = []
for item in reversed(questions):
if item.get('solution') == 'true' and count_true < quantity:
count_true += 1
else:
result.append(item)
result.reverse()
return result
@staticmethod
def build_write_blanks_text(questions: [], start_id):
result = ""
for i, q in enumerate(questions, start=start_id):
placeholder = '{{' + str(i) + '}}'
result = result + q["question"] + placeholder + "\\n"
return result
@staticmethod
def build_write_blanks_text_form(form: [], start_id):
result = ""
replaced_words = []
for i, entry in enumerate(form, start=start_id):
placeholder = '{{' + str(i) + '}}'
# Use regular expression to find the string after ':'
match = re.search(r'(?<=:)\s*(.*)', entry)
# Extract the matched string
original_string = match.group(1)
# Split the string into words
words = re.findall(r'\b\w+\b', original_string)
# Remove words with only one letter
filtered_words = [word for word in words if len(word) > 1]
# Choose a random word from the list of words
selected_word = random.choice(filtered_words)
pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)
# Replace the chosen word with the placeholder
replaced_string = pattern.sub(placeholder, original_string, 1)
# Construct the final replaced string
replaced_string = entry.replace(original_string, replaced_string)
result = result + replaced_string + "\\n"
# Save the replaced word or use it as needed
# For example, you can save it to a file or a list
replaced_words.append(selected_word)
return result, replaced_words
@staticmethod
def build_write_blanks_solutions(questions: [], start_id):
solutions = []
for i, q in enumerate(questions, start=start_id):
solution = [q["possible_answers"]] if isinstance(q["possible_answers"], str) else q["possible_answers"]
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
@staticmethod
def build_write_blanks_solutions_listening(words: [], start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solution = [word] if isinstance(word, str) else word
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
@staticmethod
def answer_word_limit_ok(question):
# Check if any option in any solution has more than three words
return not any(
len(option.split()) > 3
for solution in question["solutions"]
for option in solution["solution"]
)
@staticmethod
def assign_letters_to_paragraphs(paragraphs):
result = []
letters = iter(string.ascii_uppercase)
for paragraph in paragraphs.split("\n\n"):
if TextHelper.has_x_words(paragraph, 10):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
return result
@staticmethod
def contains_empty_dict(arr):
return any(elem == {} for elem in arr)
@staticmethod
def fix_writing_overall(overall: float, task_response: dict):
grades = [category["grade"] for category in task_response.values()]
if overall > max(grades) or overall < min(grades):
total_sum = sum(grades)
average = total_sum / len(grades)
rounded_average = round(average, 0)
return rounded_average
return overall
@staticmethod
def build_options(ideas):
options = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
options.append({
"id": next(letters),
"sentence": idea["from"]
})
return options
@staticmethod
def build_sentences(ideas, start_id):
sentences = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
sentences.append({
"solution": next(letters),
"sentence": idea["idea"]
})
random.shuffle(sentences)
for i, sentence in enumerate(sentences, start=start_id):
sentence["id"] = i
return sentences
@staticmethod
def randomize_mc_options_order(questions):
option_ids = ['A', 'B', 'C', 'D']
for question in questions:
# Store the original solution text
original_solution_text = next(
option['text'] for option in question['options'] if option['id'] == question['solution'])
# Shuffle the options
random.shuffle(question['options'])
# Update the option ids and find the new solution id
for idx, option in enumerate(question['options']):
option['id'] = option_ids[idx]
if option['text'] == original_solution_text:
question['solution'] = option['id']
return questions

View File

@@ -1,95 +1,114 @@
import datetime
from pathlib import Path
import base64
import io
import os
import shutil
import subprocess
from typing import Optional
import numpy as np
import pypandoc
from PIL import Image
import aiofiles
class FileHelper:
@staticmethod
def delete_files_older_than_one_day(directory: str):
current_time = datetime.datetime.now()
for entry in os.scandir(directory):
if entry.is_file():
file_path = Path(entry)
file_name = file_path.name
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
time_difference = current_time - file_modified_time
if time_difference.days > 1 and "placeholder" not in file_name:
file_path.unlink()
print(f"Deleted file: {file_path}")
# Supposedly pandoc covers a wide range of file extensions only tested with docx
@staticmethod
def convert_file_to_pdf(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
'-V', 'geometry:paperwidth=5.5in',
'-V', 'geometry:paperheight=8.5in',
'-V', 'geometry:margin=0.5in',
'-V', 'pagestyle=empty'
])
@staticmethod
def convert_file_to_html(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'html', outputfile=output_path)
@staticmethod
def pdf_to_png(path_id: str):
to_png = f"pdftoppm -png exercises.pdf page"
result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
if result.returncode != 0:
raise Exception(
f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
@staticmethod
def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
with Image.open(io.BytesIO(image_bytes)) as img:
img_gray = img.convert('L')
img_array = np.array(img_gray)
non_white_pixels = np.sum(img_array < 255)
return non_white_pixels <= image_threshold
@classmethod
async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
async with aiofiles.open(image_path, "rb") as image_file:
image_bytes = await image_file.read()
if cls.is_page_blank(image_bytes, image_threshold):
return None
return base64.b64encode(image_bytes).decode('utf-8')
@classmethod
def b64_pngs(cls, path_id: str, files: list[str]):
png_messages = []
for filename in files:
b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
if b64_string:
png_messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{b64_string}"
}
})
return png_messages
@staticmethod
def remove_directory(path):
try:
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
except Exception as e:
print(f"An error occurred while trying to remove {path}: {str(e)}")
import base64
import io
import os
import shutil
import subprocess
import uuid
import datetime
from pathlib import Path
from typing import Optional, Tuple
import aiofiles
import numpy as np
import pypandoc
from PIL import Image
class FileHelper:
@staticmethod
def delete_files_older_than_one_day(directory: str):
current_time = datetime.datetime.now()
for entry in os.scandir(directory):
if entry.is_file():
file_path = Path(entry)
file_name = file_path.name
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
time_difference = current_time - file_modified_time
if time_difference.days > 1 and "placeholder" not in file_name:
file_path.unlink()
print(f"Deleted file: {file_path}")
# Supposedly pandoc covers a wide range of file extensions only tested with docx
@staticmethod
def convert_file_to_pdf(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
'-V', 'geometry:paperwidth=5.5in',
'-V', 'geometry:paperheight=8.5in',
'-V', 'geometry:margin=0.5in',
'-V', 'pagestyle=empty'
])
@staticmethod
def convert_file_to_html(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'html', outputfile=output_path)
@staticmethod
def pdf_to_png(path_id: str):
to_png = f"pdftoppm -png exercises.pdf page"
result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
if result.returncode != 0:
raise Exception(
f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
@staticmethod
def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
with Image.open(io.BytesIO(image_bytes)) as img:
img_gray = img.convert('L')
img_array = np.array(img_gray)
non_white_pixels = np.sum(img_array < 255)
return non_white_pixels <= image_threshold
@classmethod
async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
async with aiofiles.open(image_path, "rb") as image_file:
image_bytes = await image_file.read()
if cls.is_page_blank(image_bytes, image_threshold):
return None
return base64.b64encode(image_bytes).decode('utf-8')
@classmethod
async def b64_pngs(cls, path_id: str, files: list[str]):
png_messages = []
for filename in files:
b64_string = await cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
if b64_string:
png_messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{b64_string}"
}
})
return png_messages
@staticmethod
def remove_directory(path):
try:
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
except Exception as e:
print(f"An error occurred while trying to remove {path}: {str(e)}")
@staticmethod
def remove_file(file_path):
try:
if os.path.exists(file_path):
if os.path.isfile(file_path):
os.remove(file_path)
except Exception as e:
print(f"An error occurred while trying to remove the file {file_path}: {str(e)}")
@staticmethod
def save_upload(file) -> Tuple[str, str]:
ext = file.filename.split('.')[-1]
path_id = str(uuid.uuid4())
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
file.save(tmp_filename)
return ext, path_id

View File

@@ -1,23 +1,23 @@
import logging
from functools import wraps
class LoggerHelper:
@staticmethod
def suppress_loggers():
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
root_logger = logging.getLogger()
original_level = root_logger.level
root_logger.setLevel(logging.ERROR)
try:
return f(*args, **kwargs)
finally:
root_logger.setLevel(original_level)
return wrapped
return decorator
import logging
from functools import wraps
class LoggerHelper:
@staticmethod
def suppress_loggers():
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
root_logger = logging.getLogger()
original_level = root_logger.level
root_logger.setLevel(logging.ERROR)
try:
return f(*args, **kwargs)
finally:
root_logger.setLevel(original_level)
return wrapped
return decorator

View File

@@ -1,28 +1,28 @@
from nltk.corpus import words
class TextHelper:
@classmethod
def has_words(cls, text: str):
if not cls._has_common_words(text):
return False
english_words = set(words.words())
words_in_input = text.split()
return any(word.lower() in english_words for word in words_in_input)
@classmethod
def has_x_words(cls, text: str, quantity):
if not cls._has_common_words(text):
return False
english_words = set(words.words())
words_in_input = text.split()
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
return english_word_count >= quantity
@staticmethod
def _has_common_words(text: str):
english_words = {"the", "be", "to", "of", "and", "a", "in", "that", "have", "i"}
words_in_input = text.split()
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
return english_word_count >= 10
from nltk.corpus import words
class TextHelper:
@classmethod
def has_words(cls, text: str):
if not cls._has_common_words(text):
return False
english_words = set(words.words())
words_in_input = text.split()
return any(word.lower() in english_words for word in words_in_input)
@classmethod
def has_x_words(cls, text: str, quantity):
if not cls._has_common_words(text):
return False
english_words = set(words.words())
words_in_input = text.split()
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
return english_word_count >= quantity
@staticmethod
def _has_common_words(text: str):
english_words = {"the", "be", "to", "of", "and", "a", "in", "that", "have", "i"}
words_in_input = text.split()
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
return english_word_count >= 10

View File

@@ -1,89 +1,89 @@
# This is a work in progress. There are still bugs. Once it is production-ready this will become a full repo.
import tiktoken
import nltk
def count_tokens(text, model_name="gpt-3.5-turbo", debug=False):
"""
Count the number of tokens in a given text string without using the OpenAI API.
This function tries three methods in the following order:
1. tiktoken (preferred): Accurate token counting similar to the OpenAI API.
2. nltk: Token counting using the Natural Language Toolkit library.
3. split: Simple whitespace-based token counting as a fallback.
Usage:
------
text = "Your text here"
result = count_tokens(text, model_name="gpt-3.5-turbo", debug=True)
print(result)
Required libraries:
-------------------
- tiktoken: Install with 'pip install tiktoken'
- nltk: Install with 'pip install nltk'
Parameters:
-----------
text : str
The text string for which you want to count tokens.
model_name : str, optional
The OpenAI model for which you want to count tokens (default: "gpt-3.5-turbo").
debug : bool, optional
Set to True to print error messages (default: False).
Returns:
--------
result : dict
A dictionary containing the number of tokens and the method used for counting.
"""
# Try using tiktoken
try:
encoding = tiktoken.encoding_for_model(model_name)
num_tokens = len(encoding.encode(text))
result = {"n_tokens": num_tokens, "method": "tiktoken"}
return result
except Exception as e:
if debug:
print(f"Error using tiktoken: {e}")
pass
# Try using nltk
try:
# Passed nltk.download("punkt") to server.py's @asynccontextmanager
tokens = nltk.word_tokenize(text)
result = {"n_tokens": len(tokens), "method": "nltk"}
return result
except Exception as e:
if debug:
print(f"Error using nltk: {e}")
pass
# If nltk and tiktoken fail, use a simple split-based method
tokens = text.split()
result = {"n_tokens": len(tokens), "method": "split"}
return result
class TokenBuffer:
def __init__(self, max_tokens=2048):
self.max_tokens = max_tokens
self.buffer = ""
self.token_lengths = []
self.token_count = 0
def update(self, text, model_name="gpt-3.5-turbo", debug=False):
new_tokens = count_tokens(text, model_name=model_name, debug=debug)["n_tokens"]
self.token_count += new_tokens
self.buffer += text
self.token_lengths.append(new_tokens)
while self.token_count > self.max_tokens:
removed_tokens = self.token_lengths.pop(0)
self.token_count -= removed_tokens
self.buffer = self.buffer.split(" ", removed_tokens)[-1]
def get_buffer(self):
return self.buffer
# This is a work in progress. There are still bugs. Once it is production-ready this will become a full repo.
import tiktoken
import nltk
def count_tokens(text, model_name="gpt-3.5-turbo", debug=False):
"""
Count the number of tokens in a given text string without using the OpenAI API.
This function tries three methods in the following order:
1. tiktoken (preferred): Accurate token counting similar to the OpenAI API.
2. nltk: Token counting using the Natural Language Toolkit library.
3. split: Simple whitespace-based token counting as a fallback.
Usage:
------
text = "Your text here"
result = count_tokens(text, model_name="gpt-3.5-turbo", debug=True)
print(result)
Required libraries:
-------------------
- tiktoken: Install with 'pip install tiktoken'
- nltk: Install with 'pip install nltk'
Parameters:
-----------
text : str
The text string for which you want to count tokens.
model_name : str, optional
The OpenAI model for which you want to count tokens (default: "gpt-3.5-turbo").
debug : bool, optional
Set to True to print error messages (default: False).
Returns:
--------
result : dict
A dictionary containing the number of tokens and the method used for counting.
"""
# Try using tiktoken
try:
encoding = tiktoken.encoding_for_model(model_name)
num_tokens = len(encoding.encode(text))
result = {"n_tokens": num_tokens, "method": "tiktoken"}
return result
except Exception as e:
if debug:
print(f"Error using tiktoken: {e}")
pass
# Try using nltk
try:
# Passed nltk.download("punkt") to server.py's @asynccontextmanager
tokens = nltk.word_tokenize(text)
result = {"n_tokens": len(tokens), "method": "nltk"}
return result
except Exception as e:
if debug:
print(f"Error using nltk: {e}")
pass
# If nltk and tiktoken fail, use a simple split-based method
tokens = text.split()
result = {"n_tokens": len(tokens), "method": "split"}
return result
class TokenBuffer:
def __init__(self, max_tokens=2048):
self.max_tokens = max_tokens
self.buffer = ""
self.token_lengths = []
self.token_count = 0
def update(self, text, model_name="gpt-3.5-turbo", debug=False):
new_tokens = count_tokens(text, model_name=model_name, debug=debug)["n_tokens"]
self.token_count += new_tokens
self.buffer += text
self.token_lengths.append(new_tokens)
while self.token_count > self.max_tokens:
removed_tokens = self.token_lengths.pop(0)
self.token_count -= removed_tokens
self.buffer = self.buffer.split(" ", removed_tokens)[-1]
def get_buffer(self):
return self.buffer

View File

@@ -1,5 +1,5 @@
from .exam import ExamMapper
__all__ = [
"ExamMapper"
]
from .exam import ExamMapper
__all__ = [
"ExamMapper"
]

View File

@@ -1,66 +1,66 @@
from typing import Dict, Any
from pydantic import ValidationError
from app.dtos.exam import (
MultipleChoiceExercise,
FillBlanksExercise,
Part, Exam
)
from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
class ExamMapper:
@staticmethod
def map_to_exam_model(response: Dict[str, Any]) -> Exam:
parts = []
for part in response['parts']:
part_exercises = part['exercises']
context = part.get('context', None)
exercises = []
for exercise in part_exercises:
exercise_type = exercise['type']
if exercise_type == 'multipleChoice':
exercise_model = MultipleChoiceExercise(**exercise)
elif exercise_type == 'fillBlanks':
exercise_model = FillBlanksExercise(**exercise)
else:
raise ValidationError(f"Unknown exercise type: {exercise_type}")
exercises.append(exercise_model)
part_kwargs = {"exercises": exercises}
if context is not None:
part_kwargs["context"] = context
part_model = Part(**part_kwargs)
parts.append(part_model)
return Exam(parts=parts)
@staticmethod
def map_to_sheet(response: Dict[str, Any]) -> Sheet:
components = []
for item in response["components"]:
component_type = item["type"]
if component_type == "multipleChoice":
options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]]
components.append(MultipleChoiceQuestion(
id=item["id"],
prompt=item["prompt"],
variant=item.get("variant", "text"),
options=options
))
elif component_type == "fillBlanks":
components.append(FillBlanksWord(
id=item["id"],
options=item["options"]
))
else:
components.append(item)
return Sheet(components=components)
from typing import Dict, Any
from pydantic import ValidationError
from app.dtos.exam import (
MultipleChoiceExercise,
FillBlanksExercise,
Part, Exam
)
from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
class ExamMapper:
@staticmethod
def map_to_exam_model(response: Dict[str, Any]) -> Exam:
parts = []
for part in response['parts']:
part_exercises = part['exercises']
context = part.get('context', None)
exercises = []
for exercise in part_exercises:
exercise_type = exercise['type']
if exercise_type == 'multipleChoice':
exercise_model = MultipleChoiceExercise(**exercise)
elif exercise_type == 'fillBlanks':
exercise_model = FillBlanksExercise(**exercise)
else:
raise ValidationError(f"Unknown exercise type: {exercise_type}")
exercises.append(exercise_model)
part_kwargs = {"exercises": exercises}
if context is not None:
part_kwargs["context"] = context
part_model = Part(**part_kwargs)
parts.append(part_model)
return Exam(parts=parts)
@staticmethod
def map_to_sheet(response: Dict[str, Any]) -> Sheet:
components = []
for item in response["components"]:
component_type = item["type"]
if component_type == "multipleChoice":
options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]]
components.append(MultipleChoiceQuestion(
id=item["id"],
prompt=item["prompt"],
variant=item.get("variant", "text"),
options=options
))
elif component_type == "fillBlanks":
components.append(FillBlanksWord(
id=item["id"],
options=item["options"]
))
else:
components.append(item)
return Sheet(components=components)

View File

@@ -1,9 +1,9 @@
from .authentication import AuthBackend, AuthenticationMiddleware
from .authorization import Authorized, IsAuthenticatedViaBearerToken
__all__ = [
"AuthBackend",
"AuthenticationMiddleware",
"Authorized",
"IsAuthenticatedViaBearerToken"
from .authentication import AuthBackend, AuthenticationMiddleware
from .authorization import Authorized, IsAuthenticatedViaBearerToken
__all__ = [
"AuthBackend",
"AuthenticationMiddleware",
"Authorized",
"IsAuthenticatedViaBearerToken"
]

View File

@@ -1,48 +1,48 @@
import os
from typing import Tuple
import jwt
from jwt import InvalidTokenError
from pydantic import BaseModel, Field
from starlette.authentication import AuthenticationBackend
from starlette.middleware.authentication import (
AuthenticationMiddleware as BaseAuthenticationMiddleware,
)
from starlette.requests import HTTPConnection
class Session(BaseModel):
authenticated: bool = Field(False, description="Is user authenticated?")
class AuthBackend(AuthenticationBackend):
async def authenticate(
self, conn: HTTPConnection
) -> Tuple[bool, Session]:
session = Session()
authorization: str = conn.headers.get("Authorization")
if not authorization:
return False, session
try:
scheme, token = authorization.split(" ")
if scheme.lower() != "bearer":
return False, session
except ValueError:
return False, session
jwt_secret_key = os.getenv("JWT_SECRET_KEY")
if not jwt_secret_key:
return False, session
try:
jwt.decode(token, jwt_secret_key, algorithms=["HS256"])
except InvalidTokenError:
return False, session
session.authenticated = True
return True, session
class AuthenticationMiddleware(BaseAuthenticationMiddleware):
pass
import os
from typing import Tuple
import jwt
from jwt import InvalidTokenError
from pydantic import BaseModel, Field
from starlette.authentication import AuthenticationBackend
from starlette.middleware.authentication import (
AuthenticationMiddleware as BaseAuthenticationMiddleware,
)
from starlette.requests import HTTPConnection
class Session(BaseModel):
authenticated: bool = Field(False, description="Is user authenticated?")
class AuthBackend(AuthenticationBackend):
async def authenticate(
self, conn: HTTPConnection
) -> Tuple[bool, Session]:
session = Session()
authorization: str = conn.headers.get("Authorization")
if not authorization:
return False, session
try:
scheme, token = authorization.split(" ")
if scheme.lower() != "bearer":
return False, session
except ValueError:
return False, session
jwt_secret_key = os.getenv("JWT_SECRET_KEY")
if not jwt_secret_key:
return False, session
try:
jwt.decode(token, jwt_secret_key, algorithms=["HS256"])
except InvalidTokenError:
return False, session
session.authenticated = True
return True, session
class AuthenticationMiddleware(BaseAuthenticationMiddleware):
pass

View File

@@ -1,36 +1,36 @@
from abc import ABC, abstractmethod
from typing import List, Type
from fastapi import Request
from fastapi.openapi.models import APIKey, APIKeyIn
from fastapi.security.base import SecurityBase
from app.exceptions import CustomException, UnauthorizedException
class BaseAuthorization(ABC):
exception = CustomException
@abstractmethod
async def has_permission(self, request: Request) -> bool:
pass
class IsAuthenticatedViaBearerToken(BaseAuthorization):
exception = UnauthorizedException
async def has_permission(self, request: Request) -> bool:
return request.user.authenticated
class Authorized(SecurityBase):
def __init__(self, permissions: List[Type[BaseAuthorization]]):
self.permissions = permissions
self.model: APIKey = APIKey(**{"in": APIKeyIn.header}, name="Authorization")
self.scheme_name = self.__class__.__name__
async def __call__(self, request: Request):
for permission in self.permissions:
cls = permission()
if not await cls.has_permission(request=request):
raise cls.exception
from abc import ABC, abstractmethod
from typing import List, Type
from fastapi import Request
from fastapi.openapi.models import APIKey, APIKeyIn
from fastapi.security.base import SecurityBase
from app.exceptions import CustomException, UnauthorizedException
class BaseAuthorization(ABC):
exception = CustomException
@abstractmethod
async def has_permission(self, request: Request) -> bool:
pass
class IsAuthenticatedViaBearerToken(BaseAuthorization):
exception = UnauthorizedException
async def has_permission(self, request: Request) -> bool:
return request.user.authenticated
class Authorized(SecurityBase):
def __init__(self, permissions: List[Type[BaseAuthorization]]):
self.permissions = permissions
self.model: APIKey = APIKey(**{"in": APIKeyIn.header}, name="Authorization")
self.scheme_name = self.__class__.__name__
async def __call__(self, request: Request):
for permission in self.permissions:
cls = permission()
if not await cls.has_permission(request=request):
raise cls.exception

View File

@@ -1,7 +1,7 @@
from .file_storage import IFileStorage
from .document_store import IDocumentStore
__all__ = [
"IFileStorage",
"IDocumentStore"
from .file_storage import IFileStorage
from .document_store import IDocumentStore
__all__ = [
"IFileStorage",
"IDocumentStore"
]

View File

@@ -1,16 +1,15 @@
from abc import ABC
class IDocumentStore(ABC):
async def save_to_db(self, collection: str, item):
pass
async def save_to_db_with_id(self, collection: str, item, id: str):
pass
async def get_all(self, collection: str):
pass
async def get_doc_by_id(self, collection: str, doc_id: str):
pass
from abc import ABC
from typing import Dict, Optional, List
class IDocumentStore(ABC):
async def save_to_db(self, collection: str, item: Dict, doc_id: Optional[str]) -> Optional[str]:
pass
async def get_all(self, collection: str) -> List[Dict]:
pass
async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]:
pass

View File

@@ -1,16 +1,16 @@
from abc import ABC, abstractmethod
class IFileStorage(ABC):
@abstractmethod
async def download_firebase_file(self, source_blob_name, destination_file_name):
pass
@abstractmethod
async def upload_file_firebase_get_url(self, destination_blob_name, source_file_name):
pass
@abstractmethod
async def make_public(self, blob_name: str):
pass
from abc import ABC, abstractmethod
class IFileStorage(ABC):
@abstractmethod
async def download_firebase_file(self, source_blob_name, destination_file_name):
pass
@abstractmethod
async def upload_file_firebase_get_url(self, destination_blob_name, source_file_name):
pass
@abstractmethod
async def make_public(self, blob_name: str):
pass

View File

@@ -1,8 +1,8 @@
from .document_stores import *
from .firebase import FirebaseStorage
__all__ = [
"FirebaseStorage"
]
__all__.extend(document_stores.__all__)
from .document_stores import *
from app.repositories.impl.file_storage.firebase import FirebaseStorage
__all__ = [
"FirebaseStorage"
]
__all__.extend(document_stores.__all__)

View File

@@ -1,7 +1,7 @@
from .firestore import Firestore
#from .mongo import MongoDB
__all__ = [
"Firestore",
#"MongoDB"
]
from .firestore import Firestore
#from .mongo import MongoDB
__all__ = [
"Firestore",
#"MongoDB"
]

View File

@@ -1,47 +1,47 @@
import logging
from google.cloud.firestore_v1.async_client import AsyncClient
from google.cloud.firestore_v1.async_collection import AsyncCollectionReference
from google.cloud.firestore_v1.async_document import AsyncDocumentReference
from app.repositories.abc import IDocumentStore
class Firestore(IDocumentStore):
def __init__(self, client: AsyncClient):
self._client = client
self._logger = logging.getLogger(__name__)
async def save_to_db(self, collection: str, item):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
update_time, document_ref = await collection_ref.add(item)
if document_ref:
self._logger.info(f"Document added with ID: {document_ref.id}")
return document_ref.id
else:
return None
async def save_to_db_with_id(self, collection: str, item, id: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
document_ref: AsyncDocumentReference = collection_ref.document(id)
await document_ref.set(item)
doc_snapshot = await document_ref.get()
if doc_snapshot.exists:
self._logger.info(f"Document added with ID: {document_ref.id}")
return document_ref.id
else:
return None
async def get_all(self, collection: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
docs = []
async for doc in collection_ref.stream():
docs.append(doc.to_dict())
return docs
async def get_doc_by_id(self, collection: str, doc_id: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
doc_ref: AsyncDocumentReference = collection_ref.document(doc_id)
doc = await doc_ref.get()
if doc.exists:
return doc.to_dict()
return None
import logging
from typing import Optional, List, Dict
from google.cloud.firestore_v1.async_client import AsyncClient
from google.cloud.firestore_v1.async_collection import AsyncCollectionReference
from google.cloud.firestore_v1.async_document import AsyncDocumentReference
from app.repositories.abc import IDocumentStore
class Firestore(IDocumentStore):
def __init__(self, client: AsyncClient):
self._client = client
self._logger = logging.getLogger(__name__)
async def save_to_db(self, collection: str, item, doc_id: Optional[str] = None) -> Optional[str]:
collection_ref: AsyncCollectionReference = self._client.collection(collection)
if doc_id:
document_ref: AsyncDocumentReference = collection_ref.document(doc_id)
await document_ref.set(item)
doc_snapshot = await document_ref.get()
if doc_snapshot.exists:
self._logger.info(f"Document added with ID: {document_ref.id}")
return document_ref.id
else:
update_time, document_ref = await collection_ref.add(item)
if document_ref:
self._logger.info(f"Document added with ID: {document_ref.id}")
return document_ref.id
return None
async def get_all(self, collection: str) -> List[Dict]:
collection_ref: AsyncCollectionReference = self._client.collection(collection)
docs = []
async for doc in collection_ref.stream():
docs.append(doc.to_dict())
return docs
async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]:
collection_ref: AsyncCollectionReference = self._client.collection(collection)
doc_ref: AsyncDocumentReference = collection_ref.document(doc_id)
doc = await doc_ref.get()
if doc.exists:
return doc.to_dict()
return None

View File

@@ -1,36 +1,37 @@
"""import logging
from pymongo import MongoClient
from app.repositories.abc import IDocumentStore
class MongoDB(IDocumentStore):
def __init__(self, client: MongoClient):
self._client = client
self._logger = logging.getLogger(__name__)
def save_to_db(self, collection: str, item):
collection_ref = self._client[collection]
result = collection_ref.insert_one(item)
if result.inserted_id:
self._logger.info(f"Document added with ID: {result.inserted_id}")
return True, str(result.inserted_id)
else:
return False, None
def save_to_db_with_id(self, collection: str, item, doc_id: str):
collection_ref = self._client[collection]
item['_id'] = doc_id
result = collection_ref.replace_one({'_id': id}, item, upsert=True)
if result.upserted_id or result.matched_count:
self._logger.info(f"Document added with ID: {doc_id}")
return True, doc_id
else:
return False, None
def get_all(self, collection: str):
collection_ref = self._client[collection]
all_documents = list(collection_ref.find())
return all_documents
"""
import logging
import uuid
from typing import Optional, List, Dict
from motor.motor_asyncio import AsyncIOMotorDatabase
from app.repositories.abc import IDocumentStore
class MongoDB(IDocumentStore):
def __init__(self, mongo_db: AsyncIOMotorDatabase):
self._mongo_db = mongo_db
self._logger = logging.getLogger(__name__)
async def save_to_db(self, collection: str, item, doc_id: Optional[str] = None) -> Optional[str]:
collection_ref = self._mongo_db[collection]
if doc_id is None:
doc_id = str(uuid.uuid4())
item['id'] = doc_id
result = await collection_ref.insert_one(item)
if result.inserted_id:
# returning id instead of _id
self._logger.info(f"Document added with ID: {doc_id}")
return doc_id
return None
async def get_all(self, collection: str) -> List[Dict]:
cursor = self._mongo_db[collection].find()
return [document async for document in cursor]
async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]:
return await self._mongo_db[collection].find_one({"id": doc_id})

View File

@@ -0,0 +1,5 @@
from .firebase import FirebaseStorage
__all__ = [
"FirebaseStorage"
]

View File

@@ -1,83 +1,83 @@
import logging
from typing import Optional
import aiofiles
from httpx import AsyncClient
from app.repositories.abc import IFileStorage
class FirebaseStorage(IFileStorage):
def __init__(self, client: AsyncClient, token: str, bucket: str):
self._httpx_client = client
self._token = token
self._storage_url = f'https://firebasestorage.googleapis.com/v0/b/{bucket}'
self._logger = logging.getLogger(__name__)
async def download_firebase_file(self, source_blob_name: str, destination_file_name: str) -> Optional[str]:
source_blob_name = source_blob_name.replace('/', '%2F')
download_url = f"{self._storage_url}/o/{source_blob_name}?alt=media"
response = await self._httpx_client.get(
download_url,
headers={'Authorization': f'Firebase {self._token}'}
)
if response.status_code == 200:
async with aiofiles.open(destination_file_name, 'wb') as file:
await file.write(response.content)
self._logger.info(f"File downloaded to {destination_file_name}")
return destination_file_name
else:
self._logger.error(f"Failed to download blob {source_blob_name}. {response.status_code} - {response.content}")
return None
async def upload_file_firebase_get_url(self, destination_blob_name: str, source_file_name: str) -> Optional[str]:
destination_blob_name = destination_blob_name.replace('/', '%2F')
upload_url = f"{self._storage_url}/o/{destination_blob_name}"
async with aiofiles.open(source_file_name, 'rb') as file:
file_bytes = await file.read()
response = await self._httpx_client.post(
upload_url,
headers={
'Authorization': f'Firebase {self._token}',
"X-Goog-Upload-Protocol": "multipart"
},
files={
'metadata': (None, '{"metadata":{"test":"testMetadata"}}', 'application/json'),
'file': file_bytes
}
)
if response.status_code == 200:
self._logger.info(f"File {source_file_name} uploaded to {self._storage_url}/o/{destination_blob_name}.")
# TODO: Test this
#await self.make_public(destination_blob_name)
file_url = f"{self._storage_url}/o/{destination_blob_name}"
return file_url
else:
self._logger.error(f"Failed to upload file {source_file_name}. Error: {response.status_code} - {str(response.content)}")
return None
async def make_public(self, destination_blob_name: str):
acl_url = f"{self._storage_url}/o/{destination_blob_name}/acl"
acl = {'entity': 'allUsers', 'role': 'READER'}
response = await self._httpx_client.post(
acl_url,
headers={
'Authorization': f'Bearer {self._token}',
'Content-Type': 'application/json'
},
json=acl
)
if response.status_code == 200:
self._logger.info(f"Blob {destination_blob_name} is now public.")
else:
self._logger.error(f"Failed to make blob {destination_blob_name} public. {response.status_code} - {response.content}")
import logging
from typing import Optional
import aiofiles
from httpx import AsyncClient
from app.repositories.abc import IFileStorage
class FirebaseStorage(IFileStorage):
def __init__(self, client: AsyncClient, token: str, bucket: str):
self._httpx_client = client
self._token = token
self._storage_url = f'https://firebasestorage.googleapis.com/v0/b/{bucket}'
self._logger = logging.getLogger(__name__)
async def download_firebase_file(self, source_blob_name: str, destination_file_name: str) -> Optional[str]:
source_blob_name = source_blob_name.replace('/', '%2F')
download_url = f"{self._storage_url}/o/{source_blob_name}?alt=media"
response = await self._httpx_client.get(
download_url,
headers={'Authorization': f'Firebase {self._token}'}
)
if response.status_code == 200:
async with aiofiles.open(destination_file_name, 'wb') as file:
await file.write(response.content)
self._logger.info(f"File downloaded to {destination_file_name}")
return destination_file_name
else:
self._logger.error(f"Failed to download blob {source_blob_name}. {response.status_code} - {response.content}")
return None
async def upload_file_firebase_get_url(self, destination_blob_name: str, source_file_name: str) -> Optional[str]:
destination_blob_name = destination_blob_name.replace('/', '%2F')
upload_url = f"{self._storage_url}/o/{destination_blob_name}"
async with aiofiles.open(source_file_name, 'rb') as file:
file_bytes = await file.read()
response = await self._httpx_client.post(
upload_url,
headers={
'Authorization': f'Firebase {self._token}',
"X-Goog-Upload-Protocol": "multipart"
},
files={
'metadata': (None, '{"metadata":{"test":"testMetadata"}}', 'application/json'),
'file': file_bytes
}
)
if response.status_code == 200:
self._logger.info(f"File {source_file_name} uploaded to {self._storage_url}/o/{destination_blob_name}.")
# TODO: Test this
#await self.make_public(destination_blob_name)
file_url = f"{self._storage_url}/o/{destination_blob_name}"
return file_url
else:
self._logger.error(f"Failed to upload file {source_file_name}. Error: {response.status_code} - {str(response.content)}")
return None
async def make_public(self, destination_blob_name: str):
acl_url = f"{self._storage_url}/o/{destination_blob_name}/acl"
acl = {'entity': 'allUsers', 'role': 'READER'}
response = await self._httpx_client.post(
acl_url,
headers={
'Authorization': f'Bearer {self._token}',
'Content-Type': 'application/json'
},
json=acl
)
if response.status_code == 200:
self._logger.info(f"Blob {destination_blob_name} is now public.")
else:
self._logger.error(f"Failed to make blob {destination_blob_name} public. {response.status_code} - {response.content}")

View File

@@ -1,160 +1,156 @@
import json
import os
import pathlib
import logging.config
import logging.handlers
import aioboto3
import contextlib
from contextlib import asynccontextmanager
from collections import defaultdict
from typing import List
from http import HTTPStatus
import httpx
import whisper
from fastapi import FastAPI, Request
from fastapi.encoders import jsonable_encoder
from fastapi.exceptions import RequestValidationError
from fastapi.middleware import Middleware
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import nltk
from dotenv import load_dotenv
from starlette import status
from app.api import router
from app.configs import config_di
from app.exceptions import CustomException
from app.middlewares import AuthenticationMiddleware, AuthBackend
load_dotenv()
@asynccontextmanager
async def lifespan(_app: FastAPI):
"""
Startup and Shutdown logic is in this lifespan method
https://fastapi.tiangolo.com/advanced/events/
"""
# Whisper model
whisper_model = whisper.load_model("base")
# NLTK required datasets download
nltk.download('words')
nltk.download("punkt")
# AWS Polly client instantiation
context_stack = contextlib.AsyncExitStack()
session = aioboto3.Session()
polly_client = await context_stack.enter_async_context(
session.client(
'polly',
region_name='eu-west-1',
aws_secret_access_key=os.getenv("AWS_ACCESS_KEY_ID"),
aws_access_key_id=os.getenv("AWS_SECRET_ACCESS_KEY")
)
)
# HTTP Client
http_client = httpx.AsyncClient()
config_di(
polly_client=polly_client,
http_client=http_client,
whisper_model=whisper_model
)
# Setup logging
config_file = pathlib.Path("./app/configs/logging/logging_config.json")
with open(config_file) as f_in:
config = json.load(f_in)
logging.config.dictConfig(config)
yield
await http_client.aclose()
await polly_client.close()
await context_stack.aclose()
def setup_listeners(_app: FastAPI) -> None:
@_app.exception_handler(RequestValidationError)
async def custom_form_validation_error(request, exc):
"""
Don't delete request param
"""
reformatted_message = defaultdict(list)
for pydantic_error in exc.errors():
loc, msg = pydantic_error["loc"], pydantic_error["msg"]
filtered_loc = loc[1:] if loc[0] in ("body", "query", "path") else loc
field_string = ".".join(filtered_loc)
if field_string == "cookie.refresh_token":
return JSONResponse(
status_code=401,
content={"error_code": 401, "message": HTTPStatus.UNAUTHORIZED.description},
)
reformatted_message[field_string].append(msg)
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content=jsonable_encoder(
{"details": "Invalid request!", "errors": reformatted_message}
),
)
@_app.exception_handler(CustomException)
async def custom_exception_handler(request: Request, exc: CustomException):
"""
Don't delete request param
"""
return JSONResponse(
status_code=exc.code,
content={"error_code": exc.error_code, "message": exc.message},
)
@_app.exception_handler(Exception)
async def default_exception_handler(request: Request, exc: Exception):
"""
Don't delete request param
"""
return JSONResponse(
status_code=500,
content=str(exc),
)
def setup_middleware() -> List[Middleware]:
middleware = [
Middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
),
Middleware(
AuthenticationMiddleware,
backend=AuthBackend()
)
]
return middleware
def create_app() -> FastAPI:
env = os.getenv("ENV")
_app = FastAPI(
docs_url="/docs" if env != "prod" else None,
redoc_url="/redoc" if env != "prod" else None,
middleware=setup_middleware(),
lifespan=lifespan
)
_app.include_router(router)
setup_listeners(_app)
return _app
app = create_app()
import json
import os
import pathlib
import logging.config
import logging.handlers
import aioboto3
import contextlib
from contextlib import asynccontextmanager
from collections import defaultdict
from typing import List
from http import HTTPStatus
import httpx
import whisper
from fastapi import FastAPI, Request
from fastapi.encoders import jsonable_encoder
from fastapi.exceptions import RequestValidationError
from fastapi.middleware import Middleware
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import nltk
from starlette import status
from app.api import router
from app.configs import DependencyInjector
from app.exceptions import CustomException
from app.middlewares import AuthenticationMiddleware, AuthBackend
@asynccontextmanager
async def lifespan(_app: FastAPI):
"""
Startup and Shutdown logic is in this lifespan method
https://fastapi.tiangolo.com/advanced/events/
"""
# Whisper model
whisper_model = whisper.load_model("base")
# NLTK required datasets download
nltk.download('words')
nltk.download("punkt")
# AWS Polly client instantiation
context_stack = contextlib.AsyncExitStack()
session = aioboto3.Session()
polly_client = await context_stack.enter_async_context(
session.client(
'polly',
region_name='eu-west-1',
aws_secret_access_key=os.getenv("AWS_ACCESS_KEY_ID"),
aws_access_key_id=os.getenv("AWS_SECRET_ACCESS_KEY")
)
)
http_client = httpx.AsyncClient()
DependencyInjector(
polly_client,
http_client,
whisper_model
).inject()
# Setup logging
config_file = pathlib.Path("./app/configs/logging/logging_config.json")
with open(config_file) as f_in:
config = json.load(f_in)
logging.config.dictConfig(config)
yield
await http_client.aclose()
await polly_client.close()
await context_stack.aclose()
def setup_listeners(_app: FastAPI) -> None:
@_app.exception_handler(RequestValidationError)
async def custom_form_validation_error(request, exc):
"""
Don't delete request param
"""
reformatted_message = defaultdict(list)
for pydantic_error in exc.errors():
loc, msg = pydantic_error["loc"], pydantic_error["msg"]
filtered_loc = loc[1:] if loc[0] in ("body", "query", "path") else loc
field_string = ".".join(filtered_loc)
if field_string == "cookie.refresh_token":
return JSONResponse(
status_code=401,
content={"error_code": 401, "message": HTTPStatus.UNAUTHORIZED.description},
)
reformatted_message[field_string].append(msg)
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content=jsonable_encoder(
{"details": "Invalid request!", "errors": reformatted_message}
),
)
@_app.exception_handler(CustomException)
async def custom_exception_handler(request: Request, exc: CustomException):
"""
Don't delete request param
"""
return JSONResponse(
status_code=exc.code,
content={"error_code": exc.error_code, "message": exc.message},
)
@_app.exception_handler(Exception)
async def default_exception_handler(request: Request, exc: Exception):
"""
Don't delete request param
"""
return JSONResponse(
status_code=500,
content=str(exc),
)
def setup_middleware() -> List[Middleware]:
middleware = [
Middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
),
Middleware(
AuthenticationMiddleware,
backend=AuthBackend()
)
]
return middleware
def create_app() -> FastAPI:
env = os.getenv("ENV")
_app = FastAPI(
docs_url="/docs" if env != "production" else None,
redoc_url="/redoc" if env != "production" else None,
middleware=setup_middleware(),
lifespan=lifespan
)
_app.include_router(router)
setup_listeners(_app)
return _app
app = create_app()

View File

@@ -1,20 +1,11 @@
from .level import ILevelService
from .listening import IListeningService
from .writing import IWritingService
from .speaking import ISpeakingService
from .reading import IReadingService
from .grade import IGradeService
from .training import ITrainingService
from .kb import IKnowledgeBase
from .third_parties import *
__all__ = [
"ILevelService",
"IListeningService",
"IWritingService",
"ISpeakingService",
"IReadingService",
"IGradeService",
"ITrainingService"
]
__all__.extend(third_parties.__all__)
from .third_parties import *
from .exam import *
from .training import *
from .user import IUserService
__all__ = [
"IUserService"
]
__all__.extend(third_parties.__all__)
__all__.extend(exam.__all__)
__all__.extend(training.__all__)

View File

@@ -0,0 +1,15 @@
from .level import ILevelService
from .listening import IListeningService
from .writing import IWritingService
from .speaking import ISpeakingService
from .reading import IReadingService
from .grade import IGradeService
__all__ = [
"ILevelService",
"IListeningService",
"IWritingService",
"ISpeakingService",
"IReadingService",
"IGradeService",
]

View File

@@ -1,13 +1,13 @@
from abc import ABC, abstractmethod
from typing import Dict, List
class IGradeService(ABC):
@abstractmethod
async def grade_short_answers(self, data: Dict):
pass
@abstractmethod
async def calculate_grading_summary(self, extracted_sections: List):
pass
from abc import ABC, abstractmethod
from typing import Dict, List
class IGradeService(ABC):
@abstractmethod
async def grade_short_answers(self, data: Dict):
pass
@abstractmethod
async def calculate_grading_summary(self, extracted_sections: List):
pass

View File

@@ -1,47 +1,47 @@
from abc import ABC, abstractmethod
import random
from typing import Dict
from fastapi import UploadFile
from app.configs.constants import EducationalContent
class ILevelService(ABC):
@abstractmethod
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
pass
@abstractmethod
async def get_level_utas(self):
pass
@abstractmethod
async def get_custom_level(self, data: Dict):
pass
@abstractmethod
async def upload_level(self, upload: UploadFile) -> Dict:
pass
@abstractmethod
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
):
pass
@abstractmethod
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
pass
@abstractmethod
async def gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
pass
from abc import ABC, abstractmethod
import random
from typing import Dict
from fastapi import UploadFile
from app.configs.constants import EducationalContent
class ILevelService(ABC):
@abstractmethod
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
pass
@abstractmethod
async def get_level_utas(self):
pass
@abstractmethod
async def get_custom_level(self, data: Dict):
pass
@abstractmethod
async def upload_level(self, upload: UploadFile) -> Dict:
pass
@abstractmethod
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
):
pass
@abstractmethod
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
pass
@abstractmethod
async def gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
pass

View File

@@ -1,18 +1,18 @@
import queue
from abc import ABC, abstractmethod
from queue import Queue
from typing import Dict, List
class IListeningService(ABC):
@abstractmethod
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
number_of_exercises_q=queue.Queue(), start_id=-1
):
pass
@abstractmethod
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str) -> Dict:
pass
import queue
from abc import ABC, abstractmethod
from queue import Queue
from typing import Dict, List
class IListeningService(ABC):
@abstractmethod
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
number_of_exercises_q=queue.Queue(), start_id=-1
):
pass
@abstractmethod
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str) -> Dict:
pass

View File

@@ -1,22 +1,22 @@
from abc import ABC, abstractmethod
from queue import Queue
from typing import List
class IReadingService(ABC):
@abstractmethod
async def gen_reading_passage(
self,
passage_id: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str,
start_id: int
):
pass
@abstractmethod
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
pass
from abc import ABC, abstractmethod
from queue import Queue
from typing import List
class IReadingService(ABC):
@abstractmethod
async def gen_reading_passage(
self,
passage_id: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str,
start_id: int
):
pass
@abstractmethod
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
pass

View File

@@ -1,29 +1,29 @@
from abc import ABC, abstractmethod
from typing import List, Dict, Optional
class ISpeakingService(ABC):
@abstractmethod
async def get_speaking_part(
self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
) -> Dict:
pass
@abstractmethod
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
pass
@abstractmethod
async def create_videos_and_save_to_db(self, exercises: List[Dict], template: Dict, req_id: str):
pass
@abstractmethod
async def generate_video(
self, part: int, avatar: str, topic: str, questions: list[str],
*,
second_topic: Optional[str] = None,
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
pass
from abc import ABC, abstractmethod
from typing import List, Dict, Optional
class ISpeakingService(ABC):
@abstractmethod
async def get_speaking_part(
self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
) -> Dict:
pass
@abstractmethod
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
pass
@abstractmethod
async def create_videos_and_save_to_db(self, exercises: List[Dict], template: Dict, req_id: str):
pass
@abstractmethod
async def generate_video(
self, part: int, avatar: str, topic: str, questions: list[str],
*,
second_topic: Optional[str] = None,
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
pass

View File

@@ -1,11 +1,11 @@
from abc import ABC, abstractmethod
class IWritingService(ABC):
@abstractmethod
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
pass
@abstractmethod
async def grade_writing_task(self, task: int, question: str, answer: str):
pass
from abc import ABC, abstractmethod
class IWritingService(ABC):
@abstractmethod
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
pass
@abstractmethod
async def grade_writing_task(self, task: int, question: str, answer: str):
pass

View File

@@ -1,13 +1,13 @@
from .stt import ISpeechToTextService
from .tts import ITextToSpeechService
from .llm import ILLMService
from .vid_gen import IVideoGeneratorService
from .ai_detector import IAIDetectorService
__all__ = [
"ISpeechToTextService",
"ITextToSpeechService",
"ILLMService",
"IVideoGeneratorService",
"IAIDetectorService"
]
from .stt import ISpeechToTextService
from .tts import ITextToSpeechService
from .llm import ILLMService
from .vid_gen import IVideoGeneratorService
from .ai_detector import IAIDetectorService
__all__ = [
"ISpeechToTextService",
"ITextToSpeechService",
"ILLMService",
"IVideoGeneratorService",
"IAIDetectorService"
]

View File

@@ -1,13 +1,13 @@
from abc import ABC, abstractmethod
from typing import Dict, Optional
class IAIDetectorService(ABC):
@abstractmethod
async def run_detection(self, text: str):
pass
@abstractmethod
def _parse_detection(self, response: Dict) -> Optional[Dict]:
pass
from abc import ABC, abstractmethod
from typing import Dict, Optional
class IAIDetectorService(ABC):
@abstractmethod
async def run_detection(self, text: str):
pass
@abstractmethod
def _parse_detection(self, response: Dict) -> Optional[Dict]:
pass

View File

@@ -1,38 +1,38 @@
from abc import ABC, abstractmethod
from typing import List, Optional, TypeVar, Callable
from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
T = TypeVar('T', bound=BaseModel)
class ILLMService(ABC):
@abstractmethod
async def prediction(
self,
model: str,
messages: List,
fields_to_check: Optional[List[str]],
temperature: float,
check_blacklisted: bool = True,
token_count: int = -1
):
pass
@abstractmethod
async def prediction_override(self, **kwargs):
pass
@abstractmethod
async def pydantic_prediction(
self,
messages: List[ChatCompletionMessageParam],
map_to_model: Callable,
json_scheme: str,
*,
model: Optional[str] = None,
temperature: Optional[float] = None,
max_retries: int = 3
) -> List[T] | T | None:
pass
from abc import ABC, abstractmethod
from typing import List, Optional, TypeVar, Callable
from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
T = TypeVar('T', bound=BaseModel)
class ILLMService(ABC):
@abstractmethod
async def prediction(
self,
model: str,
messages: List,
fields_to_check: Optional[List[str]],
temperature: float,
check_blacklisted: bool = True,
token_count: int = -1
):
pass
@abstractmethod
async def prediction_override(self, **kwargs):
pass
@abstractmethod
async def pydantic_prediction(
self,
messages: List[ChatCompletionMessageParam],
map_to_model: Callable,
json_scheme: str,
*,
model: Optional[str] = None,
temperature: Optional[float] = None,
max_retries: int = 3
) -> List[T] | T | None:
pass

View File

@@ -1,8 +1,8 @@
from abc import ABC, abstractmethod
class ISpeechToTextService(ABC):
@abstractmethod
async def speech_to_text(self, file_path):
pass
from abc import ABC, abstractmethod
class ISpeechToTextService(ABC):
@abstractmethod
async def speech_to_text(self, file_path):
pass

View File

@@ -1,22 +1,22 @@
from abc import ABC, abstractmethod
from typing import Union
class ITextToSpeechService(ABC):
@abstractmethod
async def synthesize_speech(self, text: str, voice: str, engine: str, output_format: str):
pass
@abstractmethod
async def text_to_speech(self, text: Union[list[str], str], file_name: str):
pass
@abstractmethod
async def _conversation_to_speech(self, conversation: list):
pass
@abstractmethod
async def _text_to_speech(self, text: str):
pass
from abc import ABC, abstractmethod
from typing import Union
class ITextToSpeechService(ABC):
@abstractmethod
async def synthesize_speech(self, text: str, voice: str, engine: str, output_format: str):
pass
@abstractmethod
async def text_to_speech(self, text: Union[list[str], str], file_name: str):
pass
@abstractmethod
async def _conversation_to_speech(self, conversation: list):
pass
@abstractmethod
async def _text_to_speech(self, text: str):
pass

View File

@@ -1,10 +1,10 @@
from abc import ABC, abstractmethod
from app.configs.constants import AvatarEnum
class IVideoGeneratorService(ABC):
@abstractmethod
async def create_video(self, text: str, avatar: str):
pass
from abc import ABC, abstractmethod
from app.configs.constants import AvatarEnum
class IVideoGeneratorService(ABC):
@abstractmethod
async def create_video(self, text: str, avatar: str):
pass

View File

@@ -0,0 +1,7 @@
from .training import ITrainingService
from .kb import IKnowledgeBase
__all__ = [
"ITrainingService",
"IKnowledgeBase"
]

View File

@@ -1,10 +1,10 @@
from abc import ABC, abstractmethod
from typing import List, Dict
class IKnowledgeBase(ABC):
@abstractmethod
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
pass
from abc import ABC, abstractmethod
from typing import List, Dict
class IKnowledgeBase(ABC):
@abstractmethod
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
pass

View File

@@ -1,14 +1,14 @@
from abc import ABC, abstractmethod
from typing import Dict
class ITrainingService(ABC):
@abstractmethod
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
pass
@abstractmethod
async def get_training_content(self, training_content: Dict) -> Dict:
pass
from abc import ABC, abstractmethod
from typing import Dict
class ITrainingService(ABC):
@abstractmethod
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
pass
@abstractmethod
async def get_training_content(self, training_content: Dict) -> Dict:
pass

10
app/services/abc/user.py Normal file
View File

@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
from app.dtos.user_batch import BatchUsersDTO
class IUserService(ABC):
@abstractmethod
async def fetch_tips(self, batch: BatchUsersDTO):
pass

View File

@@ -1,19 +1,11 @@
from .level import LevelService
from .listening import ListeningService
from .reading import ReadingService
from .speaking import SpeakingService
from .writing import WritingService
from .grade import GradeService
from .training import *
from .third_parties import *
__all__ = [
"LevelService",
"ListeningService",
"ReadingService",
"SpeakingService",
"WritingService",
"GradeService",
]
__all__.extend(third_parties.__all__)
__all__.extend(training.__all__)
from .user import UserService
from .training import *
from .third_parties import *
from .exam import *
__all__ = [
"UserService"
]
__all__.extend(third_parties.__all__)
__all__.extend(training.__all__)
__all__.extend(exam.__all__)

View File

@@ -0,0 +1,16 @@
from .level import LevelService
from .listening import ListeningService
from .reading import ReadingService
from .speaking import SpeakingService
from .writing import WritingService
from .grade import GradeService
__all__ = [
"LevelService",
"ListeningService",
"ReadingService",
"SpeakingService",
"WritingService",
"GradeService",
]

View File

@@ -1,200 +1,200 @@
import json
from typing import List, Dict
from app.configs.constants import GPTModels, TemperatureSettings
from app.services.abc import ILLMService, IGradeService
class GradeService(IGradeService):
def __init__(self, llm: ILLMService):
self._llm = llm
async def grade_short_answers(self, data: Dict):
json_format = {
"exercises": [
{
"id": 1,
"correct": True,
"correct_answer": " correct answer if wrong"
}
]
}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
'Grade these answers according to the text content and write a correct answer if they are '
f'wrong. Text, questions and answers:\n {data}'
)
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["exercises"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def calculate_grading_summary(self, extracted_sections: List):
ret = []
for section in extracted_sections:
openai_response_dict = await self._calculate_section_grade_summary(section)
ret.append(
{
'code': section['code'],
'name': section['name'],
'grade': section['grade'],
'evaluation': openai_response_dict['evaluation'],
'suggestions': openai_response_dict['suggestions'],
'bullet_points': self._parse_bullet_points(openai_response_dict['bullet_points'], section['grade'])
}
)
return {'sections': ret}
async def _calculate_section_grade_summary(self, section):
section_name = section['name']
section_grade = section['grade']
messages = [
{
"role": "user",
"content": (
'You are a IELTS test section grade evaluator. You will receive a IELTS test section name and the '
'grade obtained in the section. You should offer a evaluation comment on this grade and separately '
'suggestions on how to possibly get a better grade.'
)
},
{
"role": "user",
"content": f'Section: {str(section_name)} Grade: {str(section_grade)}',
},
{
"role": "user",
"content": "Speak in third person."
},
{
"role": "user",
"content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."
},
{
"role": "user",
"content": (
"Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, "
"be one paragraph long."
)
},
{
"role": "user",
"content": "Please save the evaluation comment and suggestions generated."
},
{
"role": "user",
"content": f"Offer bullet points to improve the english {str(section_name)} ability."
},
]
if section['code'] == "level":
messages[2:2] = [{
"role": "user",
"content": (
"This section is comprised of multiple choice questions that measure the user's overall english "
"level. These multiple choice questions are about knowledge on vocabulary, syntax, grammar rules, "
"and contextual usage. The grade obtained measures the ability in these areas and english language "
"overall."
)
}]
elif section['code'] == "speaking":
messages[2:2] = [{
"role": "user",
"content": (
"This section is s designed to assess the English language proficiency of individuals who want to "
"study or work in English-speaking countries. The speaking section evaluates a candidate's ability "
"to communicate effectively in spoken English."
)
}]
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
tools = self.get_tools()
res = await self._llm.prediction_override(
model="gpt-3.5-turbo",
max_tokens=chat_config['max_tokens'],
temperature=chat_config['temperature'],
tools=tools,
messages=messages
)
return self._parse_openai_response(res)
@staticmethod
def _parse_openai_response(response):
if 'choices' in response and len(response['choices']) > 0 and 'message' in response['choices'][
0] and 'tool_calls' in response['choices'][0]['message'] and isinstance(
response['choices'][0]['message']['tool_calls'], list) and len(
response['choices'][0]['message']['tool_calls']) > 0 and \
response['choices'][0]['message']['tool_calls'][0]['function']['arguments']:
return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments'])
else:
return {'evaluation': "", 'suggestions': "", 'bullet_points': []}
@staticmethod
def _parse_bullet_points(bullet_points_str, grade):
max_grade_for_suggestions = 9
if isinstance(bullet_points_str, str) and grade < max_grade_for_suggestions:
# Split the string by '\n'
lines = bullet_points_str.split('\n')
# Remove '-' and trim whitespace from each line
cleaned_lines = [line.replace('-', '').strip() for line in lines]
# Add '.' to lines that don't end with it
return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
else:
return []
@staticmethod
def get_tools():
return [
{
"type": "function",
"function": {
"name": "save_evaluation_and_suggestions",
"description": "Saves the evaluation and suggestions requested by input.",
"parameters": {
"type": "object",
"properties": {
"evaluation": {
"type": "string",
"description": (
"A comment on the IELTS section grade obtained in the specific section and what "
"it could mean without suggestions."
),
},
"suggestions": {
"type": "string",
"description": (
"A small paragraph text with suggestions on how to possibly get a better grade "
"than the one obtained."
),
},
"bullet_points": {
"type": "string",
"description": (
"Text with four bullet points to improve the english speaking ability. Only "
"include text for the bullet points separated by a paragraph."
),
},
},
"required": ["evaluation", "suggestions"],
},
}
}
]
import json
from typing import List, Dict
from app.configs.constants import GPTModels, TemperatureSettings
from app.services.abc import ILLMService, IGradeService
class GradeService(IGradeService):
def __init__(self, llm: ILLMService):
self._llm = llm
async def grade_short_answers(self, data: Dict):
json_format = {
"exercises": [
{
"id": 1,
"correct": True,
"correct_answer": " correct answer if wrong"
}
]
}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
'Grade these answers according to the text content and write a correct answer if they are '
f'wrong. Text, questions and answers:\n {data}'
)
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["exercises"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def calculate_grading_summary(self, extracted_sections: List):
ret = []
for section in extracted_sections:
openai_response_dict = await self._calculate_section_grade_summary(section)
ret.append(
{
'code': section['code'],
'name': section['name'],
'grade': section['grade'],
'evaluation': openai_response_dict['evaluation'],
'suggestions': openai_response_dict['suggestions'],
'bullet_points': self._parse_bullet_points(openai_response_dict['bullet_points'], section['grade'])
}
)
return {'sections': ret}
async def _calculate_section_grade_summary(self, section):
section_name = section['name']
section_grade = section['grade']
messages = [
{
"role": "user",
"content": (
'You are a IELTS test section grade evaluator. You will receive a IELTS test section name and the '
'grade obtained in the section. You should offer a evaluation comment on this grade and separately '
'suggestions on how to possibly get a better grade.'
)
},
{
"role": "user",
"content": f'Section: {str(section_name)} Grade: {str(section_grade)}',
},
{
"role": "user",
"content": "Speak in third person."
},
{
"role": "user",
"content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."
},
{
"role": "user",
"content": (
"Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, "
"be one paragraph long."
)
},
{
"role": "user",
"content": "Please save the evaluation comment and suggestions generated."
},
{
"role": "user",
"content": f"Offer bullet points to improve the english {str(section_name)} ability."
},
]
if section['code'] == "level":
messages[2:2] = [{
"role": "user",
"content": (
"This section is comprised of multiple choice questions that measure the user's overall english "
"level. These multiple choice questions are about knowledge on vocabulary, syntax, grammar rules, "
"and contextual usage. The grade obtained measures the ability in these areas and english language "
"overall."
)
}]
elif section['code'] == "speaking":
messages[2:2] = [{
"role": "user",
"content": (
"This section is s designed to assess the English language proficiency of individuals who want to "
"study or work in English-speaking countries. The speaking section evaluates a candidate's ability "
"to communicate effectively in spoken English."
)
}]
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
tools = self.get_tools()
res = await self._llm.prediction_override(
model="gpt-3.5-turbo",
max_tokens=chat_config['max_tokens'],
temperature=chat_config['temperature'],
tools=tools,
messages=messages
)
return self._parse_openai_response(res)
@staticmethod
def _parse_openai_response(response):
if 'choices' in response and len(response['choices']) > 0 and 'message' in response['choices'][
0] and 'tool_calls' in response['choices'][0]['message'] and isinstance(
response['choices'][0]['message']['tool_calls'], list) and len(
response['choices'][0]['message']['tool_calls']) > 0 and \
response['choices'][0]['message']['tool_calls'][0]['function']['arguments']:
return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments'])
else:
return {'evaluation': "", 'suggestions': "", 'bullet_points': []}
@staticmethod
def _parse_bullet_points(bullet_points_str, grade):
max_grade_for_suggestions = 9
if isinstance(bullet_points_str, str) and grade < max_grade_for_suggestions:
# Split the string by '\n'
lines = bullet_points_str.split('\n')
# Remove '-' and trim whitespace from each line
cleaned_lines = [line.replace('-', '').strip() for line in lines]
# Add '.' to lines that don't end with it
return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
else:
return []
@staticmethod
def get_tools():
return [
{
"type": "function",
"function": {
"name": "save_evaluation_and_suggestions",
"description": "Saves the evaluation and suggestions requested by input.",
"parameters": {
"type": "object",
"properties": {
"evaluation": {
"type": "string",
"description": (
"A comment on the IELTS section grade obtained in the specific section and what "
"it could mean without suggestions."
),
},
"suggestions": {
"type": "string",
"description": (
"A small paragraph text with suggestions on how to possibly get a better grade "
"than the one obtained."
),
},
"bullet_points": {
"type": "string",
"description": (
"Text with four bullet points to improve the english speaking ability. Only "
"include text for the bullet points separated by a paragraph."
),
},
},
"required": ["evaluation", "suggestions"],
},
}
}
]

View File

@@ -1,5 +1,5 @@
from .level import LevelService
__all__ = [
"LevelService"
from .level import LevelService
__all__ = [
"LevelService"
]

View File

@@ -1,335 +1,335 @@
import queue
import random
from typing import Dict
from app.configs.constants import CustomLevelExerciseTypes, EducationalContent
from app.services.abc import (
ILLMService, ILevelService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
class CustomLevelModule:
def __init__(
self,
llm: ILLMService,
level: ILevelService,
reading: IReadingService,
listening: IListeningService,
writing: IWritingService,
speaking: ISpeakingService
):
self._llm = llm
self._level = level
self._reading = reading
self._listening = listening
self._writing = writing
self._speaking = speaking
# TODO: I've changed this to retrieve the args from the body request and not request query args
async def get_custom_level(self, data: Dict):
nr_exercises = int(data.get('nr_exercises'))
exercise_id = 1
response = {
"exercises": {},
"module": "level"
}
for i in range(1, nr_exercises + 1, 1):
exercise_type = data.get(f'exercise_{i}_type')
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"normal", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"blank_space", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"underline", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
exercise_qty, exercise_id, exercise_text_size
)
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
)
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
1, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
1, exercise_topic, exercise_difficulty, exercise_topic_2
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
3, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
if exercise_ideamatch_qty != -1:
exercises.append('ideaMatch')
exercise_qty_q.put(exercise_ideamatch_qty)
total_qty = total_qty + exercise_ideamatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc3_qty != -1:
exercises.append('multipleChoice3Options')
exercise_qty_q.put(exercise_mc3_qty)
total_qty = total_qty + exercise_mc3_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
return response
import queue
import random
from typing import Dict
from app.configs.constants import CustomLevelExerciseTypes, EducationalContent
from app.services.abc import (
ILLMService, ILevelService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
class CustomLevelModule:
def __init__(
self,
llm: ILLMService,
level: ILevelService,
reading: IReadingService,
listening: IListeningService,
writing: IWritingService,
speaking: ISpeakingService
):
self._llm = llm
self._level = level
self._reading = reading
self._listening = listening
self._writing = writing
self._speaking = speaking
# TODO: I've changed this to retrieve the args from the body request and not request query args
async def get_custom_level(self, data: Dict):
nr_exercises = int(data.get('nr_exercises'))
exercise_id = 1
response = {
"exercises": {},
"module": "level"
}
for i in range(1, nr_exercises + 1, 1):
exercise_type = data.get(f'exercise_{i}_type')
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"normal", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"blank_space", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"underline", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
exercise_qty, exercise_id, exercise_text_size
)
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
)
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
1, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
1, exercise_topic, exercise_difficulty, exercise_topic_2
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
3, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
if exercise_ideamatch_qty != -1:
exercises.append('ideaMatch')
exercise_qty_q.put(exercise_ideamatch_qty)
total_qty = total_qty + exercise_ideamatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc3_qty != -1:
exercises.append('multipleChoice3Options')
exercise_qty_q.put(exercise_mc3_qty)
total_qty = total_qty + exercise_mc3_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
return response

View File

@@ -1,417 +1,417 @@
import json
import random
import uuid
from typing import Dict
from fastapi import UploadFile
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
from app.helpers import ExercisesHelper
from app.repositories.abc import IDocumentStore
from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \
IListeningService
from .custom import CustomLevelModule
from .upload import UploadLevelModule
class LevelService(ILevelService):
def __init__(
self,
llm: ILLMService,
document_store: IDocumentStore,
mc_variants: Dict,
reading_service: IReadingService,
writing_service: IWritingService,
speaking_service: ISpeakingService,
listening_service: IListeningService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
self._custom_module = CustomLevelModule(
llm, self, reading_service, listening_service, writing_service, speaking_service
)
self._upload_module = UploadLevelModule(llm)
# TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt
# mc_variants are stored in ./mc_variants.json
self._mc_variants = mc_variants
async def upload_level(self, upload: UploadFile) -> Dict:
return await self._upload_module.generate_level_from_file(upload)
async def get_custom_level(self, data: Dict):
return await self._custom_module.get_custom_level(data)
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False)
return {
"exercises": [exercises],
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
mc_exercises1 = await self.gen_multiple_choice(
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self.gen_multiple_choice(
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self.gen_multiple_choice(
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self.gen_multiple_choice(
"underline", 15, 46, utas=True, all_exams=all_mc_questions
)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self.gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
):
mc_template = self._mc_variants[mc_variant]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
gen_multiple_choice_for_text: str = (
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
else:
if not utas:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
else:
if all_exams is not None:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
mc_template = self._mc_variants[mc_variant]["questions"][0]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": (
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
f'it can be easy, intermediate or advanced.'
)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
async def _replace_exercise_if_exists(
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
mc_variant, utas
)
else:
seen_keys.add(key)
if not utas:
for exam in all_exams:
exam_dict = exam.to_dict()
if len(exam_dict.get("parts", [])) > 0:
exercise_dict = exam_dict.get("parts", [])[0]
if len(exercise_dict.get("exercises", [])) > 0:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
else:
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
return current_exercise, seen_keys
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
json_template = self._mc_variants["blank_space_text"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'Put the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]
async def gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
passage = await self._reading_service.generate_reading_passage(1, topic)
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
f'maximum 3 words per answer, about this text:\n"{text}"'
)
},
{
"role": "user",
"content": f'The id starts at {start_id}.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_template = self._mc_variants["text_mc_utas"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
else:
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
import json
import random
import uuid
from typing import Dict
from fastapi import UploadFile
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
from app.helpers import ExercisesHelper
from app.repositories.abc import IDocumentStore
from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \
IListeningService
from .custom import CustomLevelModule
from .upload import UploadLevelModule
class LevelService(ILevelService):
def __init__(
self,
llm: ILLMService,
document_store: IDocumentStore,
mc_variants: Dict,
reading_service: IReadingService,
writing_service: IWritingService,
speaking_service: ISpeakingService,
listening_service: IListeningService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
self._custom_module = CustomLevelModule(
llm, self, reading_service, listening_service, writing_service, speaking_service
)
self._upload_module = UploadLevelModule(llm)
# TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt
# mc_variants are stored in ./mc_variants.json
self._mc_variants = mc_variants
async def upload_level(self, upload: UploadFile) -> Dict:
return await self._upload_module.generate_level_from_file(upload)
async def get_custom_level(self, data: Dict):
return await self._custom_module.get_custom_level(data)
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False)
return {
"exercises": [exercises],
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
mc_exercises1 = await self.gen_multiple_choice(
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self.gen_multiple_choice(
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self.gen_multiple_choice(
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self.gen_multiple_choice(
"underline", 15, 46, utas=True, all_exams=all_mc_questions
)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self.gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
):
mc_template = self._mc_variants[mc_variant]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
gen_multiple_choice_for_text: str = (
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
else:
if not utas:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
else:
if all_exams is not None:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
mc_template = self._mc_variants[mc_variant]["questions"][0]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": (
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
f'it can be easy, intermediate or advanced.'
)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
async def _replace_exercise_if_exists(
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
mc_variant, utas
)
else:
seen_keys.add(key)
if not utas:
for exam in all_exams:
exam_dict = exam.to_dict()
if len(exam_dict.get("parts", [])) > 0:
exercise_dict = exam_dict.get("parts", [])[0]
if len(exercise_dict.get("exercises", [])) > 0:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
else:
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
return current_exercise, seen_keys
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
json_template = self._mc_variants["blank_space_text"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'Put the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]
async def gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
passage = await self._reading_service.generate_reading_passage(1, topic)
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
f'maximum 3 words per answer, about this text:\n"{text}"'
)
},
{
"role": "user",
"content": f'The id starts at {start_id}.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_template = self._mc_variants["text_mc_utas"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
else:
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response

View File

@@ -1,137 +1,137 @@
{
"normal": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "And"
},
{
"id": "B",
"text": "Cat"
},
{
"id": "C",
"text": "Happy"
},
{
"id": "D",
"text": "Jump"
}
],
"prompt": "Which of the following is a conjunction?",
"solution": "A",
"variant": "text"
}
]
},
"blank_space": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "And"
},
{
"id": "B",
"text": "Cat"
},
{
"id": "C",
"text": "Happy"
},
{
"id": "D",
"text": "Jump"
}
],
"prompt": "Which of the following is a conjunction?",
"solution": "A",
"variant": "text"
}
]
},
"underline": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
},
"blank_space_text": {
"question": {
"words": [
{
"id": "1",
"text": "a"
},
{
"id": "2",
"text": "b"
},
{
"id": "3",
"text": "c"
},
{
"id": "4",
"text": "d"
}
],
"text": "text"
}
},
"text_mc_utas": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
{
"normal": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "And"
},
{
"id": "B",
"text": "Cat"
},
{
"id": "C",
"text": "Happy"
},
{
"id": "D",
"text": "Jump"
}
],
"prompt": "Which of the following is a conjunction?",
"solution": "A",
"variant": "text"
}
]
},
"blank_space": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "And"
},
{
"id": "B",
"text": "Cat"
},
{
"id": "C",
"text": "Happy"
},
{
"id": "D",
"text": "Jump"
}
],
"prompt": "Which of the following is a conjunction?",
"solution": "A",
"variant": "text"
}
]
},
"underline": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
},
"blank_space_text": {
"question": {
"words": [
{
"id": "1",
"text": "a"
},
{
"id": "2",
"text": "b"
},
{
"id": "3",
"text": "c"
},
{
"id": "4",
"text": "d"
}
],
"text": "text"
}
},
"text_mc_utas": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
}

View File

@@ -1,404 +1,404 @@
import aiofiles
import os
import uuid
from logging import getLogger
from typing import Dict, Any, Tuple, Coroutine
import pdfplumber
from fastapi import UploadFile
from app.services.abc import ILLMService
from app.helpers import LoggerHelper, FileHelper
from app.mappers import ExamMapper
from app.dtos.exam import Exam
from app.dtos.sheet import Sheet
class UploadLevelModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
# TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
# firestore, return the id right away, in generation view poll for the id
async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
ext, path_id = await self._save_upload(file)
FileHelper.convert_file_to_pdf(
f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
)
file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
if not file_has_images:
FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
completion: Coroutine[Any, Any, Exam] = (
self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
)
response = await completion
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return self.fix_ids(response.dict(exclude_none=True))
return None
@staticmethod
@LoggerHelper.suppress_loggers()
def _check_pdf_for_images(pdf_path: str) -> bool:
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
if page.images:
return True
return False
@staticmethod
async def _save_upload(file: UploadFile) -> Tuple[str, str]:
ext = file.filename.split('.')[-1]
path_id = str(uuid.uuid4())
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
file_bytes: bytes = await file.read()
async with aiofiles.open(tmp_filename, 'wb') as file:
await file.write(file_bytes)
return ext, path_id
def _level_json_schema(self):
return {
"parts": [
{
"context": "<this attribute is optional you may exclude it if not required>",
"exercises": [
self._multiple_choice_html(),
self._passage_blank_space_html()
]
}
]
}
async def _html_completion(self, path_id: str) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
html = await f.read()
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": html
}
],
ExamMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_html(self):
return {
"role": "system",
"content": (
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
'Your current task is to scrape html english questions sheets.\n\n'
'In the question sheet you will only see 4 types of question:\n'
'- blank space multiple choice\n'
'- underline multiple choice\n'
'- reading passage blank space multiple choice\n'
'- reading passage multiple choice\n\n'
'For the first two types of questions the template is the same but the question prompts differ, '
'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
'multiple "_", in the underline you must include in the prompt the <u></u> to '
'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
'For the reading passage exercise you must handle the formatting of the passages. If it is a '
'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
'between paragraphs.\n\n'
'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
'out the best paragraph separation possible.'
'You will place all the information in a single JSON: '
'{"parts": [{"exercises": [{...}], "context": ""}]}\n '
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
'context.'
'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
'so that you can group the exercises by the parts that are in the html, this is crucial since only '
'reading passage multiple choice require context and if the context is included in parts where it '
'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
'The templates for the exercises are the following:\n'
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
f'{self._multiple_choice_html()}\n'
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
'exercises exclude the context field.'
)
}
@staticmethod
def _multiple_choice_html():
return {
"type": "multipleChoice",
"prompt": "Select the appropriate option.",
"questions": [
{
"id": "<the question id>",
"prompt": "<the question>",
"solution": "<the option id solution>",
"options": [
{
"id": "A",
"text": "<the a option>"
},
{
"id": "B",
"text": "<the b option>"
},
{
"id": "C",
"text": "<the c option>"
},
{
"id": "D",
"text": "<the d option>"
}
]
}
]
}
@staticmethod
def _passage_blank_space_html():
return {
"type": "fillBlanks",
"variant": "mc",
"prompt": "Click a blank to select the appropriate word for it.",
"text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
),
"solutions": [
{
"id": "<question id>",
"solution": "<the option that holds the solution>"
}
],
"words": [
{
"id": "<question id>",
"options": {
"A": "<a option>",
"B": "<b option>",
"C": "<c option>",
"D": "<d option>"
}
}
]
}
async def _png_completion(self, path_id: str) -> Exam:
FileHelper.pdf_to_png(path_id)
tmp_files = os.listdir(f'./tmp/{path_id}')
pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
json_schema = {
"components": [
{"type": "part", "part": "<name or number of the part>"},
self._multiple_choice_png(),
{"type": "blanksPassage", "text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
)},
{"type": "passage", "context": (
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
)},
self._passage_blank_space_png()
]
}
components = []
for i in range(len(pages)):
current_page = pages[i]
next_page = pages[i + 1] if i + 1 < len(pages) else None
batch = [current_page, next_page] if next_page else [current_page]
sheet = await self._png_batch(path_id, batch, json_schema)
sheet.batch = i + 1
components.append(sheet.dict())
batches = {"batches": components}
return await self._batches_to_exam_completion(batches)
async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_png(),
{
"role": "user",
"content": [
*FileHelper.b64_pngs(path_id, files)
]
}
],
ExamMapper.map_to_sheet,
str(json_schema)
)
def _gpt_instructions_png(self):
return {
"role": "system",
"content": (
'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
'Your current task is to scan english questions sheets.\n\n'
'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
'sheet components you will retrieve from the images, the components and their corresponding JSON '
'templates are as follows:\n'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In the blanks prompt you must leave 5 underscores to represent the blank space. '
'In the underlines questions the objective is to pick the words that are incorrect in the given '
'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
'in the order they appear in the question for the options A to D, disreguard options that might be '
'included underneath the underlines question and use the ones you wrapped in <u></u>.'
'In normal you just leave the question as is. '
f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
'- Reading Passages, there are two types of reading passages. Reading passages where you will see '
'blanks represented by a (question id) followed by a line, you must format these types of reading '
'passages to be only the text with the brackets that have the question id and line replaced with '
'"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
'and place 2 newlines between paragraphs. '
'For the reading passages with blanks the template is: {"type": "blanksPassage", '
'"text": "<The whole text for the exercise with replacements for blank spaces and their '
'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n'
'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
'reading passages are cut off don\'t include them in the JSON.'
)
}
def _multiple_choice_png(self):
multiple_choice = self._multiple_choice_html()["questions"][0]
multiple_choice["type"] = "multipleChoice"
multiple_choice.pop("solution")
return multiple_choice
def _passage_blank_space_png(self):
passage_blank_space = self._passage_blank_space_html()["words"][0]
passage_blank_space["type"] = "fillBlanks"
return passage_blank_space
async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": str(batches)
}
],
ExamMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_batches(self):
return {
"role": "system",
"content": (
'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
'components and solve the questions. Each batch may contain overlapping content with the previous '
'batch, or close enough content which needs to be excluded. The components are as follows:'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
'appropriate option to solve it.'
'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
'the order in which the underlines appear in the prompt you will need to fix it.'
'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
'select the appropriate solution.'
f'The template for these questions is the same: {self._multiple_choice_png()}\n'
'- Reading Passages, there are two types of reading passages with different templates. The one with '
'type "blanksPassage" where the text field holds the passage and a blank is represented by '
'{{<some number>}} and the other one with type "passage" that has the context field with just '
'reading passages. For both of these components you will have to remove any additional data that might '
'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
' if there are any. These components are used in conjunction with other ones.'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n\n'
'Now that you know the possible components here\'s what I want you to do:\n'
'1. Remove duplicates. A batch will have duplicates of other batches and the components of '
'the next batch should always take precedence over the previous one batch, what I mean by this is that '
'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
'you pick the next one.\n'
'2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
'exercise. For the multiple choice question follow the previous instruction to solve them and place '
f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
f'the template for this exercise: {self._passage_blank_space_html()}.\n'
f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
f'You must group the exercises by the parts in the order they appear in the batches components. '
f'The context field of a part is the context of a passage component that has text relevant to normal '
f'multiple choice questions.\n'
'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
'in your response and correctly structure the JSON.'
)
}
@staticmethod
def fix_ids(response):
counter = 1
for part in response["parts"]:
for exercise in part["exercises"]:
if exercise["type"] == "multipleChoice":
for question in exercise["questions"]:
question["id"] = counter
counter += 1
if exercise["type"] == "fillBlanks":
for i in range(len(exercise["words"])):
exercise["words"][i]["id"] = counter
exercise["solutions"][i]["id"] = counter
counter += 1
return response
import aiofiles
import os
import uuid
from logging import getLogger
from typing import Dict, Any, Tuple, Coroutine
import pdfplumber
from fastapi import UploadFile
from app.services.abc import ILLMService
from app.helpers import LoggerHelper, FileHelper
from app.mappers import ExamMapper
from app.dtos.exam import Exam
from app.dtos.sheet import Sheet
class UploadLevelModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
# TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
# firestore, return the id right away, in generation view poll for the id
async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
ext, path_id = await self._save_upload(file)
FileHelper.convert_file_to_pdf(
f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
)
file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
if not file_has_images:
FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
completion: Coroutine[Any, Any, Exam] = (
self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
)
response = await completion
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return self.fix_ids(response.dict(exclude_none=True))
return None
@staticmethod
@LoggerHelper.suppress_loggers()
def _check_pdf_for_images(pdf_path: str) -> bool:
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
if page.images:
return True
return False
@staticmethod
async def _save_upload(file: UploadFile) -> Tuple[str, str]:
ext = file.filename.split('.')[-1]
path_id = str(uuid.uuid4())
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
file_bytes: bytes = await file.read()
async with aiofiles.open(tmp_filename, 'wb') as file:
await file.write(file_bytes)
return ext, path_id
def _level_json_schema(self):
return {
"parts": [
{
"context": "<this attribute is optional you may exclude it if not required>",
"exercises": [
self._multiple_choice_html(),
self._passage_blank_space_html()
]
}
]
}
async def _html_completion(self, path_id: str) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
html = await f.read()
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": html
}
],
ExamMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_html(self):
return {
"role": "system",
"content": (
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
'Your current task is to scrape html english questions sheets.\n\n'
'In the question sheet you will only see 4 types of question:\n'
'- blank space multiple choice\n'
'- underline multiple choice\n'
'- reading passage blank space multiple choice\n'
'- reading passage multiple choice\n\n'
'For the first two types of questions the template is the same but the question prompts differ, '
'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
'multiple "_", in the underline you must include in the prompt the <u></u> to '
'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
'For the reading passage exercise you must handle the formatting of the passages. If it is a '
'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
'between paragraphs.\n\n'
'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
'out the best paragraph separation possible.'
'You will place all the information in a single JSON: '
'{"parts": [{"exercises": [{...}], "context": ""}]}\n '
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
'context.'
'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
'so that you can group the exercises by the parts that are in the html, this is crucial since only '
'reading passage multiple choice require context and if the context is included in parts where it '
'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
'The templates for the exercises are the following:\n'
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
f'{self._multiple_choice_html()}\n'
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
'exercises exclude the context field.'
)
}
@staticmethod
def _multiple_choice_html():
return {
"type": "multipleChoice",
"prompt": "Select the appropriate option.",
"questions": [
{
"id": "<the question id>",
"prompt": "<the question>",
"solution": "<the option id solution>",
"options": [
{
"id": "A",
"text": "<the a option>"
},
{
"id": "B",
"text": "<the b option>"
},
{
"id": "C",
"text": "<the c option>"
},
{
"id": "D",
"text": "<the d option>"
}
]
}
]
}
@staticmethod
def _passage_blank_space_html():
return {
"type": "fillBlanks",
"variant": "mc",
"prompt": "Click a blank to select the appropriate word for it.",
"text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
),
"solutions": [
{
"id": "<question id>",
"solution": "<the option that holds the solution>"
}
],
"words": [
{
"id": "<question id>",
"options": {
"A": "<a option>",
"B": "<b option>",
"C": "<c option>",
"D": "<d option>"
}
}
]
}
async def _png_completion(self, path_id: str) -> Exam:
FileHelper.pdf_to_png(path_id)
tmp_files = os.listdir(f'./tmp/{path_id}')
pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
json_schema = {
"components": [
{"type": "part", "part": "<name or number of the part>"},
self._multiple_choice_png(),
{"type": "blanksPassage", "text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
)},
{"type": "passage", "context": (
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
)},
self._passage_blank_space_png()
]
}
components = []
for i in range(len(pages)):
current_page = pages[i]
next_page = pages[i + 1] if i + 1 < len(pages) else None
batch = [current_page, next_page] if next_page else [current_page]
sheet = await self._png_batch(path_id, batch, json_schema)
sheet.batch = i + 1
components.append(sheet.dict())
batches = {"batches": components}
return await self._batches_to_exam_completion(batches)
async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_png(),
{
"role": "user",
"content": [
*FileHelper.b64_pngs(path_id, files)
]
}
],
ExamMapper.map_to_sheet,
str(json_schema)
)
def _gpt_instructions_png(self):
return {
"role": "system",
"content": (
'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
'Your current task is to scan english questions sheets.\n\n'
'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
'sheet components you will retrieve from the images, the components and their corresponding JSON '
'templates are as follows:\n'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In the blanks prompt you must leave 5 underscores to represent the blank space. '
'In the underlines questions the objective is to pick the words that are incorrect in the given '
'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
'in the order they appear in the question for the options A to D, disreguard options that might be '
'included underneath the underlines question and use the ones you wrapped in <u></u>.'
'In normal you just leave the question as is. '
f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
'- Reading Passages, there are two types of reading passages. Reading passages where you will see '
'blanks represented by a (question id) followed by a line, you must format these types of reading '
'passages to be only the text with the brackets that have the question id and line replaced with '
'"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
'and place 2 newlines between paragraphs. '
'For the reading passages with blanks the template is: {"type": "blanksPassage", '
'"text": "<The whole text for the exercise with replacements for blank spaces and their '
'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n'
'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
'reading passages are cut off don\'t include them in the JSON.'
)
}
def _multiple_choice_png(self):
multiple_choice = self._multiple_choice_html()["questions"][0]
multiple_choice["type"] = "multipleChoice"
multiple_choice.pop("solution")
return multiple_choice
def _passage_blank_space_png(self):
passage_blank_space = self._passage_blank_space_html()["words"][0]
passage_blank_space["type"] = "fillBlanks"
return passage_blank_space
async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": str(batches)
}
],
ExamMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_batches(self):
return {
"role": "system",
"content": (
'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
'components and solve the questions. Each batch may contain overlapping content with the previous '
'batch, or close enough content which needs to be excluded. The components are as follows:'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
'appropriate option to solve it.'
'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
'the order in which the underlines appear in the prompt you will need to fix it.'
'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
'select the appropriate solution.'
f'The template for these questions is the same: {self._multiple_choice_png()}\n'
'- Reading Passages, there are two types of reading passages with different templates. The one with '
'type "blanksPassage" where the text field holds the passage and a blank is represented by '
'{{<some number>}} and the other one with type "passage" that has the context field with just '
'reading passages. For both of these components you will have to remove any additional data that might '
'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
' if there are any. These components are used in conjunction with other ones.'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n\n'
'Now that you know the possible components here\'s what I want you to do:\n'
'1. Remove duplicates. A batch will have duplicates of other batches and the components of '
'the next batch should always take precedence over the previous one batch, what I mean by this is that '
'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
'you pick the next one.\n'
'2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
'exercise. For the multiple choice question follow the previous instruction to solve them and place '
f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
f'the template for this exercise: {self._passage_blank_space_html()}.\n'
f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
f'You must group the exercises by the parts in the order they appear in the batches components. '
f'The context field of a part is the context of a passage component that has text relevant to normal '
f'multiple choice questions.\n'
'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
'in your response and correctly structure the JSON.'
)
}
@staticmethod
def fix_ids(response):
counter = 1
for part in response["parts"]:
for exercise in part["exercises"]:
if exercise["type"] == "multipleChoice":
for question in exercise["questions"]:
question["id"] = counter
counter += 1
if exercise["type"] == "fillBlanks":
for i in range(len(exercise["words"])):
exercise["words"][i]["id"] = counter
exercise["solutions"][i]["id"] = counter
counter += 1
return response

View File

@@ -1,492 +1,492 @@
import queue
import uuid
from logging import getLogger
from queue import Queue
import random
from typing import Dict, List
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import IListeningService, ILLMService, ITextToSpeechService
from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate
from app.configs.constants import (
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent,
FieldsAndExercises
)
from app.helpers import ExercisesHelper, FileHelper
class ListeningService(IListeningService):
CONVERSATION_TAIL = (
"Please include random names and genders for the characters in your dialogue. "
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
)
MONOLOGUE_TAIL = (
"Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
)
def __init__(
self, llm: ILLMService,
tts: ITextToSpeechService,
file_storage: IFileStorage,
document_store: IDocumentStore
):
self._llm = llm
self._tts = tts
self._file_storage = file_storage
self._document_store = document_store
self._logger = getLogger(__name__)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
"start_id": 1,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_2": {
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
"exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
"start_id": 11,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue",
},
"section_3": {
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
"start_id": 21,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_4": {
"topic": EducationalContent.ACADEMIC_SUBJECTS,
"exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
"start_id": 31,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
}
}
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
number_of_exercises_q=queue.Queue(), start_id=-1
):
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
section = self._sections[f"section_{section_id}"]
if not topic:
topic = random.choice(section["topic"])
if len(req_exercises) == 0:
req_exercises = random.sample(section["exercise_types"], section["exercise_sample_size"])
if number_of_exercises_q.empty():
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
section["total_exercises"], len(req_exercises)
)
if start_id == -1:
start_id = section["start_id"]
dialog = await self.generate_listening_question(section_id, topic)
if section_id in {1, 3}:
dialog = self.parse_conversation(dialog)
self._logger.info(f'Generated {section["type"]}: {dialog}')
exercises = await self.generate_listening_exercises(
section_id, str(dialog), req_exercises, number_of_exercises_q, start_id, difficulty
)
return {
"exercises": exercises,
"text": dialog,
"difficulty": difficulty
}
async def generate_listening_question(self, section: int, topic: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
async def generate_listening_exercises(
self, section: int, dialog: str,
req_exercises: list[str], number_of_exercises_q: Queue,
start_id: int, difficulty: str
):
dialog_type = self._sections[f'section_{section}']["type"]
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice" or req_exercise == "multipleChoice3Options":
n_options = 4 if "multipleChoice" else 3
question = await self._gen_multiple_choice_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty, n_options
)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = await self._gen_write_blanks_questions_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = await self._gen_write_blanks_notes_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = await self._gen_write_blanks_form_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str):
template = getListeningTemplate()
template['difficulty'] = difficulty
for i, part in enumerate(parts, start=0):
part_template = getListeningPartTemplate()
file_name = str(uuid.uuid4()) + ".mp3"
sound_file_path = FilePaths.AUDIO_FILES_PATH + file_name
firebase_file_path = FilePaths.FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
if "conversation" in part["text"]:
await self._tts.text_to_speech(part["text"]["conversation"], sound_file_path)
else:
await self._tts.text_to_speech(part["text"], sound_file_path)
file_url = await self._file_storage.upload_file_firebase_get_url(firebase_file_path, sound_file_path)
part_template["audio"]["source"] = file_url
part_template["exercises"] = part["exercises"]
template['parts'].append(part_template)
if min_timer != MinTimers.LISTENING_MIN_TIMER_DEFAULT:
template["minTimer"] = min_timer
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
listening_id = await self._document_store.save_to_db_with_id("listening", template, listening_id)
if listening_id:
return {**template, "id": listening_id}
else:
raise Exception("Failed to save question: " + str(parts))
# ==================================================================================================================
# generate_listening_question helpers
# ==================================================================================================================
async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
head = (
'Compose an authentic conversation between two individuals in the everyday social context of "'
if section == 1 else
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
f'{head}{topic}". {self.CONVERSATION_TAIL}'
)
}
]
if section == 1:
messages.extend([
{
"role": "user",
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
},
{
"role": "user",
"content": 'Try to have spelling of names (cities, people, etc)'
}
])
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["conversation"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return self._get_conversation_voices(response, True)
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
head = (
'Generate a comprehensive monologue set in the social context of'
if section == 2 else
'Generate a comprehensive and complex monologue on the academic subject of'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["monologue"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return response["monologue"]
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
chosen_voices = []
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
# section 1
if unique_voices_across_segments:
while voice is None:
chosen_voice = self._get_random_voice(segment['gender'])
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
# section 3
else:
voice = self._get_random_voice(segment['gender'])
name_to_voice[name] = voice
segment['voice'] = voice
return response
@staticmethod
def _get_random_voice(gender: str):
if gender.lower() == 'male':
available_voices = NeuralVoices.MALE_NEURAL_VOICES
else:
available_voices = NeuralVoices.FEMALE_NEURAL_VOICES
return random.choice(available_voices)['Id']
# ==================================================================================================================
# generate_listening_exercises helpers
# ==================================================================================================================
async def _gen_multiple_choice_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'options for this {dialog_type}:\n"' + text + '"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["questions"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(questions, start_id)["questions"],
"type": "multipleChoice",
}
async def _gen_write_blanks_questions_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Answer the questions below using no more than three words or a number accordingly.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
async def _gen_write_blanks_notes_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["notes"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this '
'format: {"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = await self._llm.prediction(
GPTModels.GPT_4_O, word_messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
words = words["words"][:quantity]
replaced_notes = ExercisesHelper.replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
async def _gen_write_blanks_form_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}
]
if dialog_type == "conversation":
messages.append({
"role": "user",
"content": (
'It must be a form and not questions. '
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
)
})
parsed_form = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
parsed_form = parsed_form["form"][:quantity]
replaced_form, words = ExercisesHelper.build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Fill the form with words/numbers missing.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
@staticmethod
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
import queue
import uuid
from logging import getLogger
from queue import Queue
import random
from typing import Dict, List
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import IListeningService, ILLMService, ITextToSpeechService
from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate
from app.configs.constants import (
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent,
FieldsAndExercises
)
from app.helpers import ExercisesHelper, FileHelper
class ListeningService(IListeningService):
CONVERSATION_TAIL = (
"Please include random names and genders for the characters in your dialogue. "
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
)
MONOLOGUE_TAIL = (
"Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
)
def __init__(
self, llm: ILLMService,
tts: ITextToSpeechService,
file_storage: IFileStorage,
document_store: IDocumentStore
):
self._llm = llm
self._tts = tts
self._file_storage = file_storage
self._document_store = document_store
self._logger = getLogger(__name__)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
"start_id": 1,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_2": {
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
"exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
"start_id": 11,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue",
},
"section_3": {
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
"start_id": 21,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation",
},
"section_4": {
"topic": EducationalContent.ACADEMIC_SUBJECTS,
"exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
"start_id": 31,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
}
}
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
number_of_exercises_q=queue.Queue(), start_id=-1
):
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
section = self._sections[f"section_{section_id}"]
if not topic:
topic = random.choice(section["topic"])
if len(req_exercises) == 0:
req_exercises = random.sample(section["exercise_types"], section["exercise_sample_size"])
if number_of_exercises_q.empty():
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
section["total_exercises"], len(req_exercises)
)
if start_id == -1:
start_id = section["start_id"]
dialog = await self.generate_listening_question(section_id, topic)
if section_id in {1, 3}:
dialog = self.parse_conversation(dialog)
self._logger.info(f'Generated {section["type"]}: {dialog}')
exercises = await self.generate_listening_exercises(
section_id, str(dialog), req_exercises, number_of_exercises_q, start_id, difficulty
)
return {
"exercises": exercises,
"text": dialog,
"difficulty": difficulty
}
async def generate_listening_question(self, section: int, topic: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
async def generate_listening_exercises(
self, section: int, dialog: str,
req_exercises: list[str], number_of_exercises_q: Queue,
start_id: int, difficulty: str
):
dialog_type = self._sections[f'section_{section}']["type"]
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice" or req_exercise == "multipleChoice3Options":
n_options = 4 if "multipleChoice" else 3
question = await self._gen_multiple_choice_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty, n_options
)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = await self._gen_write_blanks_questions_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = await self._gen_write_blanks_notes_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = await self._gen_write_blanks_form_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str):
template = getListeningTemplate()
template['difficulty'] = difficulty
for i, part in enumerate(parts, start=0):
part_template = getListeningPartTemplate()
file_name = str(uuid.uuid4()) + ".mp3"
sound_file_path = FilePaths.AUDIO_FILES_PATH + file_name
firebase_file_path = FilePaths.FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
if "conversation" in part["text"]:
await self._tts.text_to_speech(part["text"]["conversation"], sound_file_path)
else:
await self._tts.text_to_speech(part["text"], sound_file_path)
file_url = await self._file_storage.upload_file_firebase_get_url(firebase_file_path, sound_file_path)
part_template["audio"]["source"] = file_url
part_template["exercises"] = part["exercises"]
template['parts'].append(part_template)
if min_timer != MinTimers.LISTENING_MIN_TIMER_DEFAULT:
template["minTimer"] = min_timer
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
listening_id = await self._document_store.save_to_db_with_id("listening", template, listening_id)
if listening_id:
return {**template, "id": listening_id}
else:
raise Exception("Failed to save question: " + str(parts))
# ==================================================================================================================
# generate_listening_question helpers
# ==================================================================================================================
async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
head = (
'Compose an authentic conversation between two individuals in the everyday social context of "'
if section == 1 else
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
f'{head}{topic}". {self.CONVERSATION_TAIL}'
)
}
]
if section == 1:
messages.extend([
{
"role": "user",
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
},
{
"role": "user",
"content": 'Try to have spelling of names (cities, people, etc)'
}
])
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["conversation"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return self._get_conversation_voices(response, True)
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
head = (
'Generate a comprehensive monologue set in the social context of'
if section == 2 else
'Generate a comprehensive and complex monologue on the academic subject of'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["monologue"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return response["monologue"]
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
chosen_voices = []
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
# section 1
if unique_voices_across_segments:
while voice is None:
chosen_voice = self._get_random_voice(segment['gender'])
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
# section 3
else:
voice = self._get_random_voice(segment['gender'])
name_to_voice[name] = voice
segment['voice'] = voice
return response
@staticmethod
def _get_random_voice(gender: str):
if gender.lower() == 'male':
available_voices = NeuralVoices.MALE_NEURAL_VOICES
else:
available_voices = NeuralVoices.FEMALE_NEURAL_VOICES
return random.choice(available_voices)['Id']
# ==================================================================================================================
# generate_listening_exercises helpers
# ==================================================================================================================
async def _gen_multiple_choice_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'options for this {dialog_type}:\n"' + text + '"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["questions"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(questions, start_id)["questions"],
"type": "multipleChoice",
}
async def _gen_write_blanks_questions_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Answer the questions below using no more than three words or a number accordingly.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
async def _gen_write_blanks_notes_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)
}
]
questions = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["notes"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = questions["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this '
'format: {"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = await self._llm.prediction(
GPTModels.GPT_4_O, word_messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
words = words["words"][:quantity]
replaced_notes = ExercisesHelper.replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
async def _gen_write_blanks_form_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}
]
if dialog_type == "conversation":
messages.append({
"role": "user",
"content": (
'It must be a form and not questions. '
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
)
})
parsed_form = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
parsed_form = parsed_form["form"][:quantity]
replaced_form, words = ExercisesHelper.build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": f"You will hear a {dialog_type}. Fill the form with words/numbers missing.",
"solutions": ExercisesHelper.build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
@staticmethod
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)

View File

@@ -1,349 +1,349 @@
import random
import uuid
from queue import Queue
from typing import List
from app.services.abc import IReadingService, ILLMService
from app.configs.constants import QuestionType, TemperatureSettings, FieldsAndExercises, GPTModels
from app.helpers import ExercisesHelper
class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_reading_passage(
self,
part: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str,
start_id: int
):
passage = await self.generate_reading_passage(part, topic)
exercises = await self._generate_reading_exercises(
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
)
if ExercisesHelper.contains_empty_dict(exercises):
return await self.gen_reading_passage(
part, topic, req_exercises, number_of_exercises_q, difficulty, start_id
)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on you generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
FieldsAndExercises.GEN_TEXT_FIELDS,
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def _generate_reading_exercises(
self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
question = await self._gen_summary_fill_blanks_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = await self._gen_true_false_not_given_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
question = await self._gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
if ExercisesHelper.answer_word_limit_ok(question):
exercises.append(question)
print("Added write blanks: " + str(question))
else:
exercises.append({})
print("Did not add write blanks because it did not respect word limit")
elif req_exercise == "paragraphMatch":
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def _gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": f'Summarize this text: "{text}"'
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
'Ensure that your statements accurately represent information or inferences from the text, and '
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
f'as appropriate.\n\nReference text:\n\n {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["prompts"]
if len(questions) > quantity:
questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}
async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'possible answers, must have maximum 3 words per answer, about this text:\n"{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. '
f'The paragraphs are these: {str(paragraphs)}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
headings = response["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}
async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}
import random
import uuid
from queue import Queue
from typing import List
from app.services.abc import IReadingService, ILLMService
from app.configs.constants import QuestionType, TemperatureSettings, FieldsAndExercises, GPTModels
from app.helpers import ExercisesHelper
class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
async def gen_reading_passage(
self,
part: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str,
start_id: int
):
passage = await self.generate_reading_passage(part, topic)
exercises = await self._generate_reading_exercises(
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
)
if ExercisesHelper.contains_empty_dict(exercises):
return await self.gen_reading_passage(
part, topic, req_exercises, number_of_exercises_q, difficulty, start_id
)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on you generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
FieldsAndExercises.GEN_TEXT_FIELDS,
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def _generate_reading_exercises(
self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
question = await self._gen_summary_fill_blanks_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = await self._gen_true_false_not_given_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
question = await self._gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
if ExercisesHelper.answer_word_limit_ok(question):
exercises.append(question)
print("Added write blanks: " + str(question))
else:
exercises.append({})
print("Did not add write blanks because it did not respect word limit")
elif req_exercise == "paragraphMatch":
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def _gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": f'Summarize this text: "{text}"'
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
'Ensure that your statements accurately represent information or inferences from the text, and '
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
f'as appropriate.\n\nReference text:\n\n {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["prompts"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["prompts"]
if len(questions) > quantity:
questions = ExercisesHelper.remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}
async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'possible answers, must have maximum 3 words per answer, about this text:\n"{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
questions = response["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
"solutions": ExercisesHelper.build_write_blanks_solutions(questions, start_id),
"text": ExercisesHelper.build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
paragraphs = ExercisesHelper.assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. '
f'The paragraphs are these: {str(paragraphs)}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["headings"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
headings = response["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}
async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}

View File

@@ -1,248 +1,248 @@
from typing import List, Dict
from app.services.abc import IWritingService, ILLMService, IAIDetectorService
from app.configs.constants import GPTModels, TemperatureSettings, FieldsAndExercises
from app.helpers import TextHelper, ExercisesHelper
class WritingService(IWritingService):
def __init__(self, llm: ILLMService, ai_detector: IAIDetectorService):
self._llm = llm
self._ai_detector = ai_detector
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
)
},
*self._get_writing_messages(task, topic, difficulty)
]
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
response = await self._llm.prediction(
llm_model,
messages,
["prompt"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
question = response["prompt"].strip()
return {
"question": self._add_newline_before_hyphen(question) if task == 1 else question,
"difficulty": difficulty,
"topic": topic
}
@staticmethod
def _get_writing_messages(task: int, topic: str, difficulty: str) -> List[Dict]:
# TODO: Should the muslim disclaimer be added to task 2?
task_prompt = (
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
'advice, or instructions within the letter. Make sure that the generated prompt is '
f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
) if task == 1 else (
f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
f'analysis of contrasting perspectives on the topic of "{topic}".'
)
task_instructions = (
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
'the answer should include.'
) if task == 1 else (
'The question should lead to an answer with either "theories", "complicated information" or '
'be "very descriptive" on the topic.'
)
messages = [
{
"role": "user",
"content": task_prompt
},
{
"role": "user",
"content": task_instructions
}
]
return messages
async def grade_writing_task(self, task: int, question: str, answer: str):
bare_minimum = 100 if task == 1 else 180
if not TextHelper.has_words(answer):
return self._zero_rating("The answer does not contain enough english words.")
elif not TextHelper.has_x_words(answer, bare_minimum):
return self._zero_rating("The answer is insufficient and too small to be graded.")
else:
template = self._get_writing_template()
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {template}'
)
},
{
"role": "user",
"content": (
f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
'from the task, and assign a score of 0 if the response fails to address the question. '
'Additionally, provide a detailed commentary highlighting both strengths and '
'weaknesses in the response. '
f'\n Question: "{question}" \n Answer: "{answer}"')
}
]
if task == 1:
messages.append({
"role": "user",
"content": (
'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
'"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
)
})
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
temperature = (
TemperatureSettings.GRADING_TEMPERATURE
if task == 1 else
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response = await self._llm.prediction(
llm_model,
messages,
["comment"],
temperature
)
perfect_answer_minimum = 150 if task == 1 else 250
perfect_answer = await self._get_perfect_answer(question, perfect_answer_minimum)
response["perfect_answer"] = perfect_answer["perfect_answer"]
response["overall"] = ExercisesHelper.fix_writing_overall(response["overall"], response["task_response"])
response['fixed_text'] = await self._get_fixed_text(answer)
ai_detection = await self._ai_detector.run_detection(answer)
if ai_detection is not None:
response['ai_detection'] = ai_detection
return response
async def _get_fixed_text(self, text):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"fixed_text": "fixed test with no misspelling errors"}'
)
},
{
"role": "user",
"content": (
'Fix the errors in the given text and put it in a JSON. '
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
["fixed_text"],
0.2,
False
)
return response["fixed_text"]
async def _get_perfect_answer(self, question: str, size: int) -> Dict:
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"perfect_answer": "perfect answer for the question"}'
)
},
{
"role": "user",
"content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
},
{
"role": "user",
"content": f'The answer must have at least {size} words'
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["perfect_answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
@staticmethod
def _zero_rating(comment: str):
return {
'comment': comment,
'overall': 0,
'task_response': {
'Task Achievement': {
"grade": 0.0,
"comment": ""
},
'Coherence and Cohesion': {
"grade": 0.0,
"comment": ""
},
'Lexical Resource': {
"grade": 0.0,
"comment": ""
},
'Grammatical Range and Accuracy': {
"grade": 0.0,
"comment": ""
}
}
}
@staticmethod
def _get_writing_template():
return {
"comment": "comment about student's response quality",
"overall": 0.0,
"task_response": {
"Task Achievement": {
"grade": 0.0,
"comment": "comment about Task Achievement of the student's response"
},
"Coherence and Cohesion": {
"grade": 0.0,
"comment": "comment about Coherence and Cohesion of the student's response"
},
"Lexical Resource": {
"grade": 0.0,
"comment": "comment about Lexical Resource of the student's response"
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": "comment about Grammatical Range and Accuracy of the student's response"
}
}
}
@staticmethod
def _add_newline_before_hyphen(s):
return s.replace(" -", "\n-")
from typing import List, Dict
from app.services.abc import IWritingService, ILLMService, IAIDetectorService
from app.configs.constants import GPTModels, TemperatureSettings, FieldsAndExercises
from app.helpers import TextHelper, ExercisesHelper
class WritingService(IWritingService):
def __init__(self, llm: ILLMService, ai_detector: IAIDetectorService):
self._llm = llm
self._ai_detector = ai_detector
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
)
},
*self._get_writing_messages(task, topic, difficulty)
]
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
response = await self._llm.prediction(
llm_model,
messages,
["prompt"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
question = response["prompt"].strip()
return {
"question": self._add_newline_before_hyphen(question) if task == 1 else question,
"difficulty": difficulty,
"topic": topic
}
@staticmethod
def _get_writing_messages(task: int, topic: str, difficulty: str) -> List[Dict]:
# TODO: Should the muslim disclaimer be added to task 2?
task_prompt = (
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
'advice, or instructions within the letter. Make sure that the generated prompt is '
f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
) if task == 1 else (
f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
f'analysis of contrasting perspectives on the topic of "{topic}".'
)
task_instructions = (
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
'the answer should include.'
) if task == 1 else (
'The question should lead to an answer with either "theories", "complicated information" or '
'be "very descriptive" on the topic.'
)
messages = [
{
"role": "user",
"content": task_prompt
},
{
"role": "user",
"content": task_instructions
}
]
return messages
async def grade_writing_task(self, task: int, question: str, answer: str):
bare_minimum = 100 if task == 1 else 180
if not TextHelper.has_words(answer):
return self._zero_rating("The answer does not contain enough english words.")
elif not TextHelper.has_x_words(answer, bare_minimum):
return self._zero_rating("The answer is insufficient and too small to be graded.")
else:
template = self._get_writing_template()
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {template}'
)
},
{
"role": "user",
"content": (
f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
'from the task, and assign a score of 0 if the response fails to address the question. '
'Additionally, provide a detailed commentary highlighting both strengths and '
'weaknesses in the response. '
f'\n Question: "{question}" \n Answer: "{answer}"')
}
]
if task == 1:
messages.append({
"role": "user",
"content": (
'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
'"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
)
})
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
temperature = (
TemperatureSettings.GRADING_TEMPERATURE
if task == 1 else
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response = await self._llm.prediction(
llm_model,
messages,
["comment"],
temperature
)
perfect_answer_minimum = 150 if task == 1 else 250
perfect_answer = await self._get_perfect_answer(question, perfect_answer_minimum)
response["perfect_answer"] = perfect_answer["perfect_answer"]
response["overall"] = ExercisesHelper.fix_writing_overall(response["overall"], response["task_response"])
response['fixed_text'] = await self._get_fixed_text(answer)
ai_detection = await self._ai_detector.run_detection(answer)
if ai_detection is not None:
response['ai_detection'] = ai_detection
return response
async def _get_fixed_text(self, text):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"fixed_text": "fixed test with no misspelling errors"}'
)
},
{
"role": "user",
"content": (
'Fix the errors in the given text and put it in a JSON. '
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
["fixed_text"],
0.2,
False
)
return response["fixed_text"]
async def _get_perfect_answer(self, question: str, size: int) -> Dict:
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"perfect_answer": "perfect answer for the question"}'
)
},
{
"role": "user",
"content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
},
{
"role": "user",
"content": f'The answer must have at least {size} words'
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["perfect_answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
@staticmethod
def _zero_rating(comment: str):
return {
'comment': comment,
'overall': 0,
'task_response': {
'Task Achievement': {
"grade": 0.0,
"comment": ""
},
'Coherence and Cohesion': {
"grade": 0.0,
"comment": ""
},
'Lexical Resource': {
"grade": 0.0,
"comment": ""
},
'Grammatical Range and Accuracy': {
"grade": 0.0,
"comment": ""
}
}
}
@staticmethod
def _get_writing_template():
return {
"comment": "comment about student's response quality",
"overall": 0.0,
"task_response": {
"Task Achievement": {
"grade": 0.0,
"comment": "comment about Task Achievement of the student's response"
},
"Coherence and Cohesion": {
"grade": 0.0,
"comment": "comment about Coherence and Cohesion of the student's response"
},
"Lexical Resource": {
"grade": 0.0,
"comment": "comment about Lexical Resource of the student's response"
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": "comment about Grammatical Range and Accuracy of the student's response"
}
}
}
@staticmethod
def _add_newline_before_hyphen(s):
return s.replace(" -", "\n-")

View File

@@ -1,13 +1,13 @@
from .aws_polly import AWSPolly
from .heygen import Heygen
from .openai import OpenAI
from .whisper import OpenAIWhisper
from .gpt_zero import GPTZero
__all__ = [
"AWSPolly",
"Heygen",
"OpenAI",
"OpenAIWhisper",
"GPTZero"
]
from .aws_polly import AWSPolly
from .heygen import Heygen
from .openai import OpenAI
from .whisper import OpenAIWhisper
from .gpt_zero import GPTZero
__all__ = [
"AWSPolly",
"Heygen",
"OpenAI",
"OpenAIWhisper",
"GPTZero"
]

View File

@@ -1,87 +1,87 @@
import random
from typing import Union
import aiofiles
from aiobotocore.client import BaseClient
from app.services.abc import ITextToSpeechService
from app.configs.constants import NeuralVoices
class AWSPolly(ITextToSpeechService):
def __init__(self, client: BaseClient):
self._client = client
async def synthesize_speech(self, text: str, voice: str, engine: str = "neural", output_format: str = "mp3"):
tts_response = await self._client.synthesize_speech(
Engine=engine,
Text=text,
OutputFormat=output_format,
VoiceId=voice
)
return await tts_response['AudioStream'].read()
async def text_to_speech(self, text: Union[list[str], str], file_name: str):
if isinstance(text, str):
audio_segments = await self._text_to_speech(text)
elif isinstance(text, list):
audio_segments = await self._conversation_to_speech(text)
else:
raise ValueError("Unsupported argument for text_to_speech")
final_message = await self.synthesize_speech(
"This audio recording, for the listening exercise, has finished.",
"Stephen"
)
# Add finish message
audio_segments.append(final_message)
# Combine the audio segments into a single audio file
combined_audio = b"".join(audio_segments)
# Save the combined audio to a single file
async with aiofiles.open(file_name, "wb") as f:
await f.write(combined_audio)
print("Speech segments saved to " + file_name)
async def _text_to_speech(self, text: str):
voice = random.choice(NeuralVoices.ALL_NEURAL_VOICES)['Id']
# Initialize an empty list to store audio segments
audio_segments = []
for part in self._divide_text(text):
audio_segments.append(await self.synthesize_speech(part, voice))
return audio_segments
async def _conversation_to_speech(self, conversation: list):
# Initialize an empty list to store audio segments
audio_segments = []
# Iterate through the text segments, convert to audio segments, and store them
for segment in conversation:
audio_segments.append(await self.synthesize_speech(segment["text"], segment["voice"]))
return audio_segments
@staticmethod
def _divide_text(text, max_length=3000):
if len(text) <= max_length:
return [text]
divisions = []
current_position = 0
while current_position < len(text):
next_position = min(current_position + max_length, len(text))
next_period_position = text.rfind('.', current_position, next_position)
if next_period_position != -1 and next_period_position > current_position:
divisions.append(text[current_position:next_period_position + 1])
current_position = next_period_position + 1
else:
# If no '.' found in the next chunk, split at max_length
divisions.append(text[current_position:next_position])
current_position = next_position
return divisions
import random
from typing import Union
import aiofiles
from aiobotocore.client import BaseClient
from app.services.abc import ITextToSpeechService
from app.configs.constants import NeuralVoices
class AWSPolly(ITextToSpeechService):
def __init__(self, client: BaseClient):
self._client = client
async def synthesize_speech(self, text: str, voice: str, engine: str = "neural", output_format: str = "mp3"):
tts_response = await self._client.synthesize_speech(
Engine=engine,
Text=text,
OutputFormat=output_format,
VoiceId=voice
)
return await tts_response['AudioStream'].read()
async def text_to_speech(self, text: Union[list[str], str], file_name: str):
if isinstance(text, str):
audio_segments = await self._text_to_speech(text)
elif isinstance(text, list):
audio_segments = await self._conversation_to_speech(text)
else:
raise ValueError("Unsupported argument for text_to_speech")
final_message = await self.synthesize_speech(
"This audio recording, for the listening exercise, has finished.",
"Stephen"
)
# Add finish message
audio_segments.append(final_message)
# Combine the audio segments into a single audio file
combined_audio = b"".join(audio_segments)
# Save the combined audio to a single file
async with aiofiles.open(file_name, "wb") as f:
await f.write(combined_audio)
print("Speech segments saved to " + file_name)
async def _text_to_speech(self, text: str):
voice = random.choice(NeuralVoices.ALL_NEURAL_VOICES)['Id']
# Initialize an empty list to store audio segments
audio_segments = []
for part in self._divide_text(text):
audio_segments.append(await self.synthesize_speech(part, voice))
return audio_segments
async def _conversation_to_speech(self, conversation: list):
# Initialize an empty list to store audio segments
audio_segments = []
# Iterate through the text segments, convert to audio segments, and store them
for segment in conversation:
audio_segments.append(await self.synthesize_speech(segment["text"], segment["voice"]))
return audio_segments
@staticmethod
def _divide_text(text, max_length=3000):
if len(text) <= max_length:
return [text]
divisions = []
current_position = 0
while current_position < len(text):
next_position = min(current_position + max_length, len(text))
next_period_position = text.rfind('.', current_position, next_position)
if next_period_position != -1 and next_period_position > current_position:
divisions.append(text[current_position:next_period_position + 1])
current_position = next_period_position + 1
else:
# If no '.' found in the next chunk, split at max_length
divisions.append(text[current_position:next_position])
current_position = next_position
return divisions

View File

@@ -1,52 +1,52 @@
from logging import getLogger
from typing import Dict, Optional
from httpx import AsyncClient
from app.services.abc.third_parties.ai_detector import IAIDetectorService
class GPTZero(IAIDetectorService):
_GPT_ZERO_ENDPOINT = 'https://api.gptzero.me/v2/predict/text'
def __init__(self, client: AsyncClient, gpt_zero_key: str):
self._header = {
'x-api-key': gpt_zero_key
}
self._http_client = client
self._logger = getLogger(__name__)
async def run_detection(self, text: str):
data = {
'document': text,
'version': '',
'multilingual': False
}
response = await self._http_client.post(self._GPT_ZERO_ENDPOINT, headers=self._header, json=data)
if response.status_code != 200:
return None
return self._parse_detection(response.json())
def _parse_detection(self, response: Dict) -> Optional[Dict]:
try:
text_scan = response["documents"][0]
filtered_sentences = [
{
"sentence": item["sentence"],
"highlight_sentence_for_ai": item["highlight_sentence_for_ai"]
}
for item in text_scan["sentences"]
]
return {
"class_probabilities": text_scan["class_probabilities"],
"confidence_category": text_scan["confidence_category"],
"predicted_class": text_scan["predicted_class"],
"sentences": filtered_sentences
}
except Exception as e:
self._logger.error(f'Failed to parse GPT\'s Zero response: {str(e)}')
return None
from logging import getLogger
from typing import Dict, Optional
from httpx import AsyncClient
from app.services.abc.third_parties.ai_detector import IAIDetectorService
class GPTZero(IAIDetectorService):
_GPT_ZERO_ENDPOINT = 'https://api.gptzero.me/v2/predict/text'
def __init__(self, client: AsyncClient, gpt_zero_key: str):
self._header = {
'x-api-key': gpt_zero_key
}
self._http_client = client
self._logger = getLogger(__name__)
async def run_detection(self, text: str):
data = {
'document': text,
'version': '',
'multilingual': False
}
response = await self._http_client.post(self._GPT_ZERO_ENDPOINT, headers=self._header, json=data)
if response.status_code != 200:
return None
return self._parse_detection(response.json())
def _parse_detection(self, response: Dict) -> Optional[Dict]:
try:
text_scan = response["documents"][0]
filtered_sentences = [
{
"sentence": item["sentence"],
"highlight_sentence_for_ai": item["highlight_sentence_for_ai"]
}
for item in text_scan["sentences"]
]
return {
"class_probabilities": text_scan["class_probabilities"],
"confidence_category": text_scan["confidence_category"],
"predicted_class": text_scan["predicted_class"],
"sentences": filtered_sentences
}
except Exception as e:
self._logger.error(f'Failed to parse GPT\'s Zero response: {str(e)}')
return None

Some files were not shown because too many files have changed in this diff Show More