Updated this to the latest version of develop, got rid of most of the duplication, might be missing some packages in toml, needs testing

This commit is contained in:
Carlos Mesquita
2024-08-30 02:35:11 +01:00
parent 3cf9fa5cba
commit f92a803d96
73 changed files with 3642 additions and 2703 deletions

3
.env
View File

@@ -1,7 +1,8 @@
ENV=local
OPENAI_API_KEY=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
JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/encoach-staging.json
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af

2
.gitignore vendored
View File

@@ -2,5 +2,5 @@ __pycache__
.idea
.env
.DS_Store
firebase-configs/local.json
.venv
scripts

3
.idea/ielts-be.iml generated
View File

@@ -5,9 +5,10 @@
</component>
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PackageRequirementsSettings">

5
.idea/misc.xml generated
View File

@@ -1,6 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
<component name="Black">
<option name="sdkName" value="Python 3.11 (ielts-be)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (ielts-be)" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View File

@@ -18,12 +18,16 @@ COPY . ./
COPY --from=requirements-stage /tmp/requirements.txt /app/requirements.txt
RUN apt update && apt install -y ffmpeg
RUN pip install openai-whisper
# openai-whisper model in not compatible with the newer 2.0.0 numpy release
RUN pip install --upgrade numpy<2
RUN apt update && apt install -y \
ffmpeg \
poppler-utils \
texlive-latex-base \
texlive-fonts-recommended \
texlive-latex-extra \
texlive-xetex \
pandoc \
librsvg2-bin \
&& rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir -r /app/requirements.txt

View File

@@ -1,27 +1,5 @@
# Disclaimer
Latest refactor from develop's branch commit 5d5cd21 2024-08-28
I didn't fully test all the endpoints, the main purpose of this release was for ielts-be to be async but I've also
separated logic through different layers, removed some duplication and implemented dependency injection, so there
could be errors and extensive testing is needed before even considering deploying (if you're even considering it).
The version this was refactored from was master's branch commit a4caecd 2024-06-13
# Changes
Since one of my use cases is load testing with 5000 concurrent users and ielts-be is sync, I've refactored ielts-be
into this fastapi app.
The ielts-be Dockerfile runs the container with:
```CMD exec gunicorn --bind 0.0.0.0:5000 --workers 1 --threads 8 --timeout 0 app:app```
And since gunicorn uses WSGI and ielts-be has mostly sync I/O blocking operations, everytime a request encounters
an I/O blocking operation a thread is blocked. Since this config is 1 worker with 8 threads, the container
will only be able to handle 8 concurrent requests at a time before gcloud run cold starts another instance.
Flask was built with WSGI in mind, having Quart as it's async alternative, even though you can serve Flask
with uvicorn using the [asgiref](https://pypi.org/project/asgiref/) adapter, FastAPI has better performance
than both alternatives and the sync calls would need to be modified either way.
# Endpoints
@@ -29,34 +7,38 @@ In ielts-ui I've added a wrapper to every backend request in '/src/utils/transla
new endpoints if the "BACKEND_TYPE" environment variable is set to "async", if the env variable is not present or
with another value, the wrapper will return the old endpoint.
| Method | ielts-be | This one |
|--------|--------------------------------------|------------------------------------------|
| GET | /healthcheck | /api/healthcheck |
| GET | /listening_section_1 | /api/listening/section/1 |
| GET | /listening_section_2 | /api/listening/section/2 |
| GET | /listening_section_3 | /api/listening/section/3 |
| GET | /listening_section_4 | /api/listening/section/4 |
| POST | /listening | /api/listening |
| POST | /writing_task1 | /api/grade/writing/1 |
| POST | /writing_task2 | /api/grade/writing/2 |
| GET | /writing_task1_general | /api/writing/1 |
| GET | /writing_task2_general | /api/writing/2 |
| POST | /speaking_task_1 | /api/grade/speaking/1 |
| POST | /speaking_task_2 | /api/grade/speaking/2 |
| POST | /speaking_task_3 | /api/grade/speaking/3 |
| GET | /speaking_task_1 | /api/speaking/1 |
| GET | /speaking_task_2 | /api/speaking/2 |
| GET | /speaking_task_3 | /api/speaking/3 |
| POST | /speaking | /api/speaking |
| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video |
| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video |
| GET | /reading_passage_1 | /api/reading/passage/1 |
| GET | /reading_passage_2 | /api/reading/passage/2 |
| GET | /reading_passage_3 | /api/reading/passage/3 |
| GET | /level | /api/level |
| GET | /level_utas | /api/level/utas |
| POST | /fetch_tips | /api/training/tips |
| POST | /grading_summary | /api/grade/summary |
| Method | ielts-be | This one |
|--------|--------------------------------------|---------------------------------------------|
| GET | /healthcheck | /api/healthcheck |
| GET | /listening_section_1 | /api/listening/section/1 |
| GET | /listening_section_2 | /api/listening/section/2 |
| GET | /listening_section_3 | /api/listening/section/3 |
| GET | /listening_section_4 | /api/listening/section/4 |
| POST | /listening | /api/listening |
| POST | /writing_task1 | /api/grade/writing/1 |
| POST | /writing_task2 | /api/grade/writing/2 |
| GET | /writing_task1_general | /api/writing/1 |
| GET | /writing_task2_general | /api/writing/2 |
| POST | /speaking_task_1 | /api/grade/speaking/1 |
| POST | /speaking_task_2 | /api/grade/speaking/2 |
| POST | /speaking_task_3 | /api/grade/speaking/3 |
| GET | /speaking_task_1 | /api/speaking/1 |
| GET | /speaking_task_2 | /api/speaking/2 |
| GET | /speaking_task_3 | /api/speaking/3 |
| POST | /speaking | /api/speaking |
| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video |
| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video |
| GET | /reading_passage_1 | /api/reading/passage/1 |
| GET | /reading_passage_2 | /api/reading/passage/2 |
| GET | /reading_passage_3 | /api/reading/passage/3 |
| GET | /level | /api/level |
| GET | /level_utas | /api/level/utas |
| POST | /fetch_tips | /api/training/tips |
| POST | /grading_summary | /api/grade/summary |
| POST | /grade_short_answers | /api/grade/short_answers |
| POST | /upload_level | /api/level/upload |
| POST | /training_content | /api/training/ |
| POST | /custom_level | /api/level/custom |
# Run the app
@@ -64,9 +46,7 @@ This is for Windows, creating venv and activating it may differ based on your OS
1. python -m venv env
2. env\Scripts\activate
3. pip install openai-whisper
4. pip install --upgrade numpy<2
5. pip install poetry
6. poetry install
7. python main.py
3. pip install poetry
4. poetry install
5. python app.py

1184
app.py

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,8 @@ from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Depends, Path, Request
from app.controllers.abc import IGradeController
from app.dtos import WritingGradeTaskDTO
from app.dtos.writing import WritingGradeTaskDTO
from app.dtos.speaking import GradeSpeakingAnswersDTO, GradeSpeakingDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
controller = "grade_controller"
@@ -22,18 +23,29 @@ async def grade_writing_task(
return await grade_controller.grade_writing_task(task, data)
@grade_router.post(
'/speaking/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_speaking_task_2(
data: GradeSpeakingDTO,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_speaking_task(2, [data.dict()])
@grade_router.post(
'/speaking/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_speaking_task(
request: Request,
async def grade_speaking_task_1_and_3(
data: GradeSpeakingAnswersDTO,
task: int = Path(..., ge=1, le=3),
grade_controller: IGradeController = Depends(Provide[controller])
):
data = await request.json()
return await grade_controller.grade_speaking_task(task, data)
return await grade_controller.grade_speaking_task(task, data.answers)
@grade_router.post(
@@ -47,3 +59,16 @@ async def grading_summary(
):
data = await request.json()
return await grade_controller.grading_summary(data)
@grade_router.post(
'/short_answers',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_short_answers(
request: Request,
grade_controller: IGradeController = Depends(Provide[controller])
):
data = await request.json()
return await grade_controller.grade_short_answers(data)

View File

@@ -1,5 +1,5 @@
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, UploadFile, Request
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ILevelController
@@ -27,4 +27,29 @@ async def get_level_exam(
async def get_level_utas(
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.get_level_exam()
return await level_controller.get_level_utas()
@level_router.post(
'/upload',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def upload(
file: UploadFile,
level_controller: ILevelController = Depends(Provide[controller])
):
return await level_controller.upload_level(file)
@level_router.post(
'/custom',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def custom_level(
request: Request,
level_controller: ILevelController = Depends(Provide[controller])
):
data = await request.json()
return await level_controller.get_custom_level(data)

View File

@@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, Path
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import IListeningController
from app.configs.constants import EducationalContent
from app.dtos import SaveListeningDTO
from app.dtos.listening import SaveListeningDTO
controller = "listening_controller"

View File

@@ -6,24 +6,40 @@ from fastapi import APIRouter, Path, Query, Depends, BackgroundTasks
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent
from app.controllers.abc import ISpeakingController
from app.dtos import SaveSpeakingDTO, SpeakingGenerateVideoDTO, SpeakingGenerateInteractiveVideoDTO
from app.dtos.speaking import (
SaveSpeakingDTO, GenerateVideo1DTO, GenerateVideo2DTO, GenerateVideo3DTO
)
controller = "speaking_controller"
speaking_router = APIRouter()
@speaking_router.get(
'/1',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_speaking_task(
first_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
second_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.get_speaking_part(1, first_topic, difficulty, second_topic)
@speaking_router.get(
'/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_speaking_task(
task: int = Path(..., ge=1, le=3),
task: int = Path(..., ge=2, le=3),
topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.get_speaking_task(task, topic, difficulty)
return await speaking_controller.get_speaking_part(task, topic, difficulty)
@speaking_router.post(
@@ -40,24 +56,42 @@ async def save_speaking(
@speaking_router.post(
'/generate_speaking_video',
'/generate_video/1',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_speaking_video(
data: SpeakingGenerateVideoDTO,
async def generate_video_1(
data: GenerateVideo1DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_speaking_video(data)
return await speaking_controller.generate_video(
1, data.avatar, data.first_topic, data.questions, second_topic=data.second_topic
)
@speaking_router.post(
'/generate_interactive_video',
'/generate_video/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_interactive_video(
data: SpeakingGenerateInteractiveVideoDTO,
async def generate_video_2(
data: GenerateVideo2DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_interactive_video(data)
return await speaking_controller.generate_video(
2, data.avatar, data.topic, [data.question], prompts=data.prompts, suffix=data.suffix
)
@speaking_router.post(
'/generate_video/3',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def generate_video_3(
data: GenerateVideo3DTO,
speaking_controller: ISpeakingController = Depends(Provide[controller])
):
return await speaking_controller.generate_video(
3, data.avatar, data.topic, data.questions
)

View File

@@ -1,7 +1,7 @@
from dependency_injector.wiring import Provide, inject
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, Request
from app.dtos import TipsDTO
from app.dtos.training import FetchTipsDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.controllers.abc import ITrainingController
@@ -15,7 +15,20 @@ training_router = APIRouter()
)
@inject
async def get_reading_passage(
data: TipsDTO,
data: FetchTipsDTO,
training_controller: ITrainingController = Depends(Provide[controller])
):
return await training_controller.fetch_tips(data)
@training_router.post(
'/',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def training_content(
request: Request,
training_controller: ITrainingController = Depends(Provide[controller])
):
data = await request.json()
return await training_controller.get_training_content(data)

View File

@@ -2,7 +2,7 @@ from enum import Enum
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
"discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
"discrimination", "politic", "christianity", "islam", "christian", "christians",
"jews", "jew", "discrimination", "discriminatory"]
@@ -11,6 +11,26 @@ class ExamVariant(Enum):
PARTIAL = "partial"
class CustomLevelExerciseTypes(Enum):
MULTIPLE_CHOICE_4 = "multiple_choice_4"
MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
BLANK_SPACE_TEXT = "blank_space_text"
READING_PASSAGE_UTAS = "reading_passage_utas"
WRITING_LETTER = "writing_letter"
WRITING_2 = "writing_2"
SPEAKING_1 = "speaking_1"
SPEAKING_2 = "speaking_2"
SPEAKING_3 = "speaking_3"
READING_1 = "reading_1"
READING_2 = "reading_2"
READING_3 = "reading_3"
LISTENING_1 = "listening_1"
LISTENING_2 = "listening_2"
LISTENING_3 = "listening_3"
LISTENING_4 = "listening_4"
class QuestionType(Enum):
LISTENING_SECTION_1 = "Listening Section 1"
LISTENING_SECTION_2 = "Listening Section 2"
@@ -63,7 +83,14 @@ class FieldsAndExercises:
GEN_TEXT_FIELDS = ['title']
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']
LISTENING_2_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions']
LISTENING_3_EXERCISE_TYPES = ['multipleChoice3Options', 'writeBlanksQuestions']
LISTENING_4_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
TOTAL_READING_PASSAGE_1_EXERCISES = 13
TOTAL_READING_PASSAGE_2_EXERCISES = 13
@@ -218,7 +245,6 @@ class EducationalContent:
"Space Exploration",
"Artificial Intelligence",
"Climate Change",
"World Religions",
"The Human Brain",
"Renewable Energy",
"Cultural Diversity",

View File

@@ -1,3 +1,4 @@
import json
import os
from dependency_injector import providers, containers
@@ -6,6 +7,7 @@ from openai import AsyncOpenAI
from httpx import AsyncClient as HTTPClient
from google.cloud.firestore_v1 import AsyncClient as FirestoreClient
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from app.repositories.impl import *
from app.services.impl import *
@@ -60,16 +62,26 @@ def config_di(
writing_service = providers.Factory(WritingService, llm=llm, ai_detector=ai_detector)
with open('app/services/impl/level/mc_variants.json', 'r') as file:
mc_variants = json.load(file)
level_service = providers.Factory(
LevelService, llm=llm, document_store=firestore, reading_service=reading_service
LevelService, llm=llm, document_store=firestore, mc_variants=mc_variants, reading_service=reading_service,
writing_service=writing_service, speaking_service=speaking_service, listening_service=listening_service
)
grade_service = providers.Factory(
GradeService, llm=llm
)
embeddings = SentenceTransformer('all-MiniLM-L6-v2')
training_kb = providers.Factory(
TrainingContentKnowledgeBase, embeddings=embeddings
)
training_service = providers.Factory(
TrainingService, llm=llm
TrainingService, llm=llm, firestore=firestore, training_kb=training_kb
)
# Controllers

View File

@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Dict
from typing import Dict, List
class IGradeController(ABC):
@@ -9,18 +9,14 @@ class IGradeController(ABC):
pass
@abstractmethod
async def grade_speaking_task(self, task: int, data: Dict):
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
pass
@abstractmethod
async def grade_short_answers(self, data: Dict):
pass
@abstractmethod
async def grading_summary(self, data: Dict):
pass
@abstractmethod
async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str):
pass
@abstractmethod
async def _grade_speaking_task3(self, answers: Dict):
pass

View File

@@ -1,5 +1,8 @@
from abc import ABC, abstractmethod
from fastapi import UploadFile
from typing import Dict
class ILevelController(ABC):
@@ -10,3 +13,11 @@ class ILevelController(ABC):
@abstractmethod
async def get_level_utas(self):
pass
@abstractmethod
async def upload_level(self, file: UploadFile):
pass
@abstractmethod
async def get_custom_level(self, data: Dict):
pass

View File

@@ -1,11 +1,13 @@
from abc import ABC, abstractmethod
from typing import Optional
from fastapi import BackgroundTasks
class ISpeakingController(ABC):
@abstractmethod
async def get_speaking_task(self, task: int, topic: str, difficulty: str):
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
pass
@abstractmethod
@@ -13,9 +15,11 @@ class ISpeakingController(ABC):
pass
@abstractmethod
async def generate_speaking_video(self, data):
pass
@abstractmethod
async def generate_interactive_video(self, data):
async def generate_video(
self, part: int, avatar: str, topic: str, questions: list[str],
*,
second_topic: Optional[str] = None,
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
pass

View File

@@ -6,3 +6,7 @@ class ITrainingController(ABC):
@abstractmethod
async def fetch_tips(self, data):
pass
@abstractmethod
async def get_training_content(self, data):
pass

View File

@@ -1,17 +1,12 @@
import logging
import os
import uuid
from typing import Dict
from fastapi import HTTPException
from pydantic import ValidationError
from typing import Dict, List
from app.configs.constants import FilePaths
from app.controllers.abc import IGradeController
from app.dtos.speaking import SpeakingGradeTask1And2DTO, SpeakingGradeTask3DTO
from app.dtos.writing import WritingGradeTaskDTO
from app.helpers import IOHelper
from app.helpers import FileHelper
from app.services.abc import ISpeakingService, IWritingService, IGradeService
from app.utils import handle_exception
class GradeController(IGradeController):
@@ -28,47 +23,20 @@ class GradeController(IGradeController):
self._logger = logging.getLogger(__name__)
async def grade_writing_task(self, task: int, data: WritingGradeTaskDTO):
try:
return await self._writing_service.grade_writing_task(task, data.question, data.answer)
except Exception as e:
return str(e)
return await self._writing_service.grade_writing_task(task, data.question, data.answer)
async def grade_speaking_task(self, task: int, data: Dict):
try:
if task in {1, 2}:
body = SpeakingGradeTask1And2DTO(**data)
return await self._grade_speaking_task_1_2(task, body.question, body.answer)
else:
body = SpeakingGradeTask3DTO(**data)
return await self._grade_speaking_task3(body.answers)
except ValidationError as e:
raise HTTPException(status_code=422, detail=e.errors())
@handle_exception(400)
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
return await self._speaking_service.grade_speaking_task(task, answers)
async def grade_short_answers(self, data: Dict):
return await self._service.grade_short_answers(data)
async def grading_summary(self, data: Dict):
try:
section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
return await self._service.calculate_grading_summary(extracted_sections)
except Exception as e:
return str(e)
async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str):
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
try:
IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
return await self._speaking_service.grade_speaking_task_1_and_2(
task, question, answer_firebase_path, sound_file_name
)
except Exception as e:
os.remove(sound_file_name)
return str(e), 400
async def _grade_speaking_task3(self, answers: Dict):
try:
IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
return await self._speaking_service.grade_speaking_task_3(answers)
except Exception as e:
return str(e), 400
section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
return await self._service.calculate_grading_summary(extracted_sections)
@staticmethod
def _extract_existing_sections_from_body(my_dict, keys_to_extract):

View File

@@ -1,3 +1,6 @@
from fastapi import UploadFile
from typing import Dict
from app.controllers.abc import ILevelController
from app.services.abc import ILevelService
@@ -8,13 +11,13 @@ class LevelController(ILevelController):
self._service = level_service
async def get_level_exam(self):
try:
return await self._service.get_level_exam()
except Exception as e:
return str(e)
return await self._service.get_level_exam()
async def get_level_utas(self):
try:
return await self._service.get_level_utas()
except Exception as e:
return str(e)
return await self._service.get_level_utas()
async def upload_level(self, file: UploadFile):
return await self._service.upload_level(file)
async def get_custom_level(self, data: Dict):
return await self._service.get_custom_level(data)

View File

@@ -1,97 +1,19 @@
import random
import logging
from typing import List
from app.controllers.abc import IListeningController
from app.dtos import SaveListeningDTO
from app.dtos.listening import SaveListeningDTO
from app.services.abc import IListeningService
from app.helpers import IOHelper, ExercisesHelper
from app.configs.constants import (
FilePaths, EducationalContent, FieldsAndExercises
)
class ListeningController(IListeningController):
def __init__(self, listening_service: IListeningService):
self._service = listening_service
self._logger = logging.getLogger(__name__)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
"type": "conversation",
"start_id": 1
},
"section_2": {
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
"type": "monologue",
"start_id": 11
},
"section_3": {
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
"type": "conversation",
"start_id": 21
},
"section_4": {
"topic": EducationalContent.ACADEMIC_SUBJECTS,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
"type": "monologue",
"start_id": 31
}
}
async def get_listening_question(self, section_id: int, topic: str, req_exercises: List[str], difficulty: str):
try:
IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
section = self._sections[f"section_{str(section_id)}"]
if not topic:
topic = random.choice(section["topic"])
if len(req_exercises) == 0:
req_exercises = random.sample(FieldsAndExercises.LISTENING_EXERCISE_TYPES, section["exercise_sample_size"])
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(section["total_exercises"], len(req_exercises))
dialog = await self._service.generate_listening_question(section_id, topic)
if section_id in {1, 3}:
dialog = self.parse_conversation(dialog)
self._logger.info(f'Generated {section["type"]}: {str(dialog)}')
exercises = await self._service.generate_listening_exercises(
section_id, str(dialog), req_exercises, number_of_exercises_q, section["start_id"], difficulty
)
return {
"exercises": exercises,
"text": dialog,
"difficulty": difficulty
}
except Exception as e:
return str(e)
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str
):
return await self._service.get_listening_question(section_id, topic, req_exercises, difficulty)
async def save_listening(self, data: SaveListeningDTO):
try:
return await self._service.save_listening(data.parts, data.minTimer, data.difficulty)
except Exception as e:
return str(e)
@staticmethod
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)
return await self._service.save_listening(data.parts, data.minTimer, data.difficulty, data.id)

View File

@@ -15,29 +15,29 @@ class ReadingController(IReadingController):
self._logger = logging.getLogger(__name__)
self._passages = {
"passage_1": {
"start_id": 1,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_1_EXERCISES
},
"passage_2": {
"start_id": 14,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_2_EXERCISES
},
"passage_3": {
"start_id": 27,
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_3_EXERCISES
}
}
async def get_reading_passage(self, passage_id: int, topic: str, req_exercises: List[str], difficulty: str):
try:
passage = self._passages[f'passage_{str(passage_id)}']
passage = self._passages[f'passage_{str(passage_id)}']
if len(req_exercises) == 0:
req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
if len(req_exercises) == 0:
req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
passage["total_exercises"], len(req_exercises)
)
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
passage["total_exercises"], len(req_exercises)
)
return await self._service.gen_reading_passage(
passage_id, topic, req_exercises, number_of_exercises_q, difficulty
)
except Exception as e:
return str(e)
return await self._service.gen_reading_passage(
passage_id, topic, req_exercises, number_of_exercises_q, difficulty, passage["start_id"]
)

View File

@@ -1,13 +1,12 @@
import logging
import uuid
from typing import Optional
from fastapi import BackgroundTasks
from app.controllers.abc import ISpeakingController
from app.dtos import (
SaveSpeakingDTO, SpeakingGenerateVideoDTO,
SpeakingGenerateInteractiveVideoDTO
)
from app.dtos.speaking import SaveSpeakingDTO
from app.services.abc import ISpeakingService
from app.configs.constants import ExamVariant, MinTimers
from app.configs.question_templates import getSpeakingTemplate
@@ -19,45 +18,30 @@ class SpeakingController(ISpeakingController):
self._service = speaking_service
self._logger = logging.getLogger(__name__)
async def get_speaking_task(self, task: int, topic: str, difficulty: str):
try:
return await self._service.get_speaking_task(task, topic, difficulty)
except Exception as e:
return str(e)
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
return await self._service.get_speaking_part(task, topic, difficulty, second_topic)
async def save_speaking(self, data: SaveSpeakingDTO, background_tasks: BackgroundTasks):
try:
exercises = data.exercises
min_timer = data.minTimer
exercises = data.exercises
min_timer = data.minTimer
template = getSpeakingTemplate()
template["minTimer"] = min_timer
template = getSpeakingTemplate()
template["minTimer"] = min_timer
if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
template["variant"] = ExamVariant.PARTIAL.value
else:
template["variant"] = ExamVariant.FULL.value
req_id = str(uuid.uuid4())
self._logger.info(f'Received request to save speaking with id: {req_id}')
req_id = str(uuid.uuid4())
self._logger.info(f'Received request to save speaking with id: {req_id}')
background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
self._logger.info('Started background task to save speaking.')
self._logger.info('Started background task to save speaking.')
# Return response without waiting for create_videos_and_save_to_db to finish
return {**template, "id": req_id}
except Exception as e:
return str(e)
# Return response without waiting for create_videos_and_save_to_db to finish
return {**template, "id": req_id}
async def generate_speaking_video(self, data: SpeakingGenerateVideoDTO):
try:
return await self._service.generate_speaking_video(data.question, data.topic, data.avatar, data.prompts)
except Exception as e:
return str(e)
async def generate_interactive_video(self, data: SpeakingGenerateInteractiveVideoDTO):
try:
return await self._service.generate_interactive_video(data.questions, data.topic, data.avatar)
except Exception as e:
return str(e)
async def generate_video(self, *args, **kwargs):
return await self._service.generate_video(*args, **kwargs)

View File

@@ -1,5 +1,7 @@
from typing import Dict
from app.controllers.abc import ITrainingController
from app.dtos import TipsDTO
from app.dtos.training import FetchTipsDTO
from app.services.abc import ITrainingService
@@ -8,8 +10,8 @@ class TrainingController(ITrainingController):
def __init__(self, training_service: ITrainingService):
self._service = training_service
async def fetch_tips(self, data: TipsDTO):
try:
return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
except Exception as e:
return str(e)
async def fetch_tips(self, data: FetchTipsDTO):
return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
async def get_training_content(self, data: Dict):
return await self._service.get_training_content(data)

View File

@@ -8,7 +8,4 @@ class WritingController(IWritingController):
self._service = writing_service
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
try:
return await self._service.get_writing_task_general_question(task, topic, difficulty)
except Exception as e:
return str(e)
return await self._service.get_writing_task_general_question(task, topic, difficulty)

View File

@@ -1,19 +0,0 @@
from .listening import SaveListeningDTO
from .speaking import (
SaveSpeakingDTO, SpeakingGradeTask1And2DTO,
SpeakingGradeTask3DTO, SpeakingGenerateVideoDTO,
SpeakingGenerateInteractiveVideoDTO
)
from .training import TipsDTO
from .writing import WritingGradeTaskDTO
__all__ = [
"SaveListeningDTO",
"SaveSpeakingDTO",
"SpeakingGradeTask1And2DTO",
"SpeakingGradeTask3DTO",
"SpeakingGenerateVideoDTO",
"SpeakingGenerateInteractiveVideoDTO",
"TipsDTO",
"WritingGradeTaskDTO"
]

57
app/dtos/exam.py Normal file
View File

@@ -0,0 +1,57 @@
from pydantic import BaseModel, Field
from typing import List, Dict, Union, Optional
from uuid import uuid4, UUID
class Option(BaseModel):
id: str
text: str
class MultipleChoiceQuestion(BaseModel):
id: str
prompt: str
variant: str = "text"
solution: str
options: List[Option]
class MultipleChoiceExercise(BaseModel):
id: UUID = Field(default_factory=uuid4)
type: str = "multipleChoice"
prompt: str = "Select the appropriate option."
questions: List[MultipleChoiceQuestion]
userSolutions: List = Field(default_factory=list)
class FillBlanksWord(BaseModel):
id: str
options: Dict[str, str]
class FillBlanksSolution(BaseModel):
id: str
solution: str
class FillBlanksExercise(BaseModel):
id: UUID = Field(default_factory=uuid4)
type: str = "fillBlanks"
variant: str = "mc"
prompt: str = "Click a blank to select the appropriate word for it."
text: str
solutions: List[FillBlanksSolution]
words: List[FillBlanksWord]
userSolutions: List = Field(default_factory=list)
Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
class Part(BaseModel):
exercises: List[Exercise]
context: Optional[str] = Field(default=None)
class Exam(BaseModel):
parts: List[Part]

View File

@@ -1,4 +1,5 @@
import random
import uuid
from typing import List, Dict
from pydantic import BaseModel
@@ -10,3 +11,4 @@ class SaveListeningDTO(BaseModel):
parts: List[Dict]
minTimer: int = MinTimers.LISTENING_MIN_TIMER_DEFAULT
difficulty: str = random.choice(EducationalContent.DIFFICULTIES)
id: str = str(uuid.uuid4())

29
app/dtos/sheet.py Normal file
View File

@@ -0,0 +1,29 @@
from pydantic import BaseModel
from typing import List, Dict, Union, Any, Optional
class Option(BaseModel):
id: str
text: str
class MultipleChoiceQuestion(BaseModel):
type: str = "multipleChoice"
id: str
prompt: str
variant: str = "text"
options: List[Option]
class FillBlanksWord(BaseModel):
type: str = "fillBlanks"
id: str
options: Dict[str, str]
Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]]
class Sheet(BaseModel):
batch: Optional[int] = None
components: List[Component]

View File

@@ -11,23 +11,31 @@ class SaveSpeakingDTO(BaseModel):
minTimer: int = MinTimers.SPEAKING_MIN_TIMER_DEFAULT
class SpeakingGradeTask1And2DTO(BaseModel):
class GradeSpeakingDTO(BaseModel):
question: str
answer: str
class SpeakingGradeTask3DTO(BaseModel):
answers: Dict
class GradeSpeakingAnswersDTO(BaseModel):
answers: List[Dict]
class SpeakingGenerateVideoDTO(BaseModel):
class GenerateVideo1DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
questions: List[str]
first_topic: str
second_topic: str
class GenerateVideo2DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
prompts: List[str] = []
suffix: str = ""
question: str
topic: str
class SpeakingGenerateInteractiveVideoDTO(BaseModel):
class GenerateVideo3DTO(BaseModel):
avatar: str = (random.choice(list(AvatarEnum))).value
questions: List[str]
topic: str

View File

@@ -1,8 +1,37 @@
from pydantic import BaseModel
from typing import List
class TipsDTO(BaseModel):
class FetchTipsDTO(BaseModel):
context: str
question: str
answer: str
correct_answer: str
class QueryDTO(BaseModel):
category: str
text: str
class DetailsDTO(BaseModel):
exam_id: str
date: int
performance_comment: str
detailed_summary: str
class WeakAreaDTO(BaseModel):
area: str
comment: str
class TrainingContentDTO(BaseModel):
details: List[DetailsDTO]
weak_areas: List[WeakAreaDTO]
queries: List[QueryDTO]
class TipsDTO(BaseModel):
tip_ids: List[str]

View File

@@ -1,11 +1,13 @@
from .io import IOHelper
from .text_helper import TextHelper
from .file import FileHelper
from .text import TextHelper
from .token_counter import count_tokens
from .exercises_helper import ExercisesHelper
from .exercises import ExercisesHelper
from .logger import LoggerHelper
__all__ = [
"IOHelper",
"FileHelper",
"TextHelper",
"count_tokens",
"ExercisesHelper"
"ExercisesHelper",
"LoggerHelper"
]

View File

@@ -4,7 +4,7 @@ import re
import string
from wonderwords import RandomWord
from .text_helper import TextHelper
from .text import TextHelper
class ExercisesHelper:
@@ -70,7 +70,12 @@ class ExercisesHelper:
random.shuffle(combined_array)
return combined_array
result = []
for i, word in enumerate(combined_array):
letter = chr(65 + i) # chr(65) is 'A'
result.append({"letter": letter, "word": word})
return result
@staticmethod
def fillblanks_build_solutions_array(words, start_id):
@@ -187,9 +192,58 @@ class ExercisesHelper:
@staticmethod
def fix_writing_overall(overall: float, task_response: dict):
if overall > max(task_response.values()) or overall < min(task_response.values()):
total_sum = sum(task_response.values())
average = total_sum / len(task_response.values())
grades = [category["grade"] for category in task_response.values()]
if overall > max(grades) or overall < min(grades):
total_sum = sum(grades)
average = total_sum / len(grades)
rounded_average = round(average, 0)
return rounded_average
return overall
@staticmethod
def build_options(ideas):
options = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
options.append({
"id": next(letters),
"sentence": idea["from"]
})
return options
@staticmethod
def build_sentences(ideas, start_id):
sentences = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
sentences.append({
"solution": next(letters),
"sentence": idea["idea"]
})
random.shuffle(sentences)
for i, sentence in enumerate(sentences, start=start_id):
sentence["id"] = i
return sentences
@staticmethod
def randomize_mc_options_order(questions):
option_ids = ['A', 'B', 'C', 'D']
for question in questions:
# Store the original solution text
original_solution_text = next(
option['text'] for option in question['options'] if option['id'] == question['solution'])
# Shuffle the options
random.shuffle(question['options'])
# Update the option ids and find the new solution id
for idx, option in enumerate(question['options']):
option['id'] = option_ids[idx]
if option['text'] == original_solution_text:
question['solution'] = option['id']
return questions

95
app/helpers/file.py Normal file
View File

@@ -0,0 +1,95 @@
import datetime
from pathlib import Path
import base64
import io
import os
import shutil
import subprocess
from typing import Optional
import numpy as np
import pypandoc
from PIL import Image
import aiofiles
class FileHelper:
@staticmethod
def delete_files_older_than_one_day(directory: str):
current_time = datetime.datetime.now()
for entry in os.scandir(directory):
if entry.is_file():
file_path = Path(entry)
file_name = file_path.name
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
time_difference = current_time - file_modified_time
if time_difference.days > 1 and "placeholder" not in file_name:
file_path.unlink()
print(f"Deleted file: {file_path}")
# Supposedly pandoc covers a wide range of file extensions only tested with docx
@staticmethod
def convert_file_to_pdf(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
'-V', 'geometry:paperwidth=5.5in',
'-V', 'geometry:paperheight=8.5in',
'-V', 'geometry:margin=0.5in',
'-V', 'pagestyle=empty'
])
@staticmethod
def convert_file_to_html(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'html', outputfile=output_path)
@staticmethod
def pdf_to_png(path_id: str):
to_png = f"pdftoppm -png exercises.pdf page"
result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
if result.returncode != 0:
raise Exception(
f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
@staticmethod
def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
with Image.open(io.BytesIO(image_bytes)) as img:
img_gray = img.convert('L')
img_array = np.array(img_gray)
non_white_pixels = np.sum(img_array < 255)
return non_white_pixels <= image_threshold
@classmethod
async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
async with aiofiles.open(image_path, "rb") as image_file:
image_bytes = await image_file.read()
if cls.is_page_blank(image_bytes, image_threshold):
return None
return base64.b64encode(image_bytes).decode('utf-8')
@classmethod
def b64_pngs(cls, path_id: str, files: list[str]):
png_messages = []
for filename in files:
b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
if b64_string:
png_messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{b64_string}"
}
})
return png_messages
@staticmethod
def remove_directory(path):
try:
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
except Exception as e:
print(f"An error occurred while trying to remove {path}: {str(e)}")

View File

@@ -1,20 +0,0 @@
import datetime
import os
from pathlib import Path
class IOHelper:
@staticmethod
def delete_files_older_than_one_day(directory: str):
current_time = datetime.datetime.now()
for entry in os.scandir(directory):
if entry.is_file():
file_path = Path(entry)
file_name = file_path.name
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
time_difference = current_time - file_modified_time
if time_difference.days > 1 and "placeholder" not in file_name:
file_path.unlink()
print(f"Deleted file: {file_path}")

23
app/helpers/logger.py Normal file
View File

@@ -0,0 +1,23 @@
import logging
from functools import wraps
class LoggerHelper:
@staticmethod
def suppress_loggers():
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
root_logger = logging.getLogger()
original_level = root_logger.level
root_logger.setLevel(logging.ERROR)
try:
return f(*args, **kwargs)
finally:
root_logger.setLevel(original_level)
return wrapped
return decorator

5
app/mappers/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
from .exam import ExamMapper
__all__ = [
"ExamMapper"
]

66
app/mappers/exam.py Normal file
View File

@@ -0,0 +1,66 @@
from typing import Dict, Any
from pydantic import ValidationError
from app.dtos.exam import (
MultipleChoiceExercise,
FillBlanksExercise,
Part, Exam
)
from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
class ExamMapper:
@staticmethod
def map_to_exam_model(response: Dict[str, Any]) -> Exam:
parts = []
for part in response['parts']:
part_exercises = part['exercises']
context = part.get('context', None)
exercises = []
for exercise in part_exercises:
exercise_type = exercise['type']
if exercise_type == 'multipleChoice':
exercise_model = MultipleChoiceExercise(**exercise)
elif exercise_type == 'fillBlanks':
exercise_model = FillBlanksExercise(**exercise)
else:
raise ValidationError(f"Unknown exercise type: {exercise_type}")
exercises.append(exercise_model)
part_kwargs = {"exercises": exercises}
if context is not None:
part_kwargs["context"] = context
part_model = Part(**part_kwargs)
parts.append(part_model)
return Exam(parts=parts)
@staticmethod
def map_to_sheet(response: Dict[str, Any]) -> Sheet:
components = []
for item in response["components"]:
component_type = item["type"]
if component_type == "multipleChoice":
options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]]
components.append(MultipleChoiceQuestion(
id=item["id"],
prompt=item["prompt"],
variant=item.get("variant", "text"),
options=options
))
elif component_type == "fillBlanks":
components.append(FillBlanksWord(
id=item["id"],
options=item["options"]
))
else:
components.append(item)
return Sheet(components=components)

View File

@@ -11,3 +11,6 @@ class IDocumentStore(ABC):
async def get_all(self, collection: str):
pass
async def get_doc_by_id(self, collection: str, doc_id: str):
pass

View File

@@ -15,9 +15,9 @@ class Firestore(IDocumentStore):
update_time, document_ref = await collection_ref.add(item)
if document_ref:
self._logger.info(f"Document added with ID: {document_ref.id}")
return True, document_ref.id
return document_ref.id
else:
return False, None
return None
async def save_to_db_with_id(self, collection: str, item, id: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
@@ -26,9 +26,9 @@ class Firestore(IDocumentStore):
doc_snapshot = await document_ref.get()
if doc_snapshot.exists:
self._logger.info(f"Document added with ID: {document_ref.id}")
return True, document_ref.id
return document_ref.id
else:
return False, None
return None
async def get_all(self, collection: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
@@ -36,3 +36,12 @@ class Firestore(IDocumentStore):
async for doc in collection_ref.stream():
docs.append(doc.to_dict())
return docs
async def get_doc_by_id(self, collection: str, doc_id: str):
collection_ref: AsyncCollectionReference = self._client.collection(collection)
doc_ref: AsyncDocumentReference = collection_ref.document(doc_id)
doc = await doc_ref.get()
if doc.exists:
return doc.to_dict()
return None

View File

@@ -116,6 +116,16 @@ def setup_listeners(_app: FastAPI) -> None:
content={"error_code": exc.error_code, "message": exc.message},
)
@_app.exception_handler(Exception)
async def default_exception_handler(request: Request, exc: Exception):
"""
Don't delete request param
"""
return JSONResponse(
status_code=500,
content=str(exc),
)
def setup_middleware() -> List[Middleware]:
middleware = [
@@ -135,9 +145,10 @@ def setup_middleware() -> List[Middleware]:
def create_app() -> FastAPI:
env = os.getenv("ENV")
_app = FastAPI(
docs_url=None,
redoc_url=None,
docs_url="/docs" if env != "prod" else None,
redoc_url="/redoc" if env != "prod" else None,
middleware=setup_middleware(),
lifespan=lifespan
)

View File

@@ -5,6 +5,7 @@ from .speaking import ISpeakingService
from .reading import IReadingService
from .grade import IGradeService
from .training import ITrainingService
from .kb import IKnowledgeBase
from .third_parties import *
__all__ = [

View File

@@ -4,20 +4,10 @@ from typing import Dict, List
class IGradeService(ABC):
@abstractmethod
async def grade_short_answers(self, data: Dict):
pass
@abstractmethod
async def calculate_grading_summary(self, extracted_sections: List):
pass
@abstractmethod
async def _calculate_section_grade_summary(self, section):
pass
@staticmethod
@abstractmethod
def _parse_openai_response(response):
pass
@staticmethod
@abstractmethod
def _parse_bullet_points(bullet_points_str, grade):
pass

10
app/services/abc/kb.py Normal file
View File

@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
from typing import List, Dict
class IKnowledgeBase(ABC):
@abstractmethod
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
pass

View File

@@ -1,10 +1,19 @@
from abc import ABC, abstractmethod
import random
from typing import Dict
from fastapi import UploadFile
from app.configs.constants import EducationalContent
class ILevelService(ABC):
@abstractmethod
async def get_level_exam(self):
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
pass
@abstractmethod
@@ -12,13 +21,27 @@ class ILevelService(ABC):
pass
@abstractmethod
async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
async def get_custom_level(self, data: Dict):
pass
@abstractmethod
async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
async def upload_level(self, upload: UploadFile) -> Dict:
pass
@abstractmethod
async def _generate_single_mc_level_question(self):
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
):
pass
@abstractmethod
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
pass
@abstractmethod
async def gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
pass

View File

@@ -1,68 +1,18 @@
import queue
from abc import ABC, abstractmethod
from queue import Queue
from typing import Dict
from typing import Dict, List
class IListeningService(ABC):
@abstractmethod
async def generate_listening_question(self, section: int, topic: str) -> Dict:
pass
@abstractmethod
async def generate_listening_exercises(
self, section: int, dialog: str,
req_exercises: list[str], exercises_queue: Queue,
start_id: int, difficulty: str
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
number_of_exercises_q=queue.Queue(), start_id=-1
):
pass
@abstractmethod
async def save_listening(self, parts, min_timer, difficulty):
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str) -> Dict:
pass
# ==================================================================================================================
# Helpers
# ==================================================================================================================
@abstractmethod
async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
pass
@abstractmethod
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
pass
@abstractmethod
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
pass
@staticmethod
@abstractmethod
def _get_random_voice(gender: str):
pass
@abstractmethod
async def _gen_multiple_choice_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
):
pass
@abstractmethod
async def _gen_write_blanks_questions_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
):
pass
@abstractmethod
async def _gen_write_blanks_notes_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
):
pass
@abstractmethod
async def _gen_write_blanks_form_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
):
pass

View File

@@ -2,8 +2,6 @@ from abc import ABC, abstractmethod
from queue import Queue
from typing import List
from app.configs.constants import QuestionType
class IReadingService(ABC):
@@ -14,36 +12,11 @@ class IReadingService(ABC):
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str
):
pass
# ==================================================================================================================
# Helpers
# ==================================================================================================================
@abstractmethod
async def generate_reading_passage(self, q_type: QuestionType, topic: str):
pass
@abstractmethod
async def _generate_reading_exercises(
self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
difficulty: str,
start_id: int
):
pass
@abstractmethod
async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
pass
@abstractmethod
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
pass
@abstractmethod
async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
pass
@abstractmethod
async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
pass

View File

@@ -1,21 +1,17 @@
from abc import ABC, abstractmethod
from typing import List, Dict
from typing import List, Dict, Optional
class ISpeakingService(ABC):
@abstractmethod
async def get_speaking_task(self, task_id: int, topic: str, difficulty: str):
async def get_speaking_part(
self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
) -> Dict:
pass
@abstractmethod
async def grade_speaking_task_1_and_2(
self, task: int, question: str, answer_firebase_path: str, sound_file_name: str
):
pass
@abstractmethod
async def grade_speaking_task_3(self, answers: Dict, task: int = 3):
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
pass
@abstractmethod
@@ -23,35 +19,11 @@ class ISpeakingService(ABC):
pass
@abstractmethod
async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]):
pass
@abstractmethod
async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str):
pass
# ==================================================================================================================
# Helpers
# ==================================================================================================================
@staticmethod
@abstractmethod
def _zero_rating(comment: str):
pass
@staticmethod
@abstractmethod
def _calculate_overall(response: Dict):
pass
@abstractmethod
async def _get_speaking_corrections(self, text):
pass
@abstractmethod
async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
pass
@abstractmethod
async def _create_video(self, question: str, avatar: str, error_message: str):
async def generate_video(
self, part: int, avatar: str, topic: str, questions: list[str],
*,
second_topic: Optional[str] = None,
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
pass

View File

@@ -1,6 +1,10 @@
from abc import ABC, abstractmethod
from typing import List, Optional
from typing import List, Optional, TypeVar, Callable
from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
T = TypeVar('T', bound=BaseModel)
class ILLMService(ABC):
@@ -19,3 +23,16 @@ class ILLMService(ABC):
@abstractmethod
async def prediction_override(self, **kwargs):
pass
@abstractmethod
async def pydantic_prediction(
self,
messages: List[ChatCompletionMessageParam],
map_to_model: Callable,
json_scheme: str,
*,
model: Optional[str] = None,
temperature: Optional[float] = None,
max_retries: int = 3
) -> List[T] | T | None:
pass

View File

@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod
from typing import Dict
class ITrainingService(ABC):
@@ -7,7 +9,6 @@ class ITrainingService(ABC):
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
pass
@staticmethod
@abstractmethod
def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
async def get_training_content(self, training_content: Dict) -> Dict:
pass

View File

@@ -1,6 +1,4 @@
from abc import ABC, abstractmethod
from typing import Dict
class IWritingService(ABC):
@@ -11,22 +9,3 @@ class IWritingService(ABC):
@abstractmethod
async def grade_writing_task(self, task: int, question: str, answer: str):
pass
# ==================================================================================================================
# Helpers
# ==================================================================================================================
@staticmethod
@abstractmethod
def _get_writing_prompt(task: int, topic: str, difficulty: str):
pass
@staticmethod
@abstractmethod
async def _get_fixed_text(self, text):
pass
@staticmethod
@abstractmethod
def _zero_rating(comment: str):
pass

View File

@@ -4,7 +4,7 @@ from .reading import ReadingService
from .speaking import SpeakingService
from .writing import WritingService
from .grade import GradeService
from .training import TrainingService
from .training import *
from .third_parties import *
__all__ = [
@@ -14,6 +14,6 @@ __all__ = [
"SpeakingService",
"WritingService",
"GradeService",
"TrainingService"
]
__all__.extend(third_parties.__all__)
__all__.extend(training.__all__)

View File

@@ -1,42 +1,47 @@
import json
from typing import List
import copy
from typing import List, Dict
from app.configs.constants import GPTModels, TemperatureSettings
from app.services.abc import ILLMService, IGradeService
class GradeService(IGradeService):
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
tools = [{
"type": "function",
"function": {
"name": "save_evaluation_and_suggestions",
"description": "Saves the evaluation and suggestions requested by input.",
"parameters": {
"type": "object",
"properties": {
"evaluation": {
"type": "string",
"description": "A comment on the IELTS section grade obtained in the specific section and what it could mean without suggestions.",
},
"suggestions": {
"type": "string",
"description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.",
},
"bullet_points": {
"type": "string",
"description": "Text with four bullet points to improve the english speaking ability. Only include text for the bullet points separated by a paragraph. ",
},
},
"required": ["evaluation", "suggestions"],
},
}
}]
def __init__(self, llm: ILLMService):
self._llm = llm
async def grade_short_answers(self, data: Dict):
json_format = {
"exercises": [
{
"id": 1,
"correct": True,
"correct_answer": " correct answer if wrong"
}
]
}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
'Grade these answers according to the text content and write a correct answer if they are '
f'wrong. Text, questions and answers:\n {data}'
)
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["exercises"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
async def calculate_grading_summary(self, extracted_sections: List):
ret = []
@@ -116,8 +121,8 @@ class GradeService(IGradeService):
)
}]
chat_config = copy.deepcopy(self.chat_config)
tools = copy.deepcopy(self.tools)
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
tools = self.get_tools()
res = await self._llm.prediction_override(
model="gpt-3.5-turbo",
@@ -154,3 +159,42 @@ class GradeService(IGradeService):
return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
else:
return []
@staticmethod
def get_tools():
return [
{
"type": "function",
"function": {
"name": "save_evaluation_and_suggestions",
"description": "Saves the evaluation and suggestions requested by input.",
"parameters": {
"type": "object",
"properties": {
"evaluation": {
"type": "string",
"description": (
"A comment on the IELTS section grade obtained in the specific section and what "
"it could mean without suggestions."
),
},
"suggestions": {
"type": "string",
"description": (
"A small paragraph text with suggestions on how to possibly get a better grade "
"than the one obtained."
),
},
"bullet_points": {
"type": "string",
"description": (
"Text with four bullet points to improve the english speaking ability. Only "
"include text for the bullet points separated by a paragraph."
),
},
},
"required": ["evaluation", "suggestions"],
},
}
}
]

View File

@@ -1,506 +0,0 @@
import json
import random
import uuid
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType
from app.helpers import ExercisesHelper
from app.repositories.abc import IDocumentStore
from app.services.abc import ILevelService, ILLMService, IReadingService
class LevelService(ILevelService):
def __init__(
self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
async def get_level_exam(self):
number_of_exercises = 25
exercises = await self._gen_multiple_choice_level(number_of_exercises)
return {
"exercises": [exercises],
"isDiagnostic": False,
"minTimer": 25,
"module": "level"
}
async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
gen_multiple_choice_for_text = (
f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self._gen_multiple_choice_level(quantity, start_id)
else:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
else:
seen_keys.add(key)
for exam in all_exams:
exam_dict = exam.to_dict()
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
return current_exercise, seen_keys
async def _generate_single_mc_level_question(self):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}'
)
},
{
"role": "user",
"content": (
'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
'intermediate or advanced.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
async def get_level_utas(self):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self._gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": False,
"minTimer": 25,
"module": "level"
}
async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams):
gen_multiple_choice_for_text = (
f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english '
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure '
'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self._gen_multiple_choice_level(quantity, start_id)
else:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas(
all_exams,
question["questions"][i],
question,
seen_keys
)
return ExercisesHelper.fix_exercise_ids(question, start_id)
async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return self._replace_exercise_if_exists_utas(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
else:
seen_keys.add(key)
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return self._replace_exercise_if_exists_utas(
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
)
return current_exercise, seen_keys
async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int):
json_format = {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
gen_multiple_choice_for_text = (
f'Generate {str(quantity)} multiple choice questions of 4 options for an english '
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that '
'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": gen_multiple_choice_for_text
},
{
"role": "user",
"content": (
'The type of multiple choice is the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self._gen_multiple_choice_level(quantity, start_id)
else:
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
async def _gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
json_format = {
"question": {
"words": [
{
"id": "1",
"text": "a"
},
{
"id": "2",
"text": "b"
},
{
"id": "3",
"text": "c"
},
{
"id": "4",
"text": "d"
}
],
"text": "text"
}
}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": f'Generate a text of at least {str(size)} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. Put '
'the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]
async def _gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": (
'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have '
'maximum 3 words per answer, about this text:\n"' + text + '"')
},
{
"role": "user",
"content": 'The id starts at ' + str(start_id) + '.'
}
]
return (
await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
)["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_format = {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": 'Generate ' + str(
mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_multiple_choice_level(mc_quantity, start_id)
else:
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]

View File

@@ -0,0 +1,5 @@
from .level import LevelService
__all__ = [
"LevelService"
]

View File

@@ -0,0 +1,335 @@
import queue
import random
from typing import Dict
from app.configs.constants import CustomLevelExerciseTypes, EducationalContent
from app.services.abc import (
ILLMService, ILevelService, IReadingService,
IWritingService, IListeningService, ISpeakingService
)
class CustomLevelModule:
def __init__(
self,
llm: ILLMService,
level: ILevelService,
reading: IReadingService,
listening: IListeningService,
writing: IWritingService,
speaking: ISpeakingService
):
self._llm = llm
self._level = level
self._reading = reading
self._listening = listening
self._writing = writing
self._speaking = speaking
# TODO: I've changed this to retrieve the args from the body request and not request query args
async def get_custom_level(self, data: Dict):
nr_exercises = int(data.get('nr_exercises'))
exercise_id = 1
response = {
"exercises": {},
"module": "level"
}
for i in range(1, nr_exercises + 1, 1):
exercise_type = data.get(f'exercise_{i}_type')
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"normal", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"blank_space", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
response["exercises"][f"exercise_{i}"] = {}
response["exercises"][f"exercise_{i}"]["questions"] = []
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
while exercise_qty > 0:
if exercise_qty - 15 > 0:
qty = 15
else:
qty = exercise_qty
mc_response = await self._level.gen_multiple_choice(
"underline", qty, exercise_id, utas=True,
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
)
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
exercise_id = exercise_id + qty
exercise_qty = exercise_qty - qty
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
exercise_qty, exercise_id, exercise_text_size
)
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
)
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
exercise_id = exercise_id + exercise_qty
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
1, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "writing"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
1, exercise_topic, exercise_difficulty, exercise_topic_2
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
2, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
3, exercise_topic, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
exercise_id = exercise_id + 1
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_fillblanks_qty != -1:
exercises.append('fillBlanks')
exercise_qty_q.put(exercise_fillblanks_qty)
total_qty = total_qty + exercise_fillblanks_qty
if exercise_writeblanks_qty != -1:
exercises.append('writeBlanks')
exercise_qty_q.put(exercise_writeblanks_qty)
total_qty = total_qty + exercise_writeblanks_qty
if exercise_truefalse_qty != -1:
exercises.append('trueFalse')
exercise_qty_q.put(exercise_truefalse_qty)
total_qty = total_qty + exercise_truefalse_qty
if exercise_paragraphmatch_qty != -1:
exercises.append('paragraphMatch')
exercise_qty_q.put(exercise_paragraphmatch_qty)
total_qty = total_qty + exercise_paragraphmatch_qty
if exercise_ideamatch_qty != -1:
exercises.append('ideaMatch')
exercise_qty_q.put(exercise_ideamatch_qty)
total_qty = total_qty + exercise_ideamatch_qty
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
)
response["exercises"][f"exercise_{i}"]["type"] = "reading"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc3_qty != -1:
exercises.append('multipleChoice3Options')
exercise_qty_q.put(exercise_mc3_qty)
total_qty = total_qty + exercise_mc3_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
exercises = []
exercise_qty_q = queue.Queue()
total_qty = 0
if exercise_mc_qty != -1:
exercises.append('multipleChoice')
exercise_qty_q.put(exercise_mc_qty)
total_qty = total_qty + exercise_mc_qty
if exercise_writeblanksquestions_qty != -1:
exercises.append('writeBlanksQuestions')
exercise_qty_q.put(exercise_writeblanksquestions_qty)
total_qty = total_qty + exercise_writeblanksquestions_qty
if exercise_writeblanksfill_qty != -1:
exercises.append('writeBlanksFill')
exercise_qty_q.put(exercise_writeblanksfill_qty)
total_qty = total_qty + exercise_writeblanksfill_qty
if exercise_writeblanksform_qty != -1:
exercises.append('writeBlanksForm')
exercise_qty_q.put(exercise_writeblanksform_qty)
total_qty = total_qty + exercise_writeblanksform_qty
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
)
response["exercises"][f"exercise_{i}"]["type"] = "listening"
exercise_id = exercise_id + total_qty
return response

View File

@@ -0,0 +1,417 @@
import json
import random
import uuid
from typing import Dict
from fastapi import UploadFile
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
from app.helpers import ExercisesHelper
from app.repositories.abc import IDocumentStore
from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \
IListeningService
from .custom import CustomLevelModule
from .upload import UploadLevelModule
class LevelService(ILevelService):
def __init__(
self,
llm: ILLMService,
document_store: IDocumentStore,
mc_variants: Dict,
reading_service: IReadingService,
writing_service: IWritingService,
speaking_service: ISpeakingService,
listening_service: IListeningService
):
self._llm = llm
self._document_store = document_store
self._reading_service = reading_service
self._custom_module = CustomLevelModule(
llm, self, reading_service, listening_service, writing_service, speaking_service
)
self._upload_module = UploadLevelModule(llm)
# TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt
# mc_variants are stored in ./mc_variants.json
self._mc_variants = mc_variants
async def upload_level(self, upload: UploadFile) -> Dict:
return await self._upload_module.generate_level_from_file(upload)
async def get_custom_level(self, data: Dict):
return await self._custom_module.get_custom_level(data)
async def get_level_exam(
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
) -> Dict:
exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False)
return {
"exercises": [exercises],
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
# Formats
mc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the correct word or group of words that completes the sentences.",
"questions": None,
"type": "multipleChoice",
"part": 1
}
umc = {
"id": str(uuid.uuid4()),
"prompt": "Choose the underlined word or group of words that is not correct.",
"questions": None,
"type": "multipleChoice",
"part": 2
}
bs_1 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 3
}
bs_2 = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and write the correct word for each space.",
"questions": None,
"type": "blankSpaceText",
"part": 4
}
reading = {
"id": str(uuid.uuid4()),
"prompt": "Read the text and answer the questions below.",
"questions": None,
"type": "readingExercises",
"part": 5
}
all_mc_questions = []
# PART 1
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
mc_exercises1 = await self.gen_multiple_choice(
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises1, indent=4))
all_mc_questions.append(mc_exercises1)
# PART 2
mc_exercises2 = await self.gen_multiple_choice(
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises2, indent=4))
all_mc_questions.append(mc_exercises2)
# PART 3
mc_exercises3 = await self.gen_multiple_choice(
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
)
print(json.dumps(mc_exercises3, indent=4))
all_mc_questions.append(mc_exercises3)
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
print(json.dumps(mc_exercises, indent=4))
mc["questions"] = mc_exercises
# Underlined mc
underlined_mc = await self.gen_multiple_choice(
"underline", 15, 46, utas=True, all_exams=all_mc_questions
)
print(json.dumps(underlined_mc, indent=4))
umc["questions"] = underlined_mc
# Blank Space text 1
blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250)
print(json.dumps(blank_space_text_1, indent=4))
bs_1["questions"] = blank_space_text_1
# Blank Space text 2
blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350)
print(json.dumps(blank_space_text_2, indent=4))
bs_2["questions"] = blank_space_text_2
# Reading text
reading_text = await self.gen_reading_passage_utas(87, 10, 4)
print(json.dumps(reading_text, indent=4))
reading["questions"] = reading_text
return {
"exercises": {
"blankSpaceMultipleChoice": mc,
"underlinedMultipleChoice": umc,
"blankSpaceText1": bs_1,
"blankSpaceText2": bs_2,
"readingExercises": reading,
},
"isDiagnostic": diagnostic,
"minTimer": min_timer,
"module": "level"
}
async def gen_multiple_choice(
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
):
mc_template = self._mc_variants[mc_variant]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
gen_multiple_choice_for_text: str = (
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
'punctuation. Make sure every question only has 1 correct answer.'
)
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != quantity:
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
else:
if not utas:
all_exams = await self._document_store.get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
else:
if all_exams is not None:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
)
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
mc_template = self._mc_variants[mc_variant]["questions"][0]
blank_mod = " blank space " if mc_variant == "blank_space" else " "
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
)
},
{
"role": "user",
"content": (
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
f'it can be easy, intermediate or advanced.'
)
}
]
if mc_variant == "underline":
messages.append({
"role": "user",
"content": (
'The type of multiple choice in the prompt has wrong words or group of words and the options '
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
'the boss <u>is</u> nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
)
})
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question
async def _replace_exercise_if_exists(
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
mc_variant, utas
)
else:
seen_keys.add(key)
if not utas:
for exam in all_exams:
exam_dict = exam.to_dict()
if len(exam_dict.get("parts", [])) > 0:
exercise_dict = exam_dict.get("parts", [])[0]
if len(exercise_dict.get("exercises", [])) > 0:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
else:
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return await self._replace_exercise_if_exists(
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
seen_keys, mc_variant, utas
)
return current_exercise, seen_keys
async def gen_blank_space_text_utas(
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
json_template = self._mc_variants["blank_space_text"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate a text of at least {size} words about the topic {topic}.'
},
{
"role": "user",
"content": (
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
'The ids must be ordered throughout the text and the words must be replaced only once. '
'Put the removed words and respective ids on the words array of the json in the correct order.'
)
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["question"]
async def gen_reading_passage_utas(
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
):
passage = await self._reading_service.generate_reading_passage(1, topic)
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
},
{
"role": "user",
"content": (
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
f'maximum 3 words per answer, about this text:\n"{text}"'
)
},
{
"role": "user",
"content": f'The id starts at {start_id}.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
return question["questions"]
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
json_template = self._mc_variants["text_mc_utas"]
messages = [
{
"role": "system",
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
},
{
"role": "user",
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
question = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if len(question["questions"]) != mc_quantity:
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
else:
response = ExercisesHelper.fix_exercise_ids(question, start_id)
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
return response

View File

@@ -0,0 +1,137 @@
{
"normal": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "And"
},
{
"id": "B",
"text": "Cat"
},
{
"id": "C",
"text": "Happy"
},
{
"id": "D",
"text": "Jump"
}
],
"prompt": "Which of the following is a conjunction?",
"solution": "A",
"variant": "text"
}
]
},
"blank_space": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "And"
},
{
"id": "B",
"text": "Cat"
},
{
"id": "C",
"text": "Happy"
},
{
"id": "D",
"text": "Jump"
}
],
"prompt": "Which of the following is a conjunction?",
"solution": "A",
"variant": "text"
}
]
},
"underline": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
},
"blank_space_text": {
"question": {
"words": [
{
"id": "1",
"text": "a"
},
{
"id": "2",
"text": "b"
},
{
"id": "3",
"text": "c"
},
{
"id": "4",
"text": "d"
}
],
"text": "text"
}
},
"text_mc_utas": {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
}

View File

@@ -0,0 +1,404 @@
import aiofiles
import os
import uuid
from logging import getLogger
from typing import Dict, Any, Tuple, Coroutine
import pdfplumber
from fastapi import UploadFile
from app.services.abc import ILLMService
from app.helpers import LoggerHelper, FileHelper
from app.mappers import ExamMapper
from app.dtos.exam import Exam
from app.dtos.sheet import Sheet
class UploadLevelModule:
def __init__(self, openai: ILLMService):
self._logger = getLogger(__name__)
self._llm = openai
# TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
# firestore, return the id right away, in generation view poll for the id
async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
ext, path_id = await self._save_upload(file)
FileHelper.convert_file_to_pdf(
f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
)
file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
if not file_has_images:
FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
completion: Coroutine[Any, Any, Exam] = (
self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
)
response = await completion
FileHelper.remove_directory(f'./tmp/{path_id}')
if response:
return self.fix_ids(response.dict(exclude_none=True))
return None
@staticmethod
@LoggerHelper.suppress_loggers()
def _check_pdf_for_images(pdf_path: str) -> bool:
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
if page.images:
return True
return False
@staticmethod
async def _save_upload(file: UploadFile) -> Tuple[str, str]:
ext = file.filename.split('.')[-1]
path_id = str(uuid.uuid4())
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
file_bytes: bytes = await file.read()
async with aiofiles.open(tmp_filename, 'wb') as file:
await file.write(file_bytes)
return ext, path_id
def _level_json_schema(self):
return {
"parts": [
{
"context": "<this attribute is optional you may exclude it if not required>",
"exercises": [
self._multiple_choice_html(),
self._passage_blank_space_html()
]
}
]
}
async def _html_completion(self, path_id: str) -> Exam:
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
html = await f.read()
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": html
}
],
ExamMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_html(self):
return {
"role": "system",
"content": (
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
'Your current task is to scrape html english questions sheets.\n\n'
'In the question sheet you will only see 4 types of question:\n'
'- blank space multiple choice\n'
'- underline multiple choice\n'
'- reading passage blank space multiple choice\n'
'- reading passage multiple choice\n\n'
'For the first two types of questions the template is the same but the question prompts differ, '
'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
'multiple "_", in the underline you must include in the prompt the <u></u> to '
'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
'For the reading passage exercise you must handle the formatting of the passages. If it is a '
'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
'between paragraphs.\n\n'
'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
'out the best paragraph separation possible.'
'You will place all the information in a single JSON: '
'{"parts": [{"exercises": [{...}], "context": ""}]}\n '
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
'context.'
'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
'so that you can group the exercises by the parts that are in the html, this is crucial since only '
'reading passage multiple choice require context and if the context is included in parts where it '
'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
'The templates for the exercises are the following:\n'
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
f'{self._multiple_choice_html()}\n'
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
'exercises exclude the context field.'
)
}
@staticmethod
def _multiple_choice_html():
return {
"type": "multipleChoice",
"prompt": "Select the appropriate option.",
"questions": [
{
"id": "<the question id>",
"prompt": "<the question>",
"solution": "<the option id solution>",
"options": [
{
"id": "A",
"text": "<the a option>"
},
{
"id": "B",
"text": "<the b option>"
},
{
"id": "C",
"text": "<the c option>"
},
{
"id": "D",
"text": "<the d option>"
}
]
}
]
}
@staticmethod
def _passage_blank_space_html():
return {
"type": "fillBlanks",
"variant": "mc",
"prompt": "Click a blank to select the appropriate word for it.",
"text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
),
"solutions": [
{
"id": "<question id>",
"solution": "<the option that holds the solution>"
}
],
"words": [
{
"id": "<question id>",
"options": {
"A": "<a option>",
"B": "<b option>",
"C": "<c option>",
"D": "<d option>"
}
}
]
}
async def _png_completion(self, path_id: str) -> Exam:
FileHelper.pdf_to_png(path_id)
tmp_files = os.listdir(f'./tmp/{path_id}')
pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
json_schema = {
"components": [
{"type": "part", "part": "<name or number of the part>"},
self._multiple_choice_png(),
{"type": "blanksPassage", "text": (
"<The whole text for the exercise with replacements for blank spaces and their "
"ids with {{<question id>}} with 2 newlines between paragraphs>"
)},
{"type": "passage", "context": (
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
)},
self._passage_blank_space_png()
]
}
components = []
for i in range(len(pages)):
current_page = pages[i]
next_page = pages[i + 1] if i + 1 < len(pages) else None
batch = [current_page, next_page] if next_page else [current_page]
sheet = await self._png_batch(path_id, batch, json_schema)
sheet.batch = i + 1
components.append(sheet.dict())
batches = {"batches": components}
return await self._batches_to_exam_completion(batches)
async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_png(),
{
"role": "user",
"content": [
*FileHelper.b64_pngs(path_id, files)
]
}
],
ExamMapper.map_to_sheet,
str(json_schema)
)
def _gpt_instructions_png(self):
return {
"role": "system",
"content": (
'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
'Your current task is to scan english questions sheets.\n\n'
'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
'sheet components you will retrieve from the images, the components and their corresponding JSON '
'templates are as follows:\n'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In the blanks prompt you must leave 5 underscores to represent the blank space. '
'In the underlines questions the objective is to pick the words that are incorrect in the given '
'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
'in the order they appear in the question for the options A to D, disreguard options that might be '
'included underneath the underlines question and use the ones you wrapped in <u></u>.'
'In normal you just leave the question as is. '
f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
'- Reading Passages, there are two types of reading passages. Reading passages where you will see '
'blanks represented by a (question id) followed by a line, you must format these types of reading '
'passages to be only the text with the brackets that have the question id and line replaced with '
'"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
'and place 2 newlines between paragraphs. '
'For the reading passages with blanks the template is: {"type": "blanksPassage", '
'"text": "<The whole text for the exercise with replacements for blank spaces and their '
'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n'
'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
'reading passages are cut off don\'t include them in the JSON.'
)
}
def _multiple_choice_png(self):
multiple_choice = self._multiple_choice_html()["questions"][0]
multiple_choice["type"] = "multipleChoice"
multiple_choice.pop("solution")
return multiple_choice
def _passage_blank_space_png(self):
passage_blank_space = self._passage_blank_space_html()["words"][0]
passage_blank_space["type"] = "fillBlanks"
return passage_blank_space
async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
return await self._llm.pydantic_prediction(
[self._gpt_instructions_html(),
{
"role": "user",
"content": str(batches)
}
],
ExamMapper.map_to_exam_model,
str(self._level_json_schema())
)
def _gpt_instructions_batches(self):
return {
"role": "system",
"content": (
'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
'components and solve the questions. Each batch may contain overlapping content with the previous '
'batch, or close enough content which needs to be excluded. The components are as follows:'
'- Part, a standalone part or part of a section of the question sheet: '
'{"type": "part", "part": "<name or number of the part>"}\n'
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
'the prompt field of the template: blanks, underlines and normal. '
'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
'appropriate option to solve it.'
'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
'the order in which the underlines appear in the prompt you will need to fix it.'
'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
'select the appropriate solution.'
f'The template for these questions is the same: {self._multiple_choice_png()}\n'
'- Reading Passages, there are two types of reading passages with different templates. The one with '
'type "blanksPassage" where the text field holds the passage and a blank is represented by '
'{{<some number>}} and the other one with type "passage" that has the context field with just '
'reading passages. For both of these components you will have to remove any additional data that might '
'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
' if there are any. These components are used in conjunction with other ones.'
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
'options with the question id and the options from a to d. The template is: '
f'{self._passage_blank_space_png()}\n\n'
'Now that you know the possible components here\'s what I want you to do:\n'
'1. Remove duplicates. A batch will have duplicates of other batches and the components of '
'the next batch should always take precedence over the previous one batch, what I mean by this is that '
'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
'you pick the next one.\n'
'2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
'exercise. For the multiple choice question follow the previous instruction to solve them and place '
f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
f'the template for this exercise: {self._passage_blank_space_html()}.\n'
f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
f'You must group the exercises by the parts in the order they appear in the batches components. '
f'The context field of a part is the context of a passage component that has text relevant to normal '
f'multiple choice questions.\n'
'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
'in your response and correctly structure the JSON.'
)
}
@staticmethod
def fix_ids(response):
counter = 1
for part in response["parts"]:
for exercise in part["exercises"]:
if exercise["type"] == "multipleChoice":
for question in exercise["questions"]:
question["id"] = counter
counter += 1
if exercise["type"] == "fillBlanks":
for i in range(len(exercise["words"])):
exercise["words"][i]["id"] = counter
exercise["solutions"][i]["id"] = counter
counter += 1
return response

View File

@@ -1,15 +1,18 @@
import queue
import uuid
from logging import getLogger
from queue import Queue
import random
from typing import Dict
from typing import Dict, List
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import IListeningService, ILLMService, ITextToSpeechService
from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate
from app.configs.constants import (
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent,
FieldsAndExercises
)
from app.helpers import ExercisesHelper
from app.helpers import ExercisesHelper, FileHelper
class ListeningService(IListeningService):
@@ -33,25 +36,83 @@ class ListeningService(IListeningService):
self._tts = tts
self._file_storage = file_storage
self._document_store = document_store
self._logger = getLogger(__name__)
self._sections = {
"section_1": {
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
"start_id": 1,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation"
"type": "conversation",
},
"section_2": {
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
"exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
"start_id": 11,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
"type": "monologue",
},
"section_3": {
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
"exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
"exercise_sample_size": 1,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
"start_id": 21,
"generate_dialogue": self._generate_listening_conversation,
"type": "conversation"
"type": "conversation",
},
"section_4": {
"topic": EducationalContent.ACADEMIC_SUBJECTS,
"exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
"exercise_sample_size": 2,
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
"start_id": 31,
"generate_dialogue": self._generate_listening_monologue,
"type": "monologue"
}
}
async def get_listening_question(
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
number_of_exercises_q=queue.Queue(), start_id=-1
):
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
section = self._sections[f"section_{section_id}"]
if not topic:
topic = random.choice(section["topic"])
if len(req_exercises) == 0:
req_exercises = random.sample(section["exercise_types"], section["exercise_sample_size"])
if number_of_exercises_q.empty():
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
section["total_exercises"], len(req_exercises)
)
if start_id == -1:
start_id = section["start_id"]
dialog = await self.generate_listening_question(section_id, topic)
if section_id in {1, 3}:
dialog = self.parse_conversation(dialog)
self._logger.info(f'Generated {section["type"]}: {dialog}')
exercises = await self.generate_listening_exercises(
section_id, str(dialog), req_exercises, number_of_exercises_q, start_id, difficulty
)
return {
"exercises": exercises,
"text": dialog,
"difficulty": difficulty
}
async def generate_listening_question(self, section: int, topic: str):
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
@@ -67,9 +128,10 @@ class ListeningService(IListeningService):
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
if req_exercise == "multipleChoice" or req_exercise == "multipleChoice3Options":
n_options = 4 if "multipleChoice" else 3
question = await self._gen_multiple_choice_exercise_listening(
dialog_type, dialog, number_of_exercises, start_id, difficulty
dialog_type, dialog, number_of_exercises, start_id, difficulty, n_options
)
exercises.append(question)
@@ -100,10 +162,9 @@ class ListeningService(IListeningService):
return exercises
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str):
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str):
template = getListeningTemplate()
template['difficulty'] = difficulty
listening_id = str(uuid.uuid4())
for i, part in enumerate(parts, start=0):
part_template = getListeningPartTemplate()
@@ -127,8 +188,8 @@ class ListeningService(IListeningService):
else:
template["variant"] = ExamVariant.FULL.value
(result, listening_id) = await self._document_store.save_to_db_with_id("listening", template, listening_id)
if result:
listening_id = await self._document_store.save_to_db_with_id("listening", template, listening_id)
if listening_id:
return {**template, "id": listening_id}
else:
raise Exception("Failed to save question: " + str(parts))
@@ -160,6 +221,20 @@ class ListeningService(IListeningService):
}
]
if section == 1:
messages.extend([
{
"role": "user",
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
},
{
"role": "user",
"content": 'Try to have spelling of names (cities, people, etc)'
}
])
response = await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
@@ -170,7 +245,11 @@ class ListeningService(IListeningService):
return self._get_conversation_voices(response, True)
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
context = 'social context' if section == 2 else 'academic subject'
head = (
'Generate a comprehensive monologue set in the social context of'
if section == 2 else
'Generate a comprehensive and complex monologue on the academic subject of'
)
messages = [
{
@@ -182,7 +261,7 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
f'Generate a comprehensive monologue set in the {context} of "{topic}". {self.MONOLOGUE_TAIL}'
f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
)
}
]
@@ -233,7 +312,7 @@ class ListeningService(IListeningService):
# ==================================================================================================================
async def _gen_multiple_choice_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
):
messages = [
{
@@ -248,8 +327,8 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty multiple choice questions of 4 options '
f'for this {dialog_type}:\n"' + text + '"')
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
f'options for this {dialog_type}:\n"' + text + '"')
}
]
@@ -268,7 +347,7 @@ class ListeningService(IListeningService):
}
async def _gen_write_blanks_questions_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
@@ -280,7 +359,7 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
}
]
@@ -300,7 +379,7 @@ class ListeningService(IListeningService):
}
async def _gen_write_blanks_notes_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
@@ -312,7 +391,7 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
f'Generate {str(quantity)} {difficulty} difficulty notes taken from this '
f'Generate {quantity} {difficulty} difficulty notes taken from this '
f'{dialog_type}:\n"{text}"'
)
@@ -357,7 +436,7 @@ class ListeningService(IListeningService):
}
async def _gen_write_blanks_form_exercise_listening(
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
):
messages = [
{
@@ -369,12 +448,21 @@ class ListeningService(IListeningService):
{
"role": "user",
"content": (
f'Generate a form with {str(quantity)} {difficulty} difficulty key-value pairs '
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
f'about this {dialog_type}:\n"{text}"'
)
}
]
if dialog_type == "conversation":
messages.append({
"role": "user",
"content": (
'It must be a form and not questions. '
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
)
})
parsed_form = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
@@ -391,3 +479,14 @@ class ListeningService(IListeningService):
"type": "writeBlanks"
}
@staticmethod
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)

View File

@@ -12,42 +12,25 @@ class ReadingService(IReadingService):
def __init__(self, llm: ILLMService):
self._llm = llm
self._passages = {
"passage_1": {
"question_type": QuestionType.READING_PASSAGE_1,
"start_id": 1
},
"passage_2": {
"question_type": QuestionType.READING_PASSAGE_2,
"start_id": 14
},
"passage_3": {
"question_type": QuestionType.READING_PASSAGE_3,
"start_id": 27
}
}
async def gen_reading_passage(
self,
passage_id: int,
part: int,
topic: str,
req_exercises: List[str],
number_of_exercises_q: Queue,
difficulty: str
difficulty: str,
start_id: int
):
_passage = self._passages[f'passage_{str(passage_id)}']
passage = await self.generate_reading_passage(_passage["question_type"], topic)
if passage == "":
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
start_id = _passage["start_id"]
passage = await self.generate_reading_passage(part, topic)
exercises = await self._generate_reading_exercises(
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
)
if ExercisesHelper.contains_empty_dict(exercises):
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
return await self.gen_reading_passage(
part, topic, req_exercises, number_of_exercises_q, difficulty, start_id
)
return {
"exercises": exercises,
@@ -58,7 +41,17 @@ class ReadingService(IReadingService):
"difficulty": difficulty
}
async def generate_reading_passage(self, q_type: QuestionType, topic: str):
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
part_system_message = {
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
"3": (
'The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.'
)
}
messages = [
{
"role": "system",
@@ -69,17 +62,26 @@ class ReadingService(IReadingService):
{
"role": "user",
"content": (
f'Generate an extensive text for IELTS {q_type.value}, of at least 1500 words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
f'on the topic of "{topic}". The passage should offer a substantial amount of '
'information, analysis, or narrative relevant to the chosen subject matter. This text '
'passage aims to serve as the primary reading section of an IELTS test, providing an '
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
'does not contain forbidden subjects in muslim countries.'
)
},
{
"role": "system",
"content": part_system_message[str(part)]
}
]
if part == 3:
messages.append({
"role": "user",
"content": "Use real text excerpts on you generated passage and cite the sources."
})
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
@@ -95,11 +97,15 @@ class ReadingService(IReadingService):
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
question = await self._gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
question = await self._gen_summary_fill_blanks_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = await self._gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
question = await self._gen_true_false_not_given_exercise(
passage, number_of_exercises, start_id, difficulty
)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
@@ -114,32 +120,28 @@ class ReadingService(IReadingService):
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
async def _gen_summary_fill_blanks_exercise(
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{ "summary": "summary", "words": ["word_1", "word_2"] }')
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
)
},
{
"role": "user",
"content": (
f'Summarize this text: "{text}"'
)
},
{
"role": "user",
"content": (
f'Select {str(quantity)} {difficulty} difficulty words, it must be words and not '
'expressions, from the summary.'
)
"content": f'Summarize this text: "{text}"'
}
]
@@ -148,22 +150,45 @@ class ReadingService(IReadingService):
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], 5)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }'
)
},
{
"role": "user",
"content": (
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
f'from this:\n{response["summary"]}'
)
}
]
words_response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response["words"] = words_response["words"]
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
response["summary"], response["words"], start_id
)
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": (
"Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once."
),
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
@@ -210,7 +235,8 @@ class ReadingService(IReadingService):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
)
},
{
"role": "user",
@@ -243,7 +269,8 @@ class ReadingService(IReadingService):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
)
},
{
"role": "user",
@@ -262,7 +289,7 @@ class ReadingService(IReadingService):
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
@@ -285,3 +312,38 @@ class ReadingService(IReadingService):
"sentences": sentences[:quantity],
"type": "matchSentences"
}
async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}'
)
},
{
"role": "user",
"content": (
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
f'The text: {text}'
)
}
]
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
ideas = response["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": ExercisesHelper.build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
"type": "matchSentences"
}

View File

@@ -3,7 +3,7 @@ import os
import re
import uuid
import random
from typing import Dict, List
from typing import Dict, List, Optional
from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService
@@ -27,29 +27,49 @@ class SpeakingService(ISpeakingService):
self._document_store = document_store
self._stt = stt
self._logger = logging.getLogger(__name__)
# TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with
# either the difficulty in the request or a random one yet the prompt doesn't change
self._tasks = {
"task_1": {
"get": {
"json_template": (
'{"topic": "topic", "question": "question"}'
),
"json_template": {
"first_topic": "topic 1",
"second_topic": "topic 2",
"questions": [
(
"Introductory question about the first topic, starting the topic with "
"'Let's talk about x' and then the question."
),
"Follow up question about the first topic",
"Follow up question about the first topic",
"Question about second topic",
"Follow up question about the second topic",
]
},
"prompt": (
'Craft a thought-provoking question of {difficulty} difficulty for IELTS Speaking Part 1 '
'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
'that encourages candidates to delve deeply into personal experiences, preferences, or '
'insights on the topic of "{topic}". Instruct the candidate to offer not only detailed '
'descriptions but also provide nuanced explanations, examples, or anecdotes to enrich '
'their response. Make sure that the generated question does not contain forbidden subjects in '
'insights on the topic of "{first_topic}" and the topic of "{second_topic}". '
'Make sure that the generated question does not contain forbidden subjects in '
'muslim countries.'
)
}
},
"task_2": {
"get": {
"json_template": (
'{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}'
),
"json_template": {
"topic": "topic",
"question": "question",
"prompts": [
"prompt_1",
"prompt_2",
"prompt_3"
],
"suffix": "And explain why..."
},
"prompt": (
'Create a question of {difficulty} difficulty for IELTS Speaking Part 2 '
'Create a question of medium difficulty for IELTS Speaking Part 2 '
'that encourages candidates to narrate a personal experience or story related to the topic '
'of "{topic}". Include 3 prompts that guide the candidate to describe '
'specific aspects of the experience, such as details about the situation, '
@@ -60,11 +80,18 @@ class SpeakingService(ISpeakingService):
},
"task_3": {
"get": {
"json_template": (
'{"topic": "topic", "questions": ["question", "question", "question"]}'
),
"json_template": {
"topic": "topic",
"questions": [
"Introductory question about the topic.",
"Follow up question about the topic",
"Follow up question about the topic",
"Follow up question about the topic",
"Follow up question about the topic"
]
},
"prompt": (
'Formulate a set of 3 questions of {difficulty} difficulty for IELTS Speaking Part 3 '
'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3'
'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". '
'Provide inquiries, ensuring they explore various aspects, perspectives, and implications '
'related to the topic. Make sure that the generated question does not contain forbidden '
@@ -74,28 +101,57 @@ class SpeakingService(ISpeakingService):
},
}
async def get_speaking_task(self, task_id: int, topic: str, difficulty: str):
task_values = self._tasks[f'task_{task_id}']['get']
async def get_speaking_part(
self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
) -> Dict:
task_values = self._tasks[f'task_{part}']['get']
if part == 1:
task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic)
else:
task_prompt = task_values["prompt"].format(topic=topic)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: ' +
task_values["json_template"]
'You are a helpful assistant designed to output JSON on this format: '
f'{task_values["json_template"]}'
)
},
{
"role": "user",
"content": str(task_values["prompt"]).format(topic=topic, difficulty=difficulty)
"content": task_prompt
}
]
part_specific = {
"1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).',
"2": (
'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
'that start with "And explain why".'
)
}
if part in {1, 2}:
messages.append({
"role": "user",
"content": part_specific[str(part)]
})
if part in {1, 3}:
messages.append({
"role": "user",
"content": 'They must be 1 single question each and not be double-barreled questions.'
})
fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS
response = await self._llm.prediction(
GPTModels.GPT_4_O, messages, FieldsAndExercises.GEN_FIELDS, TemperatureSettings.GEN_QUESTION_TEMPERATURE
GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
# TODO: this was on GET /speaking_task_3 don't know if it is intentional only for 3
if task_id == 3:
if part == 3:
# Remove the numbers from the questions only if the string starts with a number
response["questions"] = [
re.sub(r"^\d+\.\s*", "", question)
@@ -103,117 +159,15 @@ class SpeakingService(ISpeakingService):
for question in response["questions"]
]
response["type"] = task_id
response["type"] = part
response["difficulty"] = difficulty
response["topic"] = topic
if part in {2, 3}:
response["topic"] = topic
return response
async def grade_speaking_task_1_and_2(
self, task: int, question: str, answer_firebase_path: str, sound_file_name: str
):
request_id = uuid.uuid4()
req_data = {
"question": question,
"answer": answer_firebase_path
}
self._logger.info(
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
f'Use this id to track the logs: {str(request_id)} - Request data: {str(req_data)}'
)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {answer_firebase_path}')
await self._file_storage.download_firebase_file(answer_firebase_path, sound_file_name)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloaded file {answer_firebase_path} to {sound_file_name}')
answer = await self._stt.speech_to_text(sound_file_name)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer}')
if TextHelper.has_x_words(answer, 20):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"comment": "comment about answer quality", "overall": 0.0, '
'"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
'"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
},
{
"role": "user",
"content": (
f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
'assign a score of 0 if the response fails to address the question. Additionally, provide '
'detailed commentary highlighting both strengths and weaknesses in the response.'
f'\n Question: "{question}" \n Answer: "{answer}"')
}
]
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answer.')
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
["comment"],
TemperatureSettings.GRADING_TEMPERATURE
)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answer graded: {str(response)}')
perfect_answer_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"answer": "perfect answer"}'
)
},
{
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
f'Speaking Part {task} question: "{question}"')
}
]
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting perfect answer.')
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
perfect_answer_messages,
["answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response['perfect_answer'] = response["answer"]
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Perfect answer: ' + response['perfect_answer'])
response['transcript'] = answer
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting fixed text.')
response['fixed_text'] = await self._get_speaking_corrections(answer)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Fixed text: ' + response['fixed_text'])
if response["overall"] == "0.0" or response["overall"] == 0.0:
response["overall"] = self._calculate_overall(response)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
return response
else:
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
)
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
# TODO: When there's more time grade_speaking_task_1_2 can be merged with this, when there's more time
async def grade_speaking_task_3(self, answers: Dict, task: int = 3):
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
request_id = uuid.uuid4()
self._logger.info(
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
@@ -222,157 +176,219 @@ class SpeakingService(ISpeakingService):
text_answers = []
perfect_answers = []
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
)
if task != 2:
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
)
for item in answers:
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {item["answer"]}')
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Downloading file {item["answer"]}')
await self._file_storage.download_firebase_file(item["answer"], sound_file_name)
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
'Downloaded file ' + item["answer"] + f' to {sound_file_name}'
f'POST - speaking_task_{task} - {request_id} - '
f'Downloaded file {item["answer"]} to {sound_file_name}'
)
answer_text = await self._stt.speech_to_text(sound_file_name)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer_text}')
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Transcripted answer: {answer_text}')
text_answers.append(answer_text)
item["answer"] = answer_text
os.remove(sound_file_name)
# TODO: This will end the grading of all answers if a single one does not have enough words
# don't know if this is intended
if not TextHelper.has_x_words(answer_text, 20):
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}')
f'POST - speaking_task_{task} - {request_id} - '
f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}'
)
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
perfect_answer_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"answer": "perfect answer"}'
)
},
{
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
f'Speaking Part {task} question: "{item["question"]}"'
)
}
]
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - '
f'POST - speaking_task_{task} - {request_id} - '
f'Requesting perfect answer for question: {item["question"]}'
)
perfect_answers.append(await self._get_perfect_answer(task, item["question"]))
perfect_answers.append(
await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
perfect_answer_messages,
["answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if task in {1, 3}:
self._logger.info(
f'POST - speaking_task_{task} - {request_id} - Formatting answers and questions for prompt.'
)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"comment": "comment about answer quality", "overall": 0.0, '
'"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
'"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
}
]
message = (
f"Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a "
"strict assessment that penalizes errors. Deduct points for deviations from the task, and "
"assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
"commentary highlighting both strengths and weaknesses in the response."
"\n\n The questions and answers are: \n\n'")
formatted_text = ""
for i, entry in enumerate(answers, start=1):
formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Formatting answers and questions for prompt.'
)
self._logger.info(
f'POST - speaking_task_{task} - {request_id} - '
f'Formatted answers and questions for prompt: {formatted_text}'
)
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
else:
questions_and_answers = f'\n Question: "{answers[0]["question"]}" \n Answer: "{answers[0]["answer"]}"'
formatted_text = ""
for i, entry in enumerate(answers, start=1):
formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting grading of the answer(s).')
response = await self._grade_task(task, questions_and_answers)
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Formatted answers and questions for prompt: {formatted_text}'
)
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Answer(s) graded: {response}')
message += formatted_text
if task in {1, 3}:
self._logger.info(
f'POST - speaking_task_{task} - {request_id} - Adding perfect answer(s) to response.')
messages.append({
"role": "user",
"content": message
})
# TODO: check if it is answer["answer"] instead
for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answers.')
self._logger.info(
f'POST - speaking_task_{task} - {request_id} - Adding transcript and fixed texts to response.'
)
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
)
for i, answer in enumerate(text_answers, start=1):
response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer)
else:
response['transcript'] = answers[0]["answer"]
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answers graded: {str(response)}')
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting fixed text.')
response['fixed_text'] = await self._get_speaking_corrections(answers[0]["answer"])
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Fixed text: {response["fixed_text"]}')
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Adding perfect answers to response.')
for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer
self._logger.info(
f'POST - speaking_task_{task} - {str(request_id)} - Adding transcript and fixed texts to response.'
)
for i, answer in enumerate(text_answers, start=1):
response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer)
if response["overall"] == "0.0" or response["overall"] == 0.0:
response["overall"] = self._calculate_overall(response)
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
response['perfect_answer'] = perfect_answers[0]["answer"]
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Final response: {response}')
return response
# ==================================================================================================================
# grade_speaking_task helpers
# ==================================================================================================================
async def _get_perfect_answer(self, task: int, question: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}'
)
},
{
"role": "user",
"content": (
'Provide a perfect answer according to ielts grading system to the following '
f'Speaking Part {task} question: "{question}"'
)
}
]
if task == 1:
messages.append({
"role": "user",
"content": 'The answer must be 2 or 3 sentences long.'
})
gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO
return await self._llm.prediction(
gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE
)
async def _grade_task(self, task: int, questions_and_answers: str) -> Dict:
messages = [
{
"role": "system",
"content": (
f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}'
)
},
{
"role": "user",
"content": (
f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
'assign a score of 0 if the response fails to address the question. Additionally, provide '
'detailed commentary highlighting both strengths and weaknesses in the response.'
) + questions_and_answers
}
]
task_specific = {
"1": (
'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the '
'student that they should be.'
),
"2": 'Address the student as "you"',
"3": 'Address the student as "you" and pay special attention to coherence between the answers.'
}
messages.append({
"role": "user",
"content": task_specific[str(task)]
})
if task in {1, 3}:
messages.extend([
{
"role": "user",
"content": (
'For pronunciations act as if you heard the answers and they were transcripted '
'as you heard them.'
)
},
{
"role": "user",
"content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
}
])
return await self._llm.prediction(
GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
)
@staticmethod
def _fix_speaking_overall(overall: float, task_response: dict):
grades = [category["grade"] for category in task_response.values()]
if overall > max(grades) or overall < min(grades):
total_sum = sum(grades)
average = total_sum / len(grades)
rounded_average = round(average, 0)
return rounded_average
return overall
@staticmethod
def _zero_rating(comment: str):
return {
"comment": comment,
"overall": 0,
"task_response": {
"Fluency and Coherence": 0,
"Lexical Resource": 0,
"Grammatical Range and Accuracy": 0,
"Pronunciation": 0
"Fluency and Coherence": {
"grade": 0.0,
"comment": ""
},
"Lexical Resource": {
"grade": 0.0,
"comment": ""
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": ""
},
"Pronunciation": {
"grade": 0.0,
"comment": ""
}
}
}
@staticmethod
def _calculate_overall(response: Dict):
return round(
(
response["task_response"]["Fluency and Coherence"] +
response["task_response"]["Lexical Resource"] +
response["task_response"]["Grammatical Range and Accuracy"] +
response["task_response"]["Pronunciation"]
) / 4, 1
)
async def _get_speaking_corrections(self, text):
messages = [
{
@@ -409,6 +425,7 @@ class SpeakingService(ISpeakingService):
self._logger.info(f'Saved speaking to DB with id {req_id} : {str(template)}')
async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
avatar = (random.choice(list(AvatarEnum))).value
template_index = part - 1
# Using list comprehension to find the element with the desired value in the 'type' field
@@ -418,26 +435,12 @@ class SpeakingService(ISpeakingService):
if found_exercises:
exercise = found_exercises[0]
self._logger.info(f'Creating video for speaking part {part}')
if part in {1, 2}:
result = await self._create_video(
exercise["question"],
(random.choice(list(AvatarEnum))).value,
f'Failed to create video for part {part} question: {str(exercise["question"])}'
)
if result is not None:
if part == 2:
template["exercises"][template_index]["prompts"] = exercise["prompts"]
template["exercises"][template_index]["text"] = exercise["question"]
template["exercises"][template_index]["title"] = exercise["topic"]
template["exercises"][template_index]["video_url"] = result["video_url"]
template["exercises"][template_index]["video_path"] = result["video_path"]
else:
if part in {1, 3}:
questions = []
for question in exercise["questions"]:
result = await self._create_video(
question,
(random.choice(list(AvatarEnum))).value,
avatar,
f'Failed to create video for part {part} question: {str(exercise["question"])}'
)
if result is not None:
@@ -449,63 +452,139 @@ class SpeakingService(ISpeakingService):
questions.append(video)
template["exercises"][template_index]["prompts"] = questions
template["exercises"][template_index]["title"] = exercise["topic"]
if part == 1:
template["exercises"][template_index]["first_title"] = exercise["first_topic"]
template["exercises"][template_index]["second_title"] = exercise["second_topic"]
else:
template["exercises"][template_index]["title"] = exercise["topic"]
else:
result = await self._create_video(
exercise["question"],
avatar,
f'Failed to create video for part {part} question: {str(exercise["question"])}'
)
if result is not None:
template["exercises"][template_index]["prompts"] = exercise["prompts"]
template["exercises"][template_index]["text"] = exercise["question"]
template["exercises"][template_index]["title"] = exercise["topic"]
template["exercises"][template_index]["video_url"] = result["video_url"]
template["exercises"][template_index]["video_path"] = result["video_path"]
if not found_exercises:
template["exercises"].pop(template_index)
return template
# TODO: Check if it is intended to log the original question
async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]):
if len(prompts) > 0:
question = original_question + " In your answer you should consider: " + " ".join(prompts)
else:
question = original_question
error_msg = f'Failed to create video for part 1 question: {original_question}'
result = await self._create_video(
question,
avatar,
error_msg
async def generate_video(
self, part: int, avatar: str, topic: str, questions: list[str],
*,
second_topic: Optional[str] = None,
prompts: Optional[list[str]] = None,
suffix: Optional[str] = None,
):
request_id = str(uuid.uuid4())
# TODO: request data
self._logger.info(
f'POST - generate_video_{part} - Received request to generate video {part}. '
f'Use this id to track the logs: {request_id} - Request data: " + str(request.get_json())'
)
if result is not None:
return {
"text": original_question,
"prompts": prompts,
"title": topic,
**result,
"type": "speaking",
"id": uuid.uuid4()
}
else:
return str(error_msg)
part_questions = self._get_part_questions(part, questions, avatar)
videos = []
async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str):
sp_questions = []
self._logger.info('Creating videos for speaking part 3')
for question in questions:
self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating videos for speaking part {part}.')
for question in part_questions:
self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating video for question: {question}')
result = await self._create_video(
question,
avatar,
f'Failed to create video for part 3 question: {question}'
'POST - generate_video_{p} - {r} - Failed to create video for part {p} question: {q}'.format(
p=part, r=request_id, q=question
)
)
if result is not None:
self._logger.info(f'POST - generate_video_{part} - {request_id} - Video created')
self._logger.info(
f'POST - generate_video_{part} - {request_id} - Uploaded video to firebase: {result["video_url"]}'
)
video = {
"text": question,
**result
"video_path": result["video_path"],
"video_url": result["video_url"]
}
sp_questions.append(video)
videos.append(video)
return {
"prompts": sp_questions,
"title": topic,
"type": "interactiveSpeaking",
"id": uuid.uuid4()
}
if part == 2 and len(videos) == 0:
raise Exception(f'Failed to create video for part 2 question: {questions[0]}')
return self._get_part_response(part, topic, videos, second_topic, prompts, suffix)
@staticmethod
def _get_part_questions(part: int, questions: list[str], avatar: str):
part_questions: list[str] = []
if part == 1:
id_to_name = {
"5912afa7c77c47d3883af3d874047aaf": "MATTHEW",
"9e58d96a383e4568a7f1e49df549e0e4": "VERA",
"d2cdd9c0379a4d06ae2afb6e5039bd0c": "EDWARD",
"045cb5dcd00042b3a1e4f3bc1c12176b": "TANYA",
"1ae1e5396cc444bfad332155fdb7a934": "KAYLA",
"0ee6aa7cc1084063a630ae514fccaa31": "JEROME",
"5772cff935844516ad7eeff21f839e43": "TYLER",
}
part_questions.extend(
[
"Hello my name is " + id_to_name.get(avatar) + ", what is yours?",
"Do you work or do you study?",
*questions
]
)
elif part == 2:
# Removed as the examiner should not say what is on the card.
# question = question + " In your answer you should consider: " + " ".join(prompts) + suffix
part_questions.append(f'{questions[0]}\nYou have 1 minute to take notes.')
elif part == 3:
part_questions = questions
return part_questions
@staticmethod
def _get_part_response(
part: int,
topic: str,
videos: list[dict],
second_topic: Optional[str],
prompts: Optional[list[str]],
suffix: Optional[str]
):
response = {}
if part == 1:
response = {
"prompts": videos,
"first_title": topic,
"second_title": second_topic,
"type": "interactiveSpeaking"
}
if part == 2:
response = {
"prompts": prompts,
"title": topic,
"suffix": suffix,
"type": "speaking",
# includes text, video_url and video_path
**videos[0]
}
if part == 3:
response = {
"prompts": videos,
"title": topic,
"type": "interactiveSpeaking",
}
response["id"] = str(uuid.uuid4())
return response
async def _create_video(self, question: str, avatar: str, error_message: str):
result = await self._vid_gen.create_video(question, avatar)
@@ -519,3 +598,36 @@ class SpeakingService(ISpeakingService):
}
self._logger.error(error_message)
return None
@staticmethod
def _grade_template():
return {
"comment": "extensive comment about answer quality",
"overall": 0.0,
"task_response": {
"Fluency and Coherence": {
"grade": 0.0,
"comment": (
"extensive comment about fluency and coherence, use examples to justify the grade awarded."
)
},
"Lexical Resource": {
"grade": 0.0,
"comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": (
"extensive comment about grammatical range and accuracy, use examples to justify the "
"grade awarded."
)
},
"Pronunciation": {
"grade": 0.0,
"comment": (
"extensive comment about pronunciation on the transcribed answer, use examples to justify the "
"grade awarded."
)
}
}
}

View File

@@ -1,13 +1,16 @@
import json
import re
import logging
from typing import List, Optional
from typing import List, Optional, Callable, TypeVar
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
from app.services.abc import ILLMService
from app.helpers import count_tokens
from app.configs.constants import BLACKLISTED_WORDS
from pydantic import BaseModel
T = TypeVar('T', bound=BaseModel)
class OpenAI(ILLMService):
@@ -18,6 +21,7 @@ class OpenAI(ILLMService):
def __init__(self, client: AsyncOpenAI):
self._client = client
self._logger = logging.getLogger(__name__)
self._default_model = "gpt-4o-2024-08-06"
async def prediction(
self,
@@ -94,4 +98,53 @@ class OpenAI(ILLMService):
@staticmethod
def _check_fields(obj, fields):
return all(field in obj for field in fields)
return all(field in obj for field in fields)
async def pydantic_prediction(
self,
messages: List[ChatCompletionMessageParam],
map_to_model: Callable,
json_scheme: str,
*,
model: Optional[str] = None,
temperature: Optional[float] = None,
max_retries: int = 3
) -> List[T] | T | None:
params = {
"messages": messages,
"response_format": {"type": "json_object"},
"model": model if model else self._default_model
}
if temperature:
params["temperature"] = temperature
attempt = 0
while attempt < max_retries:
result = await self._client.chat.completions.create(**params)
result_content = result.choices[0].message.content
try:
result_json = json.loads(result_content)
return map_to_model(result_json)
except Exception as e:
attempt += 1
self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
params["messages"] = [
{
"role": "user",
"content": (
"Your previous response wasn't in the json format I've explicitly told you to output. "
f"In your next response, you will fix it and return me just the json I've asked."
)
},
{
"role": "user",
"content": (
f"Previous response: {result_content}\n"
f"JSON format: {json_scheme}"
)
}
]
if attempt >= max_retries:
self._logger.error(f"Max retries exceeded!")
return None

View File

@@ -1,68 +0,0 @@
import re
from functools import reduce
from app.configs.constants import TemperatureSettings, GPTModels
from app.helpers import count_tokens
from app.services.abc import ILLMService, ITrainingService
class TrainingService(ITrainingService):
def __init__(self, llm: ILLMService):
self._llm = llm
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
messages = self._get_question_tips(question, answer, correct_answer, context)
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
None,
TemperatureSettings.TIPS_TEMPERATURE,
token_count=token_count
)
if isinstance(response, str):
response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
return response
@staticmethod
def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
messages = [
{
"role": "user",
"content": (
"You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
"help students understand why it was a wrong answer and gives helpful insight for the future. "
"The tip should refer to the context and question."
),
}
]
if not (context is None or context == ""):
messages.append({
"role": "user",
"content": f"This is the context for the question: {context}",
})
messages.extend([
{
"role": "user",
"content": f"This is the question: {question}",
},
{
"role": "user",
"content": f"This is the answer: {answer}",
},
{
"role": "user",
"content": f"This is the correct answer: {correct_answer}",
}
])
return messages

View File

@@ -0,0 +1,7 @@
from .training import TrainingService
from .kb import TrainingContentKnowledgeBase
__all__ = [
"TrainingService",
"TrainingContentKnowledgeBase"
]

View File

@@ -0,0 +1,88 @@
import json
import os
from logging import getLogger
from typing import Dict, List
import faiss
import pickle
from app.services.abc import IKnowledgeBase
class TrainingContentKnowledgeBase(IKnowledgeBase):
def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
self._embedding_model = embeddings
self._tips = None # self._read_json(path)
self._category_metadata = None
self._indices = None
self.load_indices_and_metadata()
self._logger = getLogger(__name__)
@staticmethod
def _read_json(path: str) -> Dict[str, any]:
with open(path, 'r', encoding="utf-8") as json_file:
return json.loads(json_file.read())
def print_category_count(self):
category_tips = {}
for unit in self._tips['units']:
for page in unit['pages']:
for tip in page['tips']:
category = tip['category'].lower().replace(" ", "_")
if category not in category_tips:
category_tips[category] = 0
else:
category_tips[category] = category_tips[category] + 1
print(category_tips)
def create_embeddings_and_save_them(self) -> None:
category_embeddings = {}
category_metadata = {}
for unit in self._tips['units']:
for page in unit['pages']:
for tip in page['tips']:
category = tip['category'].lower().replace(" ", "_")
if category not in category_embeddings:
category_embeddings[category] = []
category_metadata[category] = []
category_embeddings[category].append(tip['embedding'])
category_metadata[category].append({"id": tip['id'], "text": tip['text']})
category_indices = {}
for category, embeddings in category_embeddings.items():
embeddings_array = self._embedding_model.encode(embeddings)
index = faiss.IndexFlatL2(embeddings_array.shape[1])
index.add(embeddings_array)
category_indices[category] = index
faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
with open("./faiss/tips_metadata.pkl", "wb") as f:
pickle.dump(category_metadata, f)
def load_indices_and_metadata(
self,
directory: str = './faiss',
suffix: str = '_tips_index.faiss',
metadata_path: str = './faiss/tips_metadata.pkl'
):
files = os.listdir(directory)
self._indices = {}
for file in files:
if file.endswith(suffix):
self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
with open(metadata_path, 'rb') as f:
self._category_metadata = pickle.load(f)
self._logger.info("Loaded tips metadata")
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
query_embedding = self._embedding_model.encode([query])
index = self._indices[category]
D, I = index.search(query_embedding, top_k)
results = [self._category_metadata[category][i] for i in I[0]]
return results

View File

@@ -0,0 +1,459 @@
import re
from datetime import datetime
from functools import reduce
from logging import getLogger
from typing import Dict, List
from app.configs.constants import TemperatureSettings, GPTModels
from app.helpers import count_tokens
from app.repositories.abc import IDocumentStore
from app.services.abc import ILLMService, ITrainingService, IKnowledgeBase
from app.dtos.training import *
class TrainingService(ITrainingService):
TOOLS = [
'critical_thinking',
'language_for_writing',
'reading_skills',
'strategy',
'words',
'writing_skills'
]
# strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
def __init__(self, llm: ILLMService, firestore: IDocumentStore, training_kb: IKnowledgeBase):
self._llm = llm
self._db = firestore
self._kb = training_kb
self._logger = getLogger(__name__)
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
messages = self._get_question_tips(question, answer, correct_answer, context)
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
response = await self._llm.prediction(
GPTModels.GPT_3_5_TURBO,
messages,
None,
TemperatureSettings.TIPS_TEMPERATURE,
token_count=token_count
)
if isinstance(response, str):
response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
return response
@staticmethod
def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
messages = [
{
"role": "user",
"content": (
"You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
"help students understand why it was a wrong answer and gives helpful insight for the future. "
"The tip should refer to the context and question."
),
}
]
if not (context is None or context == ""):
messages.append({
"role": "user",
"content": f"This is the context for the question: {context}",
})
messages.extend([
{
"role": "user",
"content": f"This is the question: {question}",
},
{
"role": "user",
"content": f"This is the answer: {answer}",
},
{
"role": "user",
"content": f"This is the correct answer: {correct_answer}",
}
])
return messages
async def get_training_content(self, training_content: Dict) -> Dict:
user, stats = training_content["userID"], training_content["stats"]
exam_data, exam_map = await self._sort_out_solutions(stats)
training_content = await self._get_exam_details_and_tips(exam_data)
tips = self._query_kb(training_content.queries)
usefull_tips = await self._get_usefull_tips(exam_data, tips)
exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
weak_areas = {"weak_areas": []}
for area in training_content.weak_areas:
weak_areas["weak_areas"].append(area.dict())
training_doc = {
'created_at': int(datetime.now().timestamp() * 1000),
**exam_map,
**usefull_tips.dict(),
**weak_areas,
"user": user
}
doc_id = await self._db.save_to_db('training', training_doc)
return {
"id": doc_id
}
@staticmethod
def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
new_exam_map = {"exams": []}
for detail in details:
new_exam_map["exams"].append({
"id": detail.exam_id,
"date": detail.date,
"performance_comment": detail.performance_comment,
"detailed_summary": detail.detailed_summary,
**exam_map[detail.exam_id]
})
return new_exam_map
def _query_kb(self, queries: List[QueryDTO]):
map_categories = {
"critical_thinking": "ct_focus",
"language_for_writing": "language_for_writing",
"reading_skills": "reading_skill",
"strategy": "strategy",
"writing_skills": "writing_skill"
}
tips = {"tips": []}
for query in queries:
if query.category == "words":
tips["tips"].extend(
self._kb.query_knowledge_base(query.text, "word_link")
)
tips["tips"].extend(
self._kb.query_knowledge_base(query.text, "word_partners")
)
else:
if query.category in map_categories:
tips["tips"].extend(
self._kb.query_knowledge_base(query.text, map_categories[query.category])
)
else:
self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
return tips
async def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
json_schema = (
'{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
)
messages = [
{
"role": "user",
"content": (
f"I'm going to provide you with exam data, you will take the exam data and fill this json "
f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
'summary of the student\'s performance, "weak_areas" are identified areas'
' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
' with sentence structure and punctuation.", the "queries" field is where you will write queries '
'for tips that will be displayed to the student, the category attribute is a collection of '
'embeddings and the text will be the text used to query the knowledge base. The categories are '
f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
'"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
)
},
{
"role": "user",
"content": f'Exam Data: {str(exam_data)}'
}
]
return await self._llm.pydantic_prediction(messages, self._map_gpt_response, json_schema)
async def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
json_schema = (
'{ "tip_ids": [] }'
)
messages = [
{
"role": "user",
"content": (
f"I'm going to provide you with tips and I want you to return to me the tips that "
f"can be usefull for the student that made the exam that I'm going to send you, return "
f"me the tip ids in this json format {json_schema}."
)
},
{
"role": "user",
"content": f'Exam Data: {str(exam_data)}'
},
{
"role": "user",
"content": f'Tips: {str(tips)}'
}
]
return await self._llm.pydantic_prediction(messages, lambda response: TipsDTO(**response), json_schema)
@staticmethod
def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
parsed_response = {
"details": [DetailsDTO(**detail) for detail in response["details"]],
"weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
"queries": [QueryDTO(**query) for query in response["queries"]]
}
return TrainingContentDTO(**parsed_response)
async def _sort_out_solutions(self, stats):
grouped_stats = {}
for stat in stats:
session_key = f'{str(stat["date"])}-{stat["user"]}'
module = stat["module"]
exam_id = stat["exam"]
if session_key not in grouped_stats:
grouped_stats[session_key] = {}
if module not in grouped_stats[session_key]:
grouped_stats[session_key][module] = {
"stats": [],
"exam_id": exam_id
}
grouped_stats[session_key][module]["stats"].append(stat)
exercises = {}
exam_map = {}
for session_key, modules in grouped_stats.items():
exercises[session_key] = {}
for module, module_stats in modules.items():
exercises[session_key][module] = {}
exam_id = module_stats["exam_id"]
if exam_id not in exercises[session_key][module]:
exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
exam_total_questions = 0
exam_total_correct = 0
for stat in module_stats["stats"]:
exam_total_questions += stat["score"]["total"]
exam_total_correct += stat["score"]["correct"]
exercises[session_key][module][exam_id]["date"] = stat["date"]
if session_key not in exam_map:
exam_map[session_key] = {"stat_ids": [], "score": 0}
exam_map[session_key]["stat_ids"].append(stat["id"])
exam = await self._db.get_doc_by_id(module, exam_id)
if module == "listening":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_listening_solutions(stat, exam))
elif module == "reading":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_reading_solutions(stat, exam))
elif module == "writing":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_writing_prompts_and_answers(stat, exam)
)
elif module == "speaking":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_speaking_solutions(stat, exam)
)
elif module == "level":
exercises[session_key][module][exam_id]["exercises"].extend(
self._get_level_solutions(stat, exam)
)
exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
exam_map[session_key]["module"] = module
return {"exams": exercises}, exam_map
def _get_writing_prompts_and_answers(self, stat, exam):
result = []
try:
exercises = []
for solution in stat['solutions']:
answer = solution['solution']
exercise_id = solution['id']
exercises.append({
"exercise_id": exercise_id,
"answer": answer
})
for exercise in exercises:
for exam_exercise in exam["exercises"]:
if exam_exercise["id"] == exercise["exercise_id"]:
result.append({
"exercise": exam_exercise["prompt"],
"answer": exercise["answer"]
})
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
@staticmethod
def _get_mc_question(exercise, stat):
shuffle_maps = stat.get("shuffleMaps", [])
answer = stat["solutions"] if len(shuffle_maps) == 0 else []
if len(shuffle_maps) != 0:
for solution in stat["solutions"]:
shuffle_map = [
item["map"] for item in shuffle_maps
if item["questionID"] == solution["question"]
]
answer.append({
"question": solution["question"],
"option": shuffle_map[solution["option"]]
})
return {
"question": exercise["prompt"],
"exercise": exercise["questions"],
"answer": stat["solutions"]
}
@staticmethod
def _swap_key_name(d, original_key, new_key):
d[new_key] = d.pop(original_key)
return d
def _get_level_solutions(self, stat, exam):
result = []
try:
for part in exam["parts"]:
for exercise in part["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "fillBlanks":
result.append({
"prompt": exercise["prompt"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": [
self._swap_key_name(item, 'solution', 'option')
for item in stat["solutions"]
]
})
elif stat["type"] == "multipleChoice":
result.append(self._get_mc_question(exercise, stat))
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
def _get_listening_solutions(self, stat, exam):
result = []
try:
for part in exam["parts"]:
for exercise in part["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "writeBlanks":
result.append({
"question": exercise["prompt"],
"template": exercise["text"],
"solution": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "fillBlanks":
result.append({
"question": exercise["prompt"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "multipleChoice":
result.append(self._get_mc_question(exercise, stat))
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result
@staticmethod
def _find_shuffle_map(shuffle_maps, question_id):
return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
def _get_speaking_solutions(self, stat, exam):
result = {}
try:
result = {
"comments": {
key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
,
"exercises": {}
}
for exercise in exam["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "interactiveSpeaking":
for i in range(len(exercise["prompts"])):
result["exercises"][f"exercise_{i+1}"] = {
"question": exercise["prompts"][i]["text"]
}
for i in range(len(exercise["prompts"])):
answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
result["exercises"][f"exercise_{i+1}"]["answer"] = answer
elif stat["type"] == "speaking":
result["exercises"]["exercise_1"] = {
"question": exercise["text"],
"answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
}
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return [result]
def _get_reading_solutions(self, stat, exam):
result = []
try:
for part in exam["parts"]:
text = part["text"]
for exercise in part["exercises"]:
if exercise["id"] == stat["exercise"]:
if stat["type"] == "fillBlanks":
result.append({
"text": text,
"question": exercise["prompt"],
"template": exercise["text"],
"words": exercise["words"],
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "writeBlanks":
result.append({
"text": text,
"question": exercise["prompt"],
"template": exercise["text"],
"solutions": exercise["solutions"],
"answer": stat["solutions"]
})
elif stat["type"] == "trueFalse":
result.append({
"text": text,
"questions": exercise["questions"],
"answer": stat["solutions"]
})
elif stat["type"] == "matchSentences":
result.append({
"text": text,
"question": exercise["prompt"],
"sentences": exercise["sentences"],
"options": exercise["options"],
"answer": stat["solutions"]
})
except KeyError as e:
self._logger.warning(f"Malformed stat object: {str(e)}")
return result

View File

@@ -1,5 +1,7 @@
from typing import List, Dict
from app.services.abc import IWritingService, ILLMService, IAIDetectorService
from app.configs.constants import GPTModels, TemperatureSettings
from app.configs.constants import GPTModels, TemperatureSettings, FieldsAndExercises
from app.helpers import TextHelper, ExercisesHelper
@@ -17,10 +19,7 @@ class WritingService(IWritingService):
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
)
},
{
"role": "user",
"content": self._get_writing_prompt(task, topic, difficulty)
}
*self._get_writing_messages(task, topic, difficulty)
]
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
@@ -32,15 +31,18 @@ class WritingService(IWritingService):
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
question = response["prompt"].strip()
return {
"question": response["prompt"].strip(),
"question": self._add_newline_before_hyphen(question) if task == 1 else question,
"difficulty": difficulty,
"topic": topic
}
@staticmethod
def _get_writing_prompt(task: int, topic: str, difficulty: str):
return (
def _get_writing_messages(task: int, topic: str, difficulty: str) -> List[Dict]:
# TODO: Should the muslim disclaimer be added to task 2?
task_prompt = (
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
'student to compose a letter. The prompt should present a specific scenario or situation, '
f'based on the topic of "{topic}", requiring the student to provide information, '
@@ -52,32 +54,41 @@ class WritingService(IWritingService):
f'analysis of contrasting perspectives on the topic of "{topic}".'
)
task_instructions = (
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
'the answer should include.'
) if task == 1 else (
'The question should lead to an answer with either "theories", "complicated information" or '
'be "very descriptive" on the topic.'
)
messages = [
{
"role": "user",
"content": task_prompt
},
{
"role": "user",
"content": task_instructions
}
]
return messages
async def grade_writing_task(self, task: int, question: str, answer: str):
bare_minimum = 100 if task == 1 else 180
minimum = 150 if task == 1 else 250
# TODO: left as is, don't know if this is intended or not
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
temperature = (
TemperatureSettings.GRADING_TEMPERATURE
if task == 1 else
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
if not TextHelper.has_words(answer):
return self._zero_rating("The answer does not contain enough english words.")
elif not TextHelper.has_x_words(answer, bare_minimum):
return self._zero_rating("The answer is insufficient and too small to be graded.")
else:
template = self._get_writing_template()
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"perfect_answer": "example perfect answer", "comment": '
'"comment about answer quality", "overall": 0.0, "task_response": '
'{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
'"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }'
f'You are a helpful assistant designed to output JSON on this format: {template}'
)
},
{
@@ -86,16 +97,28 @@ class WritingService(IWritingService):
f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
'from the task, and assign a score of 0 if the response fails to address the question. '
f'Additionally, provide an exemplary answer with a minimum of {minimum} words, along with a '
'detailed commentary highlighting both strengths and weaknesses in the response. '
'Additionally, provide a detailed commentary highlighting both strengths and '
'weaknesses in the response. '
f'\n Question: "{question}" \n Answer: "{answer}"')
},
{
"role": "user",
"content": f'The perfect answer must have at least {minimum} words.'
}
]
if task == 1:
messages.append({
"role": "user",
"content": (
'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
'"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
)
})
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
temperature = (
TemperatureSettings.GRADING_TEMPERATURE
if task == 1 else
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
response = await self._llm.prediction(
llm_model,
messages,
@@ -103,6 +126,10 @@ class WritingService(IWritingService):
temperature
)
perfect_answer_minimum = 150 if task == 1 else 250
perfect_answer = await self._get_perfect_answer(question, perfect_answer_minimum)
response["perfect_answer"] = perfect_answer["perfect_answer"]
response["overall"] = ExercisesHelper.fix_writing_overall(response["overall"], response["task_response"])
response['fixed_text'] = await self._get_fixed_text(answer)
@@ -114,13 +141,20 @@ class WritingService(IWritingService):
async def _get_fixed_text(self, text):
messages = [
{"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
'{"fixed_text": "fixed test with no misspelling errors"}')
},
{"role": "user", "content": (
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"fixed_text": "fixed test with no misspelling errors"}'
)
},
{
"role": "user",
"content": (
'Fix the errors in the given text and put it in a JSON. '
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"')
}
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
)
}
]
response = await self._llm.prediction(
@@ -132,16 +166,83 @@ class WritingService(IWritingService):
)
return response["fixed_text"]
async def _get_perfect_answer(self, question: str, size: int) -> Dict:
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"perfect_answer": "perfect answer for the question"}'
)
},
{
"role": "user",
"content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
},
{
"role": "user",
"content": f'The answer must have at least {size} words'
}
]
return await self._llm.prediction(
GPTModels.GPT_4_O,
messages,
["perfect_answer"],
TemperatureSettings.GEN_QUESTION_TEMPERATURE
)
@staticmethod
def _zero_rating(comment: str):
return {
'comment': comment,
'overall': 0,
'task_response': {
'Coherence and Cohesion': 0,
'Grammatical Range and Accuracy': 0,
'Lexical Resource': 0,
'Task Achievement': 0
'Task Achievement': {
"grade": 0.0,
"comment": ""
},
'Coherence and Cohesion': {
"grade": 0.0,
"comment": ""
},
'Lexical Resource': {
"grade": 0.0,
"comment": ""
},
'Grammatical Range and Accuracy': {
"grade": 0.0,
"comment": ""
}
}
}
@staticmethod
def _get_writing_template():
return {
"comment": "comment about student's response quality",
"overall": 0.0,
"task_response": {
"Task Achievement": {
"grade": 0.0,
"comment": "comment about Task Achievement of the student's response"
},
"Coherence and Cohesion": {
"grade": 0.0,
"comment": "comment about Coherence and Cohesion of the student's response"
},
"Lexical Resource": {
"grade": 0.0,
"comment": "comment about Lexical Resource of the student's response"
},
"Grammatical Range and Accuracy": {
"grade": 0.0,
"comment": "comment about Grammatical Range and Accuracy of the student's response"
}
}
}
@staticmethod
def _add_newline_before_hyphen(s):
return s.replace(" -", "\n-")

5
app/utils/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
from .handle_exception import handle_exception
__all__ = [
"handle_exception"
]

View File

@@ -0,0 +1,15 @@
import functools
from typing import Callable, Any
from fastapi import Response
def handle_exception(status_code: int = 500):
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
async def wrapper(*args: Any, **kwargs: Any) -> Any:
try:
return await func(*args, **kwargs)
except Exception as e:
return Response(content=str(e), status_code=status_code)
return wrapper
return decorator

View File

@@ -1,5 +1,5 @@
[tool.poetry]
name = "encoach-be"
name = "ielts-be"
version = "0.1.0"
description = ""
authors = ["Ecrop Devteam <company@ecrop.dev>"]
@@ -17,6 +17,14 @@ firebase-admin = "^6.5.0"
wonderwords = "^2.2.0"
dependency-injector = "^4.41.0"
openai = "^1.37.0"
python-multipart = "0.0.9"
faiss-cpu = "1.8.0.post1"
pypandoc = "1.13"
pdfplumber = "0.11.3"
numpy = "1.26.4"
pillow = "10.4.0"
sentence-transformers = "3.0.1"
openai-whisper = "20231117"
[build-system]

1
tmp/placeholder.txt Normal file
View File

@@ -0,0 +1 @@
THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO