Updated this to the latest version of develop, got rid of most of the duplication, might be missing some packages in toml, needs testing
This commit is contained in:
3
.env
3
.env
@@ -1,7 +1,8 @@
|
||||
ENV=local
|
||||
OPENAI_API_KEY=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
|
||||
JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
|
||||
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
|
||||
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/storied-phalanx-349916.json
|
||||
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/encoach-staging.json
|
||||
HEY_GEN_TOKEN=MjY4MDE0MjdjZmNhNDFmYTlhZGRkNmI3MGFlMzYwZDItMTY5NTExNzY3MA==
|
||||
|
||||
GPT_ZERO_API_KEY=0195b9bb24c5439899f71230809c74af
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -2,5 +2,5 @@ __pycache__
|
||||
.idea
|
||||
.env
|
||||
.DS_Store
|
||||
firebase-configs/local.json
|
||||
.venv
|
||||
scripts
|
||||
|
||||
3
.idea/ielts-be.iml
generated
3
.idea/ielts-be.iml
generated
@@ -5,9 +5,10 @@
|
||||
</component>
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PackageRequirementsSettings">
|
||||
|
||||
5
.idea/misc.xml
generated
5
.idea/misc.xml
generated
@@ -1,6 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.11 (ielts-be)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (ielts-be)" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
||||
16
Dockerfile
16
Dockerfile
@@ -18,12 +18,16 @@ COPY . ./
|
||||
|
||||
COPY --from=requirements-stage /tmp/requirements.txt /app/requirements.txt
|
||||
|
||||
RUN apt update && apt install -y ffmpeg
|
||||
|
||||
RUN pip install openai-whisper
|
||||
|
||||
# openai-whisper model in not compatible with the newer 2.0.0 numpy release
|
||||
RUN pip install --upgrade numpy<2
|
||||
RUN apt update && apt install -y \
|
||||
ffmpeg \
|
||||
poppler-utils \
|
||||
texlive-latex-base \
|
||||
texlive-fonts-recommended \
|
||||
texlive-latex-extra \
|
||||
texlive-xetex \
|
||||
pandoc \
|
||||
librsvg2-bin \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt
|
||||
|
||||
|
||||
92
README.md
92
README.md
@@ -1,27 +1,5 @@
|
||||
# Disclaimer
|
||||
Latest refactor from develop's branch commit 5d5cd21 2024-08-28
|
||||
|
||||
I didn't fully test all the endpoints, the main purpose of this release was for ielts-be to be async but I've also
|
||||
separated logic through different layers, removed some duplication and implemented dependency injection, so there
|
||||
could be errors and extensive testing is needed before even considering deploying (if you're even considering it).
|
||||
|
||||
The version this was refactored from was master's branch commit a4caecd 2024-06-13
|
||||
|
||||
# Changes
|
||||
|
||||
Since one of my use cases is load testing with 5000 concurrent users and ielts-be is sync, I've refactored ielts-be
|
||||
into this fastapi app.
|
||||
|
||||
The ielts-be Dockerfile runs the container with:
|
||||
|
||||
```CMD exec gunicorn --bind 0.0.0.0:5000 --workers 1 --threads 8 --timeout 0 app:app```
|
||||
|
||||
And since gunicorn uses WSGI and ielts-be has mostly sync I/O blocking operations, everytime a request encounters
|
||||
an I/O blocking operation a thread is blocked. Since this config is 1 worker with 8 threads, the container
|
||||
will only be able to handle 8 concurrent requests at a time before gcloud run cold starts another instance.
|
||||
|
||||
Flask was built with WSGI in mind, having Quart as it's async alternative, even though you can serve Flask
|
||||
with uvicorn using the [asgiref](https://pypi.org/project/asgiref/) adapter, FastAPI has better performance
|
||||
than both alternatives and the sync calls would need to be modified either way.
|
||||
|
||||
# Endpoints
|
||||
|
||||
@@ -29,34 +7,38 @@ In ielts-ui I've added a wrapper to every backend request in '/src/utils/transla
|
||||
new endpoints if the "BACKEND_TYPE" environment variable is set to "async", if the env variable is not present or
|
||||
with another value, the wrapper will return the old endpoint.
|
||||
|
||||
| Method | ielts-be | This one |
|
||||
|--------|--------------------------------------|------------------------------------------|
|
||||
| GET | /healthcheck | /api/healthcheck |
|
||||
| GET | /listening_section_1 | /api/listening/section/1 |
|
||||
| GET | /listening_section_2 | /api/listening/section/2 |
|
||||
| GET | /listening_section_3 | /api/listening/section/3 |
|
||||
| GET | /listening_section_4 | /api/listening/section/4 |
|
||||
| POST | /listening | /api/listening |
|
||||
| POST | /writing_task1 | /api/grade/writing/1 |
|
||||
| POST | /writing_task2 | /api/grade/writing/2 |
|
||||
| GET | /writing_task1_general | /api/writing/1 |
|
||||
| GET | /writing_task2_general | /api/writing/2 |
|
||||
| POST | /speaking_task_1 | /api/grade/speaking/1 |
|
||||
| POST | /speaking_task_2 | /api/grade/speaking/2 |
|
||||
| POST | /speaking_task_3 | /api/grade/speaking/3 |
|
||||
| GET | /speaking_task_1 | /api/speaking/1 |
|
||||
| GET | /speaking_task_2 | /api/speaking/2 |
|
||||
| GET | /speaking_task_3 | /api/speaking/3 |
|
||||
| POST | /speaking | /api/speaking |
|
||||
| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video |
|
||||
| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video |
|
||||
| GET | /reading_passage_1 | /api/reading/passage/1 |
|
||||
| GET | /reading_passage_2 | /api/reading/passage/2 |
|
||||
| GET | /reading_passage_3 | /api/reading/passage/3 |
|
||||
| GET | /level | /api/level |
|
||||
| GET | /level_utas | /api/level/utas |
|
||||
| POST | /fetch_tips | /api/training/tips |
|
||||
| POST | /grading_summary | /api/grade/summary |
|
||||
| Method | ielts-be | This one |
|
||||
|--------|--------------------------------------|---------------------------------------------|
|
||||
| GET | /healthcheck | /api/healthcheck |
|
||||
| GET | /listening_section_1 | /api/listening/section/1 |
|
||||
| GET | /listening_section_2 | /api/listening/section/2 |
|
||||
| GET | /listening_section_3 | /api/listening/section/3 |
|
||||
| GET | /listening_section_4 | /api/listening/section/4 |
|
||||
| POST | /listening | /api/listening |
|
||||
| POST | /writing_task1 | /api/grade/writing/1 |
|
||||
| POST | /writing_task2 | /api/grade/writing/2 |
|
||||
| GET | /writing_task1_general | /api/writing/1 |
|
||||
| GET | /writing_task2_general | /api/writing/2 |
|
||||
| POST | /speaking_task_1 | /api/grade/speaking/1 |
|
||||
| POST | /speaking_task_2 | /api/grade/speaking/2 |
|
||||
| POST | /speaking_task_3 | /api/grade/speaking/3 |
|
||||
| GET | /speaking_task_1 | /api/speaking/1 |
|
||||
| GET | /speaking_task_2 | /api/speaking/2 |
|
||||
| GET | /speaking_task_3 | /api/speaking/3 |
|
||||
| POST | /speaking | /api/speaking |
|
||||
| POST | /speaking/generate_speaking_video | /api/speaking/generate_speaking_video |
|
||||
| POST | /speaking/generate_interactive_video | /api/speaking/generate_interactive_video |
|
||||
| GET | /reading_passage_1 | /api/reading/passage/1 |
|
||||
| GET | /reading_passage_2 | /api/reading/passage/2 |
|
||||
| GET | /reading_passage_3 | /api/reading/passage/3 |
|
||||
| GET | /level | /api/level |
|
||||
| GET | /level_utas | /api/level/utas |
|
||||
| POST | /fetch_tips | /api/training/tips |
|
||||
| POST | /grading_summary | /api/grade/summary |
|
||||
| POST | /grade_short_answers | /api/grade/short_answers |
|
||||
| POST | /upload_level | /api/level/upload |
|
||||
| POST | /training_content | /api/training/ |
|
||||
| POST | /custom_level | /api/level/custom |
|
||||
|
||||
# Run the app
|
||||
|
||||
@@ -64,9 +46,7 @@ This is for Windows, creating venv and activating it may differ based on your OS
|
||||
|
||||
1. python -m venv env
|
||||
2. env\Scripts\activate
|
||||
3. pip install openai-whisper
|
||||
4. pip install --upgrade numpy<2
|
||||
5. pip install poetry
|
||||
6. poetry install
|
||||
7. python main.py
|
||||
3. pip install poetry
|
||||
4. poetry install
|
||||
5. python app.py
|
||||
|
||||
|
||||
@@ -2,7 +2,8 @@ from dependency_injector.wiring import inject, Provide
|
||||
from fastapi import APIRouter, Depends, Path, Request
|
||||
|
||||
from app.controllers.abc import IGradeController
|
||||
from app.dtos import WritingGradeTaskDTO
|
||||
from app.dtos.writing import WritingGradeTaskDTO
|
||||
from app.dtos.speaking import GradeSpeakingAnswersDTO, GradeSpeakingDTO
|
||||
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
|
||||
|
||||
controller = "grade_controller"
|
||||
@@ -22,18 +23,29 @@ async def grade_writing_task(
|
||||
return await grade_controller.grade_writing_task(task, data)
|
||||
|
||||
|
||||
@grade_router.post(
|
||||
'/speaking/2',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def grade_speaking_task_2(
|
||||
data: GradeSpeakingDTO,
|
||||
grade_controller: IGradeController = Depends(Provide[controller])
|
||||
):
|
||||
return await grade_controller.grade_speaking_task(2, [data.dict()])
|
||||
|
||||
|
||||
@grade_router.post(
|
||||
'/speaking/{task}',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def grade_speaking_task(
|
||||
request: Request,
|
||||
async def grade_speaking_task_1_and_3(
|
||||
data: GradeSpeakingAnswersDTO,
|
||||
task: int = Path(..., ge=1, le=3),
|
||||
grade_controller: IGradeController = Depends(Provide[controller])
|
||||
):
|
||||
data = await request.json()
|
||||
return await grade_controller.grade_speaking_task(task, data)
|
||||
return await grade_controller.grade_speaking_task(task, data.answers)
|
||||
|
||||
|
||||
@grade_router.post(
|
||||
@@ -47,3 +59,16 @@ async def grading_summary(
|
||||
):
|
||||
data = await request.json()
|
||||
return await grade_controller.grading_summary(data)
|
||||
|
||||
|
||||
@grade_router.post(
|
||||
'/short_answers',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def grade_short_answers(
|
||||
request: Request,
|
||||
grade_controller: IGradeController = Depends(Provide[controller])
|
||||
):
|
||||
data = await request.json()
|
||||
return await grade_controller.grade_short_answers(data)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from dependency_injector.wiring import Provide, inject
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi import APIRouter, Depends, UploadFile, Request
|
||||
|
||||
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
|
||||
from app.controllers.abc import ILevelController
|
||||
@@ -27,4 +27,29 @@ async def get_level_exam(
|
||||
async def get_level_utas(
|
||||
level_controller: ILevelController = Depends(Provide[controller])
|
||||
):
|
||||
return await level_controller.get_level_exam()
|
||||
return await level_controller.get_level_utas()
|
||||
|
||||
|
||||
@level_router.post(
|
||||
'/upload',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def upload(
|
||||
file: UploadFile,
|
||||
level_controller: ILevelController = Depends(Provide[controller])
|
||||
):
|
||||
return await level_controller.upload_level(file)
|
||||
|
||||
|
||||
@level_router.post(
|
||||
'/custom',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def custom_level(
|
||||
request: Request,
|
||||
level_controller: ILevelController = Depends(Provide[controller])
|
||||
):
|
||||
data = await request.json()
|
||||
return await level_controller.get_custom_level(data)
|
||||
|
||||
@@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, Path
|
||||
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
|
||||
from app.controllers.abc import IListeningController
|
||||
from app.configs.constants import EducationalContent
|
||||
from app.dtos import SaveListeningDTO
|
||||
from app.dtos.listening import SaveListeningDTO
|
||||
|
||||
|
||||
controller = "listening_controller"
|
||||
|
||||
@@ -6,24 +6,40 @@ from fastapi import APIRouter, Path, Query, Depends, BackgroundTasks
|
||||
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
|
||||
from app.configs.constants import EducationalContent
|
||||
from app.controllers.abc import ISpeakingController
|
||||
from app.dtos import SaveSpeakingDTO, SpeakingGenerateVideoDTO, SpeakingGenerateInteractiveVideoDTO
|
||||
from app.dtos.speaking import (
|
||||
SaveSpeakingDTO, GenerateVideo1DTO, GenerateVideo2DTO, GenerateVideo3DTO
|
||||
)
|
||||
|
||||
controller = "speaking_controller"
|
||||
speaking_router = APIRouter()
|
||||
|
||||
|
||||
@speaking_router.get(
|
||||
'/1',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def get_speaking_task(
|
||||
first_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
|
||||
second_topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
|
||||
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
|
||||
speaking_controller: ISpeakingController = Depends(Provide[controller])
|
||||
):
|
||||
return await speaking_controller.get_speaking_part(1, first_topic, difficulty, second_topic)
|
||||
|
||||
|
||||
@speaking_router.get(
|
||||
'/{task}',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def get_speaking_task(
|
||||
task: int = Path(..., ge=1, le=3),
|
||||
task: int = Path(..., ge=2, le=3),
|
||||
topic: str = Query(default=random.choice(EducationalContent.MTI_TOPICS)),
|
||||
difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)),
|
||||
speaking_controller: ISpeakingController = Depends(Provide[controller])
|
||||
):
|
||||
return await speaking_controller.get_speaking_task(task, topic, difficulty)
|
||||
return await speaking_controller.get_speaking_part(task, topic, difficulty)
|
||||
|
||||
|
||||
@speaking_router.post(
|
||||
@@ -40,24 +56,42 @@ async def save_speaking(
|
||||
|
||||
|
||||
@speaking_router.post(
|
||||
'/generate_speaking_video',
|
||||
'/generate_video/1',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def generate_speaking_video(
|
||||
data: SpeakingGenerateVideoDTO,
|
||||
async def generate_video_1(
|
||||
data: GenerateVideo1DTO,
|
||||
speaking_controller: ISpeakingController = Depends(Provide[controller])
|
||||
):
|
||||
return await speaking_controller.generate_speaking_video(data)
|
||||
return await speaking_controller.generate_video(
|
||||
1, data.avatar, data.first_topic, data.questions, second_topic=data.second_topic
|
||||
)
|
||||
|
||||
|
||||
@speaking_router.post(
|
||||
'/generate_interactive_video',
|
||||
'/generate_video/2',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def generate_interactive_video(
|
||||
data: SpeakingGenerateInteractiveVideoDTO,
|
||||
async def generate_video_2(
|
||||
data: GenerateVideo2DTO,
|
||||
speaking_controller: ISpeakingController = Depends(Provide[controller])
|
||||
):
|
||||
return await speaking_controller.generate_interactive_video(data)
|
||||
return await speaking_controller.generate_video(
|
||||
2, data.avatar, data.topic, [data.question], prompts=data.prompts, suffix=data.suffix
|
||||
)
|
||||
|
||||
|
||||
@speaking_router.post(
|
||||
'/generate_video/3',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def generate_video_3(
|
||||
data: GenerateVideo3DTO,
|
||||
speaking_controller: ISpeakingController = Depends(Provide[controller])
|
||||
):
|
||||
return await speaking_controller.generate_video(
|
||||
3, data.avatar, data.topic, data.questions
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from dependency_injector.wiring import Provide, inject
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
|
||||
from app.dtos import TipsDTO
|
||||
from app.dtos.training import FetchTipsDTO
|
||||
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
|
||||
from app.controllers.abc import ITrainingController
|
||||
|
||||
@@ -15,7 +15,20 @@ training_router = APIRouter()
|
||||
)
|
||||
@inject
|
||||
async def get_reading_passage(
|
||||
data: TipsDTO,
|
||||
data: FetchTipsDTO,
|
||||
training_controller: ITrainingController = Depends(Provide[controller])
|
||||
):
|
||||
return await training_controller.fetch_tips(data)
|
||||
|
||||
|
||||
@training_router.post(
|
||||
'/',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def training_content(
|
||||
request: Request,
|
||||
training_controller: ITrainingController = Depends(Provide[controller])
|
||||
):
|
||||
data = await request.json()
|
||||
return await training_controller.get_training_content(data)
|
||||
|
||||
@@ -2,7 +2,7 @@ from enum import Enum
|
||||
|
||||
BLACKLISTED_WORDS = ["jesus", "sex", "gay", "lesbian", "homosexual", "god", "angel", "pornography", "beer", "wine",
|
||||
"cocaine", "alcohol", "nudity", "lgbt", "casino", "gambling", "catholicism",
|
||||
"discrimination", "politics", "politic", "christianity", "islam", "christian", "christians",
|
||||
"discrimination", "politic", "christianity", "islam", "christian", "christians",
|
||||
"jews", "jew", "discrimination", "discriminatory"]
|
||||
|
||||
|
||||
@@ -11,6 +11,26 @@ class ExamVariant(Enum):
|
||||
PARTIAL = "partial"
|
||||
|
||||
|
||||
class CustomLevelExerciseTypes(Enum):
|
||||
MULTIPLE_CHOICE_4 = "multiple_choice_4"
|
||||
MULTIPLE_CHOICE_BLANK_SPACE = "multiple_choice_blank_space"
|
||||
MULTIPLE_CHOICE_UNDERLINED = "multiple_choice_underlined"
|
||||
BLANK_SPACE_TEXT = "blank_space_text"
|
||||
READING_PASSAGE_UTAS = "reading_passage_utas"
|
||||
WRITING_LETTER = "writing_letter"
|
||||
WRITING_2 = "writing_2"
|
||||
SPEAKING_1 = "speaking_1"
|
||||
SPEAKING_2 = "speaking_2"
|
||||
SPEAKING_3 = "speaking_3"
|
||||
READING_1 = "reading_1"
|
||||
READING_2 = "reading_2"
|
||||
READING_3 = "reading_3"
|
||||
LISTENING_1 = "listening_1"
|
||||
LISTENING_2 = "listening_2"
|
||||
LISTENING_3 = "listening_3"
|
||||
LISTENING_4 = "listening_4"
|
||||
|
||||
|
||||
class QuestionType(Enum):
|
||||
LISTENING_SECTION_1 = "Listening Section 1"
|
||||
LISTENING_SECTION_2 = "Listening Section 2"
|
||||
@@ -63,7 +83,14 @@ class FieldsAndExercises:
|
||||
GEN_TEXT_FIELDS = ['title']
|
||||
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
|
||||
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
|
||||
READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
|
||||
|
||||
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
|
||||
LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
|
||||
'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']
|
||||
LISTENING_2_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions']
|
||||
LISTENING_3_EXERCISE_TYPES = ['multipleChoice3Options', 'writeBlanksQuestions']
|
||||
LISTENING_4_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
|
||||
|
||||
TOTAL_READING_PASSAGE_1_EXERCISES = 13
|
||||
TOTAL_READING_PASSAGE_2_EXERCISES = 13
|
||||
@@ -218,7 +245,6 @@ class EducationalContent:
|
||||
"Space Exploration",
|
||||
"Artificial Intelligence",
|
||||
"Climate Change",
|
||||
"World Religions",
|
||||
"The Human Brain",
|
||||
"Renewable Energy",
|
||||
"Cultural Diversity",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from dependency_injector import providers, containers
|
||||
@@ -6,6 +7,7 @@ from openai import AsyncOpenAI
|
||||
from httpx import AsyncClient as HTTPClient
|
||||
from google.cloud.firestore_v1 import AsyncClient as FirestoreClient
|
||||
from dotenv import load_dotenv
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
from app.repositories.impl import *
|
||||
from app.services.impl import *
|
||||
@@ -60,16 +62,26 @@ def config_di(
|
||||
|
||||
writing_service = providers.Factory(WritingService, llm=llm, ai_detector=ai_detector)
|
||||
|
||||
with open('app/services/impl/level/mc_variants.json', 'r') as file:
|
||||
mc_variants = json.load(file)
|
||||
|
||||
level_service = providers.Factory(
|
||||
LevelService, llm=llm, document_store=firestore, reading_service=reading_service
|
||||
LevelService, llm=llm, document_store=firestore, mc_variants=mc_variants, reading_service=reading_service,
|
||||
writing_service=writing_service, speaking_service=speaking_service, listening_service=listening_service
|
||||
)
|
||||
|
||||
grade_service = providers.Factory(
|
||||
GradeService, llm=llm
|
||||
)
|
||||
|
||||
embeddings = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
|
||||
training_kb = providers.Factory(
|
||||
TrainingContentKnowledgeBase, embeddings=embeddings
|
||||
)
|
||||
|
||||
training_service = providers.Factory(
|
||||
TrainingService, llm=llm
|
||||
TrainingService, llm=llm, firestore=firestore, training_kb=training_kb
|
||||
)
|
||||
|
||||
# Controllers
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
class IGradeController(ABC):
|
||||
@@ -9,18 +9,14 @@ class IGradeController(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def grade_speaking_task(self, task: int, data: Dict):
|
||||
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def grade_short_answers(self, data: Dict):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def grading_summary(self, data: Dict):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _grade_speaking_task3(self, answers: Dict):
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from fastapi import UploadFile
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class ILevelController(ABC):
|
||||
|
||||
@@ -10,3 +13,11 @@ class ILevelController(ABC):
|
||||
@abstractmethod
|
||||
async def get_level_utas(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def upload_level(self, file: UploadFile):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_custom_level(self, data: Dict):
|
||||
pass
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import BackgroundTasks
|
||||
|
||||
|
||||
class ISpeakingController(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def get_speaking_task(self, task: int, topic: str, difficulty: str):
|
||||
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -13,9 +15,11 @@ class ISpeakingController(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generate_speaking_video(self, data):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generate_interactive_video(self, data):
|
||||
async def generate_video(
|
||||
self, part: int, avatar: str, topic: str, questions: list[str],
|
||||
*,
|
||||
second_topic: Optional[str] = None,
|
||||
prompts: Optional[list[str]] = None,
|
||||
suffix: Optional[str] = None,
|
||||
):
|
||||
pass
|
||||
|
||||
@@ -6,3 +6,7 @@ class ITrainingController(ABC):
|
||||
@abstractmethod
|
||||
async def fetch_tips(self, data):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_training_content(self, data):
|
||||
pass
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import HTTPException
|
||||
from pydantic import ValidationError
|
||||
from typing import Dict, List
|
||||
|
||||
from app.configs.constants import FilePaths
|
||||
from app.controllers.abc import IGradeController
|
||||
from app.dtos.speaking import SpeakingGradeTask1And2DTO, SpeakingGradeTask3DTO
|
||||
from app.dtos.writing import WritingGradeTaskDTO
|
||||
from app.helpers import IOHelper
|
||||
from app.helpers import FileHelper
|
||||
from app.services.abc import ISpeakingService, IWritingService, IGradeService
|
||||
from app.utils import handle_exception
|
||||
|
||||
|
||||
class GradeController(IGradeController):
|
||||
@@ -28,47 +23,20 @@ class GradeController(IGradeController):
|
||||
self._logger = logging.getLogger(__name__)
|
||||
|
||||
async def grade_writing_task(self, task: int, data: WritingGradeTaskDTO):
|
||||
try:
|
||||
return await self._writing_service.grade_writing_task(task, data.question, data.answer)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
return await self._writing_service.grade_writing_task(task, data.question, data.answer)
|
||||
|
||||
async def grade_speaking_task(self, task: int, data: Dict):
|
||||
try:
|
||||
if task in {1, 2}:
|
||||
body = SpeakingGradeTask1And2DTO(**data)
|
||||
return await self._grade_speaking_task_1_2(task, body.question, body.answer)
|
||||
else:
|
||||
body = SpeakingGradeTask3DTO(**data)
|
||||
return await self._grade_speaking_task3(body.answers)
|
||||
except ValidationError as e:
|
||||
raise HTTPException(status_code=422, detail=e.errors())
|
||||
@handle_exception(400)
|
||||
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
|
||||
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
|
||||
return await self._speaking_service.grade_speaking_task(task, answers)
|
||||
|
||||
async def grade_short_answers(self, data: Dict):
|
||||
return await self._service.grade_short_answers(data)
|
||||
|
||||
async def grading_summary(self, data: Dict):
|
||||
try:
|
||||
section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
|
||||
extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
|
||||
return await self._service.calculate_grading_summary(extracted_sections)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
async def _grade_speaking_task_1_2(self, task: int, question: str, answer_firebase_path: str):
|
||||
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
|
||||
try:
|
||||
IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
|
||||
return await self._speaking_service.grade_speaking_task_1_and_2(
|
||||
task, question, answer_firebase_path, sound_file_name
|
||||
)
|
||||
except Exception as e:
|
||||
os.remove(sound_file_name)
|
||||
return str(e), 400
|
||||
|
||||
async def _grade_speaking_task3(self, answers: Dict):
|
||||
try:
|
||||
IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
|
||||
return await self._speaking_service.grade_speaking_task_3(answers)
|
||||
except Exception as e:
|
||||
return str(e), 400
|
||||
section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
|
||||
extracted_sections = self._extract_existing_sections_from_body(data, section_keys)
|
||||
return await self._service.calculate_grading_summary(extracted_sections)
|
||||
|
||||
@staticmethod
|
||||
def _extract_existing_sections_from_body(my_dict, keys_to_extract):
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
from fastapi import UploadFile
|
||||
from typing import Dict
|
||||
|
||||
from app.controllers.abc import ILevelController
|
||||
from app.services.abc import ILevelService
|
||||
|
||||
@@ -8,13 +11,13 @@ class LevelController(ILevelController):
|
||||
self._service = level_service
|
||||
|
||||
async def get_level_exam(self):
|
||||
try:
|
||||
return await self._service.get_level_exam()
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
return await self._service.get_level_exam()
|
||||
|
||||
async def get_level_utas(self):
|
||||
try:
|
||||
return await self._service.get_level_utas()
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
return await self._service.get_level_utas()
|
||||
|
||||
async def upload_level(self, file: UploadFile):
|
||||
return await self._service.upload_level(file)
|
||||
|
||||
async def get_custom_level(self, data: Dict):
|
||||
return await self._service.get_custom_level(data)
|
||||
|
||||
@@ -1,97 +1,19 @@
|
||||
import random
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
from app.controllers.abc import IListeningController
|
||||
from app.dtos import SaveListeningDTO
|
||||
from app.dtos.listening import SaveListeningDTO
|
||||
from app.services.abc import IListeningService
|
||||
from app.helpers import IOHelper, ExercisesHelper
|
||||
from app.configs.constants import (
|
||||
FilePaths, EducationalContent, FieldsAndExercises
|
||||
)
|
||||
|
||||
|
||||
class ListeningController(IListeningController):
|
||||
|
||||
def __init__(self, listening_service: IListeningService):
|
||||
self._service = listening_service
|
||||
self._logger = logging.getLogger(__name__)
|
||||
self._sections = {
|
||||
"section_1": {
|
||||
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
|
||||
"exercise_sample_size": 1,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
|
||||
"type": "conversation",
|
||||
"start_id": 1
|
||||
},
|
||||
"section_2": {
|
||||
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
|
||||
"exercise_sample_size": 2,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
|
||||
"type": "monologue",
|
||||
"start_id": 11
|
||||
},
|
||||
"section_3": {
|
||||
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
|
||||
"exercise_sample_size": 1,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
|
||||
"type": "conversation",
|
||||
"start_id": 21
|
||||
},
|
||||
"section_4": {
|
||||
"topic": EducationalContent.ACADEMIC_SUBJECTS,
|
||||
"exercise_sample_size": 2,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
|
||||
"type": "monologue",
|
||||
"start_id": 31
|
||||
}
|
||||
}
|
||||
|
||||
async def get_listening_question(self, section_id: int, topic: str, req_exercises: List[str], difficulty: str):
|
||||
try:
|
||||
IOHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
|
||||
section = self._sections[f"section_{str(section_id)}"]
|
||||
if not topic:
|
||||
topic = random.choice(section["topic"])
|
||||
|
||||
if len(req_exercises) == 0:
|
||||
req_exercises = random.sample(FieldsAndExercises.LISTENING_EXERCISE_TYPES, section["exercise_sample_size"])
|
||||
|
||||
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(section["total_exercises"], len(req_exercises))
|
||||
|
||||
dialog = await self._service.generate_listening_question(section_id, topic)
|
||||
|
||||
if section_id in {1, 3}:
|
||||
dialog = self.parse_conversation(dialog)
|
||||
|
||||
self._logger.info(f'Generated {section["type"]}: {str(dialog)}')
|
||||
|
||||
exercises = await self._service.generate_listening_exercises(
|
||||
section_id, str(dialog), req_exercises, number_of_exercises_q, section["start_id"], difficulty
|
||||
)
|
||||
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": dialog,
|
||||
"difficulty": difficulty
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
async def get_listening_question(
|
||||
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str
|
||||
):
|
||||
return await self._service.get_listening_question(section_id, topic, req_exercises, difficulty)
|
||||
|
||||
async def save_listening(self, data: SaveListeningDTO):
|
||||
try:
|
||||
return await self._service.save_listening(data.parts, data.minTimer, data.difficulty)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@staticmethod
|
||||
def parse_conversation(conversation_data):
|
||||
conversation_list = conversation_data.get('conversation', [])
|
||||
readable_text = []
|
||||
|
||||
for message in conversation_list:
|
||||
name = message.get('name', 'Unknown')
|
||||
text = message.get('text', '')
|
||||
readable_text.append(f"{name}: {text}")
|
||||
|
||||
return "\n".join(readable_text)
|
||||
return await self._service.save_listening(data.parts, data.minTimer, data.difficulty, data.id)
|
||||
|
||||
@@ -15,29 +15,29 @@ class ReadingController(IReadingController):
|
||||
self._logger = logging.getLogger(__name__)
|
||||
self._passages = {
|
||||
"passage_1": {
|
||||
"start_id": 1,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_1_EXERCISES
|
||||
},
|
||||
"passage_2": {
|
||||
"start_id": 14,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_2_EXERCISES
|
||||
},
|
||||
"passage_3": {
|
||||
"start_id": 27,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_READING_PASSAGE_3_EXERCISES
|
||||
}
|
||||
}
|
||||
|
||||
async def get_reading_passage(self, passage_id: int, topic: str, req_exercises: List[str], difficulty: str):
|
||||
try:
|
||||
passage = self._passages[f'passage_{str(passage_id)}']
|
||||
passage = self._passages[f'passage_{str(passage_id)}']
|
||||
|
||||
if len(req_exercises) == 0:
|
||||
req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
|
||||
if len(req_exercises) == 0:
|
||||
req_exercises = random.sample(FieldsAndExercises.READING_EXERCISE_TYPES, 2)
|
||||
|
||||
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
|
||||
passage["total_exercises"], len(req_exercises)
|
||||
)
|
||||
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
|
||||
passage["total_exercises"], len(req_exercises)
|
||||
)
|
||||
|
||||
return await self._service.gen_reading_passage(
|
||||
passage_id, topic, req_exercises, number_of_exercises_q, difficulty
|
||||
)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
return await self._service.gen_reading_passage(
|
||||
passage_id, topic, req_exercises, number_of_exercises_q, difficulty, passage["start_id"]
|
||||
)
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import BackgroundTasks
|
||||
|
||||
from app.controllers.abc import ISpeakingController
|
||||
from app.dtos import (
|
||||
SaveSpeakingDTO, SpeakingGenerateVideoDTO,
|
||||
SpeakingGenerateInteractiveVideoDTO
|
||||
)
|
||||
from app.dtos.speaking import SaveSpeakingDTO
|
||||
|
||||
from app.services.abc import ISpeakingService
|
||||
from app.configs.constants import ExamVariant, MinTimers
|
||||
from app.configs.question_templates import getSpeakingTemplate
|
||||
@@ -19,45 +18,30 @@ class SpeakingController(ISpeakingController):
|
||||
self._service = speaking_service
|
||||
self._logger = logging.getLogger(__name__)
|
||||
|
||||
async def get_speaking_task(self, task: int, topic: str, difficulty: str):
|
||||
try:
|
||||
return await self._service.get_speaking_task(task, topic, difficulty)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
async def get_speaking_part(self, task: int, topic: str, difficulty: str, second_topic: Optional[str] = None):
|
||||
return await self._service.get_speaking_part(task, topic, difficulty, second_topic)
|
||||
|
||||
async def save_speaking(self, data: SaveSpeakingDTO, background_tasks: BackgroundTasks):
|
||||
try:
|
||||
exercises = data.exercises
|
||||
min_timer = data.minTimer
|
||||
exercises = data.exercises
|
||||
min_timer = data.minTimer
|
||||
|
||||
template = getSpeakingTemplate()
|
||||
template["minTimer"] = min_timer
|
||||
template = getSpeakingTemplate()
|
||||
template["minTimer"] = min_timer
|
||||
|
||||
if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
|
||||
template["variant"] = ExamVariant.PARTIAL.value
|
||||
else:
|
||||
template["variant"] = ExamVariant.FULL.value
|
||||
if min_timer < MinTimers.SPEAKING_MIN_TIMER_DEFAULT:
|
||||
template["variant"] = ExamVariant.PARTIAL.value
|
||||
else:
|
||||
template["variant"] = ExamVariant.FULL.value
|
||||
|
||||
req_id = str(uuid.uuid4())
|
||||
self._logger.info(f'Received request to save speaking with id: {req_id}')
|
||||
req_id = str(uuid.uuid4())
|
||||
self._logger.info(f'Received request to save speaking with id: {req_id}')
|
||||
|
||||
background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
|
||||
background_tasks.add_task(self._service.create_videos_and_save_to_db, exercises, template, req_id)
|
||||
|
||||
self._logger.info('Started background task to save speaking.')
|
||||
self._logger.info('Started background task to save speaking.')
|
||||
|
||||
# Return response without waiting for create_videos_and_save_to_db to finish
|
||||
return {**template, "id": req_id}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
# Return response without waiting for create_videos_and_save_to_db to finish
|
||||
return {**template, "id": req_id}
|
||||
|
||||
async def generate_speaking_video(self, data: SpeakingGenerateVideoDTO):
|
||||
try:
|
||||
return await self._service.generate_speaking_video(data.question, data.topic, data.avatar, data.prompts)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
async def generate_interactive_video(self, data: SpeakingGenerateInteractiveVideoDTO):
|
||||
try:
|
||||
return await self._service.generate_interactive_video(data.questions, data.topic, data.avatar)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
async def generate_video(self, *args, **kwargs):
|
||||
return await self._service.generate_video(*args, **kwargs)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from typing import Dict
|
||||
|
||||
from app.controllers.abc import ITrainingController
|
||||
from app.dtos import TipsDTO
|
||||
from app.dtos.training import FetchTipsDTO
|
||||
from app.services.abc import ITrainingService
|
||||
|
||||
|
||||
@@ -8,8 +10,8 @@ class TrainingController(ITrainingController):
|
||||
def __init__(self, training_service: ITrainingService):
|
||||
self._service = training_service
|
||||
|
||||
async def fetch_tips(self, data: TipsDTO):
|
||||
try:
|
||||
return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
async def fetch_tips(self, data: FetchTipsDTO):
|
||||
return await self._service.fetch_tips(data.context, data.question, data.answer, data.correct_answer)
|
||||
|
||||
async def get_training_content(self, data: Dict):
|
||||
return await self._service.get_training_content(data)
|
||||
|
||||
@@ -8,7 +8,4 @@ class WritingController(IWritingController):
|
||||
self._service = writing_service
|
||||
|
||||
async def get_writing_task_general_question(self, task: int, topic: str, difficulty: str):
|
||||
try:
|
||||
return await self._service.get_writing_task_general_question(task, topic, difficulty)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
return await self._service.get_writing_task_general_question(task, topic, difficulty)
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
from .listening import SaveListeningDTO
|
||||
from .speaking import (
|
||||
SaveSpeakingDTO, SpeakingGradeTask1And2DTO,
|
||||
SpeakingGradeTask3DTO, SpeakingGenerateVideoDTO,
|
||||
SpeakingGenerateInteractiveVideoDTO
|
||||
)
|
||||
from .training import TipsDTO
|
||||
from .writing import WritingGradeTaskDTO
|
||||
|
||||
__all__ = [
|
||||
"SaveListeningDTO",
|
||||
"SaveSpeakingDTO",
|
||||
"SpeakingGradeTask1And2DTO",
|
||||
"SpeakingGradeTask3DTO",
|
||||
"SpeakingGenerateVideoDTO",
|
||||
"SpeakingGenerateInteractiveVideoDTO",
|
||||
"TipsDTO",
|
||||
"WritingGradeTaskDTO"
|
||||
]
|
||||
|
||||
57
app/dtos/exam.py
Normal file
57
app/dtos/exam.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Union, Optional
|
||||
from uuid import uuid4, UUID
|
||||
|
||||
|
||||
class Option(BaseModel):
|
||||
id: str
|
||||
text: str
|
||||
|
||||
|
||||
class MultipleChoiceQuestion(BaseModel):
|
||||
id: str
|
||||
prompt: str
|
||||
variant: str = "text"
|
||||
solution: str
|
||||
options: List[Option]
|
||||
|
||||
|
||||
class MultipleChoiceExercise(BaseModel):
|
||||
id: UUID = Field(default_factory=uuid4)
|
||||
type: str = "multipleChoice"
|
||||
prompt: str = "Select the appropriate option."
|
||||
questions: List[MultipleChoiceQuestion]
|
||||
userSolutions: List = Field(default_factory=list)
|
||||
|
||||
|
||||
class FillBlanksWord(BaseModel):
|
||||
id: str
|
||||
options: Dict[str, str]
|
||||
|
||||
|
||||
class FillBlanksSolution(BaseModel):
|
||||
id: str
|
||||
solution: str
|
||||
|
||||
|
||||
class FillBlanksExercise(BaseModel):
|
||||
id: UUID = Field(default_factory=uuid4)
|
||||
type: str = "fillBlanks"
|
||||
variant: str = "mc"
|
||||
prompt: str = "Click a blank to select the appropriate word for it."
|
||||
text: str
|
||||
solutions: List[FillBlanksSolution]
|
||||
words: List[FillBlanksWord]
|
||||
userSolutions: List = Field(default_factory=list)
|
||||
|
||||
|
||||
Exercise = Union[MultipleChoiceExercise, FillBlanksExercise]
|
||||
|
||||
|
||||
class Part(BaseModel):
|
||||
exercises: List[Exercise]
|
||||
context: Optional[str] = Field(default=None)
|
||||
|
||||
|
||||
class Exam(BaseModel):
|
||||
parts: List[Part]
|
||||
@@ -1,4 +1,5 @@
|
||||
import random
|
||||
import uuid
|
||||
from typing import List, Dict
|
||||
|
||||
from pydantic import BaseModel
|
||||
@@ -10,3 +11,4 @@ class SaveListeningDTO(BaseModel):
|
||||
parts: List[Dict]
|
||||
minTimer: int = MinTimers.LISTENING_MIN_TIMER_DEFAULT
|
||||
difficulty: str = random.choice(EducationalContent.DIFFICULTIES)
|
||||
id: str = str(uuid.uuid4())
|
||||
|
||||
29
app/dtos/sheet.py
Normal file
29
app/dtos/sheet.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Union, Any, Optional
|
||||
|
||||
|
||||
class Option(BaseModel):
|
||||
id: str
|
||||
text: str
|
||||
|
||||
|
||||
class MultipleChoiceQuestion(BaseModel):
|
||||
type: str = "multipleChoice"
|
||||
id: str
|
||||
prompt: str
|
||||
variant: str = "text"
|
||||
options: List[Option]
|
||||
|
||||
|
||||
class FillBlanksWord(BaseModel):
|
||||
type: str = "fillBlanks"
|
||||
id: str
|
||||
options: Dict[str, str]
|
||||
|
||||
|
||||
Component = Union[MultipleChoiceQuestion, FillBlanksWord, Dict[str, Any]]
|
||||
|
||||
|
||||
class Sheet(BaseModel):
|
||||
batch: Optional[int] = None
|
||||
components: List[Component]
|
||||
@@ -11,23 +11,31 @@ class SaveSpeakingDTO(BaseModel):
|
||||
minTimer: int = MinTimers.SPEAKING_MIN_TIMER_DEFAULT
|
||||
|
||||
|
||||
class SpeakingGradeTask1And2DTO(BaseModel):
|
||||
class GradeSpeakingDTO(BaseModel):
|
||||
question: str
|
||||
answer: str
|
||||
|
||||
|
||||
class SpeakingGradeTask3DTO(BaseModel):
|
||||
answers: Dict
|
||||
class GradeSpeakingAnswersDTO(BaseModel):
|
||||
answers: List[Dict]
|
||||
|
||||
|
||||
class SpeakingGenerateVideoDTO(BaseModel):
|
||||
class GenerateVideo1DTO(BaseModel):
|
||||
avatar: str = (random.choice(list(AvatarEnum))).value
|
||||
questions: List[str]
|
||||
first_topic: str
|
||||
second_topic: str
|
||||
|
||||
|
||||
class GenerateVideo2DTO(BaseModel):
|
||||
avatar: str = (random.choice(list(AvatarEnum))).value
|
||||
prompts: List[str] = []
|
||||
suffix: str = ""
|
||||
question: str
|
||||
topic: str
|
||||
|
||||
|
||||
class SpeakingGenerateInteractiveVideoDTO(BaseModel):
|
||||
class GenerateVideo3DTO(BaseModel):
|
||||
avatar: str = (random.choice(list(AvatarEnum))).value
|
||||
questions: List[str]
|
||||
topic: str
|
||||
|
||||
@@ -1,8 +1,37 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
|
||||
class TipsDTO(BaseModel):
|
||||
class FetchTipsDTO(BaseModel):
|
||||
context: str
|
||||
question: str
|
||||
answer: str
|
||||
correct_answer: str
|
||||
|
||||
|
||||
class QueryDTO(BaseModel):
|
||||
category: str
|
||||
text: str
|
||||
|
||||
|
||||
class DetailsDTO(BaseModel):
|
||||
exam_id: str
|
||||
date: int
|
||||
performance_comment: str
|
||||
detailed_summary: str
|
||||
|
||||
|
||||
class WeakAreaDTO(BaseModel):
|
||||
area: str
|
||||
comment: str
|
||||
|
||||
|
||||
class TrainingContentDTO(BaseModel):
|
||||
details: List[DetailsDTO]
|
||||
weak_areas: List[WeakAreaDTO]
|
||||
queries: List[QueryDTO]
|
||||
|
||||
|
||||
class TipsDTO(BaseModel):
|
||||
tip_ids: List[str]
|
||||
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
from .io import IOHelper
|
||||
from .text_helper import TextHelper
|
||||
from .file import FileHelper
|
||||
from .text import TextHelper
|
||||
from .token_counter import count_tokens
|
||||
from .exercises_helper import ExercisesHelper
|
||||
from .exercises import ExercisesHelper
|
||||
from .logger import LoggerHelper
|
||||
|
||||
__all__ = [
|
||||
"IOHelper",
|
||||
"FileHelper",
|
||||
"TextHelper",
|
||||
"count_tokens",
|
||||
"ExercisesHelper"
|
||||
"ExercisesHelper",
|
||||
"LoggerHelper"
|
||||
]
|
||||
|
||||
@@ -4,7 +4,7 @@ import re
|
||||
import string
|
||||
from wonderwords import RandomWord
|
||||
|
||||
from .text_helper import TextHelper
|
||||
from .text import TextHelper
|
||||
|
||||
|
||||
class ExercisesHelper:
|
||||
@@ -70,7 +70,12 @@ class ExercisesHelper:
|
||||
|
||||
random.shuffle(combined_array)
|
||||
|
||||
return combined_array
|
||||
result = []
|
||||
for i, word in enumerate(combined_array):
|
||||
letter = chr(65 + i) # chr(65) is 'A'
|
||||
result.append({"letter": letter, "word": word})
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def fillblanks_build_solutions_array(words, start_id):
|
||||
@@ -187,9 +192,58 @@ class ExercisesHelper:
|
||||
|
||||
@staticmethod
|
||||
def fix_writing_overall(overall: float, task_response: dict):
|
||||
if overall > max(task_response.values()) or overall < min(task_response.values()):
|
||||
total_sum = sum(task_response.values())
|
||||
average = total_sum / len(task_response.values())
|
||||
grades = [category["grade"] for category in task_response.values()]
|
||||
|
||||
if overall > max(grades) or overall < min(grades):
|
||||
total_sum = sum(grades)
|
||||
average = total_sum / len(grades)
|
||||
rounded_average = round(average, 0)
|
||||
return rounded_average
|
||||
|
||||
return overall
|
||||
|
||||
@staticmethod
|
||||
def build_options(ideas):
|
||||
options = []
|
||||
letters = iter(string.ascii_uppercase)
|
||||
for idea in ideas:
|
||||
options.append({
|
||||
"id": next(letters),
|
||||
"sentence": idea["from"]
|
||||
})
|
||||
return options
|
||||
|
||||
@staticmethod
|
||||
def build_sentences(ideas, start_id):
|
||||
sentences = []
|
||||
letters = iter(string.ascii_uppercase)
|
||||
for idea in ideas:
|
||||
sentences.append({
|
||||
"solution": next(letters),
|
||||
"sentence": idea["idea"]
|
||||
})
|
||||
|
||||
random.shuffle(sentences)
|
||||
for i, sentence in enumerate(sentences, start=start_id):
|
||||
sentence["id"] = i
|
||||
return sentences
|
||||
|
||||
@staticmethod
|
||||
def randomize_mc_options_order(questions):
|
||||
option_ids = ['A', 'B', 'C', 'D']
|
||||
|
||||
for question in questions:
|
||||
# Store the original solution text
|
||||
original_solution_text = next(
|
||||
option['text'] for option in question['options'] if option['id'] == question['solution'])
|
||||
|
||||
# Shuffle the options
|
||||
random.shuffle(question['options'])
|
||||
|
||||
# Update the option ids and find the new solution id
|
||||
for idx, option in enumerate(question['options']):
|
||||
option['id'] = option_ids[idx]
|
||||
if option['text'] == original_solution_text:
|
||||
question['solution'] = option['id']
|
||||
|
||||
return questions
|
||||
95
app/helpers/file.py
Normal file
95
app/helpers/file.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pypandoc
|
||||
from PIL import Image
|
||||
|
||||
import aiofiles
|
||||
|
||||
|
||||
class FileHelper:
|
||||
|
||||
@staticmethod
|
||||
def delete_files_older_than_one_day(directory: str):
|
||||
current_time = datetime.datetime.now()
|
||||
|
||||
for entry in os.scandir(directory):
|
||||
if entry.is_file():
|
||||
file_path = Path(entry)
|
||||
file_name = file_path.name
|
||||
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
|
||||
time_difference = current_time - file_modified_time
|
||||
if time_difference.days > 1 and "placeholder" not in file_name:
|
||||
file_path.unlink()
|
||||
print(f"Deleted file: {file_path}")
|
||||
|
||||
# Supposedly pandoc covers a wide range of file extensions only tested with docx
|
||||
@staticmethod
|
||||
def convert_file_to_pdf(input_path: str, output_path: str):
|
||||
pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
|
||||
'-V', 'geometry:paperwidth=5.5in',
|
||||
'-V', 'geometry:paperheight=8.5in',
|
||||
'-V', 'geometry:margin=0.5in',
|
||||
'-V', 'pagestyle=empty'
|
||||
])
|
||||
|
||||
@staticmethod
|
||||
def convert_file_to_html(input_path: str, output_path: str):
|
||||
pypandoc.convert_file(input_path, 'html', outputfile=output_path)
|
||||
|
||||
@staticmethod
|
||||
def pdf_to_png(path_id: str):
|
||||
to_png = f"pdftoppm -png exercises.pdf page"
|
||||
result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise Exception(
|
||||
f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
|
||||
|
||||
@staticmethod
|
||||
def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
|
||||
with Image.open(io.BytesIO(image_bytes)) as img:
|
||||
img_gray = img.convert('L')
|
||||
img_array = np.array(img_gray)
|
||||
non_white_pixels = np.sum(img_array < 255)
|
||||
|
||||
return non_white_pixels <= image_threshold
|
||||
|
||||
@classmethod
|
||||
async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
|
||||
async with aiofiles.open(image_path, "rb") as image_file:
|
||||
image_bytes = await image_file.read()
|
||||
|
||||
if cls.is_page_blank(image_bytes, image_threshold):
|
||||
return None
|
||||
|
||||
return base64.b64encode(image_bytes).decode('utf-8')
|
||||
|
||||
@classmethod
|
||||
def b64_pngs(cls, path_id: str, files: list[str]):
|
||||
png_messages = []
|
||||
for filename in files:
|
||||
b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
|
||||
if b64_string:
|
||||
png_messages.append({
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{b64_string}"
|
||||
}
|
||||
})
|
||||
return png_messages
|
||||
|
||||
@staticmethod
|
||||
def remove_directory(path):
|
||||
try:
|
||||
if os.path.exists(path):
|
||||
if os.path.isdir(path):
|
||||
shutil.rmtree(path)
|
||||
except Exception as e:
|
||||
print(f"An error occurred while trying to remove {path}: {str(e)}")
|
||||
@@ -1,20 +0,0 @@
|
||||
import datetime
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class IOHelper:
|
||||
|
||||
@staticmethod
|
||||
def delete_files_older_than_one_day(directory: str):
|
||||
current_time = datetime.datetime.now()
|
||||
|
||||
for entry in os.scandir(directory):
|
||||
if entry.is_file():
|
||||
file_path = Path(entry)
|
||||
file_name = file_path.name
|
||||
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
|
||||
time_difference = current_time - file_modified_time
|
||||
if time_difference.days > 1 and "placeholder" not in file_name:
|
||||
file_path.unlink()
|
||||
print(f"Deleted file: {file_path}")
|
||||
23
app/helpers/logger.py
Normal file
23
app/helpers/logger.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
|
||||
class LoggerHelper:
|
||||
|
||||
@staticmethod
|
||||
def suppress_loggers():
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def wrapped(*args, **kwargs):
|
||||
root_logger = logging.getLogger()
|
||||
original_level = root_logger.level
|
||||
|
||||
root_logger.setLevel(logging.ERROR)
|
||||
|
||||
try:
|
||||
return f(*args, **kwargs)
|
||||
finally:
|
||||
root_logger.setLevel(original_level)
|
||||
|
||||
return wrapped
|
||||
return decorator
|
||||
5
app/mappers/__init__.py
Normal file
5
app/mappers/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .exam import ExamMapper
|
||||
|
||||
__all__ = [
|
||||
"ExamMapper"
|
||||
]
|
||||
66
app/mappers/exam.py
Normal file
66
app/mappers/exam.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from typing import Dict, Any
|
||||
|
||||
from pydantic import ValidationError
|
||||
|
||||
from app.dtos.exam import (
|
||||
MultipleChoiceExercise,
|
||||
FillBlanksExercise,
|
||||
Part, Exam
|
||||
)
|
||||
from app.dtos.sheet import Sheet, Option, MultipleChoiceQuestion, FillBlanksWord
|
||||
|
||||
|
||||
class ExamMapper:
|
||||
|
||||
@staticmethod
|
||||
def map_to_exam_model(response: Dict[str, Any]) -> Exam:
|
||||
parts = []
|
||||
for part in response['parts']:
|
||||
part_exercises = part['exercises']
|
||||
context = part.get('context', None)
|
||||
|
||||
exercises = []
|
||||
for exercise in part_exercises:
|
||||
exercise_type = exercise['type']
|
||||
if exercise_type == 'multipleChoice':
|
||||
exercise_model = MultipleChoiceExercise(**exercise)
|
||||
elif exercise_type == 'fillBlanks':
|
||||
exercise_model = FillBlanksExercise(**exercise)
|
||||
else:
|
||||
raise ValidationError(f"Unknown exercise type: {exercise_type}")
|
||||
|
||||
exercises.append(exercise_model)
|
||||
|
||||
part_kwargs = {"exercises": exercises}
|
||||
if context is not None:
|
||||
part_kwargs["context"] = context
|
||||
|
||||
part_model = Part(**part_kwargs)
|
||||
parts.append(part_model)
|
||||
|
||||
return Exam(parts=parts)
|
||||
|
||||
@staticmethod
|
||||
def map_to_sheet(response: Dict[str, Any]) -> Sheet:
|
||||
components = []
|
||||
|
||||
for item in response["components"]:
|
||||
component_type = item["type"]
|
||||
|
||||
if component_type == "multipleChoice":
|
||||
options = [Option(id=opt["id"], text=opt["text"]) for opt in item["options"]]
|
||||
components.append(MultipleChoiceQuestion(
|
||||
id=item["id"],
|
||||
prompt=item["prompt"],
|
||||
variant=item.get("variant", "text"),
|
||||
options=options
|
||||
))
|
||||
elif component_type == "fillBlanks":
|
||||
components.append(FillBlanksWord(
|
||||
id=item["id"],
|
||||
options=item["options"]
|
||||
))
|
||||
else:
|
||||
components.append(item)
|
||||
|
||||
return Sheet(components=components)
|
||||
@@ -11,3 +11,6 @@ class IDocumentStore(ABC):
|
||||
|
||||
async def get_all(self, collection: str):
|
||||
pass
|
||||
|
||||
async def get_doc_by_id(self, collection: str, doc_id: str):
|
||||
pass
|
||||
|
||||
@@ -15,9 +15,9 @@ class Firestore(IDocumentStore):
|
||||
update_time, document_ref = await collection_ref.add(item)
|
||||
if document_ref:
|
||||
self._logger.info(f"Document added with ID: {document_ref.id}")
|
||||
return True, document_ref.id
|
||||
return document_ref.id
|
||||
else:
|
||||
return False, None
|
||||
return None
|
||||
|
||||
async def save_to_db_with_id(self, collection: str, item, id: str):
|
||||
collection_ref: AsyncCollectionReference = self._client.collection(collection)
|
||||
@@ -26,9 +26,9 @@ class Firestore(IDocumentStore):
|
||||
doc_snapshot = await document_ref.get()
|
||||
if doc_snapshot.exists:
|
||||
self._logger.info(f"Document added with ID: {document_ref.id}")
|
||||
return True, document_ref.id
|
||||
return document_ref.id
|
||||
else:
|
||||
return False, None
|
||||
return None
|
||||
|
||||
async def get_all(self, collection: str):
|
||||
collection_ref: AsyncCollectionReference = self._client.collection(collection)
|
||||
@@ -36,3 +36,12 @@ class Firestore(IDocumentStore):
|
||||
async for doc in collection_ref.stream():
|
||||
docs.append(doc.to_dict())
|
||||
return docs
|
||||
|
||||
async def get_doc_by_id(self, collection: str, doc_id: str):
|
||||
collection_ref: AsyncCollectionReference = self._client.collection(collection)
|
||||
doc_ref: AsyncDocumentReference = collection_ref.document(doc_id)
|
||||
doc = await doc_ref.get()
|
||||
|
||||
if doc.exists:
|
||||
return doc.to_dict()
|
||||
return None
|
||||
|
||||
@@ -116,6 +116,16 @@ def setup_listeners(_app: FastAPI) -> None:
|
||||
content={"error_code": exc.error_code, "message": exc.message},
|
||||
)
|
||||
|
||||
@_app.exception_handler(Exception)
|
||||
async def default_exception_handler(request: Request, exc: Exception):
|
||||
"""
|
||||
Don't delete request param
|
||||
"""
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content=str(exc),
|
||||
)
|
||||
|
||||
|
||||
def setup_middleware() -> List[Middleware]:
|
||||
middleware = [
|
||||
@@ -135,9 +145,10 @@ def setup_middleware() -> List[Middleware]:
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
env = os.getenv("ENV")
|
||||
_app = FastAPI(
|
||||
docs_url=None,
|
||||
redoc_url=None,
|
||||
docs_url="/docs" if env != "prod" else None,
|
||||
redoc_url="/redoc" if env != "prod" else None,
|
||||
middleware=setup_middleware(),
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
@@ -5,6 +5,7 @@ from .speaking import ISpeakingService
|
||||
from .reading import IReadingService
|
||||
from .grade import IGradeService
|
||||
from .training import ITrainingService
|
||||
from .kb import IKnowledgeBase
|
||||
from .third_parties import *
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -4,20 +4,10 @@ from typing import Dict, List
|
||||
|
||||
class IGradeService(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def grade_short_answers(self, data: Dict):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def calculate_grading_summary(self, extracted_sections: List):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _calculate_section_grade_summary(self, section):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _parse_openai_response(response):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _parse_bullet_points(bullet_points_str, grade):
|
||||
pass
|
||||
|
||||
10
app/services/abc/kb.py
Normal file
10
app/services/abc/kb.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
class IKnowledgeBase(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
|
||||
pass
|
||||
@@ -1,10 +1,19 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import random
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import UploadFile
|
||||
|
||||
from app.configs.constants import EducationalContent
|
||||
|
||||
|
||||
class ILevelService(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def get_level_exam(self):
|
||||
async def get_level_exam(
|
||||
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
|
||||
) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -12,13 +21,27 @@ class ILevelService(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
|
||||
async def get_custom_level(self, data: Dict):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
|
||||
async def upload_level(self, upload: UploadFile) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _generate_single_mc_level_question(self):
|
||||
async def gen_multiple_choice(
|
||||
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def gen_blank_space_text_utas(
|
||||
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def gen_reading_passage_utas(
|
||||
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
||||
):
|
||||
pass
|
||||
|
||||
@@ -1,68 +1,18 @@
|
||||
import queue
|
||||
from abc import ABC, abstractmethod
|
||||
from queue import Queue
|
||||
from typing import Dict
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
class IListeningService(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def generate_listening_question(self, section: int, topic: str) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generate_listening_exercises(
|
||||
self, section: int, dialog: str,
|
||||
req_exercises: list[str], exercises_queue: Queue,
|
||||
start_id: int, difficulty: str
|
||||
async def get_listening_question(
|
||||
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
|
||||
number_of_exercises_q=queue.Queue(), start_id=-1
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def save_listening(self, parts, min_timer, difficulty):
|
||||
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str) -> Dict:
|
||||
pass
|
||||
|
||||
# ==================================================================================================================
|
||||
# Helpers
|
||||
# ==================================================================================================================
|
||||
|
||||
@abstractmethod
|
||||
async def _generate_listening_conversation(self, section: int, topic: str) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _get_random_voice(gender: str):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_multiple_choice_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_write_blanks_questions_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_write_blanks_notes_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_write_blanks_form_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@ from abc import ABC, abstractmethod
|
||||
from queue import Queue
|
||||
from typing import List
|
||||
|
||||
from app.configs.constants import QuestionType
|
||||
|
||||
|
||||
class IReadingService(ABC):
|
||||
|
||||
@@ -14,36 +12,11 @@ class IReadingService(ABC):
|
||||
topic: str,
|
||||
req_exercises: List[str],
|
||||
number_of_exercises_q: Queue,
|
||||
difficulty: str
|
||||
):
|
||||
pass
|
||||
|
||||
# ==================================================================================================================
|
||||
# Helpers
|
||||
# ==================================================================================================================
|
||||
|
||||
@abstractmethod
|
||||
async def generate_reading_passage(self, q_type: QuestionType, topic: str):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _generate_reading_exercises(
|
||||
self, passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty
|
||||
difficulty: str,
|
||||
start_id: int
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_write_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _gen_paragraph_match_exercise(self, text: str, quantity: int, start_id):
|
||||
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
|
||||
pass
|
||||
|
||||
@@ -1,21 +1,17 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
|
||||
class ISpeakingService(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def get_speaking_task(self, task_id: int, topic: str, difficulty: str):
|
||||
async def get_speaking_part(
|
||||
self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
|
||||
) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def grade_speaking_task_1_and_2(
|
||||
self, task: int, question: str, answer_firebase_path: str, sound_file_name: str
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def grade_speaking_task_3(self, answers: Dict, task: int = 3):
|
||||
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -23,35 +19,11 @@ class ISpeakingService(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str):
|
||||
pass
|
||||
|
||||
# ==================================================================================================================
|
||||
# Helpers
|
||||
# ==================================================================================================================
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _zero_rating(comment: str):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _calculate_overall(response: Dict):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _get_speaking_corrections(self, text):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def _create_video(self, question: str, avatar: str, error_message: str):
|
||||
async def generate_video(
|
||||
self, part: int, avatar: str, topic: str, questions: list[str],
|
||||
*,
|
||||
second_topic: Optional[str] = None,
|
||||
prompts: Optional[list[str]] = None,
|
||||
suffix: Optional[str] = None,
|
||||
):
|
||||
pass
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, TypeVar, Callable
|
||||
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
from pydantic import BaseModel
|
||||
|
||||
T = TypeVar('T', bound=BaseModel)
|
||||
|
||||
class ILLMService(ABC):
|
||||
|
||||
@@ -19,3 +23,16 @@ class ILLMService(ABC):
|
||||
@abstractmethod
|
||||
async def prediction_override(self, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def pydantic_prediction(
|
||||
self,
|
||||
messages: List[ChatCompletionMessageParam],
|
||||
map_to_model: Callable,
|
||||
json_scheme: str,
|
||||
*,
|
||||
model: Optional[str] = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_retries: int = 3
|
||||
) -> List[T] | T | None:
|
||||
pass
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class ITrainingService(ABC):
|
||||
|
||||
@@ -7,7 +9,6 @@ class ITrainingService(ABC):
|
||||
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
|
||||
async def get_training_content(self, training_content: Dict) -> Dict:
|
||||
pass
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class IWritingService(ABC):
|
||||
|
||||
@@ -11,22 +9,3 @@ class IWritingService(ABC):
|
||||
@abstractmethod
|
||||
async def grade_writing_task(self, task: int, question: str, answer: str):
|
||||
pass
|
||||
|
||||
# ==================================================================================================================
|
||||
# Helpers
|
||||
# ==================================================================================================================
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _get_writing_prompt(task: int, topic: str, difficulty: str):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
async def _get_fixed_text(self, text):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def _zero_rating(comment: str):
|
||||
pass
|
||||
|
||||
@@ -4,7 +4,7 @@ from .reading import ReadingService
|
||||
from .speaking import SpeakingService
|
||||
from .writing import WritingService
|
||||
from .grade import GradeService
|
||||
from .training import TrainingService
|
||||
from .training import *
|
||||
from .third_parties import *
|
||||
|
||||
__all__ = [
|
||||
@@ -14,6 +14,6 @@ __all__ = [
|
||||
"SpeakingService",
|
||||
"WritingService",
|
||||
"GradeService",
|
||||
"TrainingService"
|
||||
]
|
||||
__all__.extend(third_parties.__all__)
|
||||
__all__.extend(training.__all__)
|
||||
|
||||
@@ -1,42 +1,47 @@
|
||||
import json
|
||||
from typing import List
|
||||
import copy
|
||||
from typing import List, Dict
|
||||
|
||||
from app.configs.constants import GPTModels, TemperatureSettings
|
||||
from app.services.abc import ILLMService, IGradeService
|
||||
|
||||
|
||||
class GradeService(IGradeService):
|
||||
|
||||
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "save_evaluation_and_suggestions",
|
||||
"description": "Saves the evaluation and suggestions requested by input.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"evaluation": {
|
||||
"type": "string",
|
||||
"description": "A comment on the IELTS section grade obtained in the specific section and what it could mean without suggestions.",
|
||||
},
|
||||
"suggestions": {
|
||||
"type": "string",
|
||||
"description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.",
|
||||
},
|
||||
"bullet_points": {
|
||||
"type": "string",
|
||||
"description": "Text with four bullet points to improve the english speaking ability. Only include text for the bullet points separated by a paragraph. ",
|
||||
},
|
||||
},
|
||||
"required": ["evaluation", "suggestions"],
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def __init__(self, llm: ILLMService):
|
||||
self._llm = llm
|
||||
|
||||
async def grade_short_answers(self, data: Dict):
|
||||
json_format = {
|
||||
"exercises": [
|
||||
{
|
||||
"id": 1,
|
||||
"correct": True,
|
||||
"correct_answer": " correct answer if wrong"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Grade these answers according to the text content and write a correct answer if they are '
|
||||
f'wrong. Text, questions and answers:\n {data}'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
return await self._llm.prediction(
|
||||
GPTModels.GPT_4_O,
|
||||
messages,
|
||||
["exercises"],
|
||||
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
async def calculate_grading_summary(self, extracted_sections: List):
|
||||
ret = []
|
||||
|
||||
@@ -116,8 +121,8 @@ class GradeService(IGradeService):
|
||||
)
|
||||
}]
|
||||
|
||||
chat_config = copy.deepcopy(self.chat_config)
|
||||
tools = copy.deepcopy(self.tools)
|
||||
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
|
||||
tools = self.get_tools()
|
||||
|
||||
res = await self._llm.prediction_override(
|
||||
model="gpt-3.5-turbo",
|
||||
@@ -154,3 +159,42 @@ class GradeService(IGradeService):
|
||||
return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
|
||||
else:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_tools():
|
||||
return [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "save_evaluation_and_suggestions",
|
||||
"description": "Saves the evaluation and suggestions requested by input.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"evaluation": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"A comment on the IELTS section grade obtained in the specific section and what "
|
||||
"it could mean without suggestions."
|
||||
),
|
||||
},
|
||||
"suggestions": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"A small paragraph text with suggestions on how to possibly get a better grade "
|
||||
"than the one obtained."
|
||||
),
|
||||
},
|
||||
"bullet_points": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Text with four bullet points to improve the english speaking ability. Only "
|
||||
"include text for the bullet points separated by a paragraph."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["evaluation", "suggestions"],
|
||||
},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1,506 +0,0 @@
|
||||
import json
|
||||
import random
|
||||
import uuid
|
||||
|
||||
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent, QuestionType
|
||||
from app.helpers import ExercisesHelper
|
||||
from app.repositories.abc import IDocumentStore
|
||||
from app.services.abc import ILevelService, ILLMService, IReadingService
|
||||
|
||||
|
||||
class LevelService(ILevelService):
|
||||
|
||||
def __init__(
|
||||
self, llm: ILLMService, document_store: IDocumentStore, reading_service: IReadingService
|
||||
):
|
||||
self._llm = llm
|
||||
self._document_store = document_store
|
||||
self._reading_service = reading_service
|
||||
|
||||
async def get_level_exam(self):
|
||||
number_of_exercises = 25
|
||||
exercises = await self._gen_multiple_choice_level(number_of_exercises)
|
||||
return {
|
||||
"exercises": [exercises],
|
||||
"isDiagnostic": False,
|
||||
"minTimer": 25,
|
||||
"module": "level"
|
||||
}
|
||||
|
||||
async def _gen_multiple_choice_level(self, quantity: int, start_id=1):
|
||||
gen_multiple_choice_for_text = (
|
||||
f'Generate {str(quantity)} multiple choice questions of 4 options for an english level exam, some easy '
|
||||
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
|
||||
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
|
||||
'punctuation. Make sure every question only has 1 correct answer.'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"questions": [{"id": "9", "options": '
|
||||
'[{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
|
||||
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
|
||||
'"prompt": "Which of the following is a conjunction?", '
|
||||
'"solution": "A", "variant": "text"}]}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": gen_multiple_choice_for_text
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if len(question["questions"]) != quantity:
|
||||
return await self._gen_multiple_choice_level(quantity, start_id)
|
||||
else:
|
||||
all_exams = await self._document_store.get_all("level")
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
|
||||
all_exams, question["questions"][i], question, seen_keys
|
||||
)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Select the appropriate option.",
|
||||
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
|
||||
"type": "multipleChoice",
|
||||
}
|
||||
|
||||
async def _replace_exercise_if_exists(self, all_exams, current_exercise, current_exam, seen_keys):
|
||||
# Extracting relevant fields for comparison
|
||||
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
|
||||
# Check if the key is in the set
|
||||
if key in seen_keys:
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
||||
)
|
||||
else:
|
||||
seen_keys.add(key)
|
||||
|
||||
for exam in all_exams:
|
||||
exam_dict = exam.to_dict()
|
||||
if any(
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
||||
current_exercise["options"])
|
||||
for exercise in exam_dict.get("exercises", [])[0]["questions"]
|
||||
):
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
||||
)
|
||||
return current_exercise, seen_keys
|
||||
|
||||
async def _generate_single_mc_level_question(self):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, '
|
||||
'{"id": "C", "text": "Happy"}, {"id": "D", "text": "Jump"}], '
|
||||
'"prompt": "Which of the following is a conjunction?", '
|
||||
'"solution": "A", "variant": "text"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
|
||||
'intermediate or advanced.'
|
||||
)
|
||||
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question
|
||||
|
||||
async def get_level_utas(self):
|
||||
# Formats
|
||||
mc = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Choose the correct word or group of words that completes the sentences.",
|
||||
"questions": None,
|
||||
"type": "multipleChoice",
|
||||
"part": 1
|
||||
}
|
||||
|
||||
umc = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Choose the underlined word or group of words that is not correct.",
|
||||
"questions": None,
|
||||
"type": "multipleChoice",
|
||||
"part": 2
|
||||
}
|
||||
|
||||
bs_1 = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and write the correct word for each space.",
|
||||
"questions": None,
|
||||
"type": "blankSpaceText",
|
||||
"part": 3
|
||||
}
|
||||
|
||||
bs_2 = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and write the correct word for each space.",
|
||||
"questions": None,
|
||||
"type": "blankSpaceText",
|
||||
"part": 4
|
||||
}
|
||||
|
||||
reading = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and answer the questions below.",
|
||||
"questions": None,
|
||||
"type": "readingExercises",
|
||||
"part": 5
|
||||
}
|
||||
|
||||
all_mc_questions = []
|
||||
|
||||
# PART 1
|
||||
mc_exercises1 = await self._gen_multiple_choice_blank_space_utas(15, 1, all_mc_questions)
|
||||
print(json.dumps(mc_exercises1, indent=4))
|
||||
all_mc_questions.append(mc_exercises1)
|
||||
|
||||
# PART 2
|
||||
mc_exercises2 = await self._gen_multiple_choice_blank_space_utas(15, 16, all_mc_questions)
|
||||
print(json.dumps(mc_exercises2, indent=4))
|
||||
all_mc_questions.append(mc_exercises2)
|
||||
|
||||
# PART 3
|
||||
mc_exercises3 = await self._gen_multiple_choice_blank_space_utas(15, 31, all_mc_questions)
|
||||
print(json.dumps(mc_exercises3, indent=4))
|
||||
all_mc_questions.append(mc_exercises3)
|
||||
|
||||
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
|
||||
print(json.dumps(mc_exercises, indent=4))
|
||||
mc["questions"] = mc_exercises
|
||||
|
||||
# Underlined mc
|
||||
underlined_mc = await self._gen_multiple_choice_underlined_utas(15, 46)
|
||||
print(json.dumps(underlined_mc, indent=4))
|
||||
umc["questions"] = underlined_mc
|
||||
|
||||
# Blank Space text 1
|
||||
blank_space_text_1 = await self._gen_blank_space_text_utas(12, 61, 250)
|
||||
print(json.dumps(blank_space_text_1, indent=4))
|
||||
bs_1["questions"] = blank_space_text_1
|
||||
|
||||
# Blank Space text 2
|
||||
blank_space_text_2 = await self._gen_blank_space_text_utas(14, 73, 350)
|
||||
print(json.dumps(blank_space_text_2, indent=4))
|
||||
bs_2["questions"] = blank_space_text_2
|
||||
|
||||
# Reading text
|
||||
reading_text = await self._gen_reading_passage_utas(87, 10, 4)
|
||||
print(json.dumps(reading_text, indent=4))
|
||||
reading["questions"] = reading_text
|
||||
|
||||
return {
|
||||
"exercises": {
|
||||
"blankSpaceMultipleChoice": mc,
|
||||
"underlinedMultipleChoice": umc,
|
||||
"blankSpaceText1": bs_1,
|
||||
"blankSpaceText2": bs_2,
|
||||
"readingExercises": reading,
|
||||
},
|
||||
"isDiagnostic": False,
|
||||
"minTimer": 25,
|
||||
"module": "level"
|
||||
}
|
||||
|
||||
async def _gen_multiple_choice_blank_space_utas(self, quantity: int, start_id: int, all_exams):
|
||||
gen_multiple_choice_for_text = (
|
||||
f'Generate {str(quantity)} multiple choice blank space questions of 4 options for an english '
|
||||
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure '
|
||||
'that the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
|
||||
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"questions": [{"id": "9", "options": [{"id": "A", "text": '
|
||||
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||
'"prompt": "Which of the following is a conjunction?", '
|
||||
'"solution": "A", "variant": "text"}]}')
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": gen_multiple_choice_for_text
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if len(question["questions"]) != quantity:
|
||||
return await self._gen_multiple_choice_level(quantity, start_id)
|
||||
else:
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = await self._replace_exercise_if_exists_utas(
|
||||
all_exams,
|
||||
question["questions"][i],
|
||||
question,
|
||||
seen_keys
|
||||
)
|
||||
return ExercisesHelper.fix_exercise_ids(question, start_id)
|
||||
|
||||
async def _replace_exercise_if_exists_utas(self, all_exams, current_exercise, current_exam, seen_keys):
|
||||
# Extracting relevant fields for comparison
|
||||
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
|
||||
# Check if the key is in the set
|
||||
if key in seen_keys:
|
||||
return self._replace_exercise_if_exists_utas(
|
||||
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
||||
)
|
||||
else:
|
||||
seen_keys.add(key)
|
||||
|
||||
for exam in all_exams:
|
||||
if any(
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
||||
current_exercise["options"])
|
||||
for exercise in exam.get("questions", [])
|
||||
):
|
||||
return self._replace_exercise_if_exists_utas(
|
||||
all_exams, await self._generate_single_mc_level_question(), current_exam, seen_keys
|
||||
)
|
||||
return current_exercise, seen_keys
|
||||
|
||||
|
||||
async def _gen_multiple_choice_underlined_utas(self, quantity: int, start_id: int):
|
||||
json_format = {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"prompt": "prompt",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
gen_multiple_choice_for_text = (
|
||||
f'Generate {str(quantity)} multiple choice questions of 4 options for an english '
|
||||
'level exam, some easy questions, some intermediate questions and some advanced questions. Ensure that '
|
||||
'the questions cover a range of topics such as verb tense, subject-verb agreement, pronoun usage, '
|
||||
'sentence structure, and punctuation. Make sure every question only has 1 correct answer.'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": gen_multiple_choice_for_text
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'The type of multiple choice is the prompt has wrong words or group of words and the options '
|
||||
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
|
||||
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
|
||||
'the boss <u>is</u> nice."\nOptions:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if len(question["questions"]) != quantity:
|
||||
return await self._gen_multiple_choice_level(quantity, start_id)
|
||||
else:
|
||||
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
|
||||
|
||||
async def _gen_blank_space_text_utas(
|
||||
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
||||
):
|
||||
json_format = {
|
||||
"question": {
|
||||
"words": [
|
||||
{
|
||||
"id": "1",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"text": "text"
|
||||
}
|
||||
}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Generate a text of at least {str(size)} words about the topic {topic}.'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'From the generated text choose {str(quantity)} words (cannot be sequential words) to replace '
|
||||
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
|
||||
'The ids must be ordered throughout the text and the words must be replaced only once. Put '
|
||||
'the removed words and respective ids on the words array of the json in the correct order.'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question["question"]
|
||||
|
||||
async def _gen_reading_passage_utas(
|
||||
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
||||
):
|
||||
|
||||
passage = await self._reading_service.generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
|
||||
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
|
||||
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
|
||||
return {
|
||||
"exercises": {
|
||||
"shortAnswer": short_answer,
|
||||
"multipleChoice": mc_exercises,
|
||||
},
|
||||
"text": {
|
||||
"content": passage["text"],
|
||||
"title": passage["title"]
|
||||
}
|
||||
}
|
||||
|
||||
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
|
||||
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have '
|
||||
'maximum 3 words per answer, about this text:\n"' + text + '"')
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'The id starts at ' + str(start_id) + '.'
|
||||
}
|
||||
]
|
||||
|
||||
return (
|
||||
await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
)["questions"]
|
||||
|
||||
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
|
||||
json_format = {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"prompt": "prompt",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'Generate ' + str(
|
||||
mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'Make sure every question only has 1 correct answer.'
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if len(question["questions"]) != mc_quantity:
|
||||
return await self._gen_multiple_choice_level(mc_quantity, start_id)
|
||||
else:
|
||||
return ExercisesHelper.fix_exercise_ids(question, start_id)["questions"]
|
||||
5
app/services/impl/level/__init__.py
Normal file
5
app/services/impl/level/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .level import LevelService
|
||||
|
||||
__all__ = [
|
||||
"LevelService"
|
||||
]
|
||||
335
app/services/impl/level/custom.py
Normal file
335
app/services/impl/level/custom.py
Normal file
@@ -0,0 +1,335 @@
|
||||
import queue
|
||||
import random
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from app.configs.constants import CustomLevelExerciseTypes, EducationalContent
|
||||
from app.services.abc import (
|
||||
ILLMService, ILevelService, IReadingService,
|
||||
IWritingService, IListeningService, ISpeakingService
|
||||
)
|
||||
|
||||
|
||||
class CustomLevelModule:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: ILLMService,
|
||||
level: ILevelService,
|
||||
reading: IReadingService,
|
||||
listening: IListeningService,
|
||||
writing: IWritingService,
|
||||
speaking: ISpeakingService
|
||||
):
|
||||
self._llm = llm
|
||||
self._level = level
|
||||
self._reading = reading
|
||||
self._listening = listening
|
||||
self._writing = writing
|
||||
self._speaking = speaking
|
||||
|
||||
# TODO: I've changed this to retrieve the args from the body request and not request query args
|
||||
async def get_custom_level(self, data: Dict):
|
||||
nr_exercises = int(data.get('nr_exercises'))
|
||||
|
||||
exercise_id = 1
|
||||
response = {
|
||||
"exercises": {},
|
||||
"module": "level"
|
||||
}
|
||||
for i in range(1, nr_exercises + 1, 1):
|
||||
exercise_type = data.get(f'exercise_{i}_type')
|
||||
exercise_difficulty = data.get(f'exercise_{i}_difficulty', random.choice(['easy', 'medium', 'hard']))
|
||||
exercise_qty = int(data.get(f'exercise_{i}_qty', -1))
|
||||
exercise_topic = data.get(f'exercise_{i}_topic', random.choice(EducationalContent.TOPICS))
|
||||
exercise_topic_2 = data.get(f'exercise_{i}_topic_2', random.choice(EducationalContent.TOPICS))
|
||||
exercise_text_size = int(data.get(f'exercise_{i}_text_size', 700))
|
||||
exercise_sa_qty = int(data.get(f'exercise_{i}_sa_qty', -1))
|
||||
exercise_mc_qty = int(data.get(f'exercise_{i}_mc_qty', -1))
|
||||
exercise_mc3_qty = int(data.get(f'exercise_{i}_mc3_qty', -1))
|
||||
exercise_fillblanks_qty = int(data.get(f'exercise_{i}_fillblanks_qty', -1))
|
||||
exercise_writeblanks_qty = int(data.get(f'exercise_{i}_writeblanks_qty', -1))
|
||||
exercise_writeblanksquestions_qty = int(data.get(f'exercise_{i}_writeblanksquestions_qty', -1))
|
||||
exercise_writeblanksfill_qty = int(data.get(f'exercise_{i}_writeblanksfill_qty', -1))
|
||||
exercise_writeblanksform_qty = int(data.get(f'exercise_{i}_writeblanksform_qty', -1))
|
||||
exercise_truefalse_qty = int(data.get(f'exercise_{i}_truefalse_qty', -1))
|
||||
exercise_paragraphmatch_qty = int(data.get(f'exercise_{i}_paragraphmatch_qty', -1))
|
||||
exercise_ideamatch_qty = int(data.get(f'exercise_{i}_ideamatch_qty', -1))
|
||||
|
||||
if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
|
||||
response["exercises"][f"exercise_{i}"] = {}
|
||||
response["exercises"][f"exercise_{i}"]["questions"] = []
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
|
||||
while exercise_qty > 0:
|
||||
if exercise_qty - 15 > 0:
|
||||
qty = 15
|
||||
else:
|
||||
qty = exercise_qty
|
||||
|
||||
mc_response = await self._level.gen_multiple_choice(
|
||||
"normal", qty, exercise_id, utas=True,
|
||||
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
|
||||
response["exercises"][f"exercise_{i}"] = {}
|
||||
response["exercises"][f"exercise_{i}"]["questions"] = []
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
|
||||
while exercise_qty > 0:
|
||||
if exercise_qty - 15 > 0:
|
||||
qty = 15
|
||||
else:
|
||||
qty = exercise_qty
|
||||
|
||||
mc_response = await self._level.gen_multiple_choice(
|
||||
"blank_space", qty, exercise_id, utas=True,
|
||||
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
|
||||
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
|
||||
response["exercises"][f"exercise_{i}"] = {}
|
||||
response["exercises"][f"exercise_{i}"]["questions"] = []
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "multipleChoice"
|
||||
while exercise_qty > 0:
|
||||
if exercise_qty - 15 > 0:
|
||||
qty = 15
|
||||
else:
|
||||
qty = exercise_qty
|
||||
|
||||
mc_response = await self._level.gen_multiple_choice(
|
||||
"underline", qty, exercise_id, utas=True,
|
||||
all_exams=response["exercises"][f"exercise_{i}"]["questions"]
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["questions"].extend(mc_response["questions"])
|
||||
|
||||
exercise_id = exercise_id + qty
|
||||
exercise_qty = exercise_qty - qty
|
||||
|
||||
elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._level.gen_blank_space_text_utas(
|
||||
exercise_qty, exercise_id, exercise_text_size
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "blankSpaceText"
|
||||
exercise_id = exercise_id + exercise_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_PASSAGE_UTAS.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._level.gen_reading_passage_utas(
|
||||
exercise_id, exercise_sa_qty, exercise_mc_qty, exercise_topic
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "readingExercises"
|
||||
exercise_id = exercise_id + exercise_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.WRITING_LETTER.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
|
||||
1, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "writing"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.WRITING_2.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._writing.get_writing_task_general_question(
|
||||
2, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "writing"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_1.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
|
||||
1, exercise_topic, exercise_difficulty, exercise_topic_2
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_2.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
|
||||
2, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "speaking"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.SPEAKING_3.value:
|
||||
response["exercises"][f"exercise_{i}"] = await self._speaking.get_speaking_part(
|
||||
3, exercise_topic, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "interactiveSpeaking"
|
||||
exercise_id = exercise_id + 1
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_1.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_fillblanks_qty != -1:
|
||||
exercises.append('fillBlanks')
|
||||
exercise_qty_q.put(exercise_fillblanks_qty)
|
||||
total_qty = total_qty + exercise_fillblanks_qty
|
||||
if exercise_writeblanks_qty != -1:
|
||||
exercises.append('writeBlanks')
|
||||
exercise_qty_q.put(exercise_writeblanks_qty)
|
||||
total_qty = total_qty + exercise_writeblanks_qty
|
||||
if exercise_truefalse_qty != -1:
|
||||
exercises.append('trueFalse')
|
||||
exercise_qty_q.put(exercise_truefalse_qty)
|
||||
total_qty = total_qty + exercise_truefalse_qty
|
||||
if exercise_paragraphmatch_qty != -1:
|
||||
exercises.append('paragraphMatch')
|
||||
exercise_qty_q.put(exercise_paragraphmatch_qty)
|
||||
total_qty = total_qty + exercise_paragraphmatch_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
|
||||
1, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "reading"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_2.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_fillblanks_qty != -1:
|
||||
exercises.append('fillBlanks')
|
||||
exercise_qty_q.put(exercise_fillblanks_qty)
|
||||
total_qty = total_qty + exercise_fillblanks_qty
|
||||
if exercise_writeblanks_qty != -1:
|
||||
exercises.append('writeBlanks')
|
||||
exercise_qty_q.put(exercise_writeblanks_qty)
|
||||
total_qty = total_qty + exercise_writeblanks_qty
|
||||
if exercise_truefalse_qty != -1:
|
||||
exercises.append('trueFalse')
|
||||
exercise_qty_q.put(exercise_truefalse_qty)
|
||||
total_qty = total_qty + exercise_truefalse_qty
|
||||
if exercise_paragraphmatch_qty != -1:
|
||||
exercises.append('paragraphMatch')
|
||||
exercise_qty_q.put(exercise_paragraphmatch_qty)
|
||||
total_qty = total_qty + exercise_paragraphmatch_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
|
||||
2, exercise_topic, exercises, exercise_qty_q, exercise_difficulty, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "reading"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.READING_3.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_fillblanks_qty != -1:
|
||||
exercises.append('fillBlanks')
|
||||
exercise_qty_q.put(exercise_fillblanks_qty)
|
||||
total_qty = total_qty + exercise_fillblanks_qty
|
||||
if exercise_writeblanks_qty != -1:
|
||||
exercises.append('writeBlanks')
|
||||
exercise_qty_q.put(exercise_writeblanks_qty)
|
||||
total_qty = total_qty + exercise_writeblanks_qty
|
||||
if exercise_truefalse_qty != -1:
|
||||
exercises.append('trueFalse')
|
||||
exercise_qty_q.put(exercise_truefalse_qty)
|
||||
total_qty = total_qty + exercise_truefalse_qty
|
||||
if exercise_paragraphmatch_qty != -1:
|
||||
exercises.append('paragraphMatch')
|
||||
exercise_qty_q.put(exercise_paragraphmatch_qty)
|
||||
total_qty = total_qty + exercise_paragraphmatch_qty
|
||||
if exercise_ideamatch_qty != -1:
|
||||
exercises.append('ideaMatch')
|
||||
exercise_qty_q.put(exercise_ideamatch_qty)
|
||||
total_qty = total_qty + exercise_ideamatch_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._reading.gen_reading_passage(
|
||||
3, exercise_topic, exercises, exercise_qty_q, exercise_id, exercise_difficulty
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "reading"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_1.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc_qty != -1:
|
||||
exercises.append('multipleChoice')
|
||||
exercise_qty_q.put(exercise_mc_qty)
|
||||
total_qty = total_qty + exercise_mc_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
if exercise_writeblanksfill_qty != -1:
|
||||
exercises.append('writeBlanksFill')
|
||||
exercise_qty_q.put(exercise_writeblanksfill_qty)
|
||||
total_qty = total_qty + exercise_writeblanksfill_qty
|
||||
if exercise_writeblanksform_qty != -1:
|
||||
exercises.append('writeBlanksForm')
|
||||
exercise_qty_q.put(exercise_writeblanksform_qty)
|
||||
total_qty = total_qty + exercise_writeblanksform_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
1, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_2.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc_qty != -1:
|
||||
exercises.append('multipleChoice')
|
||||
exercise_qty_q.put(exercise_mc_qty)
|
||||
total_qty = total_qty + exercise_mc_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
2, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_3.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc3_qty != -1:
|
||||
exercises.append('multipleChoice3Options')
|
||||
exercise_qty_q.put(exercise_mc3_qty)
|
||||
total_qty = total_qty + exercise_mc3_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
3, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
elif exercise_type == CustomLevelExerciseTypes.LISTENING_4.value:
|
||||
exercises = []
|
||||
exercise_qty_q = queue.Queue()
|
||||
total_qty = 0
|
||||
if exercise_mc_qty != -1:
|
||||
exercises.append('multipleChoice')
|
||||
exercise_qty_q.put(exercise_mc_qty)
|
||||
total_qty = total_qty + exercise_mc_qty
|
||||
if exercise_writeblanksquestions_qty != -1:
|
||||
exercises.append('writeBlanksQuestions')
|
||||
exercise_qty_q.put(exercise_writeblanksquestions_qty)
|
||||
total_qty = total_qty + exercise_writeblanksquestions_qty
|
||||
if exercise_writeblanksfill_qty != -1:
|
||||
exercises.append('writeBlanksFill')
|
||||
exercise_qty_q.put(exercise_writeblanksfill_qty)
|
||||
total_qty = total_qty + exercise_writeblanksfill_qty
|
||||
if exercise_writeblanksform_qty != -1:
|
||||
exercises.append('writeBlanksForm')
|
||||
exercise_qty_q.put(exercise_writeblanksform_qty)
|
||||
total_qty = total_qty + exercise_writeblanksform_qty
|
||||
|
||||
response["exercises"][f"exercise_{i}"] = await self._listening.get_listening_question(
|
||||
4, exercise_topic, exercises, exercise_difficulty, exercise_qty_q, exercise_id
|
||||
)
|
||||
response["exercises"][f"exercise_{i}"]["type"] = "listening"
|
||||
|
||||
exercise_id = exercise_id + total_qty
|
||||
|
||||
return response
|
||||
417
app/services/impl/level/level.py
Normal file
417
app/services/impl/level/level.py
Normal file
@@ -0,0 +1,417 @@
|
||||
import json
|
||||
import random
|
||||
import uuid
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import UploadFile
|
||||
|
||||
from app.configs.constants import GPTModels, TemperatureSettings, EducationalContent
|
||||
from app.helpers import ExercisesHelper
|
||||
from app.repositories.abc import IDocumentStore
|
||||
from app.services.abc import ILevelService, ILLMService, IReadingService, IWritingService, ISpeakingService, \
|
||||
IListeningService
|
||||
from .custom import CustomLevelModule
|
||||
from .upload import UploadLevelModule
|
||||
|
||||
|
||||
class LevelService(ILevelService):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: ILLMService,
|
||||
document_store: IDocumentStore,
|
||||
mc_variants: Dict,
|
||||
reading_service: IReadingService,
|
||||
writing_service: IWritingService,
|
||||
speaking_service: ISpeakingService,
|
||||
listening_service: IListeningService
|
||||
):
|
||||
self._llm = llm
|
||||
self._document_store = document_store
|
||||
self._reading_service = reading_service
|
||||
self._custom_module = CustomLevelModule(
|
||||
llm, self, reading_service, listening_service, writing_service, speaking_service
|
||||
)
|
||||
self._upload_module = UploadLevelModule(llm)
|
||||
|
||||
# TODO: normal and blank spaces only differ on "multiple choice blank space questions" in the prompt
|
||||
# mc_variants are stored in ./mc_variants.json
|
||||
self._mc_variants = mc_variants
|
||||
|
||||
async def upload_level(self, upload: UploadFile) -> Dict:
|
||||
return await self._upload_module.generate_level_from_file(upload)
|
||||
|
||||
async def get_custom_level(self, data: Dict):
|
||||
return await self._custom_module.get_custom_level(data)
|
||||
|
||||
async def get_level_exam(
|
||||
self, number_of_exercises: int = 25, min_timer: int = 25, diagnostic: bool = False
|
||||
) -> Dict:
|
||||
exercises = await self.gen_multiple_choice("normal", number_of_exercises, utas=False)
|
||||
return {
|
||||
"exercises": [exercises],
|
||||
"isDiagnostic": diagnostic,
|
||||
"minTimer": min_timer,
|
||||
"module": "level"
|
||||
}
|
||||
|
||||
async def get_level_utas(self, diagnostic: bool = False, min_timer: int = 25):
|
||||
# Formats
|
||||
mc = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Choose the correct word or group of words that completes the sentences.",
|
||||
"questions": None,
|
||||
"type": "multipleChoice",
|
||||
"part": 1
|
||||
}
|
||||
|
||||
umc = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Choose the underlined word or group of words that is not correct.",
|
||||
"questions": None,
|
||||
"type": "multipleChoice",
|
||||
"part": 2
|
||||
}
|
||||
|
||||
bs_1 = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and write the correct word for each space.",
|
||||
"questions": None,
|
||||
"type": "blankSpaceText",
|
||||
"part": 3
|
||||
}
|
||||
|
||||
bs_2 = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and write the correct word for each space.",
|
||||
"questions": None,
|
||||
"type": "blankSpaceText",
|
||||
"part": 4
|
||||
}
|
||||
|
||||
reading = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Read the text and answer the questions below.",
|
||||
"questions": None,
|
||||
"type": "readingExercises",
|
||||
"part": 5
|
||||
}
|
||||
|
||||
all_mc_questions = []
|
||||
|
||||
# PART 1
|
||||
# await self._gen_multiple_choice("normal", number_of_exercises, utas=False)
|
||||
mc_exercises1 = await self.gen_multiple_choice(
|
||||
"blank_space", 15, 1, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(mc_exercises1, indent=4))
|
||||
all_mc_questions.append(mc_exercises1)
|
||||
|
||||
# PART 2
|
||||
mc_exercises2 = await self.gen_multiple_choice(
|
||||
"blank_space", 15, 16, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(mc_exercises2, indent=4))
|
||||
all_mc_questions.append(mc_exercises2)
|
||||
|
||||
# PART 3
|
||||
mc_exercises3 = await self.gen_multiple_choice(
|
||||
"blank_space", 15, 31, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(mc_exercises3, indent=4))
|
||||
all_mc_questions.append(mc_exercises3)
|
||||
|
||||
mc_exercises = mc_exercises1['questions'] + mc_exercises2['questions'] + mc_exercises3['questions']
|
||||
print(json.dumps(mc_exercises, indent=4))
|
||||
mc["questions"] = mc_exercises
|
||||
|
||||
# Underlined mc
|
||||
underlined_mc = await self.gen_multiple_choice(
|
||||
"underline", 15, 46, utas=True, all_exams=all_mc_questions
|
||||
)
|
||||
print(json.dumps(underlined_mc, indent=4))
|
||||
umc["questions"] = underlined_mc
|
||||
|
||||
# Blank Space text 1
|
||||
blank_space_text_1 = await self.gen_blank_space_text_utas(12, 61, 250)
|
||||
print(json.dumps(blank_space_text_1, indent=4))
|
||||
bs_1["questions"] = blank_space_text_1
|
||||
|
||||
# Blank Space text 2
|
||||
blank_space_text_2 = await self.gen_blank_space_text_utas(14, 73, 350)
|
||||
print(json.dumps(blank_space_text_2, indent=4))
|
||||
bs_2["questions"] = blank_space_text_2
|
||||
|
||||
# Reading text
|
||||
reading_text = await self.gen_reading_passage_utas(87, 10, 4)
|
||||
print(json.dumps(reading_text, indent=4))
|
||||
reading["questions"] = reading_text
|
||||
|
||||
return {
|
||||
"exercises": {
|
||||
"blankSpaceMultipleChoice": mc,
|
||||
"underlinedMultipleChoice": umc,
|
||||
"blankSpaceText1": bs_1,
|
||||
"blankSpaceText2": bs_2,
|
||||
"readingExercises": reading,
|
||||
},
|
||||
"isDiagnostic": diagnostic,
|
||||
"minTimer": min_timer,
|
||||
"module": "level"
|
||||
}
|
||||
|
||||
async def gen_multiple_choice(
|
||||
self, mc_variant: str, quantity: int, start_id: int = 1, *, utas: bool = False, all_exams=None
|
||||
):
|
||||
mc_template = self._mc_variants[mc_variant]
|
||||
blank_mod = " blank space " if mc_variant == "blank_space" else " "
|
||||
|
||||
gen_multiple_choice_for_text: str = (
|
||||
'Generate {quantity} multiple choice{blank}questions of 4 options for an english level exam, some easy '
|
||||
'questions, some intermediate questions and some advanced questions. Ensure that the questions cover '
|
||||
'a range of topics such as verb tense, subject-verb agreement, pronoun usage, sentence structure, and '
|
||||
'punctuation. Make sure every question only has 1 correct answer.'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": gen_multiple_choice_for_text.format(quantity=str(quantity), blank=blank_mod)
|
||||
}
|
||||
]
|
||||
|
||||
if mc_variant == "underline":
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
'The type of multiple choice in the prompt has wrong words or group of words and the options '
|
||||
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
|
||||
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
|
||||
'the boss <u>is</u> nice."\n'
|
||||
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
|
||||
)
|
||||
})
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if len(question["questions"]) != quantity:
|
||||
return await self.gen_multiple_choice(mc_variant, quantity, start_id, utas=utas, all_exams=all_exams)
|
||||
else:
|
||||
if not utas:
|
||||
all_exams = await self._document_store.get_all("level")
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
|
||||
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
|
||||
)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Select the appropriate option.",
|
||||
"questions": ExercisesHelper.fix_exercise_ids(question, start_id)["questions"],
|
||||
"type": "multipleChoice",
|
||||
}
|
||||
else:
|
||||
if all_exams is not None:
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i], seen_keys = await self._replace_exercise_if_exists(
|
||||
all_exams, question["questions"][i], question, seen_keys, mc_variant, utas
|
||||
)
|
||||
response = ExercisesHelper.fix_exercise_ids(question, start_id)
|
||||
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
|
||||
return response
|
||||
|
||||
async def _generate_single_multiple_choice(self, mc_variant: str = "normal"):
|
||||
mc_template = self._mc_variants[mc_variant]["questions"][0]
|
||||
blank_mod = " blank space " if mc_variant == "blank_space" else " "
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
f'You are a helpful assistant designed to output JSON on this format: {mc_template}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate 1 multiple choice {blank_mod} question of 4 options for an english level exam, '
|
||||
f'it can be easy, intermediate or advanced.'
|
||||
)
|
||||
|
||||
}
|
||||
]
|
||||
|
||||
if mc_variant == "underline":
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
'The type of multiple choice in the prompt has wrong words or group of words and the options '
|
||||
'are to find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
|
||||
'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> '
|
||||
'the boss <u>is</u> nice."\n'
|
||||
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"'
|
||||
)
|
||||
})
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["options"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question
|
||||
|
||||
async def _replace_exercise_if_exists(
|
||||
self, all_exams, current_exercise, current_exam, seen_keys, mc_variant: str, utas: bool = False
|
||||
):
|
||||
# Extracting relevant fields for comparison
|
||||
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
|
||||
# Check if the key is in the set
|
||||
if key in seen_keys:
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam, seen_keys,
|
||||
mc_variant, utas
|
||||
)
|
||||
else:
|
||||
seen_keys.add(key)
|
||||
|
||||
if not utas:
|
||||
for exam in all_exams:
|
||||
exam_dict = exam.to_dict()
|
||||
if len(exam_dict.get("parts", [])) > 0:
|
||||
exercise_dict = exam_dict.get("parts", [])[0]
|
||||
if len(exercise_dict.get("exercises", [])) > 0:
|
||||
if any(
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
||||
current_exercise["options"])
|
||||
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
|
||||
):
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
|
||||
seen_keys, mc_variant, utas
|
||||
)
|
||||
else:
|
||||
for exam in all_exams:
|
||||
if any(
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
||||
current_exercise["options"])
|
||||
for exercise in exam.get("questions", [])
|
||||
):
|
||||
return await self._replace_exercise_if_exists(
|
||||
all_exams, await self._generate_single_multiple_choice(mc_variant), current_exam,
|
||||
seen_keys, mc_variant, utas
|
||||
)
|
||||
return current_exercise, seen_keys
|
||||
|
||||
async def gen_blank_space_text_utas(
|
||||
self, quantity: int, start_id: int, size: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
||||
):
|
||||
json_template = self._mc_variants["blank_space_text"]
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Generate a text of at least {size} words about the topic {topic}.'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'From the generated text choose {quantity} words (cannot be sequential words) to replace '
|
||||
'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. '
|
||||
'The ids must be ordered throughout the text and the words must be replaced only once. '
|
||||
'Put the removed words and respective ids on the words array of the json in the correct order.'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["question"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question["question"]
|
||||
|
||||
async def gen_reading_passage_utas(
|
||||
self, start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(EducationalContent.MTI_TOPICS)
|
||||
):
|
||||
passage = await self._reading_service.generate_reading_passage(1, topic)
|
||||
short_answer = await self._gen_short_answer_utas(passage["text"], start_id, sa_quantity)
|
||||
mc_exercises = await self._gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
|
||||
return {
|
||||
"exercises": {
|
||||
"shortAnswer": short_answer,
|
||||
"multipleChoice": mc_exercises,
|
||||
},
|
||||
"text": {
|
||||
"content": passage["text"],
|
||||
"title": passage["title"]
|
||||
}
|
||||
}
|
||||
|
||||
async def _gen_short_answer_utas(self, text: str, start_id: int, sa_quantity: int):
|
||||
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {json_format}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate {sa_quantity} short answer questions, and the possible answers, must have '
|
||||
f'maximum 3 words per answer, about this text:\n"{text}"'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'The id starts at {start_id}.'
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
return question["questions"]
|
||||
|
||||
async def _gen_text_multiple_choice_utas(self, text: str, start_id: int, mc_quantity: int):
|
||||
json_template = self._mc_variants["text_mc_utas"]
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f'You are a helpful assistant designed to output JSON on this format: {json_template}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Generate {mc_quantity} multiple choice questions of 4 options for this text:\n{text}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'Make sure every question only has 1 correct answer.'
|
||||
}
|
||||
]
|
||||
|
||||
question = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["questions"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if len(question["questions"]) != mc_quantity:
|
||||
return await self._gen_text_multiple_choice_utas(text, mc_quantity, start_id)
|
||||
else:
|
||||
response = ExercisesHelper.fix_exercise_ids(question, start_id)
|
||||
response["questions"] = ExercisesHelper.randomize_mc_options_order(response["questions"])
|
||||
return response
|
||||
137
app/services/impl/level/mc_variants.json
Normal file
137
app/services/impl/level/mc_variants.json
Normal file
@@ -0,0 +1,137 @@
|
||||
{
|
||||
"normal": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "And"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "Cat"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "Happy"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "Jump"
|
||||
}
|
||||
],
|
||||
"prompt": "Which of the following is a conjunction?",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
"blank_space": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "And"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "Cat"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "Happy"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "Jump"
|
||||
}
|
||||
],
|
||||
"prompt": "Which of the following is a conjunction?",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
"underline": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"prompt": "prompt",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
"blank_space_text": {
|
||||
"question": {
|
||||
"words": [
|
||||
{
|
||||
"id": "1",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"text": "text"
|
||||
}
|
||||
},
|
||||
"text_mc_utas": {
|
||||
"questions": [
|
||||
{
|
||||
"id": "9",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "a"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "b"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "d"
|
||||
}
|
||||
],
|
||||
"prompt": "prompt",
|
||||
"solution": "A",
|
||||
"variant": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
404
app/services/impl/level/upload.py
Normal file
404
app/services/impl/level/upload.py
Normal file
@@ -0,0 +1,404 @@
|
||||
import aiofiles
|
||||
import os
|
||||
import uuid
|
||||
from logging import getLogger
|
||||
|
||||
from typing import Dict, Any, Tuple, Coroutine
|
||||
|
||||
import pdfplumber
|
||||
from fastapi import UploadFile
|
||||
|
||||
from app.services.abc import ILLMService
|
||||
from app.helpers import LoggerHelper, FileHelper
|
||||
from app.mappers import ExamMapper
|
||||
|
||||
from app.dtos.exam import Exam
|
||||
from app.dtos.sheet import Sheet
|
||||
|
||||
|
||||
class UploadLevelModule:
|
||||
def __init__(self, openai: ILLMService):
|
||||
self._logger = getLogger(__name__)
|
||||
self._llm = openai
|
||||
|
||||
# TODO: create a doc in firestore with a status and get its id, run this in a thread and modify the doc in
|
||||
# firestore, return the id right away, in generation view poll for the id
|
||||
async def generate_level_from_file(self, file: UploadFile) -> Dict[str, Any] | None:
|
||||
ext, path_id = await self._save_upload(file)
|
||||
FileHelper.convert_file_to_pdf(
|
||||
f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.pdf'
|
||||
)
|
||||
file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
|
||||
|
||||
if not file_has_images:
|
||||
FileHelper.convert_file_to_html(f'./tmp/{path_id}/uploaded.{ext}', f'./tmp/{path_id}/exercises.html')
|
||||
|
||||
completion: Coroutine[Any, Any, Exam] = (
|
||||
self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
|
||||
)
|
||||
response = await completion
|
||||
|
||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||
|
||||
if response:
|
||||
return self.fix_ids(response.dict(exclude_none=True))
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@LoggerHelper.suppress_loggers()
|
||||
def _check_pdf_for_images(pdf_path: str) -> bool:
|
||||
with pdfplumber.open(pdf_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
if page.images:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def _save_upload(file: UploadFile) -> Tuple[str, str]:
|
||||
ext = file.filename.split('.')[-1]
|
||||
path_id = str(uuid.uuid4())
|
||||
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
|
||||
|
||||
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
|
||||
file_bytes: bytes = await file.read()
|
||||
|
||||
async with aiofiles.open(tmp_filename, 'wb') as file:
|
||||
await file.write(file_bytes)
|
||||
|
||||
return ext, path_id
|
||||
|
||||
def _level_json_schema(self):
|
||||
return {
|
||||
"parts": [
|
||||
{
|
||||
"context": "<this attribute is optional you may exclude it if not required>",
|
||||
"exercises": [
|
||||
self._multiple_choice_html(),
|
||||
self._passage_blank_space_html()
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async def _html_completion(self, path_id: str) -> Exam:
|
||||
async with aiofiles.open(f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8') as f:
|
||||
html = await f.read()
|
||||
|
||||
return await self._llm.pydantic_prediction(
|
||||
[self._gpt_instructions_html(),
|
||||
{
|
||||
"role": "user",
|
||||
"content": html
|
||||
}
|
||||
],
|
||||
ExamMapper.map_to_exam_model,
|
||||
str(self._level_json_schema())
|
||||
)
|
||||
|
||||
def _gpt_instructions_html(self):
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are GPT Scraper and your job is to clean dirty html into clean usable JSON formatted data.'
|
||||
'Your current task is to scrape html english questions sheets.\n\n'
|
||||
|
||||
'In the question sheet you will only see 4 types of question:\n'
|
||||
'- blank space multiple choice\n'
|
||||
'- underline multiple choice\n'
|
||||
'- reading passage blank space multiple choice\n'
|
||||
'- reading passage multiple choice\n\n'
|
||||
|
||||
'For the first two types of questions the template is the same but the question prompts differ, '
|
||||
'whilst in the blank space multiple choice you must include in the prompt the blank spaces with '
|
||||
'multiple "_", in the underline you must include in the prompt the <u></u> to '
|
||||
'indicate the underline and the options a, b, c, d must be the ordered underlines in the prompt.\n\n'
|
||||
|
||||
'For the reading passage exercise you must handle the formatting of the passages. If it is a '
|
||||
'reading passage with blank spaces you will see blanks represented with (question id) followed by a '
|
||||
'line and your job is to replace the brackets with the question id and line with "{{question id}}" '
|
||||
'with 2 newlines between paragraphs. For the reading passages without blanks you must remove '
|
||||
'any numbers that may be there to specify paragraph numbers or line numbers, and place 2 newlines '
|
||||
'between paragraphs.\n\n'
|
||||
|
||||
'IMPORTANT: Note that for the reading passages, the html might not reflect the actual paragraph '
|
||||
'structure, don\'t format the reading passages paragraphs only by the <p></p> tags, try to figure '
|
||||
'out the best paragraph separation possible.'
|
||||
|
||||
'You will place all the information in a single JSON: '
|
||||
'{"parts": [{"exercises": [{...}], "context": ""}]}\n '
|
||||
'Where {...} are the exercises templates for each part of a question sheet and the optional field '
|
||||
'context.'
|
||||
|
||||
'IMPORTANT: The question sheet may be divided by sections but you need to only consider the parts, '
|
||||
'so that you can group the exercises by the parts that are in the html, this is crucial since only '
|
||||
'reading passage multiple choice require context and if the context is included in parts where it '
|
||||
'is not required the UI will be messed up. Some make sure to correctly group the exercises by parts.\n'
|
||||
|
||||
'The templates for the exercises are the following:\n'
|
||||
'- blank space multiple choice, underline multiple choice and reading passage multiple choice: '
|
||||
f'{self._multiple_choice_html()}\n'
|
||||
f'- reading passage blank space multiple choice: {self._passage_blank_space_html()}\n'
|
||||
|
||||
'IMPORTANT: For the reading passage multiple choice the context field must be set with the reading '
|
||||
'passages without paragraphs or line numbers, with 2 newlines between paragraphs, for the other '
|
||||
'exercises exclude the context field.'
|
||||
)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _multiple_choice_html():
|
||||
return {
|
||||
"type": "multipleChoice",
|
||||
"prompt": "Select the appropriate option.",
|
||||
"questions": [
|
||||
{
|
||||
"id": "<the question id>",
|
||||
"prompt": "<the question>",
|
||||
"solution": "<the option id solution>",
|
||||
"options": [
|
||||
{
|
||||
"id": "A",
|
||||
"text": "<the a option>"
|
||||
},
|
||||
{
|
||||
"id": "B",
|
||||
"text": "<the b option>"
|
||||
},
|
||||
{
|
||||
"id": "C",
|
||||
"text": "<the c option>"
|
||||
},
|
||||
{
|
||||
"id": "D",
|
||||
"text": "<the d option>"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _passage_blank_space_html():
|
||||
return {
|
||||
"type": "fillBlanks",
|
||||
"variant": "mc",
|
||||
"prompt": "Click a blank to select the appropriate word for it.",
|
||||
"text": (
|
||||
"<The whole text for the exercise with replacements for blank spaces and their "
|
||||
"ids with {{<question id>}} with 2 newlines between paragraphs>"
|
||||
),
|
||||
"solutions": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"solution": "<the option that holds the solution>"
|
||||
}
|
||||
],
|
||||
"words": [
|
||||
{
|
||||
"id": "<question id>",
|
||||
"options": {
|
||||
"A": "<a option>",
|
||||
"B": "<b option>",
|
||||
"C": "<c option>",
|
||||
"D": "<d option>"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async def _png_completion(self, path_id: str) -> Exam:
|
||||
FileHelper.pdf_to_png(path_id)
|
||||
|
||||
tmp_files = os.listdir(f'./tmp/{path_id}')
|
||||
pages = [f for f in tmp_files if f.startswith('page-') and f.endswith('.png')]
|
||||
pages.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
|
||||
|
||||
json_schema = {
|
||||
"components": [
|
||||
{"type": "part", "part": "<name or number of the part>"},
|
||||
self._multiple_choice_png(),
|
||||
{"type": "blanksPassage", "text": (
|
||||
"<The whole text for the exercise with replacements for blank spaces and their "
|
||||
"ids with {{<question id>}} with 2 newlines between paragraphs>"
|
||||
)},
|
||||
{"type": "passage", "context": (
|
||||
"<reading passages without paragraphs or line numbers, with 2 newlines between paragraphs>"
|
||||
)},
|
||||
self._passage_blank_space_png()
|
||||
]
|
||||
}
|
||||
|
||||
components = []
|
||||
|
||||
for i in range(len(pages)):
|
||||
current_page = pages[i]
|
||||
next_page = pages[i + 1] if i + 1 < len(pages) else None
|
||||
batch = [current_page, next_page] if next_page else [current_page]
|
||||
|
||||
sheet = await self._png_batch(path_id, batch, json_schema)
|
||||
sheet.batch = i + 1
|
||||
components.append(sheet.dict())
|
||||
|
||||
batches = {"batches": components}
|
||||
|
||||
return await self._batches_to_exam_completion(batches)
|
||||
|
||||
async def _png_batch(self, path_id: str, files: list[str], json_schema) -> Sheet:
|
||||
return await self._llm.pydantic_prediction(
|
||||
[self._gpt_instructions_png(),
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
*FileHelper.b64_pngs(path_id, files)
|
||||
]
|
||||
}
|
||||
],
|
||||
ExamMapper.map_to_sheet,
|
||||
str(json_schema)
|
||||
)
|
||||
|
||||
def _gpt_instructions_png(self):
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are GPT OCR and your job is to scan image text data and format it to JSON format.'
|
||||
'Your current task is to scan english questions sheets.\n\n'
|
||||
|
||||
'You will place all the information in a single JSON: {"components": [{...}]} where {...} is a set of '
|
||||
'sheet components you will retrieve from the images, the components and their corresponding JSON '
|
||||
'templates are as follows:\n'
|
||||
|
||||
'- Part, a standalone part or part of a section of the question sheet: '
|
||||
'{"type": "part", "part": "<name or number of the part>"}\n'
|
||||
|
||||
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
|
||||
'the prompt field of the template: blanks, underlines and normal. '
|
||||
|
||||
'In the blanks prompt you must leave 5 underscores to represent the blank space. '
|
||||
'In the underlines questions the objective is to pick the words that are incorrect in the given '
|
||||
'sentence, for these questions you must wrap the answer to the question with the html tag <u></u>, '
|
||||
'choose 3 other words to wrap in <u></u>, place them in the prompt field and use the underlined words '
|
||||
'in the order they appear in the question for the options A to D, disreguard options that might be '
|
||||
'included underneath the underlines question and use the ones you wrapped in <u></u>.'
|
||||
'In normal you just leave the question as is. '
|
||||
|
||||
f'The template for multiple choice questions is the following: {self._multiple_choice_png()}.\n'
|
||||
|
||||
'- Reading Passages, there are two types of reading passages. Reading passages where you will see '
|
||||
'blanks represented by a (question id) followed by a line, you must format these types of reading '
|
||||
'passages to be only the text with the brackets that have the question id and line replaced with '
|
||||
'"{{question id}}", also place 2 newlines between paragraphs. For the reading passages without blanks '
|
||||
'you must remove any numbers that may be there to specify paragraph numbers or line numbers, '
|
||||
'and place 2 newlines between paragraphs. '
|
||||
|
||||
'For the reading passages with blanks the template is: {"type": "blanksPassage", '
|
||||
'"text": "<The whole text for the exercise with replacements for blank spaces and their '
|
||||
'ids that are enclosed in brackets with {{<question id>}} also place 2 newlines between paragraphs>"}. '
|
||||
|
||||
'For the reading passage without blanks is: {"type": "passage", "context": "<reading passages without '
|
||||
'paragraphs or line numbers, with 2 newlines between paragraphs>"}\n'
|
||||
|
||||
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
|
||||
'options with the question id and the options from a to d. The template is: '
|
||||
f'{self._passage_blank_space_png()}\n'
|
||||
|
||||
'IMPORTANT: You must place the components in the order that they were given to you. If an exercise or '
|
||||
'reading passages are cut off don\'t include them in the JSON.'
|
||||
)
|
||||
}
|
||||
|
||||
def _multiple_choice_png(self):
|
||||
multiple_choice = self._multiple_choice_html()["questions"][0]
|
||||
multiple_choice["type"] = "multipleChoice"
|
||||
multiple_choice.pop("solution")
|
||||
return multiple_choice
|
||||
|
||||
def _passage_blank_space_png(self):
|
||||
passage_blank_space = self._passage_blank_space_html()["words"][0]
|
||||
passage_blank_space["type"] = "fillBlanks"
|
||||
return passage_blank_space
|
||||
|
||||
async def _batches_to_exam_completion(self, batches: Dict[str, Any]) -> Exam:
|
||||
return await self._llm.pydantic_prediction(
|
||||
[self._gpt_instructions_html(),
|
||||
{
|
||||
"role": "user",
|
||||
"content": str(batches)
|
||||
}
|
||||
],
|
||||
ExamMapper.map_to_exam_model,
|
||||
str(self._level_json_schema())
|
||||
)
|
||||
|
||||
def _gpt_instructions_batches(self):
|
||||
return {
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are helpfull assistant. Your task is to merge multiple batches of english question sheet '
|
||||
'components and solve the questions. Each batch may contain overlapping content with the previous '
|
||||
'batch, or close enough content which needs to be excluded. The components are as follows:'
|
||||
|
||||
'- Part, a standalone part or part of a section of the question sheet: '
|
||||
'{"type": "part", "part": "<name or number of the part>"}\n'
|
||||
|
||||
'- Multiple Choice Question, there are three types of multiple choice questions that differ on '
|
||||
'the prompt field of the template: blanks, underlines and normal. '
|
||||
|
||||
'In a blanks question, the prompt has underscores to represent the blank space, you must select the '
|
||||
'appropriate option to solve it.'
|
||||
|
||||
'In a underlines question, the prompt has 4 underlines represented by the html tags <u></u>, you must '
|
||||
'select the option that makes the prompt incorrect to solve it. If the options order doesn\'t reflect '
|
||||
'the order in which the underlines appear in the prompt you will need to fix it.'
|
||||
|
||||
'In a normal question there isn\'t either blanks or underlines in the prompt, you should just '
|
||||
'select the appropriate solution.'
|
||||
|
||||
f'The template for these questions is the same: {self._multiple_choice_png()}\n'
|
||||
|
||||
'- Reading Passages, there are two types of reading passages with different templates. The one with '
|
||||
'type "blanksPassage" where the text field holds the passage and a blank is represented by '
|
||||
'{{<some number>}} and the other one with type "passage" that has the context field with just '
|
||||
'reading passages. For both of these components you will have to remove any additional data that might '
|
||||
'be related to a question description and also remove some "(<question id>)" and "_" from blanksPassage'
|
||||
' if there are any. These components are used in conjunction with other ones.'
|
||||
|
||||
'- Blanks Options, options for a blanks reading passage exercise, this type of component is a group of '
|
||||
'options with the question id and the options from a to d. The template is: '
|
||||
f'{self._passage_blank_space_png()}\n\n'
|
||||
|
||||
'Now that you know the possible components here\'s what I want you to do:\n'
|
||||
'1. Remove duplicates. A batch will have duplicates of other batches and the components of '
|
||||
'the next batch should always take precedence over the previous one batch, what I mean by this is that '
|
||||
'if batch 1 has, for example, multiple choice question with id 10 and the next one also has id 10, '
|
||||
'you pick the next one.\n'
|
||||
'2. Solve the exercises. There are 4 types of exercises, the 3 multipleChoice variants + a fill blanks '
|
||||
'exercise. For the multiple choice question follow the previous instruction to solve them and place '
|
||||
f'them in this format: {self._multiple_choice_html()}. For the fill blanks exercises you need to match '
|
||||
'the correct blanksPassage to the correct fillBlanks options and then pick the correct option. Here is '
|
||||
f'the template for this exercise: {self._passage_blank_space_html()}.\n'
|
||||
f'3. Restructure the JSON to match this template: {self._level_json_schema()}. '
|
||||
f'You must group the exercises by the parts in the order they appear in the batches components. '
|
||||
f'The context field of a part is the context of a passage component that has text relevant to normal '
|
||||
f'multiple choice questions.\n'
|
||||
|
||||
'Do your utmost to fullfill the requisites, make sure you include all non-duplicate questions'
|
||||
'in your response and correctly structure the JSON.'
|
||||
)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def fix_ids(response):
|
||||
counter = 1
|
||||
for part in response["parts"]:
|
||||
for exercise in part["exercises"]:
|
||||
if exercise["type"] == "multipleChoice":
|
||||
for question in exercise["questions"]:
|
||||
question["id"] = counter
|
||||
counter += 1
|
||||
if exercise["type"] == "fillBlanks":
|
||||
for i in range(len(exercise["words"])):
|
||||
exercise["words"][i]["id"] = counter
|
||||
exercise["solutions"][i]["id"] = counter
|
||||
counter += 1
|
||||
return response
|
||||
@@ -1,15 +1,18 @@
|
||||
import queue
|
||||
import uuid
|
||||
from logging import getLogger
|
||||
from queue import Queue
|
||||
import random
|
||||
from typing import Dict
|
||||
from typing import Dict, List
|
||||
|
||||
from app.repositories.abc import IFileStorage, IDocumentStore
|
||||
from app.services.abc import IListeningService, ILLMService, ITextToSpeechService
|
||||
from app.configs.question_templates import getListeningTemplate, getListeningPartTemplate
|
||||
from app.configs.constants import (
|
||||
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant
|
||||
NeuralVoices, GPTModels, TemperatureSettings, FilePaths, MinTimers, ExamVariant, EducationalContent,
|
||||
FieldsAndExercises
|
||||
)
|
||||
from app.helpers import ExercisesHelper
|
||||
from app.helpers import ExercisesHelper, FileHelper
|
||||
|
||||
|
||||
class ListeningService(IListeningService):
|
||||
@@ -33,25 +36,83 @@ class ListeningService(IListeningService):
|
||||
self._tts = tts
|
||||
self._file_storage = file_storage
|
||||
self._document_store = document_store
|
||||
self._logger = getLogger(__name__)
|
||||
self._sections = {
|
||||
"section_1": {
|
||||
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
|
||||
"exercise_types": FieldsAndExercises.LISTENING_1_EXERCISE_TYPES,
|
||||
"exercise_sample_size": 1,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_1_EXERCISES,
|
||||
"start_id": 1,
|
||||
"generate_dialogue": self._generate_listening_conversation,
|
||||
"type": "conversation"
|
||||
"type": "conversation",
|
||||
},
|
||||
"section_2": {
|
||||
"topic": EducationalContent.SOCIAL_MONOLOGUE_CONTEXTS,
|
||||
"exercise_types": FieldsAndExercises.LISTENING_2_EXERCISE_TYPES,
|
||||
"exercise_sample_size": 2,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_2_EXERCISES,
|
||||
"start_id": 11,
|
||||
"generate_dialogue": self._generate_listening_monologue,
|
||||
"type": "monologue"
|
||||
"type": "monologue",
|
||||
},
|
||||
"section_3": {
|
||||
"topic": EducationalContent.FOUR_PEOPLE_SCENARIOS,
|
||||
"exercise_types": FieldsAndExercises.LISTENING_3_EXERCISE_TYPES,
|
||||
"exercise_sample_size": 1,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_3_EXERCISES,
|
||||
"start_id": 21,
|
||||
"generate_dialogue": self._generate_listening_conversation,
|
||||
"type": "conversation"
|
||||
"type": "conversation",
|
||||
},
|
||||
"section_4": {
|
||||
"topic": EducationalContent.ACADEMIC_SUBJECTS,
|
||||
"exercise_types": FieldsAndExercises.LISTENING_EXERCISE_TYPES,
|
||||
"exercise_sample_size": 2,
|
||||
"total_exercises": FieldsAndExercises.TOTAL_LISTENING_SECTION_4_EXERCISES,
|
||||
"start_id": 31,
|
||||
"generate_dialogue": self._generate_listening_monologue,
|
||||
"type": "monologue"
|
||||
}
|
||||
}
|
||||
|
||||
async def get_listening_question(
|
||||
self, section_id: int, topic: str, req_exercises: List[str], difficulty: str,
|
||||
number_of_exercises_q=queue.Queue(), start_id=-1
|
||||
):
|
||||
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH)
|
||||
section = self._sections[f"section_{section_id}"]
|
||||
if not topic:
|
||||
topic = random.choice(section["topic"])
|
||||
|
||||
if len(req_exercises) == 0:
|
||||
req_exercises = random.sample(section["exercise_types"], section["exercise_sample_size"])
|
||||
|
||||
if number_of_exercises_q.empty():
|
||||
number_of_exercises_q = ExercisesHelper.divide_number_into_parts(
|
||||
section["total_exercises"], len(req_exercises)
|
||||
)
|
||||
|
||||
if start_id == -1:
|
||||
start_id = section["start_id"]
|
||||
|
||||
dialog = await self.generate_listening_question(section_id, topic)
|
||||
|
||||
if section_id in {1, 3}:
|
||||
dialog = self.parse_conversation(dialog)
|
||||
|
||||
self._logger.info(f'Generated {section["type"]}: {dialog}')
|
||||
|
||||
exercises = await self.generate_listening_exercises(
|
||||
section_id, str(dialog), req_exercises, number_of_exercises_q, start_id, difficulty
|
||||
)
|
||||
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": dialog,
|
||||
"difficulty": difficulty
|
||||
}
|
||||
|
||||
async def generate_listening_question(self, section: int, topic: str):
|
||||
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
|
||||
|
||||
@@ -67,9 +128,10 @@ class ListeningService(IListeningService):
|
||||
for req_exercise in req_exercises:
|
||||
number_of_exercises = number_of_exercises_q.get()
|
||||
|
||||
if req_exercise == "multipleChoice":
|
||||
if req_exercise == "multipleChoice" or req_exercise == "multipleChoice3Options":
|
||||
n_options = 4 if "multipleChoice" else 3
|
||||
question = await self._gen_multiple_choice_exercise_listening(
|
||||
dialog_type, dialog, number_of_exercises, start_id, difficulty
|
||||
dialog_type, dialog, number_of_exercises, start_id, difficulty, n_options
|
||||
)
|
||||
|
||||
exercises.append(question)
|
||||
@@ -100,10 +162,9 @@ class ListeningService(IListeningService):
|
||||
|
||||
return exercises
|
||||
|
||||
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str):
|
||||
async def save_listening(self, parts: list[dict], min_timer: int, difficulty: str, listening_id: str):
|
||||
template = getListeningTemplate()
|
||||
template['difficulty'] = difficulty
|
||||
listening_id = str(uuid.uuid4())
|
||||
for i, part in enumerate(parts, start=0):
|
||||
part_template = getListeningPartTemplate()
|
||||
|
||||
@@ -127,8 +188,8 @@ class ListeningService(IListeningService):
|
||||
else:
|
||||
template["variant"] = ExamVariant.FULL.value
|
||||
|
||||
(result, listening_id) = await self._document_store.save_to_db_with_id("listening", template, listening_id)
|
||||
if result:
|
||||
listening_id = await self._document_store.save_to_db_with_id("listening", template, listening_id)
|
||||
if listening_id:
|
||||
return {**template, "id": listening_id}
|
||||
else:
|
||||
raise Exception("Failed to save question: " + str(parts))
|
||||
@@ -160,6 +221,20 @@ class ListeningService(IListeningService):
|
||||
}
|
||||
]
|
||||
|
||||
if section == 1:
|
||||
messages.extend([
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
|
||||
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'Try to have spelling of names (cities, people, etc)'
|
||||
|
||||
}
|
||||
])
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O,
|
||||
messages,
|
||||
@@ -170,7 +245,11 @@ class ListeningService(IListeningService):
|
||||
return self._get_conversation_voices(response, True)
|
||||
|
||||
async def _generate_listening_monologue(self, section: int, topic: str) -> Dict:
|
||||
context = 'social context' if section == 2 else 'academic subject'
|
||||
head = (
|
||||
'Generate a comprehensive monologue set in the social context of'
|
||||
if section == 2 else
|
||||
'Generate a comprehensive and complex monologue on the academic subject of'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
@@ -182,7 +261,7 @@ class ListeningService(IListeningService):
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate a comprehensive monologue set in the {context} of "{topic}". {self.MONOLOGUE_TAIL}'
|
||||
f'{head}: "{topic}". {self.MONOLOGUE_TAIL}'
|
||||
)
|
||||
}
|
||||
]
|
||||
@@ -233,7 +312,7 @@ class ListeningService(IListeningService):
|
||||
# ==================================================================================================================
|
||||
|
||||
async def _gen_multiple_choice_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str, n_options: int = 4
|
||||
):
|
||||
messages = [
|
||||
{
|
||||
@@ -248,8 +327,8 @@ class ListeningService(IListeningService):
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate {str(quantity)} {difficulty} difficulty multiple choice questions of 4 options '
|
||||
f'for this {dialog_type}:\n"' + text + '"')
|
||||
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
|
||||
f'options for this {dialog_type}:\n"' + text + '"')
|
||||
|
||||
}
|
||||
]
|
||||
@@ -268,7 +347,7 @@ class ListeningService(IListeningService):
|
||||
}
|
||||
|
||||
async def _gen_write_blanks_questions_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
|
||||
):
|
||||
messages = [
|
||||
{
|
||||
@@ -280,7 +359,7 @@ class ListeningService(IListeningService):
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
|
||||
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
|
||||
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
|
||||
}
|
||||
]
|
||||
@@ -300,7 +379,7 @@ class ListeningService(IListeningService):
|
||||
}
|
||||
|
||||
async def _gen_write_blanks_notes_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
|
||||
):
|
||||
messages = [
|
||||
{
|
||||
@@ -312,7 +391,7 @@ class ListeningService(IListeningService):
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate {str(quantity)} {difficulty} difficulty notes taken from this '
|
||||
f'Generate {quantity} {difficulty} difficulty notes taken from this '
|
||||
f'{dialog_type}:\n"{text}"'
|
||||
)
|
||||
|
||||
@@ -357,7 +436,7 @@ class ListeningService(IListeningService):
|
||||
}
|
||||
|
||||
async def _gen_write_blanks_form_exercise_listening(
|
||||
self, dialog_type: str, text: str, quantity: int, start_id, difficulty
|
||||
self, dialog_type: str, text: str, quantity: int, start_id: int, difficulty: str
|
||||
):
|
||||
messages = [
|
||||
{
|
||||
@@ -369,12 +448,21 @@ class ListeningService(IListeningService):
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate a form with {str(quantity)} {difficulty} difficulty key-value pairs '
|
||||
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
|
||||
f'about this {dialog_type}:\n"{text}"'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
if dialog_type == "conversation":
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
'It must be a form and not questions. '
|
||||
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
|
||||
)
|
||||
})
|
||||
|
||||
parsed_form = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["form"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
@@ -391,3 +479,14 @@ class ListeningService(IListeningService):
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def parse_conversation(conversation_data):
|
||||
conversation_list = conversation_data.get('conversation', [])
|
||||
readable_text = []
|
||||
|
||||
for message in conversation_list:
|
||||
name = message.get('name', 'Unknown')
|
||||
text = message.get('text', '')
|
||||
readable_text.append(f"{name}: {text}")
|
||||
|
||||
return "\n".join(readable_text)
|
||||
@@ -12,42 +12,25 @@ class ReadingService(IReadingService):
|
||||
|
||||
def __init__(self, llm: ILLMService):
|
||||
self._llm = llm
|
||||
self._passages = {
|
||||
"passage_1": {
|
||||
"question_type": QuestionType.READING_PASSAGE_1,
|
||||
"start_id": 1
|
||||
},
|
||||
"passage_2": {
|
||||
"question_type": QuestionType.READING_PASSAGE_2,
|
||||
"start_id": 14
|
||||
},
|
||||
"passage_3": {
|
||||
"question_type": QuestionType.READING_PASSAGE_3,
|
||||
"start_id": 27
|
||||
}
|
||||
}
|
||||
|
||||
async def gen_reading_passage(
|
||||
self,
|
||||
passage_id: int,
|
||||
part: int,
|
||||
topic: str,
|
||||
req_exercises: List[str],
|
||||
number_of_exercises_q: Queue,
|
||||
difficulty: str
|
||||
difficulty: str,
|
||||
start_id: int
|
||||
):
|
||||
_passage = self._passages[f'passage_{str(passage_id)}']
|
||||
|
||||
passage = await self.generate_reading_passage(_passage["question_type"], topic)
|
||||
|
||||
if passage == "":
|
||||
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
|
||||
|
||||
start_id = _passage["start_id"]
|
||||
passage = await self.generate_reading_passage(part, topic)
|
||||
exercises = await self._generate_reading_exercises(
|
||||
passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty
|
||||
)
|
||||
|
||||
if ExercisesHelper.contains_empty_dict(exercises):
|
||||
return await self.gen_reading_passage(passage_id, topic, req_exercises, number_of_exercises_q, difficulty)
|
||||
return await self.gen_reading_passage(
|
||||
part, topic, req_exercises, number_of_exercises_q, difficulty, start_id
|
||||
)
|
||||
|
||||
return {
|
||||
"exercises": exercises,
|
||||
@@ -58,7 +41,17 @@ class ReadingService(IReadingService):
|
||||
"difficulty": difficulty
|
||||
}
|
||||
|
||||
async def generate_reading_passage(self, q_type: QuestionType, topic: str):
|
||||
async def generate_reading_passage(self, part: int, topic: str, word_count: int = 800):
|
||||
part_system_message = {
|
||||
"1": 'The generated text should be fairly easy to understand and have multiple paragraphs.',
|
||||
"2": 'The generated text should be fairly hard to understand and have multiple paragraphs.',
|
||||
"3": (
|
||||
'The generated text should be very hard to understand and include different points, theories, '
|
||||
'subtle differences of opinions from people, correctly sourced to the person who said it, '
|
||||
'over the specified topic and have multiple paragraphs.'
|
||||
)
|
||||
}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -69,17 +62,26 @@ class ReadingService(IReadingService):
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Generate an extensive text for IELTS {q_type.value}, of at least 1500 words, '
|
||||
f'on the topic of "{topic}". The passage should offer a substantial amount of '
|
||||
'information, analysis, or narrative relevant to the chosen subject matter. This text '
|
||||
'passage aims to serve as the primary reading section of an IELTS test, providing an '
|
||||
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
|
||||
'does not contain forbidden subjects in muslim countries.'
|
||||
f'Generate an extensive text for IELTS Reading Passage {part}, of at least {word_count} words, '
|
||||
f'on the topic of "{topic}". The passage should offer a substantial amount of '
|
||||
'information, analysis, or narrative relevant to the chosen subject matter. This text '
|
||||
'passage aims to serve as the primary reading section of an IELTS test, providing an '
|
||||
'in-depth and comprehensive exploration of the topic. Make sure that the generated text '
|
||||
'does not contain forbidden subjects in muslim countries.'
|
||||
)
|
||||
|
||||
},
|
||||
{
|
||||
"role": "system",
|
||||
"content": part_system_message[str(part)]
|
||||
}
|
||||
]
|
||||
|
||||
if part == 3:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": "Use real text excerpts on you generated passage and cite the sources."
|
||||
})
|
||||
|
||||
return await self._llm.prediction(
|
||||
GPTModels.GPT_4_O,
|
||||
messages,
|
||||
@@ -95,11 +97,15 @@ class ReadingService(IReadingService):
|
||||
number_of_exercises = number_of_exercises_q.get()
|
||||
|
||||
if req_exercise == "fillBlanks":
|
||||
question = await self._gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
|
||||
question = await self._gen_summary_fill_blanks_exercise(
|
||||
passage, number_of_exercises, start_id, difficulty
|
||||
)
|
||||
exercises.append(question)
|
||||
print("Added fill blanks: " + str(question))
|
||||
elif req_exercise == "trueFalse":
|
||||
question = await self._gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
|
||||
question = await self._gen_true_false_not_given_exercise(
|
||||
passage, number_of_exercises, start_id, difficulty
|
||||
)
|
||||
exercises.append(question)
|
||||
print("Added trueFalse: " + str(question))
|
||||
elif req_exercise == "writeBlanks":
|
||||
@@ -114,32 +120,28 @@ class ReadingService(IReadingService):
|
||||
question = await self._gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added paragraph match: " + str(question))
|
||||
elif req_exercise == "ideaMatch":
|
||||
question = await self._gen_idea_match_exercise(passage, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added idea match: " + str(question))
|
||||
|
||||
start_id = start_id + number_of_exercises
|
||||
|
||||
return exercises
|
||||
|
||||
async def _gen_summary_fill_blanks_exercise(self, text: str, quantity: int, start_id, difficulty):
|
||||
async def _gen_summary_fill_blanks_exercise(
|
||||
self, text: str, quantity: int, start_id, difficulty, num_random_words: int = 1
|
||||
):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{ "summary": "summary", "words": ["word_1", "word_2"] }')
|
||||
'You are a helpful assistant designed to output JSON on this format: { "summary": "summary" }'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Summarize this text: "{text}"'
|
||||
)
|
||||
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Select {str(quantity)} {difficulty} difficulty words, it must be words and not '
|
||||
'expressions, from the summary.'
|
||||
)
|
||||
"content": f'Summarize this text: "{text}"'
|
||||
|
||||
}
|
||||
]
|
||||
@@ -148,22 +150,45 @@ class ReadingService(IReadingService):
|
||||
GPTModels.GPT_4_O, messages, ["summary"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
|
||||
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], 5)
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"words": ["word_1", "word_2"] }'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
|
||||
f'from this:\n{response["summary"]}'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
words_response = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["words"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
response["words"] = words_response["words"]
|
||||
replaced_summary = ExercisesHelper.replace_first_occurrences_with_placeholders(
|
||||
response["summary"], response["words"], start_id
|
||||
)
|
||||
options_words = ExercisesHelper.add_random_words_and_shuffle(response["words"], num_random_words)
|
||||
solutions = ExercisesHelper.fillblanks_build_solutions_array(response["words"], start_id)
|
||||
|
||||
return {
|
||||
"allowRepetition": True,
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": (
|
||||
"Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
|
||||
"Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
|
||||
"more words than spaces so you will not use them all. You may use any of the words more than once."
|
||||
),
|
||||
"solutions": solutions,
|
||||
"text": replaced_summary,
|
||||
"type": "fillBlanks",
|
||||
"words": options_words
|
||||
|
||||
}
|
||||
|
||||
async def _gen_true_false_not_given_exercise(self, text: str, quantity: int, start_id, difficulty):
|
||||
@@ -210,7 +235,8 @@ class ReadingService(IReadingService):
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
|
||||
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
@@ -243,7 +269,8 @@ class ReadingService(IReadingService):
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
|
||||
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
@@ -262,7 +289,7 @@ class ReadingService(IReadingService):
|
||||
|
||||
options = []
|
||||
for i, paragraph in enumerate(paragraphs, start=0):
|
||||
paragraph["heading"] = headings[i]
|
||||
paragraph["heading"] = headings[i]["heading"]
|
||||
options.append({
|
||||
"id": paragraph["letter"],
|
||||
"sentence": paragraph["paragraph"]
|
||||
@@ -285,3 +312,38 @@ class ReadingService(IReadingService):
|
||||
"sentences": sentences[:quantity],
|
||||
"type": "matchSentences"
|
||||
}
|
||||
|
||||
async def _gen_idea_match_exercise(self, text: str, quantity: int, start_id):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"ideas": [ '
|
||||
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
|
||||
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
|
||||
']}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'From the text extract {quantity} ideas, theories, opinions and who they are from. '
|
||||
f'The text: {text}'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["ideas"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
ideas = response["ideas"]
|
||||
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"allowRepetition": False,
|
||||
"options": ExercisesHelper.build_options(ideas),
|
||||
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
|
||||
"sentences": ExercisesHelper.build_sentences(ideas, start_id),
|
||||
"type": "matchSentences"
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
import re
|
||||
import uuid
|
||||
import random
|
||||
from typing import Dict, List
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from app.repositories.abc import IFileStorage, IDocumentStore
|
||||
from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService
|
||||
@@ -27,29 +27,49 @@ class SpeakingService(ISpeakingService):
|
||||
self._document_store = document_store
|
||||
self._stt = stt
|
||||
self._logger = logging.getLogger(__name__)
|
||||
|
||||
# TODO: Is the difficulty in the prompts supposed to be hardcoded? The response is set with
|
||||
# either the difficulty in the request or a random one yet the prompt doesn't change
|
||||
self._tasks = {
|
||||
"task_1": {
|
||||
"get": {
|
||||
"json_template": (
|
||||
'{"topic": "topic", "question": "question"}'
|
||||
),
|
||||
"json_template": {
|
||||
"first_topic": "topic 1",
|
||||
"second_topic": "topic 2",
|
||||
"questions": [
|
||||
(
|
||||
"Introductory question about the first topic, starting the topic with "
|
||||
"'Let's talk about x' and then the question."
|
||||
),
|
||||
"Follow up question about the first topic",
|
||||
"Follow up question about the first topic",
|
||||
"Question about second topic",
|
||||
"Follow up question about the second topic",
|
||||
]
|
||||
},
|
||||
"prompt": (
|
||||
'Craft a thought-provoking question of {difficulty} difficulty for IELTS Speaking Part 1 '
|
||||
'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
|
||||
'that encourages candidates to delve deeply into personal experiences, preferences, or '
|
||||
'insights on the topic of "{topic}". Instruct the candidate to offer not only detailed '
|
||||
'descriptions but also provide nuanced explanations, examples, or anecdotes to enrich '
|
||||
'their response. Make sure that the generated question does not contain forbidden subjects in '
|
||||
'insights on the topic of "{first_topic}" and the topic of "{second_topic}". '
|
||||
'Make sure that the generated question does not contain forbidden subjects in '
|
||||
'muslim countries.'
|
||||
)
|
||||
}
|
||||
},
|
||||
"task_2": {
|
||||
"get": {
|
||||
"json_template": (
|
||||
'{"topic": "topic", "question": "question", "prompts": ["prompt_1", "prompt_2", "prompt_3"]}'
|
||||
),
|
||||
"json_template": {
|
||||
"topic": "topic",
|
||||
"question": "question",
|
||||
"prompts": [
|
||||
"prompt_1",
|
||||
"prompt_2",
|
||||
"prompt_3"
|
||||
],
|
||||
"suffix": "And explain why..."
|
||||
},
|
||||
"prompt": (
|
||||
'Create a question of {difficulty} difficulty for IELTS Speaking Part 2 '
|
||||
'Create a question of medium difficulty for IELTS Speaking Part 2 '
|
||||
'that encourages candidates to narrate a personal experience or story related to the topic '
|
||||
'of "{topic}". Include 3 prompts that guide the candidate to describe '
|
||||
'specific aspects of the experience, such as details about the situation, '
|
||||
@@ -60,11 +80,18 @@ class SpeakingService(ISpeakingService):
|
||||
},
|
||||
"task_3": {
|
||||
"get": {
|
||||
"json_template": (
|
||||
'{"topic": "topic", "questions": ["question", "question", "question"]}'
|
||||
),
|
||||
"json_template": {
|
||||
"topic": "topic",
|
||||
"questions": [
|
||||
"Introductory question about the topic.",
|
||||
"Follow up question about the topic",
|
||||
"Follow up question about the topic",
|
||||
"Follow up question about the topic",
|
||||
"Follow up question about the topic"
|
||||
]
|
||||
},
|
||||
"prompt": (
|
||||
'Formulate a set of 3 questions of {difficulty} difficulty for IELTS Speaking Part 3 '
|
||||
'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3'
|
||||
'that encourage candidates to engage in a meaningful discussion on the topic of "{topic}". '
|
||||
'Provide inquiries, ensuring they explore various aspects, perspectives, and implications '
|
||||
'related to the topic. Make sure that the generated question does not contain forbidden '
|
||||
@@ -74,28 +101,57 @@ class SpeakingService(ISpeakingService):
|
||||
},
|
||||
}
|
||||
|
||||
async def get_speaking_task(self, task_id: int, topic: str, difficulty: str):
|
||||
task_values = self._tasks[f'task_{task_id}']['get']
|
||||
async def get_speaking_part(
|
||||
self, part: int, topic: str, difficulty: str, second_topic: Optional[str] = None
|
||||
) -> Dict:
|
||||
task_values = self._tasks[f'task_{part}']['get']
|
||||
|
||||
if part == 1:
|
||||
task_prompt = task_values["prompt"].format(first_topic=topic, second_topic=second_topic)
|
||||
else:
|
||||
task_prompt = task_values["prompt"].format(topic=topic)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: ' +
|
||||
task_values["json_template"]
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
f'{task_values["json_template"]}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": str(task_values["prompt"]).format(topic=topic, difficulty=difficulty)
|
||||
"content": task_prompt
|
||||
}
|
||||
]
|
||||
|
||||
part_specific = {
|
||||
"1": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, past and future).',
|
||||
"2": (
|
||||
'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
|
||||
'that start with "And explain why".'
|
||||
)
|
||||
}
|
||||
|
||||
if part in {1, 2}:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": part_specific[str(part)]
|
||||
})
|
||||
|
||||
if part in {1, 3}:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": 'They must be 1 single question each and not be double-barreled questions.'
|
||||
})
|
||||
|
||||
fields_to_check = ["first_topic"] if part == 1 else FieldsAndExercises.GEN_FIELDS
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, FieldsAndExercises.GEN_FIELDS, TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
GPTModels.GPT_4_O, messages, fields_to_check, TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
# TODO: this was on GET /speaking_task_3 don't know if it is intentional only for 3
|
||||
if task_id == 3:
|
||||
if part == 3:
|
||||
# Remove the numbers from the questions only if the string starts with a number
|
||||
response["questions"] = [
|
||||
re.sub(r"^\d+\.\s*", "", question)
|
||||
@@ -103,117 +159,15 @@ class SpeakingService(ISpeakingService):
|
||||
for question in response["questions"]
|
||||
]
|
||||
|
||||
response["type"] = task_id
|
||||
response["type"] = part
|
||||
response["difficulty"] = difficulty
|
||||
response["topic"] = topic
|
||||
|
||||
if part in {2, 3}:
|
||||
response["topic"] = topic
|
||||
|
||||
return response
|
||||
|
||||
async def grade_speaking_task_1_and_2(
|
||||
self, task: int, question: str, answer_firebase_path: str, sound_file_name: str
|
||||
):
|
||||
request_id = uuid.uuid4()
|
||||
req_data = {
|
||||
"question": question,
|
||||
"answer": answer_firebase_path
|
||||
}
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
|
||||
f'Use this id to track the logs: {str(request_id)} - Request data: {str(req_data)}'
|
||||
)
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {answer_firebase_path}')
|
||||
|
||||
await self._file_storage.download_firebase_file(answer_firebase_path, sound_file_name)
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloaded file {answer_firebase_path} to {sound_file_name}')
|
||||
|
||||
answer = await self._stt.speech_to_text(sound_file_name)
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer}')
|
||||
|
||||
if TextHelper.has_x_words(answer, 20):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"comment": "comment about answer quality", "overall": 0.0, '
|
||||
'"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
|
||||
'"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
|
||||
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
|
||||
'assign a score of 0 if the response fails to address the question. Additionally, provide '
|
||||
'detailed commentary highlighting both strengths and weaknesses in the response.'
|
||||
f'\n Question: "{question}" \n Answer: "{answer}"')
|
||||
}
|
||||
]
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answer.')
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_3_5_TURBO,
|
||||
messages,
|
||||
["comment"],
|
||||
TemperatureSettings.GRADING_TEMPERATURE
|
||||
)
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answer graded: {str(response)}')
|
||||
|
||||
perfect_answer_messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"answer": "perfect answer"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Provide a perfect answer according to ielts grading system to the following '
|
||||
f'Speaking Part {task} question: "{question}"')
|
||||
}
|
||||
]
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting perfect answer.')
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_3_5_TURBO,
|
||||
perfect_answer_messages,
|
||||
["answer"],
|
||||
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
response['perfect_answer'] = response["answer"]
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Perfect answer: ' + response['perfect_answer'])
|
||||
|
||||
response['transcript'] = answer
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting fixed text.')
|
||||
|
||||
response['fixed_text'] = await self._get_speaking_corrections(answer)
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Fixed text: ' + response['fixed_text'])
|
||||
|
||||
if response["overall"] == "0.0" or response["overall"] == 0.0:
|
||||
response["overall"] = self._calculate_overall(response)
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
|
||||
return response
|
||||
else:
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - '
|
||||
f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
|
||||
)
|
||||
|
||||
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
|
||||
|
||||
# TODO: When there's more time grade_speaking_task_1_2 can be merged with this, when there's more time
|
||||
async def grade_speaking_task_3(self, answers: Dict, task: int = 3):
|
||||
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict:
|
||||
request_id = uuid.uuid4()
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
|
||||
@@ -222,157 +176,219 @@ class SpeakingService(ISpeakingService):
|
||||
|
||||
text_answers = []
|
||||
perfect_answers = []
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
|
||||
)
|
||||
|
||||
if task != 2:
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
|
||||
)
|
||||
|
||||
for item in answers:
|
||||
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Downloading file {item["answer"]}')
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Downloading file {item["answer"]}')
|
||||
|
||||
await self._file_storage.download_firebase_file(item["answer"], sound_file_name)
|
||||
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - '
|
||||
'Downloaded file ' + item["answer"] + f' to {sound_file_name}'
|
||||
f'POST - speaking_task_{task} - {request_id} - '
|
||||
f'Downloaded file {item["answer"]} to {sound_file_name}'
|
||||
)
|
||||
|
||||
answer_text = await self._stt.speech_to_text(sound_file_name)
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Transcripted answer: {answer_text}')
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Transcripted answer: {answer_text}')
|
||||
|
||||
text_answers.append(answer_text)
|
||||
item["answer"] = answer_text
|
||||
os.remove(sound_file_name)
|
||||
|
||||
# TODO: This will end the grading of all answers if a single one does not have enough words
|
||||
# don't know if this is intended
|
||||
if not TextHelper.has_x_words(answer_text, 20):
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - '
|
||||
f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}')
|
||||
f'POST - speaking_task_{task} - {request_id} - '
|
||||
f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}'
|
||||
)
|
||||
return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
|
||||
|
||||
perfect_answer_messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"answer": "perfect answer"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Provide a perfect answer according to ielts grading system to the following '
|
||||
f'Speaking Part {task} question: "{item["question"]}"'
|
||||
)
|
||||
}
|
||||
]
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - '
|
||||
f'POST - speaking_task_{task} - {request_id} - '
|
||||
f'Requesting perfect answer for question: {item["question"]}'
|
||||
)
|
||||
perfect_answers.append(await self._get_perfect_answer(task, item["question"]))
|
||||
|
||||
perfect_answers.append(
|
||||
await self._llm.prediction(
|
||||
GPTModels.GPT_3_5_TURBO,
|
||||
perfect_answer_messages,
|
||||
["answer"],
|
||||
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
if task in {1, 3}:
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {request_id} - Formatting answers and questions for prompt.'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"comment": "comment about answer quality", "overall": 0.0, '
|
||||
'"task_response": {"Fluency and Coherence": 0.0, "Lexical Resource": 0.0, '
|
||||
'"Grammatical Range and Accuracy": 0.0, "Pronunciation": 0.0}}')
|
||||
}
|
||||
]
|
||||
message = (
|
||||
f"Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a "
|
||||
"strict assessment that penalizes errors. Deduct points for deviations from the task, and "
|
||||
"assign a score of 0 if the response fails to address the question. Additionally, provide detailed "
|
||||
"commentary highlighting both strengths and weaknesses in the response."
|
||||
"\n\n The questions and answers are: \n\n'")
|
||||
formatted_text = ""
|
||||
for i, entry in enumerate(answers, start=1):
|
||||
formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
|
||||
formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
|
||||
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - Formatting answers and questions for prompt.'
|
||||
)
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {request_id} - '
|
||||
f'Formatted answers and questions for prompt: {formatted_text}'
|
||||
)
|
||||
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
|
||||
else:
|
||||
questions_and_answers = f'\n Question: "{answers[0]["question"]}" \n Answer: "{answers[0]["answer"]}"'
|
||||
|
||||
formatted_text = ""
|
||||
for i, entry in enumerate(answers, start=1):
|
||||
formatted_text += f"**Question {i}:**\n{entry['question']}\n\n"
|
||||
formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n"
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting grading of the answer(s).')
|
||||
response = await self._grade_task(task, questions_and_answers)
|
||||
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - Formatted answers and questions for prompt: {formatted_text}'
|
||||
)
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Answer(s) graded: {response}')
|
||||
|
||||
message += formatted_text
|
||||
if task in {1, 3}:
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {request_id} - Adding perfect answer(s) to response.')
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": message
|
||||
})
|
||||
# TODO: check if it is answer["answer"] instead
|
||||
for i, answer in enumerate(perfect_answers, start=1):
|
||||
response['perfect_answer_' + str(i)] = answer
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Requesting grading of the answers.')
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {request_id} - Adding transcript and fixed texts to response.'
|
||||
)
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_3_5_TURBO, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
|
||||
)
|
||||
for i, answer in enumerate(text_answers, start=1):
|
||||
response['transcript_' + str(i)] = answer
|
||||
response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer)
|
||||
else:
|
||||
response['transcript'] = answers[0]["answer"]
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Answers graded: {str(response)}')
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting fixed text.')
|
||||
response['fixed_text'] = await self._get_speaking_corrections(answers[0]["answer"])
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Fixed text: {response["fixed_text"]}')
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Adding perfect answers to response.')
|
||||
|
||||
for i, answer in enumerate(perfect_answers, start=1):
|
||||
response['perfect_answer_' + str(i)] = answer
|
||||
|
||||
self._logger.info(
|
||||
f'POST - speaking_task_{task} - {str(request_id)} - Adding transcript and fixed texts to response.'
|
||||
)
|
||||
|
||||
for i, answer in enumerate(text_answers, start=1):
|
||||
response['transcript_' + str(i)] = answer
|
||||
response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer)
|
||||
|
||||
if response["overall"] == "0.0" or response["overall"] == 0.0:
|
||||
response["overall"] = self._calculate_overall(response)
|
||||
|
||||
self._logger.info(f'POST - speaking_task_{task} - {str(request_id)} - Final response: {str(response)}')
|
||||
response['perfect_answer'] = perfect_answers[0]["answer"]
|
||||
|
||||
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
|
||||
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Final response: {response}')
|
||||
return response
|
||||
|
||||
# ==================================================================================================================
|
||||
# grade_speaking_task helpers
|
||||
# ==================================================================================================================
|
||||
|
||||
async def _get_perfect_answer(self, task: int, question: str):
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: {"answer": "perfect answer"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Provide a perfect answer according to ielts grading system to the following '
|
||||
f'Speaking Part {task} question: "{question}"'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
if task == 1:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": 'The answer must be 2 or 3 sentences long.'
|
||||
})
|
||||
|
||||
gpt_model = GPTModels.GPT_4_O if task == 1 else GPTModels.GPT_3_5_TURBO
|
||||
|
||||
return await self._llm.prediction(
|
||||
gpt_model, messages, ["answer"], TemperatureSettings.GRADING_TEMPERATURE
|
||||
)
|
||||
|
||||
async def _grade_task(self, task: int, questions_and_answers: str) -> Dict:
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
f'You are a helpful assistant designed to output JSON on this format: {self._grade_template()}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f'Evaluate the given Speaking Part {task} response based on the IELTS grading system, ensuring a '
|
||||
'strict assessment that penalizes errors. Deduct points for deviations from the task, and '
|
||||
'assign a score of 0 if the response fails to address the question. Additionally, provide '
|
||||
'detailed commentary highlighting both strengths and weaknesses in the response.'
|
||||
) + questions_and_answers
|
||||
}
|
||||
]
|
||||
|
||||
task_specific = {
|
||||
"1": (
|
||||
'Address the student as "you". If the answers are not 2 or 3 sentences long, warn the '
|
||||
'student that they should be.'
|
||||
),
|
||||
"2": 'Address the student as "you"',
|
||||
"3": 'Address the student as "you" and pay special attention to coherence between the answers.'
|
||||
}
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": task_specific[str(task)]
|
||||
})
|
||||
|
||||
if task in {1, 3}:
|
||||
messages.extend([
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'For pronunciations act as if you heard the answers and they were transcripted '
|
||||
'as you heard them.'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": 'The comments must be long, detailed, justify the grading and suggest improvements.'
|
||||
}
|
||||
])
|
||||
|
||||
return await self._llm.prediction(
|
||||
GPTModels.GPT_4_O, messages, ["comment"], TemperatureSettings.GRADING_TEMPERATURE
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _fix_speaking_overall(overall: float, task_response: dict):
|
||||
grades = [category["grade"] for category in task_response.values()]
|
||||
|
||||
if overall > max(grades) or overall < min(grades):
|
||||
total_sum = sum(grades)
|
||||
average = total_sum / len(grades)
|
||||
rounded_average = round(average, 0)
|
||||
return rounded_average
|
||||
|
||||
return overall
|
||||
|
||||
@staticmethod
|
||||
def _zero_rating(comment: str):
|
||||
return {
|
||||
"comment": comment,
|
||||
"overall": 0,
|
||||
"task_response": {
|
||||
"Fluency and Coherence": 0,
|
||||
"Lexical Resource": 0,
|
||||
"Grammatical Range and Accuracy": 0,
|
||||
"Pronunciation": 0
|
||||
"Fluency and Coherence": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
"Lexical Resource": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
"Grammatical Range and Accuracy": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
"Pronunciation": {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _calculate_overall(response: Dict):
|
||||
return round(
|
||||
(
|
||||
response["task_response"]["Fluency and Coherence"] +
|
||||
response["task_response"]["Lexical Resource"] +
|
||||
response["task_response"]["Grammatical Range and Accuracy"] +
|
||||
response["task_response"]["Pronunciation"]
|
||||
) / 4, 1
|
||||
)
|
||||
|
||||
async def _get_speaking_corrections(self, text):
|
||||
messages = [
|
||||
{
|
||||
@@ -409,6 +425,7 @@ class SpeakingService(ISpeakingService):
|
||||
self._logger.info(f'Saved speaking to DB with id {req_id} : {str(template)}')
|
||||
|
||||
async def _create_video_per_part(self, exercises: List[Dict], template: Dict, part: int):
|
||||
avatar = (random.choice(list(AvatarEnum))).value
|
||||
template_index = part - 1
|
||||
|
||||
# Using list comprehension to find the element with the desired value in the 'type' field
|
||||
@@ -418,26 +435,12 @@ class SpeakingService(ISpeakingService):
|
||||
if found_exercises:
|
||||
exercise = found_exercises[0]
|
||||
self._logger.info(f'Creating video for speaking part {part}')
|
||||
if part in {1, 2}:
|
||||
result = await self._create_video(
|
||||
exercise["question"],
|
||||
(random.choice(list(AvatarEnum))).value,
|
||||
f'Failed to create video for part {part} question: {str(exercise["question"])}'
|
||||
)
|
||||
if result is not None:
|
||||
if part == 2:
|
||||
template["exercises"][template_index]["prompts"] = exercise["prompts"]
|
||||
|
||||
template["exercises"][template_index]["text"] = exercise["question"]
|
||||
template["exercises"][template_index]["title"] = exercise["topic"]
|
||||
template["exercises"][template_index]["video_url"] = result["video_url"]
|
||||
template["exercises"][template_index]["video_path"] = result["video_path"]
|
||||
else:
|
||||
if part in {1, 3}:
|
||||
questions = []
|
||||
for question in exercise["questions"]:
|
||||
result = await self._create_video(
|
||||
question,
|
||||
(random.choice(list(AvatarEnum))).value,
|
||||
avatar,
|
||||
f'Failed to create video for part {part} question: {str(exercise["question"])}'
|
||||
)
|
||||
if result is not None:
|
||||
@@ -449,63 +452,139 @@ class SpeakingService(ISpeakingService):
|
||||
questions.append(video)
|
||||
|
||||
template["exercises"][template_index]["prompts"] = questions
|
||||
template["exercises"][template_index]["title"] = exercise["topic"]
|
||||
if part == 1:
|
||||
template["exercises"][template_index]["first_title"] = exercise["first_topic"]
|
||||
template["exercises"][template_index]["second_title"] = exercise["second_topic"]
|
||||
else:
|
||||
template["exercises"][template_index]["title"] = exercise["topic"]
|
||||
else:
|
||||
result = await self._create_video(
|
||||
exercise["question"],
|
||||
avatar,
|
||||
f'Failed to create video for part {part} question: {str(exercise["question"])}'
|
||||
)
|
||||
if result is not None:
|
||||
template["exercises"][template_index]["prompts"] = exercise["prompts"]
|
||||
template["exercises"][template_index]["text"] = exercise["question"]
|
||||
template["exercises"][template_index]["title"] = exercise["topic"]
|
||||
template["exercises"][template_index]["video_url"] = result["video_url"]
|
||||
template["exercises"][template_index]["video_path"] = result["video_path"]
|
||||
|
||||
if not found_exercises:
|
||||
template["exercises"].pop(template_index)
|
||||
|
||||
return template
|
||||
|
||||
# TODO: Check if it is intended to log the original question
|
||||
async def generate_speaking_video(self, original_question: str, topic: str, avatar: str, prompts: List[str]):
|
||||
if len(prompts) > 0:
|
||||
question = original_question + " In your answer you should consider: " + " ".join(prompts)
|
||||
else:
|
||||
question = original_question
|
||||
|
||||
error_msg = f'Failed to create video for part 1 question: {original_question}'
|
||||
|
||||
result = await self._create_video(
|
||||
question,
|
||||
avatar,
|
||||
error_msg
|
||||
async def generate_video(
|
||||
self, part: int, avatar: str, topic: str, questions: list[str],
|
||||
*,
|
||||
second_topic: Optional[str] = None,
|
||||
prompts: Optional[list[str]] = None,
|
||||
suffix: Optional[str] = None,
|
||||
):
|
||||
request_id = str(uuid.uuid4())
|
||||
# TODO: request data
|
||||
self._logger.info(
|
||||
f'POST - generate_video_{part} - Received request to generate video {part}. '
|
||||
f'Use this id to track the logs: {request_id} - Request data: " + str(request.get_json())'
|
||||
)
|
||||
|
||||
if result is not None:
|
||||
return {
|
||||
"text": original_question,
|
||||
"prompts": prompts,
|
||||
"title": topic,
|
||||
**result,
|
||||
"type": "speaking",
|
||||
"id": uuid.uuid4()
|
||||
}
|
||||
else:
|
||||
return str(error_msg)
|
||||
part_questions = self._get_part_questions(part, questions, avatar)
|
||||
videos = []
|
||||
|
||||
async def generate_interactive_video(self, questions: List[str], avatar: str, topic: str):
|
||||
sp_questions = []
|
||||
self._logger.info('Creating videos for speaking part 3')
|
||||
for question in questions:
|
||||
self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating videos for speaking part {part}.')
|
||||
for question in part_questions:
|
||||
self._logger.info(f'POST - generate_video_{part} - {request_id} - Creating video for question: {question}')
|
||||
result = await self._create_video(
|
||||
question,
|
||||
avatar,
|
||||
f'Failed to create video for part 3 question: {question}'
|
||||
'POST - generate_video_{p} - {r} - Failed to create video for part {p} question: {q}'.format(
|
||||
p=part, r=request_id, q=question
|
||||
)
|
||||
)
|
||||
|
||||
if result is not None:
|
||||
self._logger.info(f'POST - generate_video_{part} - {request_id} - Video created')
|
||||
self._logger.info(
|
||||
f'POST - generate_video_{part} - {request_id} - Uploaded video to firebase: {result["video_url"]}'
|
||||
)
|
||||
video = {
|
||||
"text": question,
|
||||
**result
|
||||
"video_path": result["video_path"],
|
||||
"video_url": result["video_url"]
|
||||
}
|
||||
sp_questions.append(video)
|
||||
videos.append(video)
|
||||
|
||||
return {
|
||||
"prompts": sp_questions,
|
||||
"title": topic,
|
||||
"type": "interactiveSpeaking",
|
||||
"id": uuid.uuid4()
|
||||
}
|
||||
if part == 2 and len(videos) == 0:
|
||||
raise Exception(f'Failed to create video for part 2 question: {questions[0]}')
|
||||
|
||||
return self._get_part_response(part, topic, videos, second_topic, prompts, suffix)
|
||||
|
||||
@staticmethod
|
||||
def _get_part_questions(part: int, questions: list[str], avatar: str):
|
||||
part_questions: list[str] = []
|
||||
|
||||
if part == 1:
|
||||
id_to_name = {
|
||||
"5912afa7c77c47d3883af3d874047aaf": "MATTHEW",
|
||||
"9e58d96a383e4568a7f1e49df549e0e4": "VERA",
|
||||
"d2cdd9c0379a4d06ae2afb6e5039bd0c": "EDWARD",
|
||||
"045cb5dcd00042b3a1e4f3bc1c12176b": "TANYA",
|
||||
"1ae1e5396cc444bfad332155fdb7a934": "KAYLA",
|
||||
"0ee6aa7cc1084063a630ae514fccaa31": "JEROME",
|
||||
"5772cff935844516ad7eeff21f839e43": "TYLER",
|
||||
|
||||
}
|
||||
part_questions.extend(
|
||||
[
|
||||
"Hello my name is " + id_to_name.get(avatar) + ", what is yours?",
|
||||
"Do you work or do you study?",
|
||||
*questions
|
||||
]
|
||||
)
|
||||
elif part == 2:
|
||||
# Removed as the examiner should not say what is on the card.
|
||||
# question = question + " In your answer you should consider: " + " ".join(prompts) + suffix
|
||||
part_questions.append(f'{questions[0]}\nYou have 1 minute to take notes.')
|
||||
elif part == 3:
|
||||
part_questions = questions
|
||||
|
||||
return part_questions
|
||||
|
||||
@staticmethod
|
||||
def _get_part_response(
|
||||
part: int,
|
||||
topic: str,
|
||||
videos: list[dict],
|
||||
second_topic: Optional[str],
|
||||
prompts: Optional[list[str]],
|
||||
suffix: Optional[str]
|
||||
):
|
||||
response = {}
|
||||
if part == 1:
|
||||
response = {
|
||||
"prompts": videos,
|
||||
"first_title": topic,
|
||||
"second_title": second_topic,
|
||||
"type": "interactiveSpeaking"
|
||||
}
|
||||
if part == 2:
|
||||
response = {
|
||||
"prompts": prompts,
|
||||
"title": topic,
|
||||
"suffix": suffix,
|
||||
"type": "speaking",
|
||||
# includes text, video_url and video_path
|
||||
**videos[0]
|
||||
}
|
||||
if part == 3:
|
||||
response = {
|
||||
"prompts": videos,
|
||||
"title": topic,
|
||||
"type": "interactiveSpeaking",
|
||||
}
|
||||
|
||||
response["id"] = str(uuid.uuid4())
|
||||
return response
|
||||
|
||||
async def _create_video(self, question: str, avatar: str, error_message: str):
|
||||
result = await self._vid_gen.create_video(question, avatar)
|
||||
@@ -519,3 +598,36 @@ class SpeakingService(ISpeakingService):
|
||||
}
|
||||
self._logger.error(error_message)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _grade_template():
|
||||
return {
|
||||
"comment": "extensive comment about answer quality",
|
||||
"overall": 0.0,
|
||||
"task_response": {
|
||||
"Fluency and Coherence": {
|
||||
"grade": 0.0,
|
||||
"comment": (
|
||||
"extensive comment about fluency and coherence, use examples to justify the grade awarded."
|
||||
)
|
||||
},
|
||||
"Lexical Resource": {
|
||||
"grade": 0.0,
|
||||
"comment": "extensive comment about lexical resource, use examples to justify the grade awarded."
|
||||
},
|
||||
"Grammatical Range and Accuracy": {
|
||||
"grade": 0.0,
|
||||
"comment": (
|
||||
"extensive comment about grammatical range and accuracy, use examples to justify the "
|
||||
"grade awarded."
|
||||
)
|
||||
},
|
||||
"Pronunciation": {
|
||||
"grade": 0.0,
|
||||
"comment": (
|
||||
"extensive comment about pronunciation on the transcribed answer, use examples to justify the "
|
||||
"grade awarded."
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Callable, TypeVar
|
||||
from openai import AsyncOpenAI
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
|
||||
from app.services.abc import ILLMService
|
||||
from app.helpers import count_tokens
|
||||
from app.configs.constants import BLACKLISTED_WORDS
|
||||
from pydantic import BaseModel
|
||||
|
||||
T = TypeVar('T', bound=BaseModel)
|
||||
|
||||
|
||||
class OpenAI(ILLMService):
|
||||
@@ -18,6 +21,7 @@ class OpenAI(ILLMService):
|
||||
def __init__(self, client: AsyncOpenAI):
|
||||
self._client = client
|
||||
self._logger = logging.getLogger(__name__)
|
||||
self._default_model = "gpt-4o-2024-08-06"
|
||||
|
||||
async def prediction(
|
||||
self,
|
||||
@@ -94,4 +98,53 @@ class OpenAI(ILLMService):
|
||||
|
||||
@staticmethod
|
||||
def _check_fields(obj, fields):
|
||||
return all(field in obj for field in fields)
|
||||
return all(field in obj for field in fields)
|
||||
|
||||
async def pydantic_prediction(
|
||||
self,
|
||||
messages: List[ChatCompletionMessageParam],
|
||||
map_to_model: Callable,
|
||||
json_scheme: str,
|
||||
*,
|
||||
model: Optional[str] = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_retries: int = 3
|
||||
) -> List[T] | T | None:
|
||||
params = {
|
||||
"messages": messages,
|
||||
"response_format": {"type": "json_object"},
|
||||
"model": model if model else self._default_model
|
||||
}
|
||||
|
||||
if temperature:
|
||||
params["temperature"] = temperature
|
||||
|
||||
attempt = 0
|
||||
while attempt < max_retries:
|
||||
result = await self._client.chat.completions.create(**params)
|
||||
result_content = result.choices[0].message.content
|
||||
try:
|
||||
result_json = json.loads(result_content)
|
||||
return map_to_model(result_json)
|
||||
except Exception as e:
|
||||
attempt += 1
|
||||
self._logger.info(f"GPT returned malformed response: {result_content}\n {str(e)}")
|
||||
params["messages"] = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Your previous response wasn't in the json format I've explicitly told you to output. "
|
||||
f"In your next response, you will fix it and return me just the json I've asked."
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"Previous response: {result_content}\n"
|
||||
f"JSON format: {json_scheme}"
|
||||
)
|
||||
}
|
||||
]
|
||||
if attempt >= max_retries:
|
||||
self._logger.error(f"Max retries exceeded!")
|
||||
return None
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
import re
|
||||
from functools import reduce
|
||||
|
||||
from app.configs.constants import TemperatureSettings, GPTModels
|
||||
from app.helpers import count_tokens
|
||||
from app.services.abc import ILLMService, ITrainingService
|
||||
|
||||
|
||||
class TrainingService(ITrainingService):
|
||||
|
||||
def __init__(self, llm: ILLMService):
|
||||
self._llm = llm
|
||||
|
||||
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
|
||||
messages = self._get_question_tips(question, answer, correct_answer, context)
|
||||
|
||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_3_5_TURBO,
|
||||
messages,
|
||||
None,
|
||||
TemperatureSettings.TIPS_TEMPERATURE,
|
||||
token_count=token_count
|
||||
)
|
||||
|
||||
if isinstance(response, str):
|
||||
response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
|
||||
"help students understand why it was a wrong answer and gives helpful insight for the future. "
|
||||
"The tip should refer to the context and question."
|
||||
),
|
||||
}
|
||||
]
|
||||
|
||||
if not (context is None or context == ""):
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": f"This is the context for the question: {context}",
|
||||
})
|
||||
|
||||
messages.extend([
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"This is the question: {question}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"This is the answer: {answer}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"This is the correct answer: {correct_answer}",
|
||||
}
|
||||
])
|
||||
|
||||
return messages
|
||||
|
||||
7
app/services/impl/training/__init__.py
Normal file
7
app/services/impl/training/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from .training import TrainingService
|
||||
from .kb import TrainingContentKnowledgeBase
|
||||
|
||||
__all__ = [
|
||||
"TrainingService",
|
||||
"TrainingContentKnowledgeBase"
|
||||
]
|
||||
88
app/services/impl/training/kb.py
Normal file
88
app/services/impl/training/kb.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import json
|
||||
import os
|
||||
from logging import getLogger
|
||||
from typing import Dict, List
|
||||
|
||||
import faiss
|
||||
import pickle
|
||||
|
||||
from app.services.abc import IKnowledgeBase
|
||||
|
||||
|
||||
class TrainingContentKnowledgeBase(IKnowledgeBase):
|
||||
|
||||
def __init__(self, embeddings, path: str = 'pathways_2_rw_with_ids.json'):
|
||||
self._embedding_model = embeddings
|
||||
self._tips = None # self._read_json(path)
|
||||
self._category_metadata = None
|
||||
self._indices = None
|
||||
self.load_indices_and_metadata()
|
||||
self._logger = getLogger(__name__)
|
||||
|
||||
@staticmethod
|
||||
def _read_json(path: str) -> Dict[str, any]:
|
||||
with open(path, 'r', encoding="utf-8") as json_file:
|
||||
return json.loads(json_file.read())
|
||||
|
||||
def print_category_count(self):
|
||||
category_tips = {}
|
||||
for unit in self._tips['units']:
|
||||
for page in unit['pages']:
|
||||
for tip in page['tips']:
|
||||
category = tip['category'].lower().replace(" ", "_")
|
||||
if category not in category_tips:
|
||||
category_tips[category] = 0
|
||||
else:
|
||||
category_tips[category] = category_tips[category] + 1
|
||||
print(category_tips)
|
||||
|
||||
def create_embeddings_and_save_them(self) -> None:
|
||||
category_embeddings = {}
|
||||
category_metadata = {}
|
||||
|
||||
for unit in self._tips['units']:
|
||||
for page in unit['pages']:
|
||||
for tip in page['tips']:
|
||||
category = tip['category'].lower().replace(" ", "_")
|
||||
if category not in category_embeddings:
|
||||
category_embeddings[category] = []
|
||||
category_metadata[category] = []
|
||||
|
||||
category_embeddings[category].append(tip['embedding'])
|
||||
category_metadata[category].append({"id": tip['id'], "text": tip['text']})
|
||||
|
||||
category_indices = {}
|
||||
for category, embeddings in category_embeddings.items():
|
||||
embeddings_array = self._embedding_model.encode(embeddings)
|
||||
index = faiss.IndexFlatL2(embeddings_array.shape[1])
|
||||
index.add(embeddings_array)
|
||||
category_indices[category] = index
|
||||
|
||||
faiss.write_index(index, f"./faiss/{category}_tips_index.faiss")
|
||||
|
||||
with open("./faiss/tips_metadata.pkl", "wb") as f:
|
||||
pickle.dump(category_metadata, f)
|
||||
|
||||
def load_indices_and_metadata(
|
||||
self,
|
||||
directory: str = './faiss',
|
||||
suffix: str = '_tips_index.faiss',
|
||||
metadata_path: str = './faiss/tips_metadata.pkl'
|
||||
):
|
||||
files = os.listdir(directory)
|
||||
self._indices = {}
|
||||
for file in files:
|
||||
if file.endswith(suffix):
|
||||
self._indices[file[:-len(suffix)]] = faiss.read_index(f'{directory}/{file}')
|
||||
self._logger.info(f'Loaded embeddings for {file[:-len(suffix)]} category.')
|
||||
|
||||
with open(metadata_path, 'rb') as f:
|
||||
self._category_metadata = pickle.load(f)
|
||||
self._logger.info("Loaded tips metadata")
|
||||
|
||||
def query_knowledge_base(self, query: str, category: str, top_k: int = 5) -> List[Dict[str, str]]:
|
||||
query_embedding = self._embedding_model.encode([query])
|
||||
index = self._indices[category]
|
||||
D, I = index.search(query_embedding, top_k)
|
||||
results = [self._category_metadata[category][i] for i in I[0]]
|
||||
return results
|
||||
459
app/services/impl/training/training.py
Normal file
459
app/services/impl/training/training.py
Normal file
@@ -0,0 +1,459 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
from functools import reduce
|
||||
from logging import getLogger
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from app.configs.constants import TemperatureSettings, GPTModels
|
||||
from app.helpers import count_tokens
|
||||
from app.repositories.abc import IDocumentStore
|
||||
from app.services.abc import ILLMService, ITrainingService, IKnowledgeBase
|
||||
from app.dtos.training import *
|
||||
|
||||
|
||||
class TrainingService(ITrainingService):
|
||||
TOOLS = [
|
||||
'critical_thinking',
|
||||
'language_for_writing',
|
||||
'reading_skills',
|
||||
'strategy',
|
||||
'words',
|
||||
'writing_skills'
|
||||
]
|
||||
# strategy word_link ct_focus reading_skill word_partners writing_skill language_for_writing
|
||||
|
||||
def __init__(self, llm: ILLMService, firestore: IDocumentStore, training_kb: IKnowledgeBase):
|
||||
self._llm = llm
|
||||
self._db = firestore
|
||||
self._kb = training_kb
|
||||
self._logger = getLogger(__name__)
|
||||
|
||||
async def fetch_tips(self, context: str, question: str, answer: str, correct_answer: str):
|
||||
messages = self._get_question_tips(question, answer, correct_answer, context)
|
||||
|
||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||
|
||||
response = await self._llm.prediction(
|
||||
GPTModels.GPT_3_5_TURBO,
|
||||
messages,
|
||||
None,
|
||||
TemperatureSettings.TIPS_TEMPERATURE,
|
||||
token_count=token_count
|
||||
)
|
||||
|
||||
if isinstance(response, str):
|
||||
response = re.sub(r"^[a-zA-Z0-9_]+\:\s*", "", response)
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def _get_question_tips(question: str, answer: str, correct_answer: str, context: str = None):
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"You are a IELTS exam program that analyzes incorrect answers to questions and gives tips to "
|
||||
"help students understand why it was a wrong answer and gives helpful insight for the future. "
|
||||
"The tip should refer to the context and question."
|
||||
),
|
||||
}
|
||||
]
|
||||
|
||||
if not (context is None or context == ""):
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": f"This is the context for the question: {context}",
|
||||
})
|
||||
|
||||
messages.extend([
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"This is the question: {question}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"This is the answer: {answer}",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"This is the correct answer: {correct_answer}",
|
||||
}
|
||||
])
|
||||
|
||||
return messages
|
||||
|
||||
async def get_training_content(self, training_content: Dict) -> Dict:
|
||||
user, stats = training_content["userID"], training_content["stats"]
|
||||
exam_data, exam_map = await self._sort_out_solutions(stats)
|
||||
training_content = await self._get_exam_details_and_tips(exam_data)
|
||||
tips = self._query_kb(training_content.queries)
|
||||
usefull_tips = await self._get_usefull_tips(exam_data, tips)
|
||||
exam_map = self._merge_exam_map_with_details(exam_map, training_content.details)
|
||||
|
||||
weak_areas = {"weak_areas": []}
|
||||
for area in training_content.weak_areas:
|
||||
weak_areas["weak_areas"].append(area.dict())
|
||||
|
||||
training_doc = {
|
||||
'created_at': int(datetime.now().timestamp() * 1000),
|
||||
**exam_map,
|
||||
**usefull_tips.dict(),
|
||||
**weak_areas,
|
||||
"user": user
|
||||
}
|
||||
doc_id = await self._db.save_to_db('training', training_doc)
|
||||
return {
|
||||
"id": doc_id
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _merge_exam_map_with_details(exam_map: Dict[str, any], details: List[DetailsDTO]):
|
||||
new_exam_map = {"exams": []}
|
||||
for detail in details:
|
||||
new_exam_map["exams"].append({
|
||||
"id": detail.exam_id,
|
||||
"date": detail.date,
|
||||
"performance_comment": detail.performance_comment,
|
||||
"detailed_summary": detail.detailed_summary,
|
||||
**exam_map[detail.exam_id]
|
||||
})
|
||||
return new_exam_map
|
||||
|
||||
def _query_kb(self, queries: List[QueryDTO]):
|
||||
map_categories = {
|
||||
"critical_thinking": "ct_focus",
|
||||
"language_for_writing": "language_for_writing",
|
||||
"reading_skills": "reading_skill",
|
||||
"strategy": "strategy",
|
||||
"writing_skills": "writing_skill"
|
||||
}
|
||||
|
||||
tips = {"tips": []}
|
||||
for query in queries:
|
||||
if query.category == "words":
|
||||
tips["tips"].extend(
|
||||
self._kb.query_knowledge_base(query.text, "word_link")
|
||||
)
|
||||
tips["tips"].extend(
|
||||
self._kb.query_knowledge_base(query.text, "word_partners")
|
||||
)
|
||||
else:
|
||||
if query.category in map_categories:
|
||||
tips["tips"].extend(
|
||||
self._kb.query_knowledge_base(query.text, map_categories[query.category])
|
||||
)
|
||||
else:
|
||||
self._logger.info(f"GTP tried to query knowledge base for {query.category} and it doesn't exist.")
|
||||
return tips
|
||||
|
||||
async def _get_exam_details_and_tips(self, exam_data: Dict[str, any]) -> TrainingContentDTO:
|
||||
json_schema = (
|
||||
'{ "details": [{"exam_id": "", "date": 0, "performance_comment": "", "detailed_summary": ""}],'
|
||||
' "weak_areas": [{"area": "", "comment": ""}], "queries": [{"text": "", "category": ""}] }'
|
||||
)
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"I'm going to provide you with exam data, you will take the exam data and fill this json "
|
||||
f'schema : {json_schema}. "performance_comment" is a short sentence that describes the '
|
||||
'students\'s performance and main mistakes in a single exam, "detailed_summary" is a detailed '
|
||||
'summary of the student\'s performance, "weak_areas" are identified areas'
|
||||
' across all exams which need to be improved upon, for example, area "Grammar and Syntax" comment "Issues'
|
||||
' with sentence structure and punctuation.", the "queries" field is where you will write queries '
|
||||
'for tips that will be displayed to the student, the category attribute is a collection of '
|
||||
'embeddings and the text will be the text used to query the knowledge base. The categories are '
|
||||
f'the following [{", ".join(self.TOOLS)}]. The exam data will be a json where the key of the field '
|
||||
'"exams" is the exam id, an exam can be composed of multiple modules or single modules. The student'
|
||||
' will see your response so refrain from using phrasing like "The student" did x, y and z. If the '
|
||||
'field "answer" in a question is an empty array "[]", then the student didn\'t answer any question '
|
||||
'and you must address that in your response. Also questions aren\'t modules, the only modules are: '
|
||||
'level, speaking, writing, reading and listening. The details array needs to be tailored to the '
|
||||
'exam attempt, even if you receive the same exam you must treat as different exams by their id.'
|
||||
'Don\'t make references to an exam by it\'s id, the GUI will handle that so the student knows '
|
||||
'which is the exam your comments and summary are referencing too. Even if the student hasn\'t '
|
||||
'submitted no answers for an exam, you must still fill the details structure addressing that fact.'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Exam Data: {str(exam_data)}'
|
||||
}
|
||||
]
|
||||
return await self._llm.pydantic_prediction(messages, self._map_gpt_response, json_schema)
|
||||
|
||||
async def _get_usefull_tips(self, exam_data: Dict[str, any], tips: Dict[str, any]) -> TipsDTO:
|
||||
json_schema = (
|
||||
'{ "tip_ids": [] }'
|
||||
)
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"I'm going to provide you with tips and I want you to return to me the tips that "
|
||||
f"can be usefull for the student that made the exam that I'm going to send you, return "
|
||||
f"me the tip ids in this json format {json_schema}."
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Exam Data: {str(exam_data)}'
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Tips: {str(tips)}'
|
||||
}
|
||||
]
|
||||
return await self._llm.pydantic_prediction(messages, lambda response: TipsDTO(**response), json_schema)
|
||||
|
||||
@staticmethod
|
||||
def _map_gpt_response(response: Dict[str, any]) -> TrainingContentDTO:
|
||||
parsed_response = {
|
||||
"details": [DetailsDTO(**detail) for detail in response["details"]],
|
||||
"weak_areas": [WeakAreaDTO(**area) for area in response["weak_areas"]],
|
||||
"queries": [QueryDTO(**query) for query in response["queries"]]
|
||||
}
|
||||
return TrainingContentDTO(**parsed_response)
|
||||
|
||||
async def _sort_out_solutions(self, stats):
|
||||
grouped_stats = {}
|
||||
for stat in stats:
|
||||
session_key = f'{str(stat["date"])}-{stat["user"]}'
|
||||
module = stat["module"]
|
||||
exam_id = stat["exam"]
|
||||
|
||||
if session_key not in grouped_stats:
|
||||
grouped_stats[session_key] = {}
|
||||
if module not in grouped_stats[session_key]:
|
||||
grouped_stats[session_key][module] = {
|
||||
"stats": [],
|
||||
"exam_id": exam_id
|
||||
}
|
||||
grouped_stats[session_key][module]["stats"].append(stat)
|
||||
|
||||
exercises = {}
|
||||
exam_map = {}
|
||||
for session_key, modules in grouped_stats.items():
|
||||
exercises[session_key] = {}
|
||||
for module, module_stats in modules.items():
|
||||
exercises[session_key][module] = {}
|
||||
|
||||
exam_id = module_stats["exam_id"]
|
||||
if exam_id not in exercises[session_key][module]:
|
||||
exercises[session_key][module][exam_id] = {"date": None, "exercises": []}
|
||||
|
||||
exam_total_questions = 0
|
||||
exam_total_correct = 0
|
||||
|
||||
for stat in module_stats["stats"]:
|
||||
exam_total_questions += stat["score"]["total"]
|
||||
exam_total_correct += stat["score"]["correct"]
|
||||
exercises[session_key][module][exam_id]["date"] = stat["date"]
|
||||
|
||||
if session_key not in exam_map:
|
||||
exam_map[session_key] = {"stat_ids": [], "score": 0}
|
||||
exam_map[session_key]["stat_ids"].append(stat["id"])
|
||||
|
||||
exam = await self._db.get_doc_by_id(module, exam_id)
|
||||
if module == "listening":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_listening_solutions(stat, exam))
|
||||
elif module == "reading":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_reading_solutions(stat, exam))
|
||||
elif module == "writing":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_writing_prompts_and_answers(stat, exam)
|
||||
)
|
||||
elif module == "speaking":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_speaking_solutions(stat, exam)
|
||||
)
|
||||
elif module == "level":
|
||||
exercises[session_key][module][exam_id]["exercises"].extend(
|
||||
self._get_level_solutions(stat, exam)
|
||||
)
|
||||
|
||||
exam_map[session_key]["score"] = round((exam_total_correct / exam_total_questions) * 100)
|
||||
exam_map[session_key]["module"] = module
|
||||
|
||||
return {"exams": exercises}, exam_map
|
||||
|
||||
def _get_writing_prompts_and_answers(self, stat, exam):
|
||||
result = []
|
||||
try:
|
||||
exercises = []
|
||||
for solution in stat['solutions']:
|
||||
answer = solution['solution']
|
||||
exercise_id = solution['id']
|
||||
exercises.append({
|
||||
"exercise_id": exercise_id,
|
||||
"answer": answer
|
||||
})
|
||||
for exercise in exercises:
|
||||
for exam_exercise in exam["exercises"]:
|
||||
if exam_exercise["id"] == exercise["exercise_id"]:
|
||||
result.append({
|
||||
"exercise": exam_exercise["prompt"],
|
||||
"answer": exercise["answer"]
|
||||
})
|
||||
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _get_mc_question(exercise, stat):
|
||||
shuffle_maps = stat.get("shuffleMaps", [])
|
||||
answer = stat["solutions"] if len(shuffle_maps) == 0 else []
|
||||
if len(shuffle_maps) != 0:
|
||||
for solution in stat["solutions"]:
|
||||
shuffle_map = [
|
||||
item["map"] for item in shuffle_maps
|
||||
if item["questionID"] == solution["question"]
|
||||
]
|
||||
answer.append({
|
||||
"question": solution["question"],
|
||||
"option": shuffle_map[solution["option"]]
|
||||
})
|
||||
return {
|
||||
"question": exercise["prompt"],
|
||||
"exercise": exercise["questions"],
|
||||
"answer": stat["solutions"]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _swap_key_name(d, original_key, new_key):
|
||||
d[new_key] = d.pop(original_key)
|
||||
return d
|
||||
|
||||
def _get_level_solutions(self, stat, exam):
|
||||
result = []
|
||||
try:
|
||||
for part in exam["parts"]:
|
||||
for exercise in part["exercises"]:
|
||||
if exercise["id"] == stat["exercise"]:
|
||||
if stat["type"] == "fillBlanks":
|
||||
result.append({
|
||||
"prompt": exercise["prompt"],
|
||||
"template": exercise["text"],
|
||||
"words": exercise["words"],
|
||||
"solutions": exercise["solutions"],
|
||||
"answer": [
|
||||
self._swap_key_name(item, 'solution', 'option')
|
||||
for item in stat["solutions"]
|
||||
]
|
||||
})
|
||||
elif stat["type"] == "multipleChoice":
|
||||
result.append(self._get_mc_question(exercise, stat))
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
return result
|
||||
|
||||
def _get_listening_solutions(self, stat, exam):
|
||||
result = []
|
||||
try:
|
||||
for part in exam["parts"]:
|
||||
for exercise in part["exercises"]:
|
||||
if exercise["id"] == stat["exercise"]:
|
||||
if stat["type"] == "writeBlanks":
|
||||
result.append({
|
||||
"question": exercise["prompt"],
|
||||
"template": exercise["text"],
|
||||
"solution": exercise["solutions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
elif stat["type"] == "fillBlanks":
|
||||
result.append({
|
||||
"question": exercise["prompt"],
|
||||
"template": exercise["text"],
|
||||
"words": exercise["words"],
|
||||
"solutions": exercise["solutions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
elif stat["type"] == "multipleChoice":
|
||||
result.append(self._get_mc_question(exercise, stat))
|
||||
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _find_shuffle_map(shuffle_maps, question_id):
|
||||
return next((item["map"] for item in shuffle_maps if item["questionID"] == question_id), None)
|
||||
|
||||
def _get_speaking_solutions(self, stat, exam):
|
||||
result = {}
|
||||
try:
|
||||
result = {
|
||||
"comments": {
|
||||
key: value['comment'] for key, value in stat['solutions'][0]['evaluation']['task_response'].items()}
|
||||
,
|
||||
"exercises": {}
|
||||
}
|
||||
|
||||
for exercise in exam["exercises"]:
|
||||
if exercise["id"] == stat["exercise"]:
|
||||
if stat["type"] == "interactiveSpeaking":
|
||||
for i in range(len(exercise["prompts"])):
|
||||
result["exercises"][f"exercise_{i+1}"] = {
|
||||
"question": exercise["prompts"][i]["text"]
|
||||
}
|
||||
for i in range(len(exercise["prompts"])):
|
||||
answer = stat['solutions'][0]["evaluation"].get(f'transcript_{i+1}', '')
|
||||
result["exercises"][f"exercise_{i+1}"]["answer"] = answer
|
||||
elif stat["type"] == "speaking":
|
||||
result["exercises"]["exercise_1"] = {
|
||||
"question": exercise["text"],
|
||||
"answer": stat['solutions'][0]["evaluation"].get(f'transcript', '')
|
||||
}
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
return [result]
|
||||
|
||||
def _get_reading_solutions(self, stat, exam):
|
||||
result = []
|
||||
try:
|
||||
for part in exam["parts"]:
|
||||
text = part["text"]
|
||||
for exercise in part["exercises"]:
|
||||
if exercise["id"] == stat["exercise"]:
|
||||
if stat["type"] == "fillBlanks":
|
||||
result.append({
|
||||
"text": text,
|
||||
"question": exercise["prompt"],
|
||||
"template": exercise["text"],
|
||||
"words": exercise["words"],
|
||||
"solutions": exercise["solutions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
elif stat["type"] == "writeBlanks":
|
||||
result.append({
|
||||
"text": text,
|
||||
"question": exercise["prompt"],
|
||||
"template": exercise["text"],
|
||||
"solutions": exercise["solutions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
elif stat["type"] == "trueFalse":
|
||||
result.append({
|
||||
"text": text,
|
||||
"questions": exercise["questions"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
elif stat["type"] == "matchSentences":
|
||||
result.append({
|
||||
"text": text,
|
||||
"question": exercise["prompt"],
|
||||
"sentences": exercise["sentences"],
|
||||
"options": exercise["options"],
|
||||
"answer": stat["solutions"]
|
||||
})
|
||||
except KeyError as e:
|
||||
self._logger.warning(f"Malformed stat object: {str(e)}")
|
||||
return result
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from typing import List, Dict
|
||||
|
||||
from app.services.abc import IWritingService, ILLMService, IAIDetectorService
|
||||
from app.configs.constants import GPTModels, TemperatureSettings
|
||||
from app.configs.constants import GPTModels, TemperatureSettings, FieldsAndExercises
|
||||
from app.helpers import TextHelper, ExercisesHelper
|
||||
|
||||
|
||||
@@ -17,10 +19,7 @@ class WritingService(IWritingService):
|
||||
'You are a helpful assistant designed to output JSON on this format: {"prompt": "prompt content"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": self._get_writing_prompt(task, topic, difficulty)
|
||||
}
|
||||
*self._get_writing_messages(task, topic, difficulty)
|
||||
]
|
||||
|
||||
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
|
||||
@@ -32,15 +31,18 @@ class WritingService(IWritingService):
|
||||
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
question = response["prompt"].strip()
|
||||
|
||||
return {
|
||||
"question": response["prompt"].strip(),
|
||||
"question": self._add_newline_before_hyphen(question) if task == 1 else question,
|
||||
"difficulty": difficulty,
|
||||
"topic": topic
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_writing_prompt(task: int, topic: str, difficulty: str):
|
||||
return (
|
||||
def _get_writing_messages(task: int, topic: str, difficulty: str) -> List[Dict]:
|
||||
# TODO: Should the muslim disclaimer be added to task 2?
|
||||
task_prompt = (
|
||||
'Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
|
||||
'student to compose a letter. The prompt should present a specific scenario or situation, '
|
||||
f'based on the topic of "{topic}", requiring the student to provide information, '
|
||||
@@ -52,32 +54,41 @@ class WritingService(IWritingService):
|
||||
f'analysis of contrasting perspectives on the topic of "{topic}".'
|
||||
)
|
||||
|
||||
task_instructions = (
|
||||
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
|
||||
'the answer should include.'
|
||||
) if task == 1 else (
|
||||
'The question should lead to an answer with either "theories", "complicated information" or '
|
||||
'be "very descriptive" on the topic.'
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": task_prompt
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": task_instructions
|
||||
}
|
||||
]
|
||||
|
||||
return messages
|
||||
|
||||
async def grade_writing_task(self, task: int, question: str, answer: str):
|
||||
bare_minimum = 100 if task == 1 else 180
|
||||
minimum = 150 if task == 1 else 250
|
||||
|
||||
# TODO: left as is, don't know if this is intended or not
|
||||
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
|
||||
temperature = (
|
||||
TemperatureSettings.GRADING_TEMPERATURE
|
||||
if task == 1 else
|
||||
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
if not TextHelper.has_words(answer):
|
||||
return self._zero_rating("The answer does not contain enough english words.")
|
||||
elif not TextHelper.has_x_words(answer, bare_minimum):
|
||||
return self._zero_rating("The answer is insufficient and too small to be graded.")
|
||||
else:
|
||||
template = self._get_writing_template()
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"perfect_answer": "example perfect answer", "comment": '
|
||||
'"comment about answer quality", "overall": 0.0, "task_response": '
|
||||
'{"Task Achievement": 0.0, "Coherence and Cohesion": 0.0, '
|
||||
'"Lexical Resource": 0.0, "Grammatical Range and Accuracy": 0.0 }'
|
||||
f'You are a helpful assistant designed to output JSON on this format: {template}'
|
||||
)
|
||||
},
|
||||
{
|
||||
@@ -86,16 +97,28 @@ class WritingService(IWritingService):
|
||||
f'Evaluate the given Writing Task {task} response based on the IELTS grading system, '
|
||||
'ensuring a strict assessment that penalizes errors. Deduct points for deviations '
|
||||
'from the task, and assign a score of 0 if the response fails to address the question. '
|
||||
f'Additionally, provide an exemplary answer with a minimum of {minimum} words, along with a '
|
||||
'detailed commentary highlighting both strengths and weaknesses in the response. '
|
||||
'Additionally, provide a detailed commentary highlighting both strengths and '
|
||||
'weaknesses in the response. '
|
||||
f'\n Question: "{question}" \n Answer: "{answer}"')
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'The perfect answer must have at least {minimum} words.'
|
||||
}
|
||||
]
|
||||
|
||||
if task == 1:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Refer to the parts of the letter as: "Greeting Opener", "bullet 1", "bullet 2", '
|
||||
'"bullet 3", "closer (restate the purpose of the letter)", "closing greeting"'
|
||||
)
|
||||
})
|
||||
|
||||
llm_model = GPTModels.GPT_3_5_TURBO if task == 1 else GPTModels.GPT_4_O
|
||||
temperature = (
|
||||
TemperatureSettings.GRADING_TEMPERATURE
|
||||
if task == 1 else
|
||||
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
response = await self._llm.prediction(
|
||||
llm_model,
|
||||
messages,
|
||||
@@ -103,6 +126,10 @@ class WritingService(IWritingService):
|
||||
temperature
|
||||
)
|
||||
|
||||
perfect_answer_minimum = 150 if task == 1 else 250
|
||||
perfect_answer = await self._get_perfect_answer(question, perfect_answer_minimum)
|
||||
|
||||
response["perfect_answer"] = perfect_answer["perfect_answer"]
|
||||
response["overall"] = ExercisesHelper.fix_writing_overall(response["overall"], response["task_response"])
|
||||
response['fixed_text'] = await self._get_fixed_text(answer)
|
||||
|
||||
@@ -114,13 +141,20 @@ class WritingService(IWritingService):
|
||||
|
||||
async def _get_fixed_text(self, text):
|
||||
messages = [
|
||||
{"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"fixed_text": "fixed test with no misspelling errors"}')
|
||||
},
|
||||
{"role": "user", "content": (
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"fixed_text": "fixed test with no misspelling errors"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
'Fix the errors in the given text and put it in a JSON. '
|
||||
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"')
|
||||
}
|
||||
f'Do not complete the answer, only replace what is wrong. \n The text: "{text}"'
|
||||
)
|
||||
}
|
||||
]
|
||||
|
||||
response = await self._llm.prediction(
|
||||
@@ -132,16 +166,83 @@ class WritingService(IWritingService):
|
||||
)
|
||||
return response["fixed_text"]
|
||||
|
||||
async def _get_perfect_answer(self, question: str, size: int) -> Dict:
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
'You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"perfect_answer": "perfect answer for the question"}'
|
||||
)
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'Write a perfect answer for this writing exercise of a IELTS exam. Question: {question}'
|
||||
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f'The answer must have at least {size} words'
|
||||
}
|
||||
]
|
||||
return await self._llm.prediction(
|
||||
GPTModels.GPT_4_O,
|
||||
messages,
|
||||
["perfect_answer"],
|
||||
TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _zero_rating(comment: str):
|
||||
return {
|
||||
'comment': comment,
|
||||
'overall': 0,
|
||||
'task_response': {
|
||||
'Coherence and Cohesion': 0,
|
||||
'Grammatical Range and Accuracy': 0,
|
||||
'Lexical Resource': 0,
|
||||
'Task Achievement': 0
|
||||
'Task Achievement': {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
'Coherence and Cohesion': {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
'Lexical Resource': {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
},
|
||||
'Grammatical Range and Accuracy': {
|
||||
"grade": 0.0,
|
||||
"comment": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_writing_template():
|
||||
return {
|
||||
"comment": "comment about student's response quality",
|
||||
"overall": 0.0,
|
||||
"task_response": {
|
||||
"Task Achievement": {
|
||||
"grade": 0.0,
|
||||
"comment": "comment about Task Achievement of the student's response"
|
||||
},
|
||||
"Coherence and Cohesion": {
|
||||
"grade": 0.0,
|
||||
"comment": "comment about Coherence and Cohesion of the student's response"
|
||||
},
|
||||
"Lexical Resource": {
|
||||
"grade": 0.0,
|
||||
"comment": "comment about Lexical Resource of the student's response"
|
||||
},
|
||||
"Grammatical Range and Accuracy": {
|
||||
"grade": 0.0,
|
||||
"comment": "comment about Grammatical Range and Accuracy of the student's response"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _add_newline_before_hyphen(s):
|
||||
return s.replace(" -", "\n-")
|
||||
|
||||
|
||||
5
app/utils/__init__.py
Normal file
5
app/utils/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .handle_exception import handle_exception
|
||||
|
||||
__all__ = [
|
||||
"handle_exception"
|
||||
]
|
||||
15
app/utils/handle_exception.py
Normal file
15
app/utils/handle_exception.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import functools
|
||||
from typing import Callable, Any
|
||||
from fastapi import Response
|
||||
|
||||
|
||||
def handle_exception(status_code: int = 500):
|
||||
def decorator(func: Callable) -> Callable:
|
||||
@functools.wraps(func)
|
||||
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
return Response(content=str(e), status_code=status_code)
|
||||
return wrapper
|
||||
return decorator
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.poetry]
|
||||
name = "encoach-be"
|
||||
name = "ielts-be"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Ecrop Devteam <company@ecrop.dev>"]
|
||||
@@ -17,6 +17,14 @@ firebase-admin = "^6.5.0"
|
||||
wonderwords = "^2.2.0"
|
||||
dependency-injector = "^4.41.0"
|
||||
openai = "^1.37.0"
|
||||
python-multipart = "0.0.9"
|
||||
faiss-cpu = "1.8.0.post1"
|
||||
pypandoc = "1.13"
|
||||
pdfplumber = "0.11.3"
|
||||
numpy = "1.26.4"
|
||||
pillow = "10.4.0"
|
||||
sentence-transformers = "3.0.1"
|
||||
openai-whisper = "20231117"
|
||||
|
||||
|
||||
[build-system]
|
||||
|
||||
1
tmp/placeholder.txt
Normal file
1
tmp/placeholder.txt
Normal file
@@ -0,0 +1 @@
|
||||
THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO
|
||||
Reference in New Issue
Block a user