Writing and speaking rework, some changes to module upload

This commit is contained in:
Carlos-Mesquita
2024-11-25 16:41:38 +00:00
parent a54dfad43a
commit a7da187ec6
20 changed files with 495 additions and 195 deletions

View File

@@ -1,9 +1,8 @@
from dependency_injector.wiring import inject, Provide from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Depends, Path, Request from fastapi import APIRouter, Depends, Path, Request, BackgroundTasks
from app.controllers.abc import IGradeController from app.controllers.abc import IGradeController
from app.dtos.writing import WritingGradeTaskDTO from app.dtos.writing import WritingGradeTaskDTO
from app.dtos.speaking import GradeSpeakingAnswersDTO, GradeSpeakingDTO
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
controller = "grade_controller" controller = "grade_controller"
@@ -17,35 +16,51 @@ grade_router = APIRouter()
@inject @inject
async def grade_writing_task( async def grade_writing_task(
data: WritingGradeTaskDTO, data: WritingGradeTaskDTO,
background_tasks: BackgroundTasks,
task: int = Path(..., ge=1, le=2), task: int = Path(..., ge=1, le=2),
grade_controller: IGradeController = Depends(Provide[controller]) grade_controller: IGradeController = Depends(Provide[controller])
): ):
return await grade_controller.grade_writing_task(task, data) return await grade_controller.grade_writing_task(task, data, background_tasks)
@grade_router.post(
'/speaking/2',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def grade_speaking_task_2(
data: GradeSpeakingDTO,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.grade_speaking_task(2, [data.dict()])
@grade_router.post( @grade_router.post(
'/speaking/{task}', '/speaking/{task}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
) )
@inject @inject
async def grade_speaking_task_1_and_3( async def grade_speaking_task(
data: GradeSpeakingAnswersDTO, request: Request,
background_tasks: BackgroundTasks,
task: int = Path(..., ge=1, le=3), task: int = Path(..., ge=1, le=3),
grade_controller: IGradeController = Depends(Provide[controller]) grade_controller: IGradeController = Depends(Provide[controller])
): ):
return await grade_controller.grade_speaking_task(task, data.answers) form = await request.form()
return await grade_controller.grade_speaking_task(task, form, background_tasks)
@grade_router.get(
'/pending/{sessionId}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_pending_evaluations(
session_id: str,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.get_evaluations(session_id, "pending")
@grade_router.get(
'/completed/{sessionId}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_completed_evaluations(
session_id: str,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.get_evaluations(session_id, "completed")
@grade_router.post( @grade_router.post(

View File

@@ -18,7 +18,6 @@ async def generate_exercises(
dto: LevelExercisesDTO, dto: LevelExercisesDTO,
level_controller: ILevelController = Depends(Provide[controller]) level_controller: ILevelController = Depends(Provide[controller])
): ):
print(dto.dict())
return await level_controller.generate_exercises(dto) return await level_controller.generate_exercises(dto)
@level_router.get( @level_router.get(

View File

@@ -3,9 +3,8 @@ from typing import Optional
from dependency_injector.wiring import inject, Provide from dependency_injector.wiring import inject, Provide
from fastapi import APIRouter, Path, Query, Depends from fastapi import APIRouter, Path, Query, Depends
from pydantic import BaseModel
from app.dtos.video import Task, TaskStatus from app.dtos.speaking import Video
from app.middlewares import Authorized, IsAuthenticatedViaBearerToken from app.middlewares import Authorized, IsAuthenticatedViaBearerToken
from app.configs.constants import EducationalContent from app.configs.constants import EducationalContent
from app.controllers.abc import ISpeakingController from app.controllers.abc import ISpeakingController
@@ -13,11 +12,6 @@ from app.controllers.abc import ISpeakingController
controller = "speaking_controller" controller = "speaking_controller"
speaking_router = APIRouter() speaking_router = APIRouter()
class Video(BaseModel):
text: str
avatar: str
@speaking_router.post( @speaking_router.post(
'/media', '/media',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]

View File

@@ -18,11 +18,11 @@ load_dotenv()
class DependencyInjector: class DependencyInjector:
def __init__(self, polly_client: any, http_client: HTTPClient, whisper_model: any): def __init__(self, polly_client: any, http_client: HTTPClient, stt: OpenAIWhisper):
self._container = containers.DynamicContainer() self._container = containers.DynamicContainer()
self._polly_client = polly_client self._polly_client = polly_client
self._http_client = http_client self._http_client = http_client
self._whisper_model = whisper_model self._stt = stt
def inject(self): def inject(self):
self._setup_clients() self._setup_clients()
@@ -33,22 +33,25 @@ class DependencyInjector:
self._container.wire( self._container.wire(
packages=["app"] packages=["app"]
) )
return self
def _setup_clients(self): def _setup_clients(self):
self._container.openai_client = providers.Singleton(AsyncOpenAI) self._container.openai_client = providers.Singleton(AsyncOpenAI)
self._container.polly_client = providers.Object(self._polly_client) self._container.polly_client = providers.Object(self._polly_client)
self._container.http_client = providers.Object(self._http_client) self._container.http_client = providers.Object(self._http_client)
self._container.whisper_model = providers.Object(self._whisper_model) self._container.stt = providers.Object(self._stt)
def _setup_third_parties(self): def _setup_third_parties(self):
self._container.llm = providers.Factory(OpenAI, client=self._container.openai_client) self._container.llm = providers.Factory(OpenAI, client=self._container.openai_client)
self._container.stt = providers.Factory(OpenAIWhisper, model=self._container.whisper_model)
self._container.tts = providers.Factory(AWSPolly, client=self._container.polly_client) self._container.tts = providers.Factory(AWSPolly, client=self._container.polly_client)
"""
with open('app/services/impl/third_parties/elai/conf.json', 'r') as file: with open('app/services/impl/third_parties/elai/conf.json', 'r') as file:
elai_conf = json.load(file) elai_conf = json.load(file)
with open('app/services/impl/third_parties/elai/avatars.json', 'r') as file: with open('app/services/impl/third_parties/elai/avatars.json', 'r') as file:
elai_avatars = json.load(file) elai_avatars = json.load(file)
"""
with open('app/services/impl/third_parties/heygen/avatars.json', 'r') as file: with open('app/services/impl/third_parties/heygen/avatars.json', 'r') as file:
heygen_avatars = json.load(file) heygen_avatars = json.load(file)

View File

@@ -1,15 +1,27 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, List from typing import Dict, List, Union
from fastapi import BackgroundTasks
from fastapi.datastructures import FormData
class IGradeController(ABC): class IGradeController(ABC):
@abstractmethod @abstractmethod
async def grade_writing_task(self, task: int, data): async def grade_writing_task(
self, session_id: str, exercise_id: str,
task: int, dto: any,
background_tasks: BackgroundTasks
):
pass pass
@abstractmethod @abstractmethod
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: async def grade_speaking_task(
self, task: int, form: FormData, background_tasks: BackgroundTasks
):
pass
@abstractmethod
async def get_evaluations(self, session_id: str, status: str):
pass pass
@abstractmethod @abstractmethod

View File

@@ -1,34 +1,96 @@
import logging import logging
from typing import Dict, List from typing import Dict, List, Union
from uuid import uuid4
from fastapi import BackgroundTasks, Response, HTTPException
from fastapi.datastructures import FormData
from app.configs.constants import FilePaths
from app.controllers.abc import IGradeController from app.controllers.abc import IGradeController
from app.dtos.evaluation import EvaluationType
from app.dtos.speaking import GradeSpeakingItem
from app.dtos.writing import WritingGradeTaskDTO from app.dtos.writing import WritingGradeTaskDTO
from app.helpers import FileHelper from app.services.abc import IGradeService, IEvaluationService
from app.services.abc import ISpeakingService, IWritingService, IGradeService
from app.utils import handle_exception
class GradeController(IGradeController): class GradeController(IGradeController):
def __init__( def __init__(
self, self,
grade_service: IGradeService, grade_service: IGradeService,
speaking_service: ISpeakingService, evaluation_service: IEvaluationService,
writing_service: IWritingService
): ):
self._service = grade_service self._service = grade_service
self._speaking_service = speaking_service self._evaluation_service = evaluation_service
self._writing_service = writing_service
self._logger = logging.getLogger(__name__) self._logger = logging.getLogger(__name__)
async def grade_writing_task(self, task: int, data: WritingGradeTaskDTO): async def grade_writing_task(
return await self._writing_service.grade_writing_task(task, data.question, data.answer) self, session_id: str, exercise_id: str,
task: int, dto: WritingGradeTaskDTO, background_tasks: BackgroundTasks
):
await self._evaluation_service.create_or_update_evaluation(
dto.sessionId, dto.exercise_id, EvaluationType.WRITING, task
)
@handle_exception(400) await self._evaluation_service.begin_evaluation(
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: session_id, task, exercise_id, EvaluationType.WRITING, dto, background_tasks
FileHelper.delete_files_older_than_one_day(FilePaths.AUDIO_FILES_PATH) )
return await self._speaking_service.grade_speaking_task(task, answers)
return Response(status_code=200)
async def grade_speaking_task(self, task: int, form: FormData, background_tasks: BackgroundTasks):
answers: Dict[int, Dict] = {}
session_id = form.get("sessionId")
exercise_id = form.get("exerciseId")
if not session_id or not exercise_id:
raise HTTPException(
status_code=400,
detail="Fields sessionId and exerciseId are required!"
)
for key, value in form.items():
if '_' not in key:
continue
field_name, index = key.rsplit('_', 1)
index = int(index)
if index not in answers:
answers[index] = {}
if field_name == 'question':
answers[index]['question'] = value
elif field_name == 'audio':
answers[index]['answer'] = value
for i, answer in answers.items():
if 'question' not in answer or 'answer' not in answer:
raise HTTPException(
status_code=400,
detail=f"Incomplete data for answer {i}. Both question and audio required."
)
items = [
GradeSpeakingItem(
question=answers[i]['question'],
answer=answers[i]['answer']
)
for i in sorted(answers.keys())
]
ex_type = EvaluationType.SPEAKING if task == 2 else EvaluationType.SPEAKING_INTERACTIVE
await self._evaluation_service.create_or_update_evaluation(
session_id, exercise_id, ex_type, task
)
await self._evaluation_service.begin_evaluation(
session_id, task, exercise_id, ex_type, items, background_tasks
)
return Response(status_code=200)
async def get_evaluations(self, session_id: str, status: str):
return await self._evaluation_service.get_evaluations(session_id, status)
async def grade_short_answers(self, data: Dict): async def grade_short_answers(self, data: Dict):
return await self._service.grade_short_answers(data) return await self._service.grade_short_answers(data)

18
app/dtos/evaluation.py Normal file
View File

@@ -0,0 +1,18 @@
from enum import Enum
from typing import Dict, Optional
from pydantic import BaseModel
class EvaluationType(str, Enum):
WRITING = "writing"
SPEAKING_INTERACTIVE = "speaking_interactive"
SPEAKING = "speaking"
class EvaluationRecord(BaseModel):
id: str
session_id: str
exercise_id: str
type: EvaluationType
task: int
status: str = "pending"
result: Optional[Dict] = None

View File

@@ -1,27 +1,13 @@
import random from typing import List, Dict
from typing import List, Dict, Optional
from fastapi import UploadFile
from pydantic import BaseModel from pydantic import BaseModel
from app.configs.constants import MinTimers
class Video(BaseModel):
text: str
avatar: str
class SaveSpeakingDTO(BaseModel): class GradeSpeakingItem(BaseModel):
exercises: List[Dict]
minTimer: int = MinTimers.SPEAKING_MIN_TIMER_DEFAULT
class GradeSpeakingDTO(BaseModel):
question: str question: str
answer: str answer: UploadFile
class GradeSpeakingAnswersDTO(BaseModel):
answers: List[Dict]
class GenerateVideo1DTO(BaseModel):
avatar: str = Optional[str]
questions: List[str]
first_topic: str
second_topic: str

View File

@@ -2,5 +2,7 @@ from pydantic import BaseModel
class WritingGradeTaskDTO(BaseModel): class WritingGradeTaskDTO(BaseModel):
sessionId: str
question: str question: str
answer: str answer: str
exercise_id: str

View File

@@ -35,7 +35,7 @@ class MongoDB(IDocumentStore):
return [document async for document in cursor] return [document async for document in cursor]
async def update(self, collection: str, filter_query: Dict, update: Dict) -> Optional[str]: async def update(self, collection: str, filter_query: Dict, update: Dict) -> Optional[str]:
return (await self._mongo_db[collection].update_one(filter_query, update)).upserted_id return (await self._mongo_db[collection].update_one(filter_query, update, upsert=True)).upserted_id
async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]: async def get_doc_by_id(self, collection: str, doc_id: str) -> Optional[Dict]:
return await self._mongo_db[collection].find_one({"id": doc_id}) return await self._mongo_db[collection].find_one({"id": doc_id})

View File

@@ -12,7 +12,6 @@ from typing import List
from http import HTTPStatus from http import HTTPStatus
import httpx import httpx
import whisper
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.encoders import jsonable_encoder from fastapi.encoders import jsonable_encoder
from fastapi.exceptions import RequestValidationError from fastapi.exceptions import RequestValidationError
@@ -27,6 +26,7 @@ from app.api import router
from app.configs import DependencyInjector from app.configs import DependencyInjector
from app.exceptions import CustomException from app.exceptions import CustomException
from app.middlewares import AuthenticationMiddleware, AuthBackend from app.middlewares import AuthenticationMiddleware, AuthBackend
from app.services.impl import OpenAIWhisper
@asynccontextmanager @asynccontextmanager
@@ -36,8 +36,6 @@ async def lifespan(_app: FastAPI):
https://fastapi.tiangolo.com/advanced/events/ https://fastapi.tiangolo.com/advanced/events/
""" """
# Whisper model
whisper_model = whisper.load_model("base")
# NLTK required datasets download # NLTK required datasets download
nltk.download('words') nltk.download('words')
@@ -56,11 +54,12 @@ async def lifespan(_app: FastAPI):
) )
http_client = httpx.AsyncClient() http_client = httpx.AsyncClient()
stt = OpenAIWhisper()
DependencyInjector( DependencyInjector(
polly_client, polly_client,
http_client, http_client,
whisper_model stt
).inject() ).inject()
# Setup logging # Setup logging
@@ -72,6 +71,7 @@ async def lifespan(_app: FastAPI):
yield yield
stt.close()
await http_client.aclose() await http_client.aclose()
await polly_client.close() await polly_client.close()
await context_stack.aclose() await context_stack.aclose()

View File

@@ -2,9 +2,11 @@ from .third_parties import *
from .exam import * from .exam import *
from .training import * from .training import *
from .user import IUserService from .user import IUserService
from .evaluation import IEvaluationService
__all__ = [ __all__ = [
"IUserService" "IUserService",
"IEvaluationService"
] ]
__all__.extend(third_parties.__all__) __all__.extend(third_parties.__all__)
__all__.extend(exam.__all__) __all__.extend(exam.__all__)

View File

@@ -0,0 +1,33 @@
from abc import abstractmethod, ABC
from typing import Union, List, Dict
from fastapi import BackgroundTasks
from app.dtos.evaluation import EvaluationType
class IEvaluationService(ABC):
@abstractmethod
async def create_evaluation(
self,
session_id: str,
exercise_id: str,
eval_type: EvaluationType,
task: int
):
pass
@abstractmethod
async def begin_evaluation(
self,
session_id: str, task: int,
exercise_id: str, exercise_type: str,
solution: any,
background_tasks: BackgroundTasks
):
pass
@abstractmethod
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
pass

View File

@@ -11,6 +11,6 @@ class ISpeakingService(ABC):
pass pass
@abstractmethod @abstractmethod
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: async def grade_speaking_task(self, task: int, items: any) -> Dict:
pass pass

View File

@@ -0,0 +1,108 @@
import logging
from typing import Union, Dict, List
from fastapi import BackgroundTasks
from app.dtos.evaluation import EvaluationType
from app.dtos.speaking import GradeSpeakingItem
from app.dtos.writing import WritingGradeTaskDTO
from app.repositories.abc import IDocumentStore
from app.services.abc import IWritingService, ISpeakingService, IEvaluationService
class EvaluationService(IEvaluationService):
def __init__(self, db: IDocumentStore, writing_service: IWritingService, speaking_service: ISpeakingService):
self._db = db
self._writing_service = writing_service
self._speaking_service = speaking_service
self._logger = logging.getLogger(__name__)
async def create_evaluation(
self,
session_id: str,
exercise_id: str,
eval_type: EvaluationType,
task: int
):
await self._db.save_to_db(
"evaluation",
{
"session_id": session_id,
"exercise_id": exercise_id,
"type": eval_type,
"task": task,
"status": "pending"
}
)
async def begin_evaluation(
self,
session_id: str, task: int,
exercise_id: str, exercise_type: str,
solution: Union[WritingGradeTaskDTO, List[GradeSpeakingItem]],
background_tasks: BackgroundTasks
):
background_tasks.add_task(
self._begin_evaluation,
session_id, task,
exercise_id, exercise_type,
solution
)
async def _begin_evaluation(
self, session_id: str, task: int,
exercise_id: str, exercise_type: str,
solution: Union[WritingGradeTaskDTO, List[GradeSpeakingItem]]
):
try:
if exercise_type == EvaluationType.WRITING:
result = await self._writing_service.grade_writing_task(
task,
solution.question,
solution.answer
)
else:
result = await self._speaking_service.grade_speaking_task(
task,
solution
)
await self._db.update(
"evaluation",
{
"exercise_id": exercise_id,
"session_id": session_id,
},
{
"$set": {
"status": "completed",
"result": result,
}
}
)
except Exception as e:
self._logger.error(f"Error processing evaluation {session_id} - {exercise_id}: {str(e)}")
await self._db.update(
"evaluation",
{
"exercise_id": exercise_id,
"session_id": session_id
},
{
"$set": {
"status": "error",
"error": str(e),
}
}
)
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
return await self._db.find(
"evaluation",
{
"session_id": session_id,
"status": status
}
)

View File

@@ -1,8 +1,10 @@
from uuid import uuid4
import aiofiles import aiofiles
import os import os
from logging import getLogger from logging import getLogger
from typing import Dict, Any, Coroutine, Optional from typing import Dict, Any, Optional
import pdfplumber import pdfplumber
from fastapi import UploadFile from fastapi import UploadFile
@@ -21,20 +23,19 @@ class UploadLevelModule:
self._logger = getLogger(__name__) self._logger = getLogger(__name__)
self._llm = openai self._llm = openai
async def generate_level_from_file(self, file: UploadFile, solutions: Optional[UploadFile]) -> Dict[str, Any] | None: async def generate_level_from_file(self, exercises: UploadFile, solutions: Optional[UploadFile]) -> Dict[str, Any] | None:
ext, path_id = await FileHelper.save_upload(file) path_id = str(uuid4())
FileHelper.convert_file_to_pdf( ext, _ = await FileHelper.save_upload(exercises, "exercises", path_id)
f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.pdf' FileHelper.convert_file_to_html(f'./tmp/{path_id}/exercises.{ext}', f'./tmp/{path_id}/exercises.html')
)
file_has_images = self._check_pdf_for_images(f'./tmp/{path_id}/exercises.pdf')
if not file_has_images: if solutions:
FileHelper.convert_file_to_html(f'./tmp/{path_id}/upload.{ext}', f'./tmp/{path_id}/exercises.html') ext, _ = await FileHelper.save_upload(solutions, "solutions", path_id)
FileHelper.convert_file_to_html(f'./tmp/{path_id}/solutions.{ext}', f'./tmp/{path_id}/solutions.html')
completion: Coroutine[Any, Any, Exam] = ( #completion: Coroutine[Any, Any, Exam] = (
self._png_completion(path_id) if file_has_images else self._html_completion(path_id) # self._png_completion(path_id) if file_has_images else self._html_completion(path_id)
) #)
response = await completion response = await self._html_completion(path_id)
FileHelper.remove_directory(f'./tmp/{path_id}') FileHelper.remove_directory(f'./tmp/{path_id}')
@@ -42,6 +43,7 @@ class UploadLevelModule:
return self.fix_ids(response.model_dump(exclude_none=True)) return self.fix_ids(response.model_dump(exclude_none=True))
return None return None
@staticmethod @staticmethod
@suppress_loggers() @suppress_loggers()
def _check_pdf_for_images(pdf_path: str) -> bool: def _check_pdf_for_images(pdf_path: str) -> bool:

View File

@@ -98,7 +98,7 @@ class ImportReadingModule:
] ]
} }
], ],
"text": "<numbered questions with format: <question text>{{<question number>}}\\n>", "text": "<numbered questions with format in square brackets: [<question text>{{<question number>}}\\\\n] notice how there is a double backslash before the n -> I want an escaped newline in your output> ",
"type": "writeBlanks", "type": "writeBlanks",
"prompt": "<specific instructions for this exercise section>" "prompt": "<specific instructions for this exercise section>"
} }

View File

@@ -1,6 +1,7 @@
import asyncio
import logging import logging
import os import os
import random import aiofiles
import re import re
import uuid import uuid
from typing import Dict, List, Optional from typing import Dict, List, Optional
@@ -9,6 +10,7 @@ from app.configs.constants import (
FieldsAndExercises, GPTModels, TemperatureSettings, FieldsAndExercises, GPTModels, TemperatureSettings,
FilePaths FilePaths
) )
from app.dtos.speaking import GradeSpeakingItem
from app.helpers import TextHelper from app.helpers import TextHelper
from app.repositories.abc import IFileStorage, IDocumentStore from app.repositories.abc import IFileStorage, IDocumentStore
from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService from app.services.abc import ISpeakingService, ILLMService, IVideoGeneratorService, ISpeechToTextService
@@ -165,106 +167,111 @@ class SpeakingService(ISpeakingService):
return response return response
async def grade_speaking_task(self, task: int, answers: List[Dict]) -> Dict: async def grade_speaking_task(self, task: int, items: List[GradeSpeakingItem]) -> Dict:
request_id = uuid.uuid4() request_id = str(uuid.uuid4())
self._logger.info( self._log(task, request_id, f"Received request to grade speaking task {task}.")
f'POST - speaking_task_{task} - Received request to grade speaking task {task}. '
f'Use this id to track the logs: {str(request_id)} - Request data: {str(answers)}'
)
text_answers = []
perfect_answers = []
if task != 2: if task != 2:
self._logger.info( self._log(task, request_id, f'Received {len(items)} total answers.')
f'POST - speaking_task_{task} - {str(request_id)} - Received {str(len(answers))} total answers.'
)
for item in answers: temp_files = []
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4()) try:
# Save all files first
temp_files = await asyncio.gather(*[
self.save_file(item) for item in items
])
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Downloading file {item["answer"]}') # Process all transcriptions concurrently (up to 4)
self._log(task, request_id, 'Starting batch transcription')
text_answers = await asyncio.gather(*[
self._stt.speech_to_text(file_path)
for file_path in temp_files
])
await self._file_storage.download_firebase_file(item["answer"], sound_file_name) for answer in text_answers:
self._log(task, request_id, f'Transcribed answer: {answer}')
self._logger.info( if not TextHelper.has_x_words(answer, 20):
f'POST - speaking_task_{task} - {request_id} - ' self._log(
f'Downloaded file {item["answer"]} to {sound_file_name}' task, request_id,
) f'The answer had less words than threshold 20 to be graded. Answer: {answer}'
answer_text = await self._stt.speech_to_text(sound_file_name)
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Transcripted answer: {answer_text}')
text_answers.append(answer_text)
item["answer"] = answer_text
os.remove(sound_file_name)
# TODO: This will end the grading of all answers if a single one does not have enough words
# don't know if this is intended
if not TextHelper.has_x_words(answer_text, 20):
self._logger.info(
f'POST - speaking_task_{task} - {request_id} - '
f'The answer had less words than threshold 20 to be graded. Answer: {answer_text}'
) )
return self._zero_rating("The audio recorded does not contain enough english words to be graded.") return self._zero_rating("The audio recorded does not contain enough english words to be graded.")
self._logger.info( # Get perfect answers
f'POST - speaking_task_{task} - {request_id} - ' self._log(task, request_id, 'Requesting perfect answers')
f'Requesting perfect answer for question: {item["question"]}' perfect_answers = await asyncio.gather(*[
) self._get_perfect_answer(task, item.question)
perfect_answers.append(await self._get_perfect_answer(task, item["question"])) for item in items
])
# Format the responses
if task in {1, 3}: if task in {1, 3}:
self._logger.info( self._log(task, request_id, 'Formatting answers and questions for prompt.')
f'POST - speaking_task_{task} - {request_id} - Formatting answers and questions for prompt.'
)
formatted_text = "" formatted_text = ""
for i, entry in enumerate(answers, start=1): for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
formatted_text += f"**Question {i}:**\n{entry['question']}\n\n" formatted_text += f"**Question {i}:**\n{item.question}\n\n"
formatted_text += f"**Answer {i}:**\n{entry['answer']}\n\n" formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
self._logger.info( self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
f'POST - speaking_task_{task} - {request_id} - '
f'Formatted answers and questions for prompt: {formatted_text}'
)
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}' questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
else: else:
questions_and_answers = f'\n Question: "{answers[0]["question"]}" \n Answer: "{answers[0]["answer"]}"' questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting grading of the answer(s).') self._log(task, request_id, 'Requesting grading of the answer(s).')
response = await self._grade_task(task, questions_and_answers) response = await self._grade_task(task, questions_and_answers)
self._log(task, request_id, f'Answer(s) graded: {response}')
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Answer(s) graded: {response}')
if task in {1, 3}: if task in {1, 3}:
self._logger.info( self._log(task, request_id, 'Adding perfect answer(s) to response.')
f'POST - speaking_task_{task} - {request_id} - Adding perfect answer(s) to response.')
# TODO: check if it is answer["answer"] instead # TODO: check if it is answer["answer"] instead
for i, answer in enumerate(perfect_answers, start=1): for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer response['perfect_answer_' + str(i)] = answer
self._logger.info( self._log(task, request_id, 'Getting speaking corrections in parallel')
f'POST - speaking_task_{task} - {request_id} - Adding transcript and fixed texts to response.' # Get all corrections in parallel
) fixed_texts = await asyncio.gather(*[
self._get_speaking_corrections(answer)
for answer in text_answers
])
for i, answer in enumerate(text_answers, start=1): self._log(task, request_id, 'Adding transcript and fixed texts to response.')
for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
response['transcript_' + str(i)] = answer response['transcript_' + str(i)] = answer
response['fixed_text_' + str(i)] = await self._get_speaking_corrections(answer) response['fixed_text_' + str(i)] = fixed
else: else:
response['transcript'] = answers[0]["answer"] response['transcript'] = text_answers[0]
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Requesting fixed text.') self._log(task, request_id, 'Requesting fixed text.')
response['fixed_text'] = await self._get_speaking_corrections(answers[0]["answer"]) response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Fixed text: {response["fixed_text"]}') self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
response['perfect_answer'] = perfect_answers[0]["answer"] response['perfect_answer'] = perfect_answers[0]["answer"]
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"]) response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
self._logger.info(f'POST - speaking_task_{task} - {request_id} - Final response: {response}') self._log(task, request_id, f'Final response: {response}')
return response return response
finally:
for file_path in temp_files:
try:
if os.path.exists(file_path):
os.remove(file_path)
except Exception as e:
self._log(task, request_id, f'Error cleaning up temp file {file_path}: {str(e)}')
def _log(self, task: int, request_id: str, message: str):
self._logger.info(f'POST - speaking_task_{task} - {request_id} - {message}')
@staticmethod
async def save_file(item: GradeSpeakingItem) -> str:
sound_file_name = FilePaths.AUDIO_FILES_PATH + str(uuid.uuid4())
content = await item.answer.read()
async with aiofiles.open(sound_file_name, 'wb') as f:
await f.write(content)
return sound_file_name
# ================================================================================================================== # ==================================================================================================================
# grade_speaking_task helpers # grade_speaking_task helpers
# ================================================================================================================== # ==================================================================================================================
@@ -336,7 +343,7 @@ class SpeakingService(ISpeakingService):
{ {
"role": "user", "role": "user",
"content": ( "content": (
'For pronunciations act as if you heard the answers and they were transcripted ' 'For pronunciations act as if you heard the answers and they were transcribed '
'as you heard them.' 'as you heard them.'
) )
}, },

View File

@@ -1,3 +1,4 @@
import asyncio
from typing import List, Dict from typing import List, Dict
from app.services.abc import IWritingService, ILLMService, IAIDetectorService from app.services.abc import IWritingService, ILLMService, IAIDetectorService
@@ -126,7 +127,7 @@ class WritingService(IWritingService):
TemperatureSettings.GEN_QUESTION_TEMPERATURE TemperatureSettings.GEN_QUESTION_TEMPERATURE
) )
response = await self._llm.prediction( evaluation_promise = self._llm.prediction(
llm_model, llm_model,
messages, messages,
["comment"], ["comment"],
@@ -134,15 +135,27 @@ class WritingService(IWritingService):
) )
perfect_answer_minimum = 150 if task == 1 else 250 perfect_answer_minimum = 150 if task == 1 else 250
perfect_answer = await self._get_perfect_answer(question, perfect_answer_minimum) perfect_answer_promise = self._get_perfect_answer(question, perfect_answer_minimum)
fixed_text_promise = self._get_fixed_text(answer)
ai_detection_promise = self._ai_detector.run_detection(answer)
response["perfect_answer"] = perfect_answer["perfect_answer"] prediction_result, perfect_answer_result, fixed_text_result, ai_detection_result = await asyncio.gather(
response["overall"] = ExercisesHelper.fix_writing_overall(response["overall"], response["task_response"]) evaluation_promise,
response['fixed_text'] = await self._get_fixed_text(answer) perfect_answer_promise,
fixed_text_promise,
ai_detection_promise
)
ai_detection = await self._ai_detector.run_detection(answer) response = prediction_result
if ai_detection is not None: response["perfect_answer"] = perfect_answer_result["perfect_answer"]
response['ai_detection'] = ai_detection response["overall"] = ExercisesHelper.fix_writing_overall(
response["overall"],
response["task_response"]
)
response['fixed_text'] = fixed_text_result
if ai_detection_result is not None:
response['ai_detection'] = ai_detection_result
return response return response

View File

@@ -1,22 +1,66 @@
import os import os
import threading
from fastapi.concurrency import run_in_threadpool import whisper
import asyncio
from concurrent.futures import ThreadPoolExecutor
from typing import Dict
from whisper import Whisper from whisper import Whisper
from app.services.abc import ISpeechToTextService from app.services.abc import ISpeechToTextService
"""
The whisper model is not thread safe, a thread pool
with 4 whisper models will be created so it can
process up to 4 transcriptions at a time.
The base model requires ~1GB so 4 instances is the safe bet:
https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages
"""
class OpenAIWhisper(ISpeechToTextService): class OpenAIWhisper(ISpeechToTextService):
def __init__(self, model_name: str = "base", num_models: int = 4):
self._model_name = model_name
self._num_models = num_models
self._models: Dict[int, 'Whisper'] = {}
self._lock = threading.Lock()
self._next_model_id = 0
self._is_closed = False
def __init__(self, model: Whisper): for i in range(num_models):
self._model = model self._models[i] = whisper.load_model(self._model_name)
async def speech_to_text(self, file_path): self._executor = ThreadPoolExecutor(
if os.path.exists(file_path): max_workers=num_models,
result = await run_in_threadpool( thread_name_prefix="whisper_worker"
self._model.transcribe, file_path, fp16=False, language='English', verbose=False
) )
return result["text"]
else: def get_model(self) -> 'Whisper':
print("File not found:", file_path) with self._lock:
raise Exception("File " + file_path + " not found.") model_id = self._next_model_id
self._next_model_id = (self._next_model_id + 1) % self._num_models
return self._models[model_id]
async def speech_to_text(self, file_path: str) -> str:
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found.")
def transcribe():
model = self.get_model()
return model.transcribe(
file_path,
fp16=False,
language='English',
verbose=False
)["text"]
loop = asyncio.get_running_loop()
return await loop.run_in_executor(self._executor, transcribe)
def close(self):
with self._lock:
if not self._is_closed:
self._is_closed = True
if self._executor:
self._executor.shutdown(wait=True, cancel_futures=True)
def __del__(self):
self.close()