ENCOA-276, ENCOA-277
This commit is contained in:
@@ -51,6 +51,20 @@ async def generate_mp3(
|
|||||||
return await listening_controller.generate_mp3(dto)
|
return await listening_controller.generate_mp3(dto)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@listening_router.post(
|
||||||
|
'/transcribe',
|
||||||
|
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||||
|
)
|
||||||
|
@inject
|
||||||
|
async def transcribe_dialog(
|
||||||
|
audio: UploadFile,
|
||||||
|
listening_controller: IListeningController = Depends(Provide[controller])
|
||||||
|
):
|
||||||
|
return await listening_controller.transcribe_dialog(audio)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@listening_router.post(
|
@listening_router.post(
|
||||||
'/',
|
'/',
|
||||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||||
@@ -61,3 +75,4 @@ async def generate_listening_exercise(
|
|||||||
listening_controller: IListeningController = Depends(Provide[controller])
|
listening_controller: IListeningController = Depends(Provide[controller])
|
||||||
):
|
):
|
||||||
return await listening_controller.get_listening_question(dto)
|
return await listening_controller.get_listening_question(dto)
|
||||||
|
|
||||||
|
|||||||
@@ -106,6 +106,7 @@ class FilePaths:
|
|||||||
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
|
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
|
||||||
VIDEO_FILES_PATH = 'download-video/'
|
VIDEO_FILES_PATH = 'download-video/'
|
||||||
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
|
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
|
||||||
|
FIREBASE_FAILED_TRANSCRIPTION_FILES_PATH = 'failed_transcriptions/'
|
||||||
WRITING_ATTACHMENTS = 'writing_attachments/'
|
WRITING_ATTACHMENTS = 'writing_attachments/'
|
||||||
|
|
||||||
|
|
||||||
@@ -232,7 +233,7 @@ class NeuralVoices:
|
|||||||
|
|
||||||
|
|
||||||
class EducationalContent:
|
class EducationalContent:
|
||||||
DIFFICULTIES = ["easy", "medium", "hard"]
|
DIFFICULTIES = ["A1", "A2", "B1", "B2", "C1", "C2"]
|
||||||
|
|
||||||
MTI_TOPICS = [
|
MTI_TOPICS = [
|
||||||
"Education",
|
"Education",
|
||||||
|
|||||||
@@ -20,3 +20,7 @@ class IListeningController(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def generate_mp3(self, dto):
|
async def generate_mp3(self, dto):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def transcribe_dialog(self, audio: UploadFile):
|
||||||
|
pass
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import io
|
import io
|
||||||
|
|
||||||
from fastapi import UploadFile
|
from fastapi import UploadFile
|
||||||
from starlette.responses import StreamingResponse, Response
|
from fastapi.responses import StreamingResponse, Response
|
||||||
|
|
||||||
from ielts_be.controllers import IListeningController
|
from ielts_be.controllers import IListeningController
|
||||||
from ielts_be.services import IListeningService
|
from ielts_be.services import IListeningService
|
||||||
@@ -37,3 +37,10 @@ class ListeningController(IListeningController):
|
|||||||
"Content-Disposition": "attachment;filename=speech.mp3"
|
"Content-Disposition": "attachment;filename=speech.mp3"
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def transcribe_dialog(self, audio: UploadFile):
|
||||||
|
dialog = await self._service.transcribe_dialog(audio)
|
||||||
|
if dialog is None:
|
||||||
|
return Response(status_code=500)
|
||||||
|
|
||||||
|
return dialog
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import List, Union, Optional, Literal
|
from typing import List, Union, Optional, Literal, Any
|
||||||
from uuid import uuid4, UUID
|
from uuid import uuid4, UUID
|
||||||
|
|
||||||
|
from ielts_be.dtos.listening import Dialog
|
||||||
|
|
||||||
|
|
||||||
class ExerciseBase(BaseModel):
|
class ExerciseBase(BaseModel):
|
||||||
id: UUID = Field(default_factory=uuid4)
|
id: UUID = Field(default_factory=uuid4)
|
||||||
@@ -81,6 +83,7 @@ ListeningExercise = Union[
|
|||||||
|
|
||||||
class ListeningSection(BaseModel):
|
class ListeningSection(BaseModel):
|
||||||
exercises: List[ListeningExercise]
|
exercises: List[ListeningExercise]
|
||||||
|
script: Optional[Union[List[Any] | str]] = None
|
||||||
|
|
||||||
|
|
||||||
class ListeningExam(BaseModel):
|
class ListeningExam(BaseModel):
|
||||||
|
|||||||
@@ -15,3 +15,7 @@ class UnauthorizedException(CustomException):
|
|||||||
code = HTTPStatus.UNAUTHORIZED
|
code = HTTPStatus.UNAUTHORIZED
|
||||||
error_code = HTTPStatus.UNAUTHORIZED
|
error_code = HTTPStatus.UNAUTHORIZED
|
||||||
message = HTTPStatus.UNAUTHORIZED.description
|
message = HTTPStatus.UNAUTHORIZED.description
|
||||||
|
|
||||||
|
class TranscriptionException(CustomException):
|
||||||
|
code = HTTPStatus.INTERNAL_SERVER_ERROR
|
||||||
|
error_code = HTTPStatus.INTERNAL_SERVER_ERROR
|
||||||
@@ -8,7 +8,7 @@ from ielts_be.dtos.exams.listening import (
|
|||||||
WriteBlanksExercise,
|
WriteBlanksExercise,
|
||||||
ListeningExam,
|
ListeningExam,
|
||||||
ListeningSection,
|
ListeningSection,
|
||||||
WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion
|
WriteBlanksVariant, WriteBlankSolution, WriteBlanksQuestionExercise, WriteBlankQuestion, Dialog
|
||||||
)
|
)
|
||||||
|
|
||||||
class ListeningQuestionSection(BaseModel):
|
class ListeningQuestionSection(BaseModel):
|
||||||
@@ -110,3 +110,73 @@ class ListeningMapper:
|
|||||||
minTimer=response.get('minTimer'),
|
minTimer=response.get('minTimer'),
|
||||||
module="listening"
|
module="listening"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_speaker(participant: Dict[str, str]) -> None:
|
||||||
|
required_fields = ["name", "gender", "text"]
|
||||||
|
for field in required_fields:
|
||||||
|
if field not in participant:
|
||||||
|
raise ValueError(f"Missing required field '{field}' in speaker")
|
||||||
|
if not isinstance(participant[field], str):
|
||||||
|
raise ValueError(f"Field '{field}' must be a string")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate_conversation(cls,conversation: List[Dict[str, str]]) -> None:
|
||||||
|
if not isinstance(conversation, list):
|
||||||
|
raise ValueError("Conversation must be a list")
|
||||||
|
if not conversation:
|
||||||
|
raise ValueError("Conversation cannot be empty")
|
||||||
|
for participant in conversation:
|
||||||
|
cls.validate_speaker(participant)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_monologue(monologue: str) -> None:
|
||||||
|
if not isinstance(monologue, str):
|
||||||
|
raise ValueError("Monologue must be a string")
|
||||||
|
if not monologue.strip():
|
||||||
|
raise ValueError("Monologue cannot be empty")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_section_number(section_key: str) -> str:
|
||||||
|
return ''.join([char for char in section_key if char.isdigit()])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def map_to_dialog_model(cls, response: Dict[str, Any]) -> Dict[str, Optional[Union[List[Dict[str, str]], str]]]:
|
||||||
|
if not isinstance(response, dict):
|
||||||
|
raise ValueError("Response must be a dictionary")
|
||||||
|
|
||||||
|
if "sections" not in response:
|
||||||
|
raise ValueError("Response must contain 'sections' key")
|
||||||
|
|
||||||
|
if not isinstance(response["sections"], list):
|
||||||
|
raise ValueError("Sections must be a list")
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
for section in response["sections"]:
|
||||||
|
if not isinstance(section, dict) or len(section) != 1:
|
||||||
|
raise ValueError("Each section must be a dictionary with exactly one key")
|
||||||
|
|
||||||
|
section_key = next(iter(section))
|
||||||
|
section_number = cls.extract_section_number(section_key)
|
||||||
|
section_content = section[section_key]
|
||||||
|
|
||||||
|
if not isinstance(section_content, dict):
|
||||||
|
raise ValueError(f"Content for section {section_key} must be a dictionary")
|
||||||
|
|
||||||
|
if not section_content:
|
||||||
|
result[section_number] = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
dialog_type = next(iter(section_content))
|
||||||
|
if dialog_type not in ["conversation", "monologue"]:
|
||||||
|
raise ValueError(f"Invalid dialog type '{dialog_type}' in section {section_key}")
|
||||||
|
|
||||||
|
if dialog_type == "conversation":
|
||||||
|
cls.validate_conversation(section_content["conversation"])
|
||||||
|
result[section_number] = section_content["conversation"]
|
||||||
|
else:
|
||||||
|
cls.validate_monologue(section_content["monologue"])
|
||||||
|
result[section_number] = section_content["monologue"]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import aiofiles
|
import aiofiles
|
||||||
@@ -40,6 +41,7 @@ class FirebaseStorage(IFileStorage):
|
|||||||
async with aiofiles.open(source_file_name, 'rb') as file:
|
async with aiofiles.open(source_file_name, 'rb') as file:
|
||||||
file_bytes = await file.read()
|
file_bytes = await file.read()
|
||||||
|
|
||||||
|
created = datetime.now().isoformat()
|
||||||
response = await self._httpx_client.post(
|
response = await self._httpx_client.post(
|
||||||
upload_url,
|
upload_url,
|
||||||
headers={
|
headers={
|
||||||
@@ -47,7 +49,7 @@ class FirebaseStorage(IFileStorage):
|
|||||||
"X-Goog-Upload-Protocol": "multipart"
|
"X-Goog-Upload-Protocol": "multipart"
|
||||||
},
|
},
|
||||||
files={
|
files={
|
||||||
'metadata': (None, '{"metadata":{"test":"testMetadata"}}', 'application/json'),
|
'metadata': (None, '{"metadata":{"created":"'+ created + '"}}', 'application/json'),
|
||||||
'file': file_bytes
|
'file': file_bytes
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -70,7 +72,7 @@ class FirebaseStorage(IFileStorage):
|
|||||||
response = await self._httpx_client.post(
|
response = await self._httpx_client.post(
|
||||||
acl_url,
|
acl_url,
|
||||||
headers={
|
headers={
|
||||||
'Authorization': f'Bearer {self._token}',
|
'Authorization': f'Firebase {self._token}',
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
},
|
},
|
||||||
json=acl
|
json=acl
|
||||||
|
|||||||
@@ -20,12 +20,12 @@ class IListeningService(ABC):
|
|||||||
async def generate_mp3(self, dto) -> bytes:
|
async def generate_mp3(self, dto) -> bytes:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
async def get_dialog_from_audio(self, upload: UploadFile):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def import_exam(
|
async def import_exam(
|
||||||
self, exercises: UploadFile, solutions: UploadFile = None
|
self, exercises: UploadFile, solutions: UploadFile = None
|
||||||
) -> Dict[str, Any] | None:
|
) -> Dict[str, Any] | None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def transcribe_dialog(self, audio: UploadFile):
|
||||||
|
pass
|
||||||
|
|||||||
@@ -1,8 +1,14 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
class ISpeechToTextService(ABC):
|
class ISpeechToTextService(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def speech_to_text(self, file: bytes):
|
async def speech_to_text(self, file: str):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
@abstractmethod
|
||||||
|
async def fix_overlap(llm, segments: List[str]):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ from logging import getLogger
|
|||||||
import random
|
import random
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
import aiofiles
|
||||||
from starlette.datastructures import UploadFile
|
from starlette.datastructures import UploadFile
|
||||||
|
|
||||||
from ielts_be.dtos.listening import GenerateListeningExercises, Dialog, ListeningExercises
|
from ielts_be.dtos.listening import GenerateListeningExercises, Dialog, ListeningExercises
|
||||||
|
from ielts_be.exceptions.exceptions import TranscriptionException
|
||||||
from ielts_be.repositories import IFileStorage, IDocumentStore
|
from ielts_be.repositories import IFileStorage, IDocumentStore
|
||||||
from ielts_be.services import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService
|
from ielts_be.services import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService
|
||||||
from ielts_be.configs.constants import (
|
from ielts_be.configs.constants import (
|
||||||
@@ -13,6 +15,7 @@ from ielts_be.configs.constants import (
|
|||||||
FieldsAndExercises
|
FieldsAndExercises
|
||||||
)
|
)
|
||||||
from ielts_be.helpers import FileHelper
|
from ielts_be.helpers import FileHelper
|
||||||
|
from .audio_to_dialog import AudioToDialog
|
||||||
from .import_listening import ImportListeningModule
|
from .import_listening import ImportListeningModule
|
||||||
from .write_blank_forms import WriteBlankForms
|
from .write_blank_forms import WriteBlankForms
|
||||||
from .write_blanks import WriteBlanks
|
from .write_blanks import WriteBlanks
|
||||||
@@ -50,6 +53,7 @@ class ListeningService(IListeningService):
|
|||||||
self._write_blanks_notes = WriteBlankNotes(llm)
|
self._write_blanks_notes = WriteBlankNotes(llm)
|
||||||
self._import = ImportListeningModule(llm)
|
self._import = ImportListeningModule(llm)
|
||||||
self._true_false = TrueFalse(llm)
|
self._true_false = TrueFalse(llm)
|
||||||
|
self._audio_to_dialog = AudioToDialog(llm)
|
||||||
self._sections = {
|
self._sections = {
|
||||||
"section_1": {
|
"section_1": {
|
||||||
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
|
"topic": EducationalContent.TWO_PEOPLE_SCENARIOS,
|
||||||
@@ -94,11 +98,18 @@ class ListeningService(IListeningService):
|
|||||||
async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
|
async def generate_listening_dialog(self, section: int, topic: str, difficulty: str):
|
||||||
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
|
return await self._sections[f'section_{section}']["generate_dialogue"](section, topic)
|
||||||
|
|
||||||
# TODO: When mp3 editor
|
async def transcribe_dialog(self, audio: UploadFile):
|
||||||
async def get_dialog_from_audio(self, upload: UploadFile):
|
ext, path_id = await FileHelper.save_upload(audio)
|
||||||
ext, path_id = await FileHelper.save_upload(upload)
|
try:
|
||||||
dialog = await self._stt.speech_to_text(f'./tmp/{path_id}/upload.{ext}')
|
transcription_segments = await self._stt.speech_to_text(f'./tmp/{path_id}/upload.{ext}')
|
||||||
|
transcription = await self._stt.fix_overlap(self._llm, transcription_segments)
|
||||||
|
dialog = await self._audio_to_dialog.get_dialog(transcription)
|
||||||
|
except TranscriptionException as e:
|
||||||
|
self._logger.error(str(e))
|
||||||
|
return None
|
||||||
|
|
||||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||||
|
return dialog
|
||||||
|
|
||||||
async def generate_mp3(self, dto: Dialog) -> bytes:
|
async def generate_mp3(self, dto: Dialog) -> bytes:
|
||||||
return await self._tts.text_to_speech(dto)
|
return await self._tts.text_to_speech(dto)
|
||||||
|
|||||||
37
ielts_be/services/impl/exam/listening/audio_to_dialog.py
Normal file
37
ielts_be/services/impl/exam/listening/audio_to_dialog.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
from logging import getLogger
|
||||||
|
|
||||||
|
from ielts_be.configs.constants import TemperatureSettings, GPTModels
|
||||||
|
from ielts_be.services import ILLMService
|
||||||
|
|
||||||
|
|
||||||
|
class AudioToDialog:
|
||||||
|
def __init__(self, llm_service: ILLMService):
|
||||||
|
self._logger = getLogger(__name__)
|
||||||
|
self._llm = llm_service
|
||||||
|
|
||||||
|
async def get_dialog(self, transcription: str):
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
'You are a helpful assistant designed to output JSON on either one of these formats:\n'
|
||||||
|
'1 - {"dialog": [{"name": "name", "gender": "gender", "text": "text"}]}\n'
|
||||||
|
'2 - {"dialog": "text"}\n\n'
|
||||||
|
'A transcription of an audio file will be provided to you. Based on that transcription you will'
|
||||||
|
'need to determine whether the transcription is a conversation or a monologue. If the transcription '
|
||||||
|
'is a dialog you will have to determine the interlocutors names and genders and place each excerpt of '
|
||||||
|
'dialog in a sequential manner using the json array structure previously given (1). In the case of being '
|
||||||
|
'a monologue just place all the text in the field "dialog" (2). If the transcription is a conversation '
|
||||||
|
'and you can\'t ascertain the names of the interlocutors from the transcription give a single common name '
|
||||||
|
'to each interlocutor. Also gender must be male or female, if you can\'t ascertain then use male.'
|
||||||
|
)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Transcription: {transcription}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
return await self._llm.prediction(
|
||||||
|
GPTModels.GPT_4_O, messages, ["dialog"], TemperatureSettings.GEN_QUESTION_TEMPERATURE
|
||||||
|
)
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import json
|
import asyncio
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
@@ -36,28 +36,47 @@ class ImportListeningModule:
|
|||||||
f'./tmp/{path_id}/solutions.html'
|
f'./tmp/{path_id}/solutions.html'
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await self._get_listening_sections(path_id, solutions is not None)
|
|
||||||
|
|
||||||
FileHelper.remove_directory(f'./tmp/{path_id}')
|
|
||||||
if response:
|
|
||||||
return response.model_dump(exclude_none=True)
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def _get_listening_sections(
|
|
||||||
self,
|
|
||||||
path_id: str,
|
|
||||||
has_solutions: bool = False
|
|
||||||
) -> ListeningExam:
|
|
||||||
async with aiofiles.open(
|
async with aiofiles.open(
|
||||||
f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8'
|
f'./tmp/{path_id}/exercises.html', 'r', encoding='utf-8'
|
||||||
) as f:
|
) as f:
|
||||||
exercises_html = await f.read()
|
exercises_html = await f.read()
|
||||||
|
|
||||||
|
dialog_promise = self._llm.pydantic_prediction(
|
||||||
|
[
|
||||||
|
self._dialog_instructions(),
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Listening exercise sheet:\n\n{exercises_html}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
ListeningMapper.map_to_dialog_model,
|
||||||
|
str(self._dialog_schema())
|
||||||
|
)
|
||||||
|
response_promise = self._get_listening_sections(path_id, exercises_html, solutions is not None)
|
||||||
|
|
||||||
|
tasks = await asyncio.gather(dialog_promise, response_promise)
|
||||||
|
dialog: Dict = tasks[0]
|
||||||
|
response = tasks[1]
|
||||||
|
|
||||||
|
FileHelper.remove_directory(f'./tmp/{path_id}')
|
||||||
|
if response:
|
||||||
|
response = response.model_dump(exclude_none=True)
|
||||||
|
for i in range(len(response["parts"])):
|
||||||
|
response["parts"][i]["script"] = dialog[str(i + 1)]
|
||||||
|
return response
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _get_listening_sections(
|
||||||
|
self,
|
||||||
|
path_id: str,
|
||||||
|
html: str,
|
||||||
|
has_solutions: bool = False
|
||||||
|
) -> ListeningExam:
|
||||||
messages = [
|
messages = [
|
||||||
self._instructions(has_solutions),
|
self._instructions(has_solutions),
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"Listening exercise sheet:\n\n{exercises_html}"
|
"content": f"Listening exercise sheet:\n\n{html}"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -181,3 +200,37 @@ class ImportListeningModule:
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _dialog_instructions() -> Dict[str, str]:
|
||||||
|
return {
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
f"You are processing a listening test exercise sheet. Your objective is to ascertain if "
|
||||||
|
'there is a monologue or a conversation for parts/sections of the test. If there is you '
|
||||||
|
'must either use the following JSON: {"monologue": "monologue_text"} for monologues or '
|
||||||
|
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]} for conversations. \n\n'
|
||||||
|
|
||||||
|
'First identify all sections/parts by looking for \'SECTION n\' headers or similar ones, '
|
||||||
|
'then for each section identify and structure its dialog type of the section iff there is one in a single '
|
||||||
|
'JSON format like so {"sections": [{"section_1": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}}, '
|
||||||
|
'{"section_2": {"monologue": "monologue_text"}} ]}'
|
||||||
|
|
||||||
|
'Each section might not have a conversation or monologue in those cases omit the section, for instance section 1 '
|
||||||
|
'might have a conversation, section 2 might have nothing, section 3 might have a monologue. In that case: '
|
||||||
|
'{"sections": [{"section_1": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}},'
|
||||||
|
'{"section_3": {"monologue": "monologue_text"}} ]}. Keep in mind that gender most likely won\'t be included '
|
||||||
|
', try to figure out by the name of the speaker, when in doubt use male. The gender MUST BE ONLY "male" or "female".'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _dialog_schema():
|
||||||
|
return {
|
||||||
|
"sections": [
|
||||||
|
{"section_1": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}},
|
||||||
|
{"section_2": {"monologue": "monologue_text"}},
|
||||||
|
{"section_3": {"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}},
|
||||||
|
{"section_4": {"monologue": "monologue_text"}},
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -23,7 +23,7 @@ class WriteBlankForms:
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": (
|
"content": (
|
||||||
f'Generate a form with {quantity} {difficulty} difficulty key-value pairs '
|
f'Generate a form with {quantity} of {difficulty} CEFR level difficulty key-value pairs '
|
||||||
f'about this {dialog_type}:\n"{text}"'
|
f'about this {dialog_type}:\n"{text}"'
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class WriteBlankNotes:
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": (
|
"content": (
|
||||||
f'Generate {quantity} {difficulty} difficulty notes taken from this '
|
f'Generate {quantity} {difficulty} CEFR level difficulty notes taken from this '
|
||||||
f'{dialog_type}:\n"{text}"'
|
f'{dialog_type}:\n"{text}"'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class WriteBlanks:
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": (
|
"content": (
|
||||||
f'Generate {quantity} {difficulty} difficulty short answer questions, and the '
|
f'Generate {quantity} {difficulty} CEFR level difficulty short answer questions, and the '
|
||||||
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
|
f'possible answers (max 3 words per answer), about this {dialog_type}:\n"{text}"')
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ class FillBlanks:
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": (
|
"content": (
|
||||||
f'Select {quantity} {difficulty} difficulty words, it must be words and not expressions, '
|
f'Select {quantity} {difficulty} CEFR level difficulty words, it must be words and not expressions, '
|
||||||
f'from this:\n{response["summary"]}'
|
f'from this:\n{response["summary"]}'
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class WriteBlanks:
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": (
|
"content": (
|
||||||
f'Generate {str(quantity)} {difficulty} difficulty short answer questions, and the '
|
f'Generate {str(quantity)} {difficulty} CEFR level difficulty short answer questions, and the '
|
||||||
f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
|
f'possible answers, must have maximum {max_words} words per answer, about this text:\n"{text}"'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ class MultipleChoice:
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": (
|
"content": (
|
||||||
f'Generate {quantity} {difficulty} difficulty multiple choice questions of {n_options} '
|
f'Generate {quantity} {difficulty} CEFR level difficulty multiple choice questions of {n_options} '
|
||||||
f'options for this text:\n"' + text + '"')
|
f'options for this text:\n"' + text + '"')
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ class TrueFalse:
|
|||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": (
|
"content": (
|
||||||
f'Generate {str(quantity)} {difficulty} difficulty statements based on the provided text. '
|
f'Generate {str(quantity)} {difficulty} CEFR level difficulty statements based on the provided text. '
|
||||||
'Ensure that your statements accurately represent information or inferences from the text, and '
|
'Ensure that your statements accurately represent information or inferences from the text, and '
|
||||||
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
|
'provide a variety of responses, including, at least one of each True, False, and Not Given, '
|
||||||
f'as appropriate.\n\nReference text:\n\n {text}'
|
f'as appropriate.\n\nReference text:\n\n {text}'
|
||||||
|
|||||||
@@ -37,9 +37,25 @@ class GradeSpeaking:
|
|||||||
|
|
||||||
# Process all transcriptions concurrently (up to 4)
|
# Process all transcriptions concurrently (up to 4)
|
||||||
self._log(task, request_id, 'Starting batch transcription')
|
self._log(task, request_id, 'Starting batch transcription')
|
||||||
text_answers = await asyncio.gather(*[
|
text_transcription_segments = await asyncio.gather(*[
|
||||||
self._stt.speech_to_text(file_path)
|
self._stt.speech_to_text(file_path)
|
||||||
for file_path in temp_files
|
for file_path in temp_files
|
||||||
|
], return_exceptions=True)
|
||||||
|
|
||||||
|
successful_transcriptions = []
|
||||||
|
failed_indices = []
|
||||||
|
successful_indices = []
|
||||||
|
for i, result in enumerate(text_transcription_segments):
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
self._log(task, request_id, f'Transcription failed for exercise {i + 1}: {str(result)}')
|
||||||
|
failed_indices.append(i)
|
||||||
|
elif isinstance(result, list):
|
||||||
|
successful_transcriptions.append(result)
|
||||||
|
successful_indices.append(i)
|
||||||
|
|
||||||
|
text_answers = await asyncio.gather(*[
|
||||||
|
self._stt.fix_overlap(self._llm, answer_segments)
|
||||||
|
for answer_segments in successful_transcriptions
|
||||||
])
|
])
|
||||||
|
|
||||||
for answer in text_answers:
|
for answer in text_answers:
|
||||||
@@ -63,14 +79,17 @@ class GradeSpeaking:
|
|||||||
self._log(task, request_id, 'Formatting answers and questions for prompt.')
|
self._log(task, request_id, 'Formatting answers and questions for prompt.')
|
||||||
|
|
||||||
formatted_text = ""
|
formatted_text = ""
|
||||||
for i, (item, transcribed_answer) in enumerate(zip(items, text_answers), start=1):
|
for success_idx, orig_idx in enumerate(successful_indices):
|
||||||
formatted_text += f"**Question {i}:**\n{item.question}\n\n"
|
formatted_text += f"**Question {orig_idx + 1}:**\n{items[orig_idx].question}\n\n"
|
||||||
formatted_text += f"**Answer {i}:**\n{transcribed_answer}\n\n"
|
formatted_text += f"**Answer {orig_idx + 1}:**\n{text_answers[success_idx]}\n\n"
|
||||||
|
|
||||||
self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
|
self._log(task, request_id, f'Formatted answers and questions for prompt: {formatted_text}')
|
||||||
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
|
questions_and_answers = f'\n\n The questions and answers are: \n\n{formatted_text}'
|
||||||
else:
|
else:
|
||||||
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
|
if len(text_answers) > 0:
|
||||||
|
questions_and_answers = f'\n Question: "{items[0].question}" \n Answer: "{text_answers[0]}"'
|
||||||
|
else:
|
||||||
|
return self._zero_rating("The audio recording failed to be transcribed.")
|
||||||
|
|
||||||
self._log(task, request_id, 'Requesting grading of the answer(s).')
|
self._log(task, request_id, 'Requesting grading of the answer(s).')
|
||||||
response = await self._grade_task(task, questions_and_answers)
|
response = await self._grade_task(task, questions_and_answers)
|
||||||
@@ -79,37 +98,43 @@ class GradeSpeaking:
|
|||||||
if task in {1, 3}:
|
if task in {1, 3}:
|
||||||
self._log(task, request_id, 'Adding perfect answer(s) to response.')
|
self._log(task, request_id, 'Adding perfect answer(s) to response.')
|
||||||
|
|
||||||
# TODO: check if it is answer["answer"] instead
|
# Add responses for successful transcriptions
|
||||||
for i, answer in enumerate(perfect_answers, start=1):
|
for success_idx, orig_idx in enumerate(successful_indices):
|
||||||
response['perfect_answer_' + str(i)] = answer
|
response['perfect_answer_' + str(orig_idx + 1)] = perfect_answers[
|
||||||
|
orig_idx] # Changed from success_idx
|
||||||
|
response['transcript_' + str(orig_idx + 1)] = text_answers[success_idx]
|
||||||
|
response['fixed_text_' + str(orig_idx + 1)] = await self._get_speaking_corrections(
|
||||||
|
text_answers[success_idx])
|
||||||
|
|
||||||
self._log(task, request_id, 'Getting speaking corrections in parallel')
|
# Add empty strings for failed transcriptions but keep perfect answers
|
||||||
# Get all corrections in parallel
|
for failed_idx in failed_indices:
|
||||||
fixed_texts = await asyncio.gather(*[
|
response['perfect_answer_' + str(failed_idx + 1)] = perfect_answers[
|
||||||
self._get_speaking_corrections(answer)
|
failed_idx] # Keep perfect answer
|
||||||
for answer in text_answers
|
response['transcript_' + str(failed_idx + 1)] = ""
|
||||||
])
|
response['fixed_text_' + str(failed_idx + 1)] = ""
|
||||||
|
response[f'error_{failed_idx + 1}'] = f"Transcription failed for exercise {failed_idx + 1}"
|
||||||
self._log(task, request_id, 'Adding transcript and fixed texts to response.')
|
|
||||||
for i, (answer, fixed) in enumerate(zip(text_answers, fixed_texts), start=1):
|
|
||||||
response['transcript_' + str(i)] = answer
|
|
||||||
response['fixed_text_' + str(i)] = fixed
|
|
||||||
else:
|
else:
|
||||||
response['transcript'] = text_answers[0]
|
response['transcript'] = text_answers[0] if text_answers else ""
|
||||||
|
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0]) if text_answers else ""
|
||||||
self._log(task, request_id, 'Requesting fixed text.')
|
response['perfect_answer'] = perfect_answers[0]["answer"] if perfect_answers else ""
|
||||||
response['fixed_text'] = await self._get_speaking_corrections(text_answers[0])
|
|
||||||
self._log(task, request_id, f'Fixed text: {response["fixed_text"]}')
|
|
||||||
|
|
||||||
response['perfect_answer'] = perfect_answers[0]["answer"]
|
|
||||||
|
|
||||||
solutions = []
|
solutions = []
|
||||||
for file_name in temp_files:
|
for i, file_name in enumerate(temp_files):
|
||||||
solutions.append(await self._file_storage.upload_file_firebase_get_url(f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav', file_name))
|
try:
|
||||||
|
if i not in failed_indices:
|
||||||
|
path = f'{FilePaths.FIREBASE_SPEAKING_VIDEO_FILES_PATH}{uuid.uuid4()}.wav'
|
||||||
|
else:
|
||||||
|
path = f'{FilePaths.FIREBASE_FAILED_TRANSCRIPTION_FILES_PATH}_grading_{request_id}_ex_{i + 1}.wav'
|
||||||
|
|
||||||
|
solution_url = await self._file_storage.upload_file_firebase_get_url(path, file_name)
|
||||||
|
solutions.append(solution_url)
|
||||||
|
except Exception as e:
|
||||||
|
self._log(task, request_id, f'Failed to upload file {i + 1}: {str(e)}')
|
||||||
|
solutions.append("")
|
||||||
|
|
||||||
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
|
response["overall"] = self._fix_speaking_overall(response["overall"], response["task_response"])
|
||||||
response["solutions"] = solutions
|
response["solutions"] = solutions
|
||||||
if task in {1,3}:
|
if task in {1, 3}:
|
||||||
response["answer"] = solutions
|
response["answer"] = solutions
|
||||||
else:
|
else:
|
||||||
response["fullPath"] = solutions[0]
|
response["fullPath"] = solutions[0]
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ def get_writing_args_general(task: int, topic: str, difficulty: str) -> List[Dic
|
|||||||
'student to compose a letter. The prompt should present a specific scenario or situation, '
|
'student to compose a letter. The prompt should present a specific scenario or situation, '
|
||||||
f'based on the topic of "{topic}", requiring the student to provide information, '
|
f'based on the topic of "{topic}", requiring the student to provide information, '
|
||||||
'advice, or instructions within the letter. Make sure that the generated prompt is '
|
'advice, or instructions within the letter. Make sure that the generated prompt is '
|
||||||
f'of {difficulty} difficulty and does not contain forbidden subjects in muslim countries.'
|
f'of {difficulty} CEFR level difficulty and does not contain forbidden subjects in muslim countries.'
|
||||||
),
|
),
|
||||||
"instructions": (
|
"instructions": (
|
||||||
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
|
'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
|
||||||
@@ -19,7 +19,7 @@ def get_writing_args_general(task: int, topic: str, difficulty: str) -> List[Dic
|
|||||||
"2": {
|
"2": {
|
||||||
# TODO: Should the muslim disclaimer be here as well?
|
# TODO: Should the muslim disclaimer be here as well?
|
||||||
"prompt": (
|
"prompt": (
|
||||||
f'Craft a comprehensive question of {difficulty} difficulty like the ones for IELTS '
|
f'Craft a comprehensive question of {difficulty} CEFR level difficulty like the ones for IELTS '
|
||||||
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
|
'Writing Task 2 General Training that directs the candidate to delve into an in-depth '
|
||||||
f'analysis of contrasting perspectives on the topic of "{topic}".'
|
f'analysis of contrasting perspectives on the topic of "{topic}".'
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -5,12 +5,16 @@ import numpy as np
|
|||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
import librosa
|
import librosa
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from typing import Dict
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
|
|
||||||
|
from tenacity import retry, stop_after_attempt, retry_if_exception_type
|
||||||
from whisper import Whisper
|
from whisper import Whisper
|
||||||
|
|
||||||
from ielts_be.services import ISpeechToTextService
|
from ielts_be.configs.constants import GPTModels, TemperatureSettings
|
||||||
|
from ielts_be.exceptions.exceptions import TranscriptionException
|
||||||
|
from ielts_be.services import ISpeechToTextService, ILLMService
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The whisper model is not thread safe, a thread pool
|
The whisper model is not thread safe, a thread pool
|
||||||
@@ -44,34 +48,37 @@ class OpenAIWhisper(ISpeechToTextService):
|
|||||||
self._next_model_id = (self._next_model_id + 1) % self._num_models
|
self._next_model_id = (self._next_model_id + 1) % self._num_models
|
||||||
return self._models[model_id]
|
return self._models[model_id]
|
||||||
|
|
||||||
async def speech_to_text(self, path: str) -> str:
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
retry=retry_if_exception_type(Exception),
|
||||||
|
reraise=True
|
||||||
|
)
|
||||||
|
async def speech_to_text(self, path: str, *, index: Optional[int] = None) -> str:
|
||||||
def transcribe():
|
def transcribe():
|
||||||
try:
|
try:
|
||||||
audio, sr = sf.read(path)
|
audio, sr = sf.read(path)
|
||||||
|
|
||||||
# Convert to mono first to reduce memory usage
|
|
||||||
if len(audio.shape) > 1:
|
if len(audio.shape) > 1:
|
||||||
audio = audio.mean(axis=1)
|
audio = audio.mean(axis=1)
|
||||||
|
|
||||||
# Resample from 48kHz to 16kHz
|
|
||||||
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
|
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
|
||||||
|
|
||||||
# Normalize to [-1, 1] range
|
|
||||||
audio = audio.astype(np.float32)
|
audio = audio.astype(np.float32)
|
||||||
if np.max(np.abs(audio)) > 0:
|
if np.max(np.abs(audio)) > 0:
|
||||||
audio = audio / np.max(np.abs(audio))
|
audio = audio / np.max(np.abs(audio))
|
||||||
|
|
||||||
# Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
|
max_samples = 480000 # 30 seconds at 16kHz
|
||||||
max_samples = 480000
|
overlap = max_samples // 4 # 1/4 overlap
|
||||||
|
|
||||||
|
# Greater than 30 secs
|
||||||
if len(audio) > max_samples:
|
if len(audio) > max_samples:
|
||||||
chunks = []
|
chunks = []
|
||||||
for i in range(0, len(audio), max_samples):
|
texts = []
|
||||||
|
model = self.get_model()
|
||||||
|
|
||||||
|
# i + 1 gets 1/4 overlap
|
||||||
|
for i in range(0, len(audio) - overlap, max_samples - overlap):
|
||||||
chunk = audio[i:i + max_samples]
|
chunk = audio[i:i + max_samples]
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
|
|
||||||
model = self.get_model()
|
|
||||||
texts = []
|
|
||||||
for chunk in chunks:
|
|
||||||
result = model.transcribe(
|
result = model.transcribe(
|
||||||
chunk,
|
chunk,
|
||||||
fp16=False,
|
fp16=False,
|
||||||
@@ -79,7 +86,7 @@ class OpenAIWhisper(ISpeechToTextService):
|
|||||||
verbose=False
|
verbose=False
|
||||||
)["text"]
|
)["text"]
|
||||||
texts.append(result)
|
texts.append(result)
|
||||||
return " ".join(texts)
|
return texts
|
||||||
else:
|
else:
|
||||||
model = self.get_model()
|
model = self.get_model()
|
||||||
return model.transcribe(
|
return model.transcribe(
|
||||||
@@ -90,8 +97,12 @@ class OpenAIWhisper(ISpeechToTextService):
|
|||||||
)["text"]
|
)["text"]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise
|
msg = (
|
||||||
|
f"Failed to transcribe exercise {index+1} after 3 attempts: {str(e)}"
|
||||||
|
if index else
|
||||||
|
f"Transcription failed after 3 attempts: {str(e)}"
|
||||||
|
)
|
||||||
|
raise TranscriptionException(msg)
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
return await loop.run_in_executor(self._executor, transcribe)
|
return await loop.run_in_executor(self._executor, transcribe)
|
||||||
|
|
||||||
@@ -104,3 +115,27 @@ class OpenAIWhisper(ISpeechToTextService):
|
|||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def fix_overlap(llm: ILLMService, segments: List[str]):
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
'You are a helpful assistant designed to fix transcription segments. You will receive '
|
||||||
|
'a string array with transcriptions segments that have overlap, your job is to only '
|
||||||
|
'remove duplicated words between segments and join them into one single text. You cannot '
|
||||||
|
'correct phrasing or wording, your job is to simply make sure that there is no repeated words '
|
||||||
|
'between the end of a segment and at the start of the next segment. Your response must be formatted '
|
||||||
|
'as JSON in the following format: {"fixed_text": ""}'
|
||||||
|
)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"[\n" + ",\n".join(f' "{segment}"' for segment in segments) + "\n]"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
response = await llm.prediction(
|
||||||
|
GPTModels.GPT_4_O, messages, ["fixed_text"], 0.1
|
||||||
|
)
|
||||||
|
return response["fixed_text"]
|
||||||
|
|||||||
17
poetry.lock
generated
17
poetry.lock
generated
@@ -3660,6 +3660,21 @@ files = [
|
|||||||
{file = "tbb-2021.13.1-py3-none-win_amd64.whl", hash = "sha256:cbf024b2463fdab3ebe3fa6ff453026358e6b903839c80d647e08ad6d0796ee9"},
|
{file = "tbb-2021.13.1-py3-none-win_amd64.whl", hash = "sha256:cbf024b2463fdab3ebe3fa6ff453026358e6b903839c80d647e08ad6d0796ee9"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tenacity"
|
||||||
|
version = "9.0.0"
|
||||||
|
description = "Retry code until it succeeds"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"},
|
||||||
|
{file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
doc = ["reno", "sphinx"]
|
||||||
|
test = ["pytest", "tornado (>=4.5)", "typeguard"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "threadpoolctl"
|
name = "threadpoolctl"
|
||||||
version = "3.5.0"
|
version = "3.5.0"
|
||||||
@@ -4533,4 +4548,4 @@ multidict = ">=4.0"
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "8137ea241f80674fe65910e0f00ecdbfa21792b101f7793d992e8016f8dce1e0"
|
content-hash = "87621bcf9b5e2914b151dd2352141d26e6afbe012f0fb7a30ebcaa8bea0beab0"
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ tiktoken = "0.7.0"
|
|||||||
gunicorn = "^23.0.0"
|
gunicorn = "^23.0.0"
|
||||||
librosa = "^0.10.2.post1"
|
librosa = "^0.10.2.post1"
|
||||||
soundfile = "^0.12.1"
|
soundfile = "^0.12.1"
|
||||||
|
tenacity = "^9.0.0"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|||||||
Reference in New Issue
Block a user