Merged in release/async (pull request #41)
Now grading is partitioned into smaller chunks so that whisper doesnt struggle Approved-by: Tiago Ribeiro
This commit is contained in:
@@ -39,30 +39,6 @@ async def grade_speaking_task(
|
||||
return await grade_controller.grade_speaking_task(task, form, background_tasks)
|
||||
|
||||
|
||||
@grade_router.get(
|
||||
'/pending/{sessionId}',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def get_pending_evaluations(
|
||||
session_id: str,
|
||||
grade_controller: IGradeController = Depends(Provide[controller])
|
||||
):
|
||||
return await grade_controller.get_evaluations(session_id, "pending")
|
||||
|
||||
|
||||
@grade_router.get(
|
||||
'/completed/{sessionId}',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
)
|
||||
@inject
|
||||
async def get_completed_evaluations(
|
||||
session_id: str,
|
||||
grade_controller: IGradeController = Depends(Provide[controller])
|
||||
):
|
||||
return await grade_controller.get_evaluations(session_id, "completed")
|
||||
|
||||
|
||||
@grade_router.post(
|
||||
'/summary',
|
||||
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
|
||||
|
||||
@@ -20,10 +20,6 @@ class IGradeController(ABC):
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_evaluations(self, session_id: str, status: str):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def grade_short_answers(self, data: Dict):
|
||||
pass
|
||||
|
||||
@@ -90,9 +90,6 @@ class GradeController(IGradeController):
|
||||
|
||||
return Response(status_code=200)
|
||||
|
||||
async def get_evaluations(self, session_id: str, status: str):
|
||||
return await self._evaluation_service.get_evaluations(session_id, status)
|
||||
|
||||
async def grade_short_answers(self, data: Dict):
|
||||
return await self._service.grade_short_answers(data)
|
||||
|
||||
|
||||
@@ -27,8 +27,3 @@ class IEvaluationService(ABC):
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
|
||||
pass
|
||||
|
||||
|
||||
@@ -4,5 +4,5 @@ from abc import ABC, abstractmethod
|
||||
class ISpeechToTextService(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def speech_to_text(self, file_path):
|
||||
async def speech_to_text(self, file: bytes):
|
||||
pass
|
||||
|
||||
@@ -101,12 +101,3 @@ class EvaluationService(IEvaluationService):
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
|
||||
return await self._db.find(
|
||||
"evaluation",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"status": status
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
import os
|
||||
import threading
|
||||
import whisper
|
||||
import asyncio
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import librosa
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict
|
||||
|
||||
from logging import getLogger
|
||||
from whisper import Whisper
|
||||
|
||||
from app.services.abc import ISpeechToTextService
|
||||
@@ -24,6 +28,7 @@ class OpenAIWhisper(ISpeechToTextService):
|
||||
self._lock = threading.Lock()
|
||||
self._next_model_id = 0
|
||||
self._is_closed = False
|
||||
self._logger = getLogger(__name__)
|
||||
|
||||
for i in range(num_models):
|
||||
self._models[i] = whisper.load_model(self._model_name, in_memory=True)
|
||||
@@ -39,18 +44,53 @@ class OpenAIWhisper(ISpeechToTextService):
|
||||
self._next_model_id = (self._next_model_id + 1) % self._num_models
|
||||
return self._models[model_id]
|
||||
|
||||
async def speech_to_text(self, file_path: str) -> str:
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File {file_path} not found.")
|
||||
|
||||
async def speech_to_text(self, path: str) -> str:
|
||||
def transcribe():
|
||||
model = self.get_model()
|
||||
return model.transcribe(
|
||||
file_path,
|
||||
fp16=False,
|
||||
language='English',
|
||||
verbose=False
|
||||
)["text"]
|
||||
try:
|
||||
audio, sr = sf.read(path)
|
||||
|
||||
# Convert to mono first to reduce memory usage
|
||||
if len(audio.shape) > 1:
|
||||
audio = audio.mean(axis=1)
|
||||
|
||||
# Resample from 48kHz to 16kHz
|
||||
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
|
||||
|
||||
# Normalize to [-1, 1] range
|
||||
audio = audio.astype(np.float32)
|
||||
if np.max(np.abs(audio)) > 0:
|
||||
audio = audio / np.max(np.abs(audio))
|
||||
|
||||
# Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
|
||||
max_samples = 480000
|
||||
if len(audio) > max_samples:
|
||||
chunks = []
|
||||
for i in range(0, len(audio), max_samples):
|
||||
chunk = audio[i:i + max_samples]
|
||||
chunks.append(chunk)
|
||||
|
||||
model = self.get_model()
|
||||
texts = []
|
||||
for chunk in chunks:
|
||||
result = model.transcribe(
|
||||
chunk,
|
||||
fp16=False,
|
||||
language='English',
|
||||
verbose=False
|
||||
)["text"]
|
||||
texts.append(result)
|
||||
return " ".join(texts)
|
||||
else:
|
||||
model = self.get_model()
|
||||
return model.transcribe(
|
||||
audio,
|
||||
fp16=False,
|
||||
language='English',
|
||||
verbose=False
|
||||
)["text"]
|
||||
|
||||
except Exception as e:
|
||||
raise
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
return await loop.run_in_executor(self._executor, transcribe)
|
||||
|
||||
174
poetry.lock
generated
174
poetry.lock
generated
@@ -253,6 +253,20 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
|
||||
tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
||||
tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
|
||||
|
||||
[[package]]
|
||||
name = "audioread"
|
||||
version = "3.0.1"
|
||||
description = "Multi-library, cross-platform audio decoding."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"},
|
||||
{file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["tox"]
|
||||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.34.131"
|
||||
@@ -585,6 +599,17 @@ ssh = ["bcrypt (>=3.1.5)"]
|
||||
test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
|
||||
test-randomorder = ["pytest-randomly"]
|
||||
|
||||
[[package]]
|
||||
name = "decorator"
|
||||
version = "5.1.1"
|
||||
description = "Decorators for Humans"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
files = [
|
||||
{file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
|
||||
{file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dependency-injector"
|
||||
version = "4.42.0"
|
||||
@@ -1580,6 +1605,56 @@ files = [
|
||||
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy-loader"
|
||||
version = "0.4"
|
||||
description = "Makes it easy to load subpackages and functions on demand."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
|
||||
{file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
packaging = "*"
|
||||
|
||||
[package.extras]
|
||||
dev = ["changelist (==0.5)"]
|
||||
lint = ["pre-commit (==3.7.0)"]
|
||||
test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "librosa"
|
||||
version = "0.10.2.post1"
|
||||
description = "Python module for audio and music processing"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"},
|
||||
{file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
audioread = ">=2.1.9"
|
||||
decorator = ">=4.3.0"
|
||||
joblib = ">=0.14"
|
||||
lazy-loader = ">=0.1"
|
||||
msgpack = ">=1.0"
|
||||
numba = ">=0.51.0"
|
||||
numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2"
|
||||
pooch = ">=1.1"
|
||||
scikit-learn = ">=0.20.0"
|
||||
scipy = ">=1.2.0"
|
||||
soundfile = ">=0.12.1"
|
||||
soxr = ">=0.3.2"
|
||||
typing-extensions = ">=4.1.1"
|
||||
|
||||
[package.extras]
|
||||
display = ["matplotlib (>=3.5.0)"]
|
||||
docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"]
|
||||
tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"]
|
||||
|
||||
[[package]]
|
||||
name = "llvmlite"
|
||||
version = "0.43.0"
|
||||
@@ -2498,6 +2573,43 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
|
||||
typing = ["typing-extensions"]
|
||||
xmp = ["defusedxml"]
|
||||
|
||||
[[package]]
|
||||
name = "platformdirs"
|
||||
version = "4.3.6"
|
||||
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
|
||||
{file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
|
||||
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
|
||||
type = ["mypy (>=1.11.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "pooch"
|
||||
version = "1.8.2"
|
||||
description = "A friend to fetch your data files"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"},
|
||||
{file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
packaging = ">=20.0"
|
||||
platformdirs = ">=2.5.0"
|
||||
requests = ">=2.19.0"
|
||||
|
||||
[package.extras]
|
||||
progress = ["tqdm (>=4.41.0,<5.0.0)"]
|
||||
sftp = ["paramiko (>=2.7.0)"]
|
||||
xxhash = ["xxhash (>=1.4.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "proto-plus"
|
||||
version = "1.24.0"
|
||||
@@ -3441,6 +3553,66 @@ files = [
|
||||
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soundfile"
|
||||
version = "0.12.1"
|
||||
description = "An audio library based on libsndfile, CFFI and NumPy"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"},
|
||||
{file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"},
|
||||
{file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"},
|
||||
{file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"},
|
||||
{file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"},
|
||||
{file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"},
|
||||
{file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"},
|
||||
{file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cffi = ">=1.0"
|
||||
|
||||
[package.extras]
|
||||
numpy = ["numpy"]
|
||||
|
||||
[[package]]
|
||||
name = "soxr"
|
||||
version = "0.5.0.post1"
|
||||
description = "High quality, one-dimensional sample-rate conversion library"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"},
|
||||
{file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"},
|
||||
{file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"},
|
||||
{file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"},
|
||||
{file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"},
|
||||
{file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"},
|
||||
{file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"},
|
||||
{file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"},
|
||||
{file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"},
|
||||
{file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"},
|
||||
{file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"},
|
||||
{file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"},
|
||||
{file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"},
|
||||
{file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"},
|
||||
{file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"},
|
||||
{file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"},
|
||||
{file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"},
|
||||
{file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"},
|
||||
{file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"},
|
||||
{file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"},
|
||||
{file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = "*"
|
||||
|
||||
[package.extras]
|
||||
docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
|
||||
test = ["pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "starlette"
|
||||
version = "0.37.2"
|
||||
@@ -4361,4 +4533,4 @@ multidict = ">=4.0"
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "d3f779a2521db9bb040d9c2e76f30e7c6cb584119b3bd50454f391f8c7ef368f"
|
||||
content-hash = "8137ea241f80674fe65910e0f00ecdbfa21792b101f7793d992e8016f8dce1e0"
|
||||
|
||||
@@ -30,6 +30,8 @@ shortuuid = "1.0.13"
|
||||
pandas = "2.2.3"
|
||||
tiktoken = "0.7.0"
|
||||
gunicorn = "^23.0.0"
|
||||
librosa = "^0.10.2.post1"
|
||||
soundfile = "^0.12.1"
|
||||
|
||||
|
||||
[build-system]
|
||||
|
||||
Reference in New Issue
Block a user