Merged in release/async (pull request #41)

Now grading is partitioned into smaller chunks so that whisper doesnt struggle

Approved-by: Tiago Ribeiro
This commit is contained in:
carlos.mesquita
2024-11-27 08:25:52 +00:00
committed by Tiago Ribeiro
9 changed files with 228 additions and 59 deletions

View File

@@ -39,30 +39,6 @@ async def grade_speaking_task(
return await grade_controller.grade_speaking_task(task, form, background_tasks)
@grade_router.get(
'/pending/{sessionId}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_pending_evaluations(
session_id: str,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.get_evaluations(session_id, "pending")
@grade_router.get(
'/completed/{sessionId}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_completed_evaluations(
session_id: str,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.get_evaluations(session_id, "completed")
@grade_router.post(
'/summary',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]

View File

@@ -20,10 +20,6 @@ class IGradeController(ABC):
):
pass
@abstractmethod
async def get_evaluations(self, session_id: str, status: str):
pass
@abstractmethod
async def grade_short_answers(self, data: Dict):
pass

View File

@@ -90,9 +90,6 @@ class GradeController(IGradeController):
return Response(status_code=200)
async def get_evaluations(self, session_id: str, status: str):
return await self._evaluation_service.get_evaluations(session_id, status)
async def grade_short_answers(self, data: Dict):
return await self._service.grade_short_answers(data)

View File

@@ -27,8 +27,3 @@ class IEvaluationService(ABC):
background_tasks: BackgroundTasks
):
pass
@abstractmethod
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
pass

View File

@@ -4,5 +4,5 @@ from abc import ABC, abstractmethod
class ISpeechToTextService(ABC):
@abstractmethod
async def speech_to_text(self, file_path):
async def speech_to_text(self, file: bytes):
pass

View File

@@ -101,12 +101,3 @@ class EvaluationService(IEvaluationService):
}
}
)
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
return await self._db.find(
"evaluation",
{
"session_id": session_id,
"status": status
}
)

View File

@@ -1,9 +1,13 @@
import os
import threading
import whisper
import asyncio
import numpy as np
import soundfile as sf
import librosa
from concurrent.futures import ThreadPoolExecutor
from typing import Dict
from logging import getLogger
from whisper import Whisper
from app.services.abc import ISpeechToTextService
@@ -24,6 +28,7 @@ class OpenAIWhisper(ISpeechToTextService):
self._lock = threading.Lock()
self._next_model_id = 0
self._is_closed = False
self._logger = getLogger(__name__)
for i in range(num_models):
self._models[i] = whisper.load_model(self._model_name, in_memory=True)
@@ -39,18 +44,53 @@ class OpenAIWhisper(ISpeechToTextService):
self._next_model_id = (self._next_model_id + 1) % self._num_models
return self._models[model_id]
async def speech_to_text(self, file_path: str) -> str:
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found.")
async def speech_to_text(self, path: str) -> str:
def transcribe():
model = self.get_model()
return model.transcribe(
file_path,
fp16=False,
language='English',
verbose=False
)["text"]
try:
audio, sr = sf.read(path)
# Convert to mono first to reduce memory usage
if len(audio.shape) > 1:
audio = audio.mean(axis=1)
# Resample from 48kHz to 16kHz
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
# Normalize to [-1, 1] range
audio = audio.astype(np.float32)
if np.max(np.abs(audio)) > 0:
audio = audio / np.max(np.abs(audio))
# Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
max_samples = 480000
if len(audio) > max_samples:
chunks = []
for i in range(0, len(audio), max_samples):
chunk = audio[i:i + max_samples]
chunks.append(chunk)
model = self.get_model()
texts = []
for chunk in chunks:
result = model.transcribe(
chunk,
fp16=False,
language='English',
verbose=False
)["text"]
texts.append(result)
return " ".join(texts)
else:
model = self.get_model()
return model.transcribe(
audio,
fp16=False,
language='English',
verbose=False
)["text"]
except Exception as e:
raise
loop = asyncio.get_running_loop()
return await loop.run_in_executor(self._executor, transcribe)

174
poetry.lock generated
View File

@@ -253,6 +253,20 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
[[package]]
name = "audioread"
version = "3.0.1"
description = "Multi-library, cross-platform audio decoding."
optional = false
python-versions = ">=3.6"
files = [
{file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"},
{file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"},
]
[package.extras]
test = ["tox"]
[[package]]
name = "boto3"
version = "1.34.131"
@@ -585,6 +599,17 @@ ssh = ["bcrypt (>=3.1.5)"]
test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
test-randomorder = ["pytest-randomly"]
[[package]]
name = "decorator"
version = "5.1.1"
description = "Decorators for Humans"
optional = false
python-versions = ">=3.5"
files = [
{file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
{file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
]
[[package]]
name = "dependency-injector"
version = "4.42.0"
@@ -1580,6 +1605,56 @@ files = [
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
]
[[package]]
name = "lazy-loader"
version = "0.4"
description = "Makes it easy to load subpackages and functions on demand."
optional = false
python-versions = ">=3.7"
files = [
{file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
{file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
]
[package.dependencies]
packaging = "*"
[package.extras]
dev = ["changelist (==0.5)"]
lint = ["pre-commit (==3.7.0)"]
test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
[[package]]
name = "librosa"
version = "0.10.2.post1"
description = "Python module for audio and music processing"
optional = false
python-versions = ">=3.7"
files = [
{file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"},
{file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"},
]
[package.dependencies]
audioread = ">=2.1.9"
decorator = ">=4.3.0"
joblib = ">=0.14"
lazy-loader = ">=0.1"
msgpack = ">=1.0"
numba = ">=0.51.0"
numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2"
pooch = ">=1.1"
scikit-learn = ">=0.20.0"
scipy = ">=1.2.0"
soundfile = ">=0.12.1"
soxr = ">=0.3.2"
typing-extensions = ">=4.1.1"
[package.extras]
display = ["matplotlib (>=3.5.0)"]
docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"]
tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"]
[[package]]
name = "llvmlite"
version = "0.43.0"
@@ -2498,6 +2573,43 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
typing = ["typing-extensions"]
xmp = ["defusedxml"]
[[package]]
name = "platformdirs"
version = "4.3.6"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
python-versions = ">=3.8"
files = [
{file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
{file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
]
[package.extras]
docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
type = ["mypy (>=1.11.2)"]
[[package]]
name = "pooch"
version = "1.8.2"
description = "A friend to fetch your data files"
optional = false
python-versions = ">=3.7"
files = [
{file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"},
{file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"},
]
[package.dependencies]
packaging = ">=20.0"
platformdirs = ">=2.5.0"
requests = ">=2.19.0"
[package.extras]
progress = ["tqdm (>=4.41.0,<5.0.0)"]
sftp = ["paramiko (>=2.7.0)"]
xxhash = ["xxhash (>=1.4.3)"]
[[package]]
name = "proto-plus"
version = "1.24.0"
@@ -3441,6 +3553,66 @@ files = [
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
name = "soundfile"
version = "0.12.1"
description = "An audio library based on libsndfile, CFFI and NumPy"
optional = false
python-versions = "*"
files = [
{file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"},
{file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"},
{file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"},
{file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"},
{file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"},
{file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"},
{file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"},
{file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"},
]
[package.dependencies]
cffi = ">=1.0"
[package.extras]
numpy = ["numpy"]
[[package]]
name = "soxr"
version = "0.5.0.post1"
description = "High quality, one-dimensional sample-rate conversion library"
optional = false
python-versions = ">=3.9"
files = [
{file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"},
{file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"},
{file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"},
{file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"},
{file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"},
{file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"},
{file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"},
{file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"},
{file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"},
{file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"},
{file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"},
{file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"},
{file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"},
{file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"},
{file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"},
{file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"},
{file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"},
{file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"},
{file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"},
{file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"},
{file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"},
]
[package.dependencies]
numpy = "*"
[package.extras]
docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
test = ["pytest"]
[[package]]
name = "starlette"
version = "0.37.2"
@@ -4361,4 +4533,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "d3f779a2521db9bb040d9c2e76f30e7c6cb584119b3bd50454f391f8c7ef368f"
content-hash = "8137ea241f80674fe65910e0f00ecdbfa21792b101f7793d992e8016f8dce1e0"

View File

@@ -30,6 +30,8 @@ shortuuid = "1.0.13"
pandas = "2.2.3"
tiktoken = "0.7.0"
gunicorn = "^23.0.0"
librosa = "^0.10.2.post1"
soundfile = "^0.12.1"
[build-system]