Merged in release/async (pull request #41)

Now grading is partitioned into smaller chunks so that whisper doesnt struggle

Approved-by: Tiago Ribeiro
This commit is contained in:
carlos.mesquita
2024-11-27 08:25:52 +00:00
committed by Tiago Ribeiro
9 changed files with 228 additions and 59 deletions

View File

@@ -39,30 +39,6 @@ async def grade_speaking_task(
return await grade_controller.grade_speaking_task(task, form, background_tasks) return await grade_controller.grade_speaking_task(task, form, background_tasks)
@grade_router.get(
'/pending/{sessionId}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_pending_evaluations(
session_id: str,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.get_evaluations(session_id, "pending")
@grade_router.get(
'/completed/{sessionId}',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
)
@inject
async def get_completed_evaluations(
session_id: str,
grade_controller: IGradeController = Depends(Provide[controller])
):
return await grade_controller.get_evaluations(session_id, "completed")
@grade_router.post( @grade_router.post(
'/summary', '/summary',
dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]

View File

@@ -20,10 +20,6 @@ class IGradeController(ABC):
): ):
pass pass
@abstractmethod
async def get_evaluations(self, session_id: str, status: str):
pass
@abstractmethod @abstractmethod
async def grade_short_answers(self, data: Dict): async def grade_short_answers(self, data: Dict):
pass pass

View File

@@ -90,9 +90,6 @@ class GradeController(IGradeController):
return Response(status_code=200) return Response(status_code=200)
async def get_evaluations(self, session_id: str, status: str):
return await self._evaluation_service.get_evaluations(session_id, status)
async def grade_short_answers(self, data: Dict): async def grade_short_answers(self, data: Dict):
return await self._service.grade_short_answers(data) return await self._service.grade_short_answers(data)

View File

@@ -27,8 +27,3 @@ class IEvaluationService(ABC):
background_tasks: BackgroundTasks background_tasks: BackgroundTasks
): ):
pass pass
@abstractmethod
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
pass

View File

@@ -4,5 +4,5 @@ from abc import ABC, abstractmethod
class ISpeechToTextService(ABC): class ISpeechToTextService(ABC):
@abstractmethod @abstractmethod
async def speech_to_text(self, file_path): async def speech_to_text(self, file: bytes):
pass pass

View File

@@ -101,12 +101,3 @@ class EvaluationService(IEvaluationService):
} }
} }
) )
async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
return await self._db.find(
"evaluation",
{
"session_id": session_id,
"status": status
}
)

View File

@@ -1,9 +1,13 @@
import os
import threading import threading
import whisper import whisper
import asyncio import asyncio
import numpy as np
import soundfile as sf
import librosa
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import Dict from typing import Dict
from logging import getLogger
from whisper import Whisper from whisper import Whisper
from app.services.abc import ISpeechToTextService from app.services.abc import ISpeechToTextService
@@ -24,6 +28,7 @@ class OpenAIWhisper(ISpeechToTextService):
self._lock = threading.Lock() self._lock = threading.Lock()
self._next_model_id = 0 self._next_model_id = 0
self._is_closed = False self._is_closed = False
self._logger = getLogger(__name__)
for i in range(num_models): for i in range(num_models):
self._models[i] = whisper.load_model(self._model_name, in_memory=True) self._models[i] = whisper.load_model(self._model_name, in_memory=True)
@@ -39,18 +44,53 @@ class OpenAIWhisper(ISpeechToTextService):
self._next_model_id = (self._next_model_id + 1) % self._num_models self._next_model_id = (self._next_model_id + 1) % self._num_models
return self._models[model_id] return self._models[model_id]
async def speech_to_text(self, file_path: str) -> str: async def speech_to_text(self, path: str) -> str:
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found.")
def transcribe(): def transcribe():
model = self.get_model() try:
return model.transcribe( audio, sr = sf.read(path)
file_path,
fp16=False, # Convert to mono first to reduce memory usage
language='English', if len(audio.shape) > 1:
verbose=False audio = audio.mean(axis=1)
)["text"]
# Resample from 48kHz to 16kHz
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
# Normalize to [-1, 1] range
audio = audio.astype(np.float32)
if np.max(np.abs(audio)) > 0:
audio = audio / np.max(np.abs(audio))
# Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
max_samples = 480000
if len(audio) > max_samples:
chunks = []
for i in range(0, len(audio), max_samples):
chunk = audio[i:i + max_samples]
chunks.append(chunk)
model = self.get_model()
texts = []
for chunk in chunks:
result = model.transcribe(
chunk,
fp16=False,
language='English',
verbose=False
)["text"]
texts.append(result)
return " ".join(texts)
else:
model = self.get_model()
return model.transcribe(
audio,
fp16=False,
language='English',
verbose=False
)["text"]
except Exception as e:
raise
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
return await loop.run_in_executor(self._executor, transcribe) return await loop.run_in_executor(self._executor, transcribe)

174
poetry.lock generated
View File

@@ -253,6 +253,20 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
[[package]]
name = "audioread"
version = "3.0.1"
description = "Multi-library, cross-platform audio decoding."
optional = false
python-versions = ">=3.6"
files = [
{file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"},
{file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"},
]
[package.extras]
test = ["tox"]
[[package]] [[package]]
name = "boto3" name = "boto3"
version = "1.34.131" version = "1.34.131"
@@ -585,6 +599,17 @@ ssh = ["bcrypt (>=3.1.5)"]
test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
test-randomorder = ["pytest-randomly"] test-randomorder = ["pytest-randomly"]
[[package]]
name = "decorator"
version = "5.1.1"
description = "Decorators for Humans"
optional = false
python-versions = ">=3.5"
files = [
{file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
{file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
]
[[package]] [[package]]
name = "dependency-injector" name = "dependency-injector"
version = "4.42.0" version = "4.42.0"
@@ -1580,6 +1605,56 @@ files = [
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
] ]
[[package]]
name = "lazy-loader"
version = "0.4"
description = "Makes it easy to load subpackages and functions on demand."
optional = false
python-versions = ">=3.7"
files = [
{file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
{file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
]
[package.dependencies]
packaging = "*"
[package.extras]
dev = ["changelist (==0.5)"]
lint = ["pre-commit (==3.7.0)"]
test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
[[package]]
name = "librosa"
version = "0.10.2.post1"
description = "Python module for audio and music processing"
optional = false
python-versions = ">=3.7"
files = [
{file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"},
{file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"},
]
[package.dependencies]
audioread = ">=2.1.9"
decorator = ">=4.3.0"
joblib = ">=0.14"
lazy-loader = ">=0.1"
msgpack = ">=1.0"
numba = ">=0.51.0"
numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2"
pooch = ">=1.1"
scikit-learn = ">=0.20.0"
scipy = ">=1.2.0"
soundfile = ">=0.12.1"
soxr = ">=0.3.2"
typing-extensions = ">=4.1.1"
[package.extras]
display = ["matplotlib (>=3.5.0)"]
docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"]
tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"]
[[package]] [[package]]
name = "llvmlite" name = "llvmlite"
version = "0.43.0" version = "0.43.0"
@@ -2498,6 +2573,43 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
typing = ["typing-extensions"] typing = ["typing-extensions"]
xmp = ["defusedxml"] xmp = ["defusedxml"]
[[package]]
name = "platformdirs"
version = "4.3.6"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
python-versions = ">=3.8"
files = [
{file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
{file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
]
[package.extras]
docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
type = ["mypy (>=1.11.2)"]
[[package]]
name = "pooch"
version = "1.8.2"
description = "A friend to fetch your data files"
optional = false
python-versions = ">=3.7"
files = [
{file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"},
{file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"},
]
[package.dependencies]
packaging = ">=20.0"
platformdirs = ">=2.5.0"
requests = ">=2.19.0"
[package.extras]
progress = ["tqdm (>=4.41.0,<5.0.0)"]
sftp = ["paramiko (>=2.7.0)"]
xxhash = ["xxhash (>=1.4.3)"]
[[package]] [[package]]
name = "proto-plus" name = "proto-plus"
version = "1.24.0" version = "1.24.0"
@@ -3441,6 +3553,66 @@ files = [
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
] ]
[[package]]
name = "soundfile"
version = "0.12.1"
description = "An audio library based on libsndfile, CFFI and NumPy"
optional = false
python-versions = "*"
files = [
{file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"},
{file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"},
{file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"},
{file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"},
{file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"},
{file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"},
{file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"},
{file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"},
]
[package.dependencies]
cffi = ">=1.0"
[package.extras]
numpy = ["numpy"]
[[package]]
name = "soxr"
version = "0.5.0.post1"
description = "High quality, one-dimensional sample-rate conversion library"
optional = false
python-versions = ">=3.9"
files = [
{file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"},
{file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"},
{file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"},
{file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"},
{file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"},
{file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"},
{file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"},
{file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"},
{file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"},
{file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"},
{file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"},
{file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"},
{file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"},
{file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"},
{file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"},
{file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"},
{file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"},
{file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"},
{file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"},
{file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"},
{file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"},
]
[package.dependencies]
numpy = "*"
[package.extras]
docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
test = ["pytest"]
[[package]] [[package]]
name = "starlette" name = "starlette"
version = "0.37.2" version = "0.37.2"
@@ -4361,4 +4533,4 @@ multidict = ">=4.0"
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "d3f779a2521db9bb040d9c2e76f30e7c6cb584119b3bd50454f391f8c7ef368f" content-hash = "8137ea241f80674fe65910e0f00ecdbfa21792b101f7793d992e8016f8dce1e0"

View File

@@ -30,6 +30,8 @@ shortuuid = "1.0.13"
pandas = "2.2.3" pandas = "2.2.3"
tiktoken = "0.7.0" tiktoken = "0.7.0"
gunicorn = "^23.0.0" gunicorn = "^23.0.0"
librosa = "^0.10.2.post1"
soundfile = "^0.12.1"
[build-system] [build-system]