Merged in release/async (pull request #41)

Now grading is partitioned into smaller chunks so that whisper doesnt struggle Approved-by: Tiago Ribeiro
2024-11-27 08:25:52 +00:00
parent 54a01f9631 a2d1133915
commit 93d9b700fd
9 changed files with 228 additions and 59 deletions
--- a/app/api/grade.py
+++ b/app/api/grade.py
@@ -39,30 +39,6 @@ async def grade_speaking_task(
    return await grade_controller.grade_speaking_task(task, form, background_tasks)
@grade_router.get(
    '/pending/{sessionId}',
    dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
 )
@inject
 async def get_pending_evaluations(
    session_id: str,
    grade_controller: IGradeController = Depends(Provide[controller])
 ):
    return await grade_controller.get_evaluations(session_id, "pending")
@grade_router.get(
    '/completed/{sessionId}',
    dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
 )
@inject
 async def get_completed_evaluations(
    session_id: str,
    grade_controller: IGradeController = Depends(Provide[controller])
 ):
    return await grade_controller.get_evaluations(session_id, "completed")
@grade_router.post(
    '/summary',
    dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
--- a/app/controllers/abc/grade.py
+++ b/app/controllers/abc/grade.py
@@ -20,10 +20,6 @@ class IGradeController(ABC):
    ):
        pass
    @abstractmethod
    async def get_evaluations(self, session_id: str, status: str):
        pass
    @abstractmethod
    async def grade_short_answers(self, data: Dict):
        pass
--- a/app/controllers/impl/grade.py
+++ b/app/controllers/impl/grade.py
@@ -90,9 +90,6 @@ class GradeController(IGradeController):
        return Response(status_code=200)
    async def get_evaluations(self, session_id: str, status: str):
        return await self._evaluation_service.get_evaluations(session_id, status)
    async def grade_short_answers(self, data: Dict):
        return await self._service.grade_short_answers(data)
--- a/app/services/abc/evaluation.py
+++ b/app/services/abc/evaluation.py
@@ -27,8 +27,3 @@ class IEvaluationService(ABC):
        background_tasks: BackgroundTasks
    ):
        pass
    @abstractmethod
    async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
        pass
--- a/app/services/abc/third_parties/stt.py
+++ b/app/services/abc/third_parties/stt.py
@@ -4,5 +4,5 @@ from abc import ABC, abstractmethod
 class ISpeechToTextService(ABC):
    @abstractmethod
-    async def speech_to_text(self, file_path):
+    async def speech_to_text(self, file: bytes):
        pass
--- a/app/services/impl/exam/evaluation.py
+++ b/app/services/impl/exam/evaluation.py
@@ -101,12 +101,3 @@ class EvaluationService(IEvaluationService):
                    }
                }
            )
    async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
        return await self._db.find(
            "evaluation",
            {
                "session_id": session_id,
                "status": status
            }
        )
--- a/app/services/impl/third_parties/whisper.py
+++ b/app/services/impl/third_parties/whisper.py
@@ -1,9 +1,13 @@
 import os
 import threading
 import whisper
 import asyncio
 import numpy as np
 import soundfile as sf
 import librosa
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict
 from logging import getLogger
 from whisper import Whisper
 from app.services.abc import ISpeechToTextService
@@ -24,6 +28,7 @@ class OpenAIWhisper(ISpeechToTextService):
        self._lock = threading.Lock()
        self._next_model_id = 0
        self._is_closed = False
        self._logger = getLogger(__name__)
        for i in range(num_models):
            self._models[i] = whisper.load_model(self._model_name, in_memory=True)
@@ -39,18 +44,53 @@ class OpenAIWhisper(ISpeechToTextService):
            self._next_model_id = (self._next_model_id + 1) % self._num_models
            return self._models[model_id]
-    async def speech_to_text(self, file_path: str) -> str:
+    async def speech_to_text(self, path: str) -> str:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File {file_path} not found.")
        def transcribe():
-            model = self.get_model()
+            try:
-            return model.transcribe(
+                audio, sr = sf.read(path)
-                file_path,
+
-                fp16=False,
+                # Convert to mono first to reduce memory usage
-                language='English',
+                if len(audio.shape) > 1:
-                verbose=False
+                    audio = audio.mean(axis=1)
-            )["text"]
+
                # Resample from 48kHz to 16kHz
                audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
                # Normalize to [-1, 1] range
                audio = audio.astype(np.float32)
                if np.max(np.abs(audio)) > 0:
                    audio = audio / np.max(np.abs(audio))
                # Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
                max_samples = 480000
                if len(audio) > max_samples:
                    chunks = []
                    for i in range(0, len(audio), max_samples):
                        chunk = audio[i:i + max_samples]
                        chunks.append(chunk)
                    model = self.get_model()
                    texts = []
                    for chunk in chunks:
                        result = model.transcribe(
                            chunk,
                            fp16=False,
                            language='English',
                            verbose=False
                        )["text"]
                        texts.append(result)
                    return " ".join(texts)
                else:
                    model = self.get_model()
                    return model.transcribe(
                        audio,
                        fp16=False,
                        language='English',
                        verbose=False
                    )["text"]
            except Exception as e:
                raise
        loop = asyncio.get_running_loop()
        return await loop.run_in_executor(self._executor, transcribe)
--- a/poetry.lock
+++ b/poetry.lock
@@ -253,6 +253,20 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
 tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
 [[package]]
 name = "audioread"
 version = "3.0.1"
 description = "Multi-library, cross-platform audio decoding."
 optional = false
 python-versions = ">=3.6"
 files = [
    {file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"},
    {file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"},
 ]
 [package.extras]
 test = ["tox"]
 [[package]]
 name = "boto3"
 version = "1.34.131"
@@ -585,6 +599,17 @@ ssh = ["bcrypt (>=3.1.5)"]
 test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]
 [[package]]
 name = "decorator"
 version = "5.1.1"
 description = "Decorators for Humans"
 optional = false
 python-versions = ">=3.5"
 files = [
    {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
    {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
 ]
 [[package]]
 name = "dependency-injector"
 version = "4.42.0"
@@ -1580,6 +1605,56 @@ files = [
    {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
 ]
 [[package]]
 name = "lazy-loader"
 version = "0.4"
 description = "Makes it easy to load subpackages and functions on demand."
 optional = false
 python-versions = ">=3.7"
 files = [
    {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
    {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
 ]
 [package.dependencies]
 packaging = "*"
 [package.extras]
 dev = ["changelist (==0.5)"]
 lint = ["pre-commit (==3.7.0)"]
 test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
 [[package]]
 name = "librosa"
 version = "0.10.2.post1"
 description = "Python module for audio and music processing"
 optional = false
 python-versions = ">=3.7"
 files = [
    {file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"},
    {file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"},
 ]
 [package.dependencies]
 audioread = ">=2.1.9"
 decorator = ">=4.3.0"
 joblib = ">=0.14"
 lazy-loader = ">=0.1"
 msgpack = ">=1.0"
 numba = ">=0.51.0"
 numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2"
 pooch = ">=1.1"
 scikit-learn = ">=0.20.0"
 scipy = ">=1.2.0"
 soundfile = ">=0.12.1"
 soxr = ">=0.3.2"
 typing-extensions = ">=4.1.1"
 [package.extras]
 display = ["matplotlib (>=3.5.0)"]
 docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"]
 tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"]
 [[package]]
 name = "llvmlite"
 version = "0.43.0"
@@ -2498,6 +2573,43 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 typing = ["typing-extensions"]
 xmp = ["defusedxml"]
 [[package]]
 name = "platformdirs"
 version = "4.3.6"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
 files = [
    {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
    {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
 ]
 [package.extras]
 docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
 type = ["mypy (>=1.11.2)"]
 [[package]]
 name = "pooch"
 version = "1.8.2"
 description = "A friend to fetch your data files"
 optional = false
 python-versions = ">=3.7"
 files = [
    {file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"},
    {file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"},
 ]
 [package.dependencies]
 packaging = ">=20.0"
 platformdirs = ">=2.5.0"
 requests = ">=2.19.0"
 [package.extras]
 progress = ["tqdm (>=4.41.0,<5.0.0)"]
 sftp = ["paramiko (>=2.7.0)"]
 xxhash = ["xxhash (>=1.4.3)"]
 [[package]]
 name = "proto-plus"
 version = "1.24.0"
@@ -3441,6 +3553,66 @@ files = [
    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
 ]
 [[package]]
 name = "soundfile"
 version = "0.12.1"
 description = "An audio library based on libsndfile, CFFI and NumPy"
 optional = false
 python-versions = "*"
 files = [
    {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"},
    {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"},
    {file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"},
    {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"},
    {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"},
    {file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"},
    {file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"},
    {file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"},
 ]
 [package.dependencies]
 cffi = ">=1.0"
 [package.extras]
 numpy = ["numpy"]
 [[package]]
 name = "soxr"
 version = "0.5.0.post1"
 description = "High quality, one-dimensional sample-rate conversion library"
 optional = false
 python-versions = ">=3.9"
 files = [
    {file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"},
    {file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"},
    {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"},
    {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"},
    {file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"},
    {file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"},
    {file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"},
    {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"},
    {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"},
    {file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"},
    {file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"},
    {file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"},
    {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"},
    {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"},
    {file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"},
    {file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"},
    {file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"},
    {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"},
    {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"},
    {file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"},
    {file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"},
 ]
 [package.dependencies]
 numpy = "*"
 [package.extras]
 docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
 test = ["pytest"]
 [[package]]
 name = "starlette"
 version = "0.37.2"
@@ -4361,4 +4533,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "d3f779a2521db9bb040d9c2e76f30e7c6cb584119b3bd50454f391f8c7ef368f"
+content-hash = "8137ea241f80674fe65910e0f00ecdbfa21792b101f7793d992e8016f8dce1e0"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,8 @@ shortuuid = "1.0.13"
 pandas = "2.2.3"
 tiktoken = "0.7.0"
 gunicorn = "^23.0.0"
 librosa = "^0.10.2.post1"
 soundfile = "^0.12.1"
 [build-system]