Merged in release/async (pull request #41)

Now grading is partitioned into smaller chunks so that whisper doesnt struggle Approved-by: Tiago Ribeiro
2024-11-27 08:25:52 +00:00
parent 54a01f9631 a2d1133915
commit 93d9b700fd
9 changed files with 228 additions and 59 deletions
--- a/app/api/grade.py
+++ b/app/api/grade.py
@@ -39,30 +39,6 @@ async def grade_speaking_task(
    return await grade_controller.grade_speaking_task(task, form, background_tasks)


-@grade_router.get(
-    '/pending/{sessionId}',
-    dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
-)
-@inject
-async def get_pending_evaluations(
-    session_id: str,
-    grade_controller: IGradeController = Depends(Provide[controller])
-):
-    return await grade_controller.get_evaluations(session_id, "pending")
-
-
-@grade_router.get(
-    '/completed/{sessionId}',
-    dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
-)
-@inject
-async def get_completed_evaluations(
-    session_id: str,
-    grade_controller: IGradeController = Depends(Provide[controller])
-):
-    return await grade_controller.get_evaluations(session_id, "completed")
-
-
@grade_router.post(
    '/summary',
    dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))]
--- a/app/controllers/abc/grade.py
+++ b/app/controllers/abc/grade.py
@@ -20,10 +20,6 @@ class IGradeController(ABC):
    ):
        pass

-    @abstractmethod
-    async def get_evaluations(self, session_id: str, status: str):
-        pass
-
    @abstractmethod
    async def grade_short_answers(self, data: Dict):
        pass
--- a/app/controllers/impl/grade.py
+++ b/app/controllers/impl/grade.py
@@ -90,9 +90,6 @@ class GradeController(IGradeController):

        return Response(status_code=200)

-    async def get_evaluations(self, session_id: str, status: str):
-        return await self._evaluation_service.get_evaluations(session_id, status)
-
    async def grade_short_answers(self, data: Dict):
        return await self._service.grade_short_answers(data)

--- a/app/services/abc/evaluation.py
+++ b/app/services/abc/evaluation.py
@@ -27,8 +27,3 @@ class IEvaluationService(ABC):
        background_tasks: BackgroundTasks
    ):
        pass
-
-    @abstractmethod
-    async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
-        pass
-
--- a/app/services/abc/third_parties/stt.py
+++ b/app/services/abc/third_parties/stt.py
@@ -4,5 +4,5 @@ from abc import ABC, abstractmethod
 class ISpeechToTextService(ABC):

    @abstractmethod
-    async def speech_to_text(self, file_path):
+    async def speech_to_text(self, file: bytes):
        pass
--- a/app/services/impl/exam/evaluation.py
+++ b/app/services/impl/exam/evaluation.py
@@ -101,12 +101,3 @@ class EvaluationService(IEvaluationService):
                    }
                }
            )
-
-    async def get_evaluations(self, session_id: str, status: str) -> List[Dict]:
-        return await self._db.find(
-            "evaluation",
-            {
-                "session_id": session_id,
-                "status": status
-            }
-        )
--- a/app/services/impl/third_parties/whisper.py
+++ b/app/services/impl/third_parties/whisper.py
@@ -1,9 +1,13 @@
-import os
 import threading
 import whisper
 import asyncio
+import numpy as np
+import soundfile as sf
+import librosa
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict
+
+from logging import getLogger
 from whisper import Whisper

 from app.services.abc import ISpeechToTextService
@@ -24,6 +28,7 @@ class OpenAIWhisper(ISpeechToTextService):
        self._lock = threading.Lock()
        self._next_model_id = 0
        self._is_closed = False
+        self._logger = getLogger(__name__)

        for i in range(num_models):
            self._models[i] = whisper.load_model(self._model_name, in_memory=True)
@@ -39,18 +44,53 @@ class OpenAIWhisper(ISpeechToTextService):
            self._next_model_id = (self._next_model_id + 1) % self._num_models
            return self._models[model_id]

-    async def speech_to_text(self, file_path: str) -> str:
-        if not os.path.exists(file_path):
-            raise FileNotFoundError(f"File {file_path} not found.")
-
+    async def speech_to_text(self, path: str) -> str:
        def transcribe():
-            model = self.get_model()
-            return model.transcribe(
-                file_path,
-                fp16=False,
-                language='English',
-                verbose=False
-            )["text"]
+            try:
+                audio, sr = sf.read(path)
+
+                # Convert to mono first to reduce memory usage
+                if len(audio.shape) > 1:
+                    audio = audio.mean(axis=1)
+
+                # Resample from 48kHz to 16kHz
+                audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+
+                # Normalize to [-1, 1] range
+                audio = audio.astype(np.float32)
+                if np.max(np.abs(audio)) > 0:
+                    audio = audio / np.max(np.abs(audio))
+
+                # Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
+                max_samples = 480000
+                if len(audio) > max_samples:
+                    chunks = []
+                    for i in range(0, len(audio), max_samples):
+                        chunk = audio[i:i + max_samples]
+                        chunks.append(chunk)
+
+                    model = self.get_model()
+                    texts = []
+                    for chunk in chunks:
+                        result = model.transcribe(
+                            chunk,
+                            fp16=False,
+                            language='English',
+                            verbose=False
+                        )["text"]
+                        texts.append(result)
+                    return " ".join(texts)
+                else:
+                    model = self.get_model()
+                    return model.transcribe(
+                        audio,
+                        fp16=False,
+                        language='English',
+                        verbose=False
+                    )["text"]
+
+            except Exception as e:
+                raise

        loop = asyncio.get_running_loop()
        return await loop.run_in_executor(self._executor, transcribe)
--- a/poetry.lock
+++ b/poetry.lock
@@ -253,6 +253,20 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
 tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]

+[[package]]
+name = "audioread"
+version = "3.0.1"
+description = "Multi-library, cross-platform audio decoding."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"},
+    {file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"},
+]
+
+[package.extras]
+test = ["tox"]
+
 [[package]]
 name = "boto3"
 version = "1.34.131"
@@ -585,6 +599,17 @@ ssh = ["bcrypt (>=3.1.5)"]
 test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]

+[[package]]
+name = "decorator"
+version = "5.1.1"
+description = "Decorators for Humans"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
+    {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
+]
+
 [[package]]
 name = "dependency-injector"
 version = "4.42.0"
@@ -1580,6 +1605,56 @@ files = [
    {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
 ]

+[[package]]
+name = "lazy-loader"
+version = "0.4"
+description = "Makes it easy to load subpackages and functions on demand."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
+    {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
+]
+
+[package.dependencies]
+packaging = "*"
+
+[package.extras]
+dev = ["changelist (==0.5)"]
+lint = ["pre-commit (==3.7.0)"]
+test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
+
+[[package]]
+name = "librosa"
+version = "0.10.2.post1"
+description = "Python module for audio and music processing"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"},
+    {file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"},
+]
+
+[package.dependencies]
+audioread = ">=2.1.9"
+decorator = ">=4.3.0"
+joblib = ">=0.14"
+lazy-loader = ">=0.1"
+msgpack = ">=1.0"
+numba = ">=0.51.0"
+numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2"
+pooch = ">=1.1"
+scikit-learn = ">=0.20.0"
+scipy = ">=1.2.0"
+soundfile = ">=0.12.1"
+soxr = ">=0.3.2"
+typing-extensions = ">=4.1.1"
+
+[package.extras]
+display = ["matplotlib (>=3.5.0)"]
+docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"]
+tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"]
+
 [[package]]
 name = "llvmlite"
 version = "0.43.0"
@@ -2498,6 +2573,43 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 typing = ["typing-extensions"]
 xmp = ["defusedxml"]

+[[package]]
+name = "platformdirs"
+version = "4.3.6"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
+    {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
+]
+
+[package.extras]
+docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
+type = ["mypy (>=1.11.2)"]
+
+[[package]]
+name = "pooch"
+version = "1.8.2"
+description = "A friend to fetch your data files"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"},
+    {file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"},
+]
+
+[package.dependencies]
+packaging = ">=20.0"
+platformdirs = ">=2.5.0"
+requests = ">=2.19.0"
+
+[package.extras]
+progress = ["tqdm (>=4.41.0,<5.0.0)"]
+sftp = ["paramiko (>=2.7.0)"]
+xxhash = ["xxhash (>=1.4.3)"]
+
 [[package]]
 name = "proto-plus"
 version = "1.24.0"
@@ -3441,6 +3553,66 @@ files = [
    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
 ]

+[[package]]
+name = "soundfile"
+version = "0.12.1"
+description = "An audio library based on libsndfile, CFFI and NumPy"
+optional = false
+python-versions = "*"
+files = [
+    {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"},
+    {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"},
+    {file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"},
+    {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"},
+    {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"},
+    {file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"},
+    {file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"},
+    {file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"},
+]
+
+[package.dependencies]
+cffi = ">=1.0"
+
+[package.extras]
+numpy = ["numpy"]
+
+[[package]]
+name = "soxr"
+version = "0.5.0.post1"
+description = "High quality, one-dimensional sample-rate conversion library"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"},
+    {file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"},
+]
+
+[package.dependencies]
+numpy = "*"
+
+[package.extras]
+docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
+test = ["pytest"]
+
 [[package]]
 name = "starlette"
 version = "0.37.2"
@@ -4361,4 +4533,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "d3f779a2521db9bb040d9c2e76f30e7c6cb584119b3bd50454f391f8c7ef368f"
+content-hash = "8137ea241f80674fe65910e0f00ecdbfa21792b101f7793d992e8016f8dce1e0"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,8 @@ shortuuid = "1.0.13"
 pandas = "2.2.3"
 tiktoken = "0.7.0"
 gunicorn = "^23.0.0"
+librosa = "^0.10.2.post1"
+soundfile = "^0.12.1"


 [build-system]