diff --git a/app/api/grade.py b/app/api/grade.py index b3032ad..268142b 100644 --- a/app/api/grade.py +++ b/app/api/grade.py @@ -39,30 +39,6 @@ async def grade_speaking_task( return await grade_controller.grade_speaking_task(task, form, background_tasks) -@grade_router.get( - '/pending/{sessionId}', - dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] -) -@inject -async def get_pending_evaluations( - session_id: str, - grade_controller: IGradeController = Depends(Provide[controller]) -): - return await grade_controller.get_evaluations(session_id, "pending") - - -@grade_router.get( - '/completed/{sessionId}', - dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] -) -@inject -async def get_completed_evaluations( - session_id: str, - grade_controller: IGradeController = Depends(Provide[controller]) -): - return await grade_controller.get_evaluations(session_id, "completed") - - @grade_router.post( '/summary', dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] diff --git a/app/controllers/abc/grade.py b/app/controllers/abc/grade.py index b767c6e..f0c883a 100644 --- a/app/controllers/abc/grade.py +++ b/app/controllers/abc/grade.py @@ -20,10 +20,6 @@ class IGradeController(ABC): ): pass - @abstractmethod - async def get_evaluations(self, session_id: str, status: str): - pass - @abstractmethod async def grade_short_answers(self, data: Dict): pass diff --git a/app/controllers/impl/grade.py b/app/controllers/impl/grade.py index f80dbca..cae2087 100644 --- a/app/controllers/impl/grade.py +++ b/app/controllers/impl/grade.py @@ -90,9 +90,6 @@ class GradeController(IGradeController): return Response(status_code=200) - async def get_evaluations(self, session_id: str, status: str): - return await self._evaluation_service.get_evaluations(session_id, status) - async def grade_short_answers(self, data: Dict): return await self._service.grade_short_answers(data) diff --git a/app/services/abc/evaluation.py b/app/services/abc/evaluation.py index c7ee386..0347859 100644 --- a/app/services/abc/evaluation.py +++ b/app/services/abc/evaluation.py @@ -27,8 +27,3 @@ class IEvaluationService(ABC): background_tasks: BackgroundTasks ): pass - - @abstractmethod - async def get_evaluations(self, session_id: str, status: str) -> List[Dict]: - pass - diff --git a/app/services/abc/third_parties/stt.py b/app/services/abc/third_parties/stt.py index 7fce30a..6d5de59 100644 --- a/app/services/abc/third_parties/stt.py +++ b/app/services/abc/third_parties/stt.py @@ -4,5 +4,5 @@ from abc import ABC, abstractmethod class ISpeechToTextService(ABC): @abstractmethod - async def speech_to_text(self, file_path): + async def speech_to_text(self, file: bytes): pass diff --git a/app/services/impl/exam/evaluation.py b/app/services/impl/exam/evaluation.py index 12ee185..2d0a94c 100644 --- a/app/services/impl/exam/evaluation.py +++ b/app/services/impl/exam/evaluation.py @@ -101,12 +101,3 @@ class EvaluationService(IEvaluationService): } } ) - - async def get_evaluations(self, session_id: str, status: str) -> List[Dict]: - return await self._db.find( - "evaluation", - { - "session_id": session_id, - "status": status - } - ) diff --git a/app/services/impl/third_parties/whisper.py b/app/services/impl/third_parties/whisper.py index 3ef4a08..8a7a8d4 100644 --- a/app/services/impl/third_parties/whisper.py +++ b/app/services/impl/third_parties/whisper.py @@ -1,9 +1,13 @@ -import os import threading import whisper import asyncio +import numpy as np +import soundfile as sf +import librosa from concurrent.futures import ThreadPoolExecutor from typing import Dict + +from logging import getLogger from whisper import Whisper from app.services.abc import ISpeechToTextService @@ -24,6 +28,7 @@ class OpenAIWhisper(ISpeechToTextService): self._lock = threading.Lock() self._next_model_id = 0 self._is_closed = False + self._logger = getLogger(__name__) for i in range(num_models): self._models[i] = whisper.load_model(self._model_name, in_memory=True) @@ -39,18 +44,53 @@ class OpenAIWhisper(ISpeechToTextService): self._next_model_id = (self._next_model_id + 1) % self._num_models return self._models[model_id] - async def speech_to_text(self, file_path: str) -> str: - if not os.path.exists(file_path): - raise FileNotFoundError(f"File {file_path} not found.") - + async def speech_to_text(self, path: str) -> str: def transcribe(): - model = self.get_model() - return model.transcribe( - file_path, - fp16=False, - language='English', - verbose=False - )["text"] + try: + audio, sr = sf.read(path) + + # Convert to mono first to reduce memory usage + if len(audio.shape) > 1: + audio = audio.mean(axis=1) + + # Resample from 48kHz to 16kHz + audio = librosa.resample(audio, orig_sr=sr, target_sr=16000) + + # Normalize to [-1, 1] range + audio = audio.astype(np.float32) + if np.max(np.abs(audio)) > 0: + audio = audio / np.max(np.abs(audio)) + + # Break up long audio into chunks (30 seconds at 16kHz = 480000 samples) + max_samples = 480000 + if len(audio) > max_samples: + chunks = [] + for i in range(0, len(audio), max_samples): + chunk = audio[i:i + max_samples] + chunks.append(chunk) + + model = self.get_model() + texts = [] + for chunk in chunks: + result = model.transcribe( + chunk, + fp16=False, + language='English', + verbose=False + )["text"] + texts.append(result) + return " ".join(texts) + else: + model = self.get_model() + return model.transcribe( + audio, + fp16=False, + language='English', + verbose=False + )["text"] + + except Exception as e: + raise loop = asyncio.get_running_loop() return await loop.run_in_executor(self._executor, transcribe) diff --git a/poetry.lock b/poetry.lock index 6070eb1..5e6f60e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -253,6 +253,20 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +[[package]] +name = "audioread" +version = "3.0.1" +description = "Multi-library, cross-platform audio decoding." +optional = false +python-versions = ">=3.6" +files = [ + {file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"}, + {file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"}, +] + +[package.extras] +test = ["tox"] + [[package]] name = "boto3" version = "1.34.131" @@ -585,6 +599,17 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + [[package]] name = "dependency-injector" version = "4.42.0" @@ -1580,6 +1605,56 @@ files = [ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] +[[package]] +name = "lazy-loader" +version = "0.4" +description = "Makes it easy to load subpackages and functions on demand." +optional = false +python-versions = ">=3.7" +files = [ + {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"}, + {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +dev = ["changelist (==0.5)"] +lint = ["pre-commit (==3.7.0)"] +test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"] + +[[package]] +name = "librosa" +version = "0.10.2.post1" +description = "Python module for audio and music processing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"}, + {file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"}, +] + +[package.dependencies] +audioread = ">=2.1.9" +decorator = ">=4.3.0" +joblib = ">=0.14" +lazy-loader = ">=0.1" +msgpack = ">=1.0" +numba = ">=0.51.0" +numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2" +pooch = ">=1.1" +scikit-learn = ">=0.20.0" +scipy = ">=1.2.0" +soundfile = ">=0.12.1" +soxr = ">=0.3.2" +typing-extensions = ">=4.1.1" + +[package.extras] +display = ["matplotlib (>=3.5.0)"] +docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"] +tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"] + [[package]] name = "llvmlite" version = "0.43.0" @@ -2498,6 +2573,43 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa typing = ["typing-extensions"] xmp = ["defusedxml"] +[[package]] +name = "platformdirs" +version = "4.3.6" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, + {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, +] + +[package.extras] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.11.2)"] + +[[package]] +name = "pooch" +version = "1.8.2" +description = "A friend to fetch your data files" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"}, + {file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"}, +] + +[package.dependencies] +packaging = ">=20.0" +platformdirs = ">=2.5.0" +requests = ">=2.19.0" + +[package.extras] +progress = ["tqdm (>=4.41.0,<5.0.0)"] +sftp = ["paramiko (>=2.7.0)"] +xxhash = ["xxhash (>=1.4.3)"] + [[package]] name = "proto-plus" version = "1.24.0" @@ -3441,6 +3553,66 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "soundfile" +version = "0.12.1" +description = "An audio library based on libsndfile, CFFI and NumPy" +optional = false +python-versions = "*" +files = [ + {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"}, + {file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"}, + {file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"}, + {file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"}, +] + +[package.dependencies] +cffi = ">=1.0" + +[package.extras] +numpy = ["numpy"] + +[[package]] +name = "soxr" +version = "0.5.0.post1" +description = "High quality, one-dimensional sample-rate conversion library" +optional = false +python-versions = ">=3.9" +files = [ + {file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"}, + {file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"}, + {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"}, + {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"}, + {file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"}, + {file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"}, + {file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"}, + {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"}, + {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"}, + {file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"}, + {file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"}, + {file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"}, + {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"}, + {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"}, + {file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"}, + {file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"}, + {file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"}, + {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"}, + {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"}, + {file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"}, + {file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"] +test = ["pytest"] + [[package]] name = "starlette" version = "0.37.2" @@ -4361,4 +4533,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "d3f779a2521db9bb040d9c2e76f30e7c6cb584119b3bd50454f391f8c7ef368f" +content-hash = "8137ea241f80674fe65910e0f00ecdbfa21792b101f7793d992e8016f8dce1e0" diff --git a/pyproject.toml b/pyproject.toml index 51b5a36..b3da2f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,8 @@ shortuuid = "1.0.13" pandas = "2.2.3" tiktoken = "0.7.0" gunicorn = "^23.0.0" +librosa = "^0.10.2.post1" +soundfile = "^0.12.1" [build-system]