Brushed up the backend, added writing task 1 academic prompt gen and grading ENCOA-274

2024-12-10 22:24:40 +00:00
parent 68cab80851
commit 6982068864
167 changed files with 1411 additions and 1229 deletions
--- a/ielts_be/services/impl/third_parties/whisper.py
+++ b/ielts_be/services/impl/third_parties/whisper.py
@@ -0,0 +1,106 @@
+import threading
+import whisper
+import asyncio
+import numpy as np
+import soundfile as sf
+import librosa
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict
+
+from logging import getLogger
+from whisper import Whisper
+
+from ielts_be.services import ISpeechToTextService
+
+"""
+    The whisper model is not thread safe, a thread pool
+    with 4 whisper models will be created so it can
+    process up to 4 transcriptions at a time. 
+    
+    The base model requires ~1GB so 4 instances is the safe bet:
+    https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages
+"""
+class OpenAIWhisper(ISpeechToTextService):
+    def __init__(self, model_name: str = "base", num_models: int = 4):
+        self._model_name = model_name
+        self._num_models = num_models
+        self._models: Dict[int, 'Whisper'] = {}
+        self._lock = threading.Lock()
+        self._next_model_id = 0
+        self._is_closed = False
+        self._logger = getLogger(__name__)
+
+        for i in range(num_models):
+            self._models[i] = whisper.load_model(self._model_name, in_memory=True)
+
+        self._executor = ThreadPoolExecutor(
+            max_workers=num_models,
+            thread_name_prefix="whisper_worker"
+        )
+
+    def get_model(self) -> 'Whisper':
+        with self._lock:
+            model_id = self._next_model_id
+            self._next_model_id = (self._next_model_id + 1) % self._num_models
+            return self._models[model_id]
+
+    async def speech_to_text(self, path: str) -> str:
+        def transcribe():
+            try:
+                audio, sr = sf.read(path)
+
+                # Convert to mono first to reduce memory usage
+                if len(audio.shape) > 1:
+                    audio = audio.mean(axis=1)
+
+                # Resample from 48kHz to 16kHz
+                audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+
+                # Normalize to [-1, 1] range
+                audio = audio.astype(np.float32)
+                if np.max(np.abs(audio)) > 0:
+                    audio = audio / np.max(np.abs(audio))
+
+                # Break up long audio into chunks (30 seconds at 16kHz = 480000 samples)
+                max_samples = 480000
+                if len(audio) > max_samples:
+                    chunks = []
+                    for i in range(0, len(audio), max_samples):
+                        chunk = audio[i:i + max_samples]
+                        chunks.append(chunk)
+
+                    model = self.get_model()
+                    texts = []
+                    for chunk in chunks:
+                        result = model.transcribe(
+                            chunk,
+                            fp16=False,
+                            language='English',
+                            verbose=False
+                        )["text"]
+                        texts.append(result)
+                    return " ".join(texts)
+                else:
+                    model = self.get_model()
+                    return model.transcribe(
+                        audio,
+                        fp16=False,
+                        language='English',
+                        verbose=False
+                    )["text"]
+
+            except Exception as e:
+                raise
+
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(self._executor, transcribe)
+
+    def close(self):
+        with self._lock:
+            if not self._is_closed:
+                self._is_closed = True
+                if self._executor:
+                    self._executor.shutdown(wait=True, cancel_futures=True)
+
+    def __del__(self):
+        self.close()