diff --git a/Dockerfile b/Dockerfile index 8e0a4da..cd075fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,8 @@ FROM python:3.11-slim as requirements-stage WORKDIR /tmp RUN pip install poetry COPY pyproject.toml ./poetry.lock* /tmp/ +# https://python-poetry.org/docs/cli#export +RUN poetry self add poetry-plugin-export RUN poetry export -f requirements.txt --output requirements.txt --without-hashes diff --git a/ielts_be/api/exam/listening.py b/ielts_be/api/exam/listening.py index 5358dc0..b969cc3 100644 --- a/ielts_be/api/exam/listening.py +++ b/ielts_be/api/exam/listening.py @@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, Path, Query, UploadFile from ielts_be.middlewares import Authorized, IsAuthenticatedViaBearerToken from ielts_be.controllers import IListeningController from ielts_be.configs.constants import EducationalContent -from ielts_be.dtos.listening import GenerateListeningExercises, Dialog +from ielts_be.dtos.listening import ListeningExercisesDTO, Dialog, InstructionsDTO controller = "listening_controller" listening_router = APIRouter() @@ -64,6 +64,17 @@ async def transcribe_dialog( return await listening_controller.transcribe_dialog(audio) +@listening_router.post( + '/instructions', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def create_instructions( + dto: InstructionsDTO, + listening_controller: IListeningController = Depends(Provide[controller]) +): + return await listening_controller.create_instructions(dto.text) + @listening_router.post( '/', @@ -71,7 +82,7 @@ async def transcribe_dialog( ) @inject async def generate_listening_exercise( - dto: GenerateListeningExercises, + dto: ListeningExercisesDTO, listening_controller: IListeningController = Depends(Provide[controller]) ): return await listening_controller.get_listening_question(dto) diff --git a/ielts_be/api/exam/speaking.py b/ielts_be/api/exam/speaking.py index 4a18a0b..2b955cd 100644 --- a/ielts_be/api/exam/speaking.py +++ b/ielts_be/api/exam/speaking.py @@ -1,5 +1,5 @@ import random -from typing import Optional +from typing import Optional, List from dependency_injector.wiring import inject, Provide from fastapi import APIRouter, Path, Query, Depends @@ -59,7 +59,7 @@ async def get_speaking_task( topic: Optional[str] = Query(None), first_topic: Optional[str] = Query(None), second_topic: Optional[str] = Query(None), - difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)), + difficulty: Optional[str] = None, speaking_controller: ISpeakingController = Depends(Provide[controller]) ): if not second_topic: @@ -67,5 +67,8 @@ async def get_speaking_task( else: topic_or_first_topic = first_topic if first_topic else random.choice(EducationalContent.MTI_TOPICS) + if not difficulty: + difficulty = random.choice(random.choice(EducationalContent.DIFFICULTIES)) + second_topic = second_topic if second_topic else random.choice(EducationalContent.MTI_TOPICS) return await speaking_controller.get_speaking_part(task, topic_or_first_topic, second_topic, difficulty) diff --git a/ielts_be/api/exam/writing.py b/ielts_be/api/exam/writing.py index ed304dd..7988acb 100644 --- a/ielts_be/api/exam/writing.py +++ b/ielts_be/api/exam/writing.py @@ -1,4 +1,5 @@ import random +from typing import Optional, List from dependency_injector.wiring import inject, Provide from fastapi import APIRouter, Path, Query, Depends, UploadFile, File @@ -19,7 +20,7 @@ writing_router = APIRouter() async def generate_writing_academic( task: int = Path(..., ge=1, le=2), file: UploadFile = File(...), - difficulty: str = Query(default=None), + difficulty: Optional[List[str]] = None, writing_controller: IWritingController = Depends(Provide[controller]) ): difficulty = random.choice(EducationalContent.DIFFICULTIES) if not difficulty else difficulty @@ -33,7 +34,7 @@ async def generate_writing_academic( @inject async def generate_writing( task: int = Path(..., ge=1, le=2), - difficulty: str = Query(default=None), + difficulty: Optional[str] = None, topic: str = Query(default=None), writing_controller: IWritingController = Depends(Provide[controller]) ): diff --git a/ielts_be/controllers/abc/exam/listening.py b/ielts_be/controllers/abc/exam/listening.py index 8aa8428..8dbed28 100644 --- a/ielts_be/controllers/abc/exam/listening.py +++ b/ielts_be/controllers/abc/exam/listening.py @@ -24,3 +24,7 @@ class IListeningController(ABC): @abstractmethod async def transcribe_dialog(self, audio: UploadFile): pass + + @abstractmethod + async def create_instructions(self, text: str): + pass diff --git a/ielts_be/controllers/impl/exam/listening.py b/ielts_be/controllers/impl/exam/listening.py index 2be5cfb..fbc3e1e 100644 --- a/ielts_be/controllers/impl/exam/listening.py +++ b/ielts_be/controllers/impl/exam/listening.py @@ -5,7 +5,7 @@ from fastapi.responses import StreamingResponse, Response from ielts_be.controllers import IListeningController from ielts_be.services import IListeningService -from ielts_be.dtos.listening import GenerateListeningExercises, Dialog +from ielts_be.dtos.listening import ListeningExercisesDTO, Dialog class ListeningController(IListeningController): @@ -23,12 +23,26 @@ class ListeningController(IListeningController): async def generate_listening_dialog(self, section_id: int, topic: str, difficulty: str): return await self._service.generate_listening_dialog(section_id, topic, difficulty) - async def get_listening_question(self, dto: GenerateListeningExercises): + async def get_listening_question(self, dto: ListeningExercisesDTO): return await self._service.get_listening_question(dto) async def generate_mp3(self, dto: Dialog): mp3 = await self._service.generate_mp3(dto) + return self._mp3_response(mp3) + async def create_instructions(self, text: str): + mp3 = await self._service.create_instructions(text) + return self._mp3_response(mp3) + + async def transcribe_dialog(self, audio: UploadFile): + dialog = await self._service.transcribe_dialog(audio) + if dialog is None: + return Response(status_code=500) + + return dialog + + @staticmethod + def _mp3_response(mp3: bytes): return StreamingResponse( content=io.BytesIO(mp3), media_type="audio/mpeg", @@ -37,10 +51,3 @@ class ListeningController(IListeningController): "Content-Disposition": "attachment;filename=speech.mp3" } ) - - async def transcribe_dialog(self, audio: UploadFile): - dialog = await self._service.transcribe_dialog(audio) - if dialog is None: - return Response(status_code=500) - - return dialog diff --git a/ielts_be/controllers/impl/grade.py b/ielts_be/controllers/impl/grade.py index f4c70b6..dec3973 100644 --- a/ielts_be/controllers/impl/grade.py +++ b/ielts_be/controllers/impl/grade.py @@ -25,10 +25,6 @@ class GradeController(IGradeController): self, task: int, dto: WritingGradeTaskDTO, background_tasks: BackgroundTasks ): - await self._evaluation_service.create_evaluation( - dto.userId, dto.sessionId, dto.exerciseId, EvaluationType.WRITING, task - ) - await self._evaluation_service.begin_evaluation( dto.userId, dto.sessionId, task, dto.exerciseId, EvaluationType.WRITING, dto, background_tasks ) @@ -79,10 +75,6 @@ class GradeController(IGradeController): ex_type = EvaluationType.SPEAKING if task == 2 else EvaluationType.SPEAKING_INTERACTIVE - await self._evaluation_service.create_evaluation( - user_id, session_id, exercise_id, ex_type, task - ) - await self._evaluation_service.begin_evaluation( user_id, session_id, task, exercise_id, ex_type, items, background_tasks ) diff --git a/ielts_be/dtos/listening.py b/ielts_be/dtos/listening.py index 8577d3f..09dbefa 100644 --- a/ielts_be/dtos/listening.py +++ b/ielts_be/dtos/listening.py @@ -18,16 +18,19 @@ class ListeningExercises(BaseModel): type: ListeningExerciseType quantity: int -class GenerateListeningExercises(BaseModel): +class ListeningExercisesDTO(BaseModel): text: str exercises: List[ListeningExercises] difficulty: Optional[str] +class InstructionsDTO(BaseModel): + text: str + class ConversationPayload(BaseModel): name: str gender: str text: str - voice: str + voice: Optional[str] = None class Dialog(BaseModel): conversation: Optional[List[ConversationPayload]] = Field(default_factory=list) diff --git a/ielts_be/dtos/reading.py b/ielts_be/dtos/reading.py index 2de9085..192f8b5 100644 --- a/ielts_be/dtos/reading.py +++ b/ielts_be/dtos/reading.py @@ -14,4 +14,4 @@ class ReadingExercise(BaseModel): class ReadingDTO(BaseModel): text: str = Field(...) exercises: List[ReadingExercise] = Field(...) - difficulty: str = Field(random.choice(EducationalContent.DIFFICULTIES)) + difficulty: Optional[str] = None diff --git a/ielts_be/dtos/user_batch.py b/ielts_be/dtos/user_batch.py index 2198f5f..c5cdb06 100644 --- a/ielts_be/dtos/user_batch.py +++ b/ielts_be/dtos/user_batch.py @@ -15,7 +15,7 @@ class Entity(BaseModel): class UserDTO(BaseModel): - id: uuid.UUID = Field(default_factory=uuid.uuid4) + id: str email: str name: str type: str diff --git a/ielts_be/services/abc/evaluation.py b/ielts_be/services/abc/evaluation.py index 9dbb48a..6048a79 100644 --- a/ielts_be/services/abc/evaluation.py +++ b/ielts_be/services/abc/evaluation.py @@ -6,17 +6,6 @@ from ielts_be.dtos.evaluation import EvaluationType class IEvaluationService(ABC): - @abstractmethod - async def create_evaluation( - self, - user_id: str, - session_id: str, - exercise_id: str, - eval_type: EvaluationType, - task: int - ): - pass - @abstractmethod async def begin_evaluation( self, diff --git a/ielts_be/services/abc/exam/listening.py b/ielts_be/services/abc/exam/listening.py index 85f3d57..723bf45 100644 --- a/ielts_be/services/abc/exam/listening.py +++ b/ielts_be/services/abc/exam/listening.py @@ -20,6 +20,10 @@ class IListeningService(ABC): async def generate_mp3(self, dto) -> bytes: pass + @abstractmethod + async def create_instructions(self, text: str) -> bytes: + pass + @abstractmethod async def import_exam( self, exercises: UploadFile, solutions: UploadFile = None diff --git a/ielts_be/services/abc/third_parties/tts.py b/ielts_be/services/abc/third_parties/tts.py index 018cc26..6f67c62 100644 --- a/ielts_be/services/abc/third_parties/tts.py +++ b/ielts_be/services/abc/third_parties/tts.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Union +from typing import Union, Optional class ITextToSpeechService(ABC): @@ -9,14 +9,5 @@ class ITextToSpeechService(ABC): pass @abstractmethod - async def text_to_speech(self, dialog) -> bytes: + async def text_to_speech(self, dialog, include_final_cue = True) -> bytes: pass - - @abstractmethod - async def _conversation_to_speech(self, conversation: list): - pass - - @abstractmethod - async def _text_to_speech(self, text: str): - pass - diff --git a/ielts_be/services/impl/exam/evaluation.py b/ielts_be/services/impl/exam/evaluation.py index 013f252..ccf6a0e 100644 --- a/ielts_be/services/impl/exam/evaluation.py +++ b/ielts_be/services/impl/exam/evaluation.py @@ -1,5 +1,5 @@ import logging -from typing import Union, List +from typing import Union, List, Dict from fastapi import BackgroundTasks @@ -18,26 +18,6 @@ class EvaluationService(IEvaluationService): self._speaking_service = speaking_service self._logger = logging.getLogger(__name__) - async def create_evaluation( - self, - user_id: str, - session_id: str, - exercise_id: str, - eval_type: EvaluationType, - task: int - ): - await self._db.save_to_db( - "evaluation", - { - "user": user_id, - "session_id": session_id, - "exercise_id": exercise_id, - "type": eval_type, - "task": task, - "status": "pending" - } - ) - async def begin_evaluation( self, user_id: str, session_id: str, task: int, @@ -71,34 +51,58 @@ class EvaluationService(IEvaluationService): solution ) - await self._db.update( + eval_res = await self._db.find( "evaluation", { "user": user_id, "exercise_id": exercise_id, "session_id": session_id, - }, - { - "$set": { - "status": "completed", - "result": result, - } } ) + if len(eval_res) > 0: + await self._db.update( + "evaluation", + { + "user": user_id, + "exercise_id": exercise_id, + "session_id": session_id, + }, + { + "$set": { + "status": "completed", + "result": result, + } + } + ) + else: + self._logger.info("Skipping evaluation write to db since the record was removed.") + except Exception as e: - self._logger.error(f"Error processing evaluation {session_id} - {exercise_id}: {str(e)}") - await self._db.update( + eval_res = await self._db.find( "evaluation", { "user": user_id, "exercise_id": exercise_id, - "session_id": session_id - }, - { - "$set": { - "status": "error", - "error": str(e), - } + "session_id": session_id, } ) + + if len(eval_res) > 0: + self._logger.error(f"Error processing evaluation {session_id} - {exercise_id}: {str(e)}") + await self._db.update( + "evaluation", + { + "user": user_id, + "exercise_id": exercise_id, + "session_id": session_id + }, + { + "$set": { + "status": "error", + "error": str(e), + } + } + ) + else: + self._logger.info("Skipping evaluation write to db since the record was removed.") diff --git a/ielts_be/services/impl/exam/listening/__init__.py b/ielts_be/services/impl/exam/listening/__init__.py index 876a3a0..a9a3345 100644 --- a/ielts_be/services/impl/exam/listening/__init__.py +++ b/ielts_be/services/impl/exam/listening/__init__.py @@ -1,12 +1,11 @@ import asyncio from logging import getLogger import random -from typing import Dict, Any +from typing import Dict, Any, Union -import aiofiles from starlette.datastructures import UploadFile -from ielts_be.dtos.listening import GenerateListeningExercises, Dialog, ListeningExercises +from ielts_be.dtos.listening import ListeningExercisesDTO, Dialog, ListeningExercises, ConversationPayload from ielts_be.exceptions.exceptions import TranscriptionException from ielts_be.repositories import IFileStorage, IDocumentStore from ielts_be.services import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService @@ -112,9 +111,27 @@ class ListeningService(IListeningService): return dialog async def generate_mp3(self, dto: Dialog) -> bytes: + convo = dto.conversation + voices_assigned = True + for segment in convo: + if segment.voice is None: + voices_assigned = False + + if not voices_assigned: + dto = self._get_conversation_voices(dto, True) + return await self._tts.text_to_speech(dto) - async def get_listening_question(self, dto: GenerateListeningExercises): + async def create_instructions(self, text: str) -> bytes: + script = Dialog(conversation=[ConversationPayload(**{ + "text": text, + "voice": "Matthew", + "name": "", + "gender": "" + })]) + return await self._tts.text_to_speech(script, False) + + async def get_listening_question(self, dto: ListeningExercisesDTO): start_id = 1 exercise_tasks = [] @@ -255,7 +272,13 @@ class ListeningService(IListeningService): ) return {"dialog": response["monologue"]} - def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool): + # TODO: This was a refactor from the previous ielts-be, don't know why there is a distinction between + # section 1 and 3, I think it would make sense to only keep only the section 1 logic, only bringing this up since + # there would need to be a refactor of the POST /api/listening/media endpoint which imo is pointless + # https://bitbucket.org/ecropdev/ielts-be/src/676f660f3e80220e3db0418dbeef0b1c0f257edb/helper/exercises.py?at=release%2Fmongodb-migration + """ + def generate_listening_1_conversation(topic: str): + ... chosen_voices = [] name_to_voice = {} for segment in response['conversation']: @@ -265,18 +288,70 @@ class ListeningService(IListeningService): voice = name_to_voice[name] else: voice = None + while voice is None: + if segment['gender'].lower() == 'male': + available_voices = MALE_NEURAL_VOICES + else: + available_voices = FEMALE_NEURAL_VOICES + + chosen_voice = random.choice(available_voices)['Id'] + if chosen_voice not in chosen_voices: + voice = chosen_voice + chosen_voices.append(voice) + name_to_voice[name] = voice + segment['voice'] = voice + return response + + + def generate_listening_3_conversation(topic: str): + ... + name_to_voice = {} + for segment in response['conversation']: + if 'voice' not in segment: + name = segment['name'] + if name in name_to_voice: + voice = name_to_voice[name] + else: + if segment['gender'].lower() == 'male': + voice = random.choice(MALE_NEURAL_VOICES)['Id'] + else: + voice = random.choice(FEMALE_NEURAL_VOICES)['Id'] + name_to_voice[name] = voice + segment['voice'] = voice + return response + """ + def _get_conversation_voices(self, response: Union[Dict, Dialog], unique_voices_across_segments: bool): + chosen_voices = [] + name_to_voice = {} + + is_model = isinstance(response, Dialog) + conversation = response.conversation if is_model else response['conversation'] + + for segment in conversation: + voice_check = (segment.voice is None) if is_model else ('voice' not in segment) + if voice_check: + name = segment.name if is_model else segment['name'] + if name in name_to_voice: + voice = name_to_voice[name] + else: + voice = None + gender = segment.gender if is_model else segment['gender'] # section 1 if unique_voices_across_segments: while voice is None: - chosen_voice = self._get_random_voice(segment['gender']) + chosen_voice = self._get_random_voice(gender) if chosen_voice not in chosen_voices: voice = chosen_voice chosen_voices.append(voice) # section 3 else: - voice = self._get_random_voice(segment['gender']) + voice = self._get_random_voice(gender) name_to_voice[name] = voice - segment['voice'] = voice + + if is_model: + segment.voice = voice + else: + segment['voice'] = voice return response @staticmethod diff --git a/ielts_be/services/impl/third_parties/aws_polly.py b/ielts_be/services/impl/third_parties/aws_polly.py index 87f13ca..381c8f3 100644 --- a/ielts_be/services/impl/third_parties/aws_polly.py +++ b/ielts_be/services/impl/third_parties/aws_polly.py @@ -1,4 +1,5 @@ import random +from typing import Optional from aiobotocore.client import BaseClient @@ -21,7 +22,7 @@ class AWSPolly(ITextToSpeechService): ) return await tts_response['AudioStream'].read() - async def text_to_speech(self, dialog: Dialog) -> bytes: + async def text_to_speech(self, dialog: Dialog, include_final_clue = True) -> bytes: if not dialog.conversation and not dialog.monologue: raise ValueError("Unsupported argument for text_to_speech") @@ -30,13 +31,14 @@ class AWSPolly(ITextToSpeechService): else: audio_segments = await self._conversation_to_speech(dialog) - final_message = await self.synthesize_speech( - "This audio recording, for the listening exercise, has finished.", - "Stephen" - ) + if include_final_clue: + final_message = await self.synthesize_speech( + "This audio recording, for the listening exercise, has finished.", + "Stephen" + ) - # Add finish message - audio_segments.append(final_message) + # Add finish message + audio_segments.append(final_message) # Combine the audio segments into a single audio file combined_audio = b"".join(audio_segments) diff --git a/ielts_be/services/impl/user.py b/ielts_be/services/impl/user.py index b150876..deb6fd0 100644 --- a/ielts_be/services/impl/user.py +++ b/ielts_be/services/impl/user.py @@ -45,7 +45,6 @@ class UserService(IUserService): error_msg = f"Couldn't upload users. Failed to run command firebase auth import -> ```cmd {result.stdout}```" self._logger.error(error_msg) return error_msg - await self._init_users(batch_dto) FileHelper.remove_file(path) @@ -68,7 +67,7 @@ class UserService(IUserService): for user in batch_dto.users: user_data = { - 'UID': str(user.id), + 'UID': user.id, 'Email': user.email, 'Email Verified': False, 'Password Hash': user.passwordHash, @@ -142,7 +141,7 @@ class UserService(IUserService): 'subscriptionExpirationDate': user.expiryDate, 'entities': user.entities } - await self._db.save_to_db("users", new_user, str(user.id)) + await self._db.save_to_db("users", new_user, user.id) async def _create_code(self, user: UserDTO, maker_id: str) -> str: code = shortuuid.ShortUUID().random(length=6) @@ -174,6 +173,7 @@ class UserService(IUserService): 'name': user.groupName.strip(), 'participants': [user_id], 'disableEditing': False, + 'entity': user.entities[0]['id'] } await self._db.save_to_db("groups", new_group, str(uuid.uuid4())) else: diff --git a/scripts/listening_instructions/gen_listening_instructions.py b/scripts/listening_instructions/gen_listening_instructions.py new file mode 100644 index 0000000..650ae0a --- /dev/null +++ b/scripts/listening_instructions/gen_listening_instructions.py @@ -0,0 +1,98 @@ +import os +import asyncio +import contextlib +import aioboto3 +from itertools import combinations +from dotenv import load_dotenv +import aiofiles + +from ielts_be.dtos.listening import Dialog, ConversationPayload +from ielts_be.services.impl import AWSPolly + +SECTION_DESCRIPTIONS = { + 1: "a conversation between two people in an everyday social context", + 2: "a monologue set in a social context", + 3: "a conversation between up to four individuals in an educational context", + 4: "a monologue about an academic subject" +} + +def generate_preset(selected_sections): + sections = [SECTION_DESCRIPTIONS[i] for i in selected_sections] + + if len(selected_sections) >= 3: + intro = "You will hear a number of different recordings and you will have to answer questions on what you hear." + play_text = "All the recordings can be played three times." + final_text = " Pay close attention to the audio recordings and answer the questions accordingly." + else: + num_word = "one" if len(selected_sections) == 1 else "two" + recording_text = "recording" if len(selected_sections) == 1 else "recordings" + intro = f"You will hear {num_word} {recording_text} and you will have to answer questions on what you hear." + play_text = f"The {recording_text} can be played three times." + final_text = f" Pay close attention to the audio {recording_text} and answer the questions accordingly." + + instructions = f" There will be time for you to read the instructions and questions and you will have a chance to check your work. {play_text}" + + if len(sections) == 1: + parts = f"The recording consists of {sections[0]}." + else: + parts = f"The module is in {len(sections)} parts. " + for i, section in enumerate(sections): + ordinal = ["first", "second", "third", "fourth"][i] + parts += f"In the {ordinal} part you will hear {section}. " + + return f"{intro}{instructions} {parts}{final_text}" + +async def save_preset_audio(service, combo, preset_text): + script = Dialog(conversation=[ConversationPayload(**{ + "text": preset_text, + "voice": "Matthew", + "name": "", + "gender": "" + })]) + + filename = f"./recordings/presetInstructions_{'_'.join(map(str, combo))}.mp3" + audio_bytes = await service.text_to_speech(script, False) + + os.makedirs("./recordings", exist_ok=True) + async with aiofiles.open(filename, "wb") as f: + await f.write(audio_bytes) + + +async def main(generate = False): + load_dotenv() + sections = [1, 2, 3, 4] + all_combinations = [] + + for length in range(1, len(sections) + 1): + combos = list(combinations(sections, length)) + all_combinations.extend(combos) + + if generate: + async with contextlib.AsyncExitStack() as context_stack: + session = aioboto3.Session() + polly_client = await context_stack.enter_async_context( + session.client( + 'polly', + region_name='eu-west-1', + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID") + ) + ) + service = AWSPolly(polly_client) + + tasks = [] + for combo in all_combinations: + preset_text = generate_preset(combo) + task = save_preset_audio(service, combo, preset_text) + tasks.append(task) + + await asyncio.gather(*tasks) + else: + for combo in all_combinations: + print(combo) + preset_text = generate_preset(combo) + print(f'{preset_text}\n\n') + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/scripts/listening_instructions/recordings/.gitkeep b/scripts/listening_instructions/recordings/.gitkeep new file mode 100644 index 0000000..e69de29