diff --git a/ielts_be/api/exam/listening.py b/ielts_be/api/exam/listening.py index 5358dc0..b969cc3 100644 --- a/ielts_be/api/exam/listening.py +++ b/ielts_be/api/exam/listening.py @@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, Path, Query, UploadFile from ielts_be.middlewares import Authorized, IsAuthenticatedViaBearerToken from ielts_be.controllers import IListeningController from ielts_be.configs.constants import EducationalContent -from ielts_be.dtos.listening import GenerateListeningExercises, Dialog +from ielts_be.dtos.listening import ListeningExercisesDTO, Dialog, InstructionsDTO controller = "listening_controller" listening_router = APIRouter() @@ -64,6 +64,17 @@ async def transcribe_dialog( return await listening_controller.transcribe_dialog(audio) +@listening_router.post( + '/instructions', + dependencies=[Depends(Authorized([IsAuthenticatedViaBearerToken]))] +) +@inject +async def create_instructions( + dto: InstructionsDTO, + listening_controller: IListeningController = Depends(Provide[controller]) +): + return await listening_controller.create_instructions(dto.text) + @listening_router.post( '/', @@ -71,7 +82,7 @@ async def transcribe_dialog( ) @inject async def generate_listening_exercise( - dto: GenerateListeningExercises, + dto: ListeningExercisesDTO, listening_controller: IListeningController = Depends(Provide[controller]) ): return await listening_controller.get_listening_question(dto) diff --git a/ielts_be/api/exam/speaking.py b/ielts_be/api/exam/speaking.py index 4a18a0b..2b955cd 100644 --- a/ielts_be/api/exam/speaking.py +++ b/ielts_be/api/exam/speaking.py @@ -1,5 +1,5 @@ import random -from typing import Optional +from typing import Optional, List from dependency_injector.wiring import inject, Provide from fastapi import APIRouter, Path, Query, Depends @@ -59,7 +59,7 @@ async def get_speaking_task( topic: Optional[str] = Query(None), first_topic: Optional[str] = Query(None), second_topic: Optional[str] = Query(None), - difficulty: str = Query(default=random.choice(EducationalContent.DIFFICULTIES)), + difficulty: Optional[str] = None, speaking_controller: ISpeakingController = Depends(Provide[controller]) ): if not second_topic: @@ -67,5 +67,8 @@ async def get_speaking_task( else: topic_or_first_topic = first_topic if first_topic else random.choice(EducationalContent.MTI_TOPICS) + if not difficulty: + difficulty = random.choice(random.choice(EducationalContent.DIFFICULTIES)) + second_topic = second_topic if second_topic else random.choice(EducationalContent.MTI_TOPICS) return await speaking_controller.get_speaking_part(task, topic_or_first_topic, second_topic, difficulty) diff --git a/ielts_be/api/exam/writing.py b/ielts_be/api/exam/writing.py index ed304dd..7988acb 100644 --- a/ielts_be/api/exam/writing.py +++ b/ielts_be/api/exam/writing.py @@ -1,4 +1,5 @@ import random +from typing import Optional, List from dependency_injector.wiring import inject, Provide from fastapi import APIRouter, Path, Query, Depends, UploadFile, File @@ -19,7 +20,7 @@ writing_router = APIRouter() async def generate_writing_academic( task: int = Path(..., ge=1, le=2), file: UploadFile = File(...), - difficulty: str = Query(default=None), + difficulty: Optional[List[str]] = None, writing_controller: IWritingController = Depends(Provide[controller]) ): difficulty = random.choice(EducationalContent.DIFFICULTIES) if not difficulty else difficulty @@ -33,7 +34,7 @@ async def generate_writing_academic( @inject async def generate_writing( task: int = Path(..., ge=1, le=2), - difficulty: str = Query(default=None), + difficulty: Optional[str] = None, topic: str = Query(default=None), writing_controller: IWritingController = Depends(Provide[controller]) ): diff --git a/ielts_be/controllers/abc/exam/listening.py b/ielts_be/controllers/abc/exam/listening.py index 8aa8428..8dbed28 100644 --- a/ielts_be/controllers/abc/exam/listening.py +++ b/ielts_be/controllers/abc/exam/listening.py @@ -24,3 +24,7 @@ class IListeningController(ABC): @abstractmethod async def transcribe_dialog(self, audio: UploadFile): pass + + @abstractmethod + async def create_instructions(self, text: str): + pass diff --git a/ielts_be/controllers/impl/exam/listening.py b/ielts_be/controllers/impl/exam/listening.py index 2be5cfb..fbc3e1e 100644 --- a/ielts_be/controllers/impl/exam/listening.py +++ b/ielts_be/controllers/impl/exam/listening.py @@ -5,7 +5,7 @@ from fastapi.responses import StreamingResponse, Response from ielts_be.controllers import IListeningController from ielts_be.services import IListeningService -from ielts_be.dtos.listening import GenerateListeningExercises, Dialog +from ielts_be.dtos.listening import ListeningExercisesDTO, Dialog class ListeningController(IListeningController): @@ -23,12 +23,26 @@ class ListeningController(IListeningController): async def generate_listening_dialog(self, section_id: int, topic: str, difficulty: str): return await self._service.generate_listening_dialog(section_id, topic, difficulty) - async def get_listening_question(self, dto: GenerateListeningExercises): + async def get_listening_question(self, dto: ListeningExercisesDTO): return await self._service.get_listening_question(dto) async def generate_mp3(self, dto: Dialog): mp3 = await self._service.generate_mp3(dto) + return self._mp3_response(mp3) + async def create_instructions(self, text: str): + mp3 = await self._service.create_instructions(text) + return self._mp3_response(mp3) + + async def transcribe_dialog(self, audio: UploadFile): + dialog = await self._service.transcribe_dialog(audio) + if dialog is None: + return Response(status_code=500) + + return dialog + + @staticmethod + def _mp3_response(mp3: bytes): return StreamingResponse( content=io.BytesIO(mp3), media_type="audio/mpeg", @@ -37,10 +51,3 @@ class ListeningController(IListeningController): "Content-Disposition": "attachment;filename=speech.mp3" } ) - - async def transcribe_dialog(self, audio: UploadFile): - dialog = await self._service.transcribe_dialog(audio) - if dialog is None: - return Response(status_code=500) - - return dialog diff --git a/ielts_be/dtos/listening.py b/ielts_be/dtos/listening.py index 8577d3f..d0a62da 100644 --- a/ielts_be/dtos/listening.py +++ b/ielts_be/dtos/listening.py @@ -18,11 +18,14 @@ class ListeningExercises(BaseModel): type: ListeningExerciseType quantity: int -class GenerateListeningExercises(BaseModel): +class ListeningExercisesDTO(BaseModel): text: str exercises: List[ListeningExercises] difficulty: Optional[str] +class InstructionsDTO(BaseModel): + text: str + class ConversationPayload(BaseModel): name: str gender: str diff --git a/ielts_be/dtos/reading.py b/ielts_be/dtos/reading.py index 2de9085..192f8b5 100644 --- a/ielts_be/dtos/reading.py +++ b/ielts_be/dtos/reading.py @@ -14,4 +14,4 @@ class ReadingExercise(BaseModel): class ReadingDTO(BaseModel): text: str = Field(...) exercises: List[ReadingExercise] = Field(...) - difficulty: str = Field(random.choice(EducationalContent.DIFFICULTIES)) + difficulty: Optional[str] = None diff --git a/ielts_be/services/abc/exam/listening.py b/ielts_be/services/abc/exam/listening.py index 85f3d57..723bf45 100644 --- a/ielts_be/services/abc/exam/listening.py +++ b/ielts_be/services/abc/exam/listening.py @@ -20,6 +20,10 @@ class IListeningService(ABC): async def generate_mp3(self, dto) -> bytes: pass + @abstractmethod + async def create_instructions(self, text: str) -> bytes: + pass + @abstractmethod async def import_exam( self, exercises: UploadFile, solutions: UploadFile = None diff --git a/ielts_be/services/abc/third_parties/tts.py b/ielts_be/services/abc/third_parties/tts.py index 018cc26..6f67c62 100644 --- a/ielts_be/services/abc/third_parties/tts.py +++ b/ielts_be/services/abc/third_parties/tts.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Union +from typing import Union, Optional class ITextToSpeechService(ABC): @@ -9,14 +9,5 @@ class ITextToSpeechService(ABC): pass @abstractmethod - async def text_to_speech(self, dialog) -> bytes: + async def text_to_speech(self, dialog, include_final_cue = True) -> bytes: pass - - @abstractmethod - async def _conversation_to_speech(self, conversation: list): - pass - - @abstractmethod - async def _text_to_speech(self, text: str): - pass - diff --git a/ielts_be/services/impl/exam/listening/__init__.py b/ielts_be/services/impl/exam/listening/__init__.py index 876a3a0..a2a81f0 100644 --- a/ielts_be/services/impl/exam/listening/__init__.py +++ b/ielts_be/services/impl/exam/listening/__init__.py @@ -3,10 +3,9 @@ from logging import getLogger import random from typing import Dict, Any -import aiofiles from starlette.datastructures import UploadFile -from ielts_be.dtos.listening import GenerateListeningExercises, Dialog, ListeningExercises +from ielts_be.dtos.listening import ListeningExercisesDTO, Dialog, ListeningExercises, ConversationPayload from ielts_be.exceptions.exceptions import TranscriptionException from ielts_be.repositories import IFileStorage, IDocumentStore from ielts_be.services import IListeningService, ILLMService, ITextToSpeechService, ISpeechToTextService @@ -114,7 +113,16 @@ class ListeningService(IListeningService): async def generate_mp3(self, dto: Dialog) -> bytes: return await self._tts.text_to_speech(dto) - async def get_listening_question(self, dto: GenerateListeningExercises): + async def create_instructions(self, text: str) -> bytes: + script = Dialog(conversation=[ConversationPayload(**{ + "text": text, + "voice": "Matthew", + "name": "", + "gender": "" + })]) + return await self._tts.text_to_speech(script, False) + + async def get_listening_question(self, dto: ListeningExercisesDTO): start_id = 1 exercise_tasks = [] diff --git a/ielts_be/services/impl/third_parties/aws_polly.py b/ielts_be/services/impl/third_parties/aws_polly.py index 87f13ca..381c8f3 100644 --- a/ielts_be/services/impl/third_parties/aws_polly.py +++ b/ielts_be/services/impl/third_parties/aws_polly.py @@ -1,4 +1,5 @@ import random +from typing import Optional from aiobotocore.client import BaseClient @@ -21,7 +22,7 @@ class AWSPolly(ITextToSpeechService): ) return await tts_response['AudioStream'].read() - async def text_to_speech(self, dialog: Dialog) -> bytes: + async def text_to_speech(self, dialog: Dialog, include_final_clue = True) -> bytes: if not dialog.conversation and not dialog.monologue: raise ValueError("Unsupported argument for text_to_speech") @@ -30,13 +31,14 @@ class AWSPolly(ITextToSpeechService): else: audio_segments = await self._conversation_to_speech(dialog) - final_message = await self.synthesize_speech( - "This audio recording, for the listening exercise, has finished.", - "Stephen" - ) + if include_final_clue: + final_message = await self.synthesize_speech( + "This audio recording, for the listening exercise, has finished.", + "Stephen" + ) - # Add finish message - audio_segments.append(final_message) + # Add finish message + audio_segments.append(final_message) # Combine the audio segments into a single audio file combined_audio = b"".join(audio_segments) diff --git a/scripts/listening_instructions/gen_listening_instructions.py b/scripts/listening_instructions/gen_listening_instructions.py new file mode 100644 index 0000000..650ae0a --- /dev/null +++ b/scripts/listening_instructions/gen_listening_instructions.py @@ -0,0 +1,98 @@ +import os +import asyncio +import contextlib +import aioboto3 +from itertools import combinations +from dotenv import load_dotenv +import aiofiles + +from ielts_be.dtos.listening import Dialog, ConversationPayload +from ielts_be.services.impl import AWSPolly + +SECTION_DESCRIPTIONS = { + 1: "a conversation between two people in an everyday social context", + 2: "a monologue set in a social context", + 3: "a conversation between up to four individuals in an educational context", + 4: "a monologue about an academic subject" +} + +def generate_preset(selected_sections): + sections = [SECTION_DESCRIPTIONS[i] for i in selected_sections] + + if len(selected_sections) >= 3: + intro = "You will hear a number of different recordings and you will have to answer questions on what you hear." + play_text = "All the recordings can be played three times." + final_text = " Pay close attention to the audio recordings and answer the questions accordingly." + else: + num_word = "one" if len(selected_sections) == 1 else "two" + recording_text = "recording" if len(selected_sections) == 1 else "recordings" + intro = f"You will hear {num_word} {recording_text} and you will have to answer questions on what you hear." + play_text = f"The {recording_text} can be played three times." + final_text = f" Pay close attention to the audio {recording_text} and answer the questions accordingly." + + instructions = f" There will be time for you to read the instructions and questions and you will have a chance to check your work. {play_text}" + + if len(sections) == 1: + parts = f"The recording consists of {sections[0]}." + else: + parts = f"The module is in {len(sections)} parts. " + for i, section in enumerate(sections): + ordinal = ["first", "second", "third", "fourth"][i] + parts += f"In the {ordinal} part you will hear {section}. " + + return f"{intro}{instructions} {parts}{final_text}" + +async def save_preset_audio(service, combo, preset_text): + script = Dialog(conversation=[ConversationPayload(**{ + "text": preset_text, + "voice": "Matthew", + "name": "", + "gender": "" + })]) + + filename = f"./recordings/presetInstructions_{'_'.join(map(str, combo))}.mp3" + audio_bytes = await service.text_to_speech(script, False) + + os.makedirs("./recordings", exist_ok=True) + async with aiofiles.open(filename, "wb") as f: + await f.write(audio_bytes) + + +async def main(generate = False): + load_dotenv() + sections = [1, 2, 3, 4] + all_combinations = [] + + for length in range(1, len(sections) + 1): + combos = list(combinations(sections, length)) + all_combinations.extend(combos) + + if generate: + async with contextlib.AsyncExitStack() as context_stack: + session = aioboto3.Session() + polly_client = await context_stack.enter_async_context( + session.client( + 'polly', + region_name='eu-west-1', + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID") + ) + ) + service = AWSPolly(polly_client) + + tasks = [] + for combo in all_combinations: + preset_text = generate_preset(combo) + task = save_preset_audio(service, combo, preset_text) + tasks.append(task) + + await asyncio.gather(*tasks) + else: + for combo in all_combinations: + print(combo) + preset_text = generate_preset(combo) + print(f'{preset_text}\n\n') + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/scripts/listening_instructions/recordings/.gitkeep b/scripts/listening_instructions/recordings/.gitkeep new file mode 100644 index 0000000..e69de29