diff --git a/ielts_be/dtos/listening.py b/ielts_be/dtos/listening.py index d0a62da..09dbefa 100644 --- a/ielts_be/dtos/listening.py +++ b/ielts_be/dtos/listening.py @@ -30,7 +30,7 @@ class ConversationPayload(BaseModel): name: str gender: str text: str - voice: str + voice: Optional[str] = None class Dialog(BaseModel): conversation: Optional[List[ConversationPayload]] = Field(default_factory=list) diff --git a/ielts_be/services/impl/exam/listening/__init__.py b/ielts_be/services/impl/exam/listening/__init__.py index a2a81f0..a9a3345 100644 --- a/ielts_be/services/impl/exam/listening/__init__.py +++ b/ielts_be/services/impl/exam/listening/__init__.py @@ -1,7 +1,7 @@ import asyncio from logging import getLogger import random -from typing import Dict, Any +from typing import Dict, Any, Union from starlette.datastructures import UploadFile @@ -111,6 +111,15 @@ class ListeningService(IListeningService): return dialog async def generate_mp3(self, dto: Dialog) -> bytes: + convo = dto.conversation + voices_assigned = True + for segment in convo: + if segment.voice is None: + voices_assigned = False + + if not voices_assigned: + dto = self._get_conversation_voices(dto, True) + return await self._tts.text_to_speech(dto) async def create_instructions(self, text: str) -> bytes: @@ -263,7 +272,13 @@ class ListeningService(IListeningService): ) return {"dialog": response["monologue"]} - def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool): + # TODO: This was a refactor from the previous ielts-be, don't know why there is a distinction between + # section 1 and 3, I think it would make sense to only keep only the section 1 logic, only bringing this up since + # there would need to be a refactor of the POST /api/listening/media endpoint which imo is pointless + # https://bitbucket.org/ecropdev/ielts-be/src/676f660f3e80220e3db0418dbeef0b1c0f257edb/helper/exercises.py?at=release%2Fmongodb-migration + """ + def generate_listening_1_conversation(topic: str): + ... chosen_voices = [] name_to_voice = {} for segment in response['conversation']: @@ -273,18 +288,70 @@ class ListeningService(IListeningService): voice = name_to_voice[name] else: voice = None + while voice is None: + if segment['gender'].lower() == 'male': + available_voices = MALE_NEURAL_VOICES + else: + available_voices = FEMALE_NEURAL_VOICES + + chosen_voice = random.choice(available_voices)['Id'] + if chosen_voice not in chosen_voices: + voice = chosen_voice + chosen_voices.append(voice) + name_to_voice[name] = voice + segment['voice'] = voice + return response + + + def generate_listening_3_conversation(topic: str): + ... + name_to_voice = {} + for segment in response['conversation']: + if 'voice' not in segment: + name = segment['name'] + if name in name_to_voice: + voice = name_to_voice[name] + else: + if segment['gender'].lower() == 'male': + voice = random.choice(MALE_NEURAL_VOICES)['Id'] + else: + voice = random.choice(FEMALE_NEURAL_VOICES)['Id'] + name_to_voice[name] = voice + segment['voice'] = voice + return response + """ + def _get_conversation_voices(self, response: Union[Dict, Dialog], unique_voices_across_segments: bool): + chosen_voices = [] + name_to_voice = {} + + is_model = isinstance(response, Dialog) + conversation = response.conversation if is_model else response['conversation'] + + for segment in conversation: + voice_check = (segment.voice is None) if is_model else ('voice' not in segment) + if voice_check: + name = segment.name if is_model else segment['name'] + if name in name_to_voice: + voice = name_to_voice[name] + else: + voice = None + gender = segment.gender if is_model else segment['gender'] # section 1 if unique_voices_across_segments: while voice is None: - chosen_voice = self._get_random_voice(segment['gender']) + chosen_voice = self._get_random_voice(gender) if chosen_voice not in chosen_voices: voice = chosen_voice chosen_voices.append(voice) # section 3 else: - voice = self._get_random_voice(segment['gender']) + voice = self._get_random_voice(gender) name_to_voice[name] = voice - segment['voice'] = voice + + if is_model: + segment.voice = voice + else: + segment['voice'] = voice return response @staticmethod