ENCOA-305

This commit is contained in:
Carlos-Mesquita
2025-01-05 14:09:49 +00:00
parent 4fc58523bc
commit b4d4afd83a
2 changed files with 73 additions and 6 deletions

View File

@@ -30,7 +30,7 @@ class ConversationPayload(BaseModel):
name: str name: str
gender: str gender: str
text: str text: str
voice: str voice: Optional[str] = None
class Dialog(BaseModel): class Dialog(BaseModel):
conversation: Optional[List[ConversationPayload]] = Field(default_factory=list) conversation: Optional[List[ConversationPayload]] = Field(default_factory=list)

View File

@@ -1,7 +1,7 @@
import asyncio import asyncio
from logging import getLogger from logging import getLogger
import random import random
from typing import Dict, Any from typing import Dict, Any, Union
from starlette.datastructures import UploadFile from starlette.datastructures import UploadFile
@@ -111,6 +111,15 @@ class ListeningService(IListeningService):
return dialog return dialog
async def generate_mp3(self, dto: Dialog) -> bytes: async def generate_mp3(self, dto: Dialog) -> bytes:
convo = dto.conversation
voices_assigned = True
for segment in convo:
if segment.voice is None:
voices_assigned = False
if not voices_assigned:
dto = self._get_conversation_voices(dto, True)
return await self._tts.text_to_speech(dto) return await self._tts.text_to_speech(dto)
async def create_instructions(self, text: str) -> bytes: async def create_instructions(self, text: str) -> bytes:
@@ -263,7 +272,13 @@ class ListeningService(IListeningService):
) )
return {"dialog": response["monologue"]} return {"dialog": response["monologue"]}
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool): # TODO: This was a refactor from the previous ielts-be, don't know why there is a distinction between
# section 1 and 3, I think it would make sense to only keep only the section 1 logic, only bringing this up since
# there would need to be a refactor of the POST /api/listening/media endpoint which imo is pointless
# https://bitbucket.org/ecropdev/ielts-be/src/676f660f3e80220e3db0418dbeef0b1c0f257edb/helper/exercises.py?at=release%2Fmongodb-migration
"""
def generate_listening_1_conversation(topic: str):
...
chosen_voices = [] chosen_voices = []
name_to_voice = {} name_to_voice = {}
for segment in response['conversation']: for segment in response['conversation']:
@@ -273,18 +288,70 @@ class ListeningService(IListeningService):
voice = name_to_voice[name] voice = name_to_voice[name]
else: else:
voice = None voice = None
while voice is None:
if segment['gender'].lower() == 'male':
available_voices = MALE_NEURAL_VOICES
else:
available_voices = FEMALE_NEURAL_VOICES
chosen_voice = random.choice(available_voices)['Id']
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
name_to_voice[name] = voice
segment['voice'] = voice
return response
def generate_listening_3_conversation(topic: str):
...
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
return response
"""
def _get_conversation_voices(self, response: Union[Dict, Dialog], unique_voices_across_segments: bool):
chosen_voices = []
name_to_voice = {}
is_model = isinstance(response, Dialog)
conversation = response.conversation if is_model else response['conversation']
for segment in conversation:
voice_check = (segment.voice is None) if is_model else ('voice' not in segment)
if voice_check:
name = segment.name if is_model else segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
gender = segment.gender if is_model else segment['gender']
# section 1 # section 1
if unique_voices_across_segments: if unique_voices_across_segments:
while voice is None: while voice is None:
chosen_voice = self._get_random_voice(segment['gender']) chosen_voice = self._get_random_voice(gender)
if chosen_voice not in chosen_voices: if chosen_voice not in chosen_voices:
voice = chosen_voice voice = chosen_voice
chosen_voices.append(voice) chosen_voices.append(voice)
# section 3 # section 3
else: else:
voice = self._get_random_voice(segment['gender']) voice = self._get_random_voice(gender)
name_to_voice[name] = voice name_to_voice[name] = voice
segment['voice'] = voice
if is_model:
segment.voice = voice
else:
segment['voice'] = voice
return response return response
@staticmethod @staticmethod