ENCOA-305

This commit is contained in:
Carlos-Mesquita
2025-01-05 14:09:49 +00:00
parent 4fc58523bc
commit b4d4afd83a
2 changed files with 73 additions and 6 deletions

View File

@@ -30,7 +30,7 @@ class ConversationPayload(BaseModel):
name: str
gender: str
text: str
voice: str
voice: Optional[str] = None
class Dialog(BaseModel):
conversation: Optional[List[ConversationPayload]] = Field(default_factory=list)

View File

@@ -1,7 +1,7 @@
import asyncio
from logging import getLogger
import random
from typing import Dict, Any
from typing import Dict, Any, Union
from starlette.datastructures import UploadFile
@@ -111,6 +111,15 @@ class ListeningService(IListeningService):
return dialog
async def generate_mp3(self, dto: Dialog) -> bytes:
convo = dto.conversation
voices_assigned = True
for segment in convo:
if segment.voice is None:
voices_assigned = False
if not voices_assigned:
dto = self._get_conversation_voices(dto, True)
return await self._tts.text_to_speech(dto)
async def create_instructions(self, text: str) -> bytes:
@@ -263,7 +272,13 @@ class ListeningService(IListeningService):
)
return {"dialog": response["monologue"]}
def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
# TODO: This was a refactor from the previous ielts-be, don't know why there is a distinction between
# section 1 and 3, I think it would make sense to only keep only the section 1 logic, only bringing this up since
# there would need to be a refactor of the POST /api/listening/media endpoint which imo is pointless
# https://bitbucket.org/ecropdev/ielts-be/src/676f660f3e80220e3db0418dbeef0b1c0f257edb/helper/exercises.py?at=release%2Fmongodb-migration
"""
def generate_listening_1_conversation(topic: str):
...
chosen_voices = []
name_to_voice = {}
for segment in response['conversation']:
@@ -273,18 +288,70 @@ class ListeningService(IListeningService):
voice = name_to_voice[name]
else:
voice = None
while voice is None:
if segment['gender'].lower() == 'male':
available_voices = MALE_NEURAL_VOICES
else:
available_voices = FEMALE_NEURAL_VOICES
chosen_voice = random.choice(available_voices)['Id']
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
name_to_voice[name] = voice
segment['voice'] = voice
return response
def generate_listening_3_conversation(topic: str):
...
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
return response
"""
def _get_conversation_voices(self, response: Union[Dict, Dialog], unique_voices_across_segments: bool):
chosen_voices = []
name_to_voice = {}
is_model = isinstance(response, Dialog)
conversation = response.conversation if is_model else response['conversation']
for segment in conversation:
voice_check = (segment.voice is None) if is_model else ('voice' not in segment)
if voice_check:
name = segment.name if is_model else segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
gender = segment.gender if is_model else segment['gender']
# section 1
if unique_voices_across_segments:
while voice is None:
chosen_voice = self._get_random_voice(segment['gender'])
chosen_voice = self._get_random_voice(gender)
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
# section 3
else:
voice = self._get_random_voice(segment['gender'])
voice = self._get_random_voice(gender)
name_to_voice[name] = voice
segment['voice'] = voice
if is_model:
segment.voice = voice
else:
segment['voice'] = voice
return response
@staticmethod