diff --git a/ielts_be/dtos/listening.py b/ielts_be/dtos/listening.py
index d0a62da..09dbefa 100644
--- a/ielts_be/dtos/listening.py
+++ b/ielts_be/dtos/listening.py
@@ -30,7 +30,7 @@ class ConversationPayload(BaseModel):
     name: str
     gender: str
     text: str
-    voice: str
+    voice: Optional[str] = None
 
 class Dialog(BaseModel):
     conversation: Optional[List[ConversationPayload]] = Field(default_factory=list)
diff --git a/ielts_be/services/impl/exam/listening/__init__.py b/ielts_be/services/impl/exam/listening/__init__.py
index a2a81f0..a9a3345 100644
--- a/ielts_be/services/impl/exam/listening/__init__.py
+++ b/ielts_be/services/impl/exam/listening/__init__.py
@@ -1,7 +1,7 @@
 import asyncio
 from logging import getLogger
 import random
-from typing import Dict, Any
+from typing import Dict, Any, Union
 
 from starlette.datastructures import UploadFile
 
@@ -111,6 +111,15 @@ class ListeningService(IListeningService):
         return dialog
 
     async def generate_mp3(self, dto: Dialog) -> bytes:
+        convo = dto.conversation
+        voices_assigned = True
+        for segment in convo:
+            if segment.voice is None:
+                voices_assigned = False
+
+        if not voices_assigned:
+            dto = self._get_conversation_voices(dto, True)
+
         return await self._tts.text_to_speech(dto)
 
     async def create_instructions(self, text: str) -> bytes:
@@ -263,7 +272,13 @@ class ListeningService(IListeningService):
         )
         return {"dialog": response["monologue"]}
 
-    def _get_conversation_voices(self, response: Dict, unique_voices_across_segments: bool):
+    # TODO: This was a refactor from the previous ielts-be, don't know why there is a distinction between
+    #   section 1 and 3, I think it would make sense to only keep only the section 1 logic, only bringing this up since
+    #   there would need to be a refactor of the POST /api/listening/media endpoint which imo is pointless
+    # https://bitbucket.org/ecropdev/ielts-be/src/676f660f3e80220e3db0418dbeef0b1c0f257edb/helper/exercises.py?at=release%2Fmongodb-migration
+    """
+    def generate_listening_1_conversation(topic: str):
+        ...
         chosen_voices = []
         name_to_voice = {}
         for segment in response['conversation']:
@@ -273,18 +288,70 @@ class ListeningService(IListeningService):
                     voice = name_to_voice[name]
                 else:
                     voice = None
+                    while voice is None:
+                        if segment['gender'].lower() == 'male':
+                            available_voices = MALE_NEURAL_VOICES
+                        else:
+                            available_voices = FEMALE_NEURAL_VOICES
+    
+                        chosen_voice = random.choice(available_voices)['Id']
+                        if chosen_voice not in chosen_voices:
+                            voice = chosen_voice
+                            chosen_voices.append(voice)
+                    name_to_voice[name] = voice
+                segment['voice'] = voice
+        return response
+
+
+    def generate_listening_3_conversation(topic: str):
+        ...
+        name_to_voice = {}
+        for segment in response['conversation']:
+            if 'voice' not in segment:
+                name = segment['name']
+                if name in name_to_voice:
+                    voice = name_to_voice[name]
+                else:
+                    if segment['gender'].lower() == 'male':
+                        voice = random.choice(MALE_NEURAL_VOICES)['Id']
+                    else:
+                        voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
+                    name_to_voice[name] = voice
+                segment['voice'] = voice
+        return response
+    """
+    def _get_conversation_voices(self, response: Union[Dict, Dialog], unique_voices_across_segments: bool):
+        chosen_voices = []
+        name_to_voice = {}
+
+        is_model = isinstance(response, Dialog)
+        conversation = response.conversation if is_model else response['conversation']
+
+        for segment in conversation:
+            voice_check = (segment.voice is None) if is_model else ('voice' not in segment)
+            if voice_check:
+                name = segment.name if is_model else segment['name']
+                if name in name_to_voice:
+                    voice = name_to_voice[name]
+                else:
+                    voice = None
+                    gender = segment.gender if is_model else segment['gender']
                     # section 1
                     if unique_voices_across_segments:
                         while voice is None:
-                            chosen_voice = self._get_random_voice(segment['gender'])
+                            chosen_voice = self._get_random_voice(gender)
                             if chosen_voice not in chosen_voices:
                                 voice = chosen_voice
                                 chosen_voices.append(voice)
                     # section 3
                     else:
-                        voice = self._get_random_voice(segment['gender'])
+                        voice = self._get_random_voice(gender)
                     name_to_voice[name] = voice
-                segment['voice'] = voice
+
+                if is_model:
+                    segment.voice = voice
+                else:
+                    segment['voice'] = voice
         return response
 
     @staticmethod