Generate questions endpoints working for all.

2023-11-12 23:40:24 +00:00
parent 274252bf92
commit 695d9b589a
7 changed files with 2143 additions and 312 deletions
--- a/helper/speech_to_text_helper.py
+++ b/helper/speech_to_text_helper.py
@@ -1,9 +1,11 @@
 import whisper
 import os
-import gtts
 import nltk
+import boto3
+import random
 nltk.download('words')
 from nltk.corpus import words
+from helper.constants import *

 def speech_to_text(file_path):
    if os.path.exists(file_path):
@@ -15,8 +17,72 @@ def speech_to_text(file_path):
        raise Exception("File " + file_path + " not found.")

 def text_to_speech(text: str, file_name: str):
-    tts = gtts.gTTS(text)
-    tts.save(file_name)
+    # Initialize the Amazon Polly client
+    client = boto3.client(
+        'polly',
+        region_name='eu-west-1',
+        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+        aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
+    )
+    # Initialize an empty list to store audio segments
+    audio_segments = []
+    tts_response = client.synthesize_speech(
+        Engine="neural",
+        Text=text,
+        OutputFormat="mp3",
+        VoiceId=random.choice(ALL_NEURAL_VOICES)['Id']
+    )
+    audio_segments.append(tts_response['AudioStream'].read())
+    # Combine the audio segments into a single audio file
+    combined_audio = b"".join(audio_segments)
+    file_name = file_name + ".mp3"
+    # Save the combined audio to a single file
+    with open(file_name, "wb") as f:
+        f.write(combined_audio)
+
+    print("Speech segments saved to " + file_name)
+
+def conversation_text_to_speech(conversation: list, file_name: str):
+    # Create a dictionary to store the mapping of 'name' to 'voice'
+    name_to_voice = {}
+    for segment in conversation:
+        if 'voice' not in segment:
+            name = segment['name']
+            if name in name_to_voice:
+                voice = name_to_voice[name]
+            else:
+                if segment['gender'].lower() == 'male':
+                    voice = random.choice(MALE_NEURAL_VOICES)['Id']
+                else:
+                    voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
+                name_to_voice[name] = voice
+            segment['voice'] = voice
+    # Initialize the Amazon Polly client
+    client = boto3.client(
+        'polly',
+        region_name='eu-west-1',
+        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+        aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
+    )
+    # Initialize an empty list to store audio segments
+    audio_segments = []
+    # Iterate through the text segments, convert to audio segments, and store them
+    for segment in conversation:
+        response = client.synthesize_speech(
+            Engine="neural", 
+            Text=segment["text"],
+            OutputFormat="mp3",
+            VoiceId=segment["voice"]
+        )
+        audio_segments.append(response['AudioStream'].read())
+    # Combine the audio segments into a single audio file
+    combined_audio = b"".join(audio_segments)
+    file_name = file_name + ".mp3"
+    # Save the combined audio to a single file
+    with open(file_name, "wb") as f:
+        f.write(combined_audio)
+
+    print("Speech segments saved to " + file_name)

 def has_words(text: str):
    english_words = set(words.words())