Generate questions endpoints working for all.

This commit is contained in:
Cristiano Ferreira
2023-11-12 23:40:24 +00:00
parent 274252bf92
commit 695d9b589a
7 changed files with 2143 additions and 312 deletions

View File

@@ -1,9 +1,11 @@
import whisper
import os
import gtts
import nltk
import boto3
import random
nltk.download('words')
from nltk.corpus import words
from helper.constants import *
def speech_to_text(file_path):
if os.path.exists(file_path):
@@ -15,8 +17,72 @@ def speech_to_text(file_path):
raise Exception("File " + file_path + " not found.")
def text_to_speech(text: str, file_name: str):
tts = gtts.gTTS(text)
tts.save(file_name)
# Initialize the Amazon Polly client
client = boto3.client(
'polly',
region_name='eu-west-1',
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
)
# Initialize an empty list to store audio segments
audio_segments = []
tts_response = client.synthesize_speech(
Engine="neural",
Text=text,
OutputFormat="mp3",
VoiceId=random.choice(ALL_NEURAL_VOICES)['Id']
)
audio_segments.append(tts_response['AudioStream'].read())
# Combine the audio segments into a single audio file
combined_audio = b"".join(audio_segments)
file_name = file_name + ".mp3"
# Save the combined audio to a single file
with open(file_name, "wb") as f:
f.write(combined_audio)
print("Speech segments saved to " + file_name)
def conversation_text_to_speech(conversation: list, file_name: str):
# Create a dictionary to store the mapping of 'name' to 'voice'
name_to_voice = {}
for segment in conversation:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
# Initialize the Amazon Polly client
client = boto3.client(
'polly',
region_name='eu-west-1',
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
)
# Initialize an empty list to store audio segments
audio_segments = []
# Iterate through the text segments, convert to audio segments, and store them
for segment in conversation:
response = client.synthesize_speech(
Engine="neural",
Text=segment["text"],
OutputFormat="mp3",
VoiceId=segment["voice"]
)
audio_segments.append(response['AudioStream'].read())
# Combine the audio segments into a single audio file
combined_audio = b"".join(audio_segments)
file_name = file_name + ".mp3"
# Save the combined audio to a single file
with open(file_name, "wb") as f:
f.write(combined_audio)
print("Speech segments saved to " + file_name)
def has_words(text: str):
english_words = set(words.words())