ENCOA-295

This commit is contained in:
Carlos-Mesquita
2024-12-26 12:31:22 +00:00
parent 09d6242360
commit 9bfad2d47f
13 changed files with 170 additions and 38 deletions

View File

@@ -0,0 +1,98 @@
import os
import asyncio
import contextlib
import aioboto3
from itertools import combinations
from dotenv import load_dotenv
import aiofiles
from ielts_be.dtos.listening import Dialog, ConversationPayload
from ielts_be.services.impl import AWSPolly
SECTION_DESCRIPTIONS = {
1: "a conversation between two people in an everyday social context",
2: "a monologue set in a social context",
3: "a conversation between up to four individuals in an educational context",
4: "a monologue about an academic subject"
}
def generate_preset(selected_sections):
sections = [SECTION_DESCRIPTIONS[i] for i in selected_sections]
if len(selected_sections) >= 3:
intro = "You will hear a number of different recordings and you will have to answer questions on what you hear."
play_text = "All the recordings can be played three times."
final_text = " Pay close attention to the audio recordings and answer the questions accordingly."
else:
num_word = "one" if len(selected_sections) == 1 else "two"
recording_text = "recording" if len(selected_sections) == 1 else "recordings"
intro = f"You will hear {num_word} {recording_text} and you will have to answer questions on what you hear."
play_text = f"The {recording_text} can be played three times."
final_text = f" Pay close attention to the audio {recording_text} and answer the questions accordingly."
instructions = f" There will be time for you to read the instructions and questions and you will have a chance to check your work. {play_text}"
if len(sections) == 1:
parts = f"The recording consists of {sections[0]}."
else:
parts = f"The module is in {len(sections)} parts. "
for i, section in enumerate(sections):
ordinal = ["first", "second", "third", "fourth"][i]
parts += f"In the {ordinal} part you will hear {section}. "
return f"{intro}{instructions} {parts}{final_text}"
async def save_preset_audio(service, combo, preset_text):
script = Dialog(conversation=[ConversationPayload(**{
"text": preset_text,
"voice": "Matthew",
"name": "",
"gender": ""
})])
filename = f"./recordings/presetInstructions_{'_'.join(map(str, combo))}.mp3"
audio_bytes = await service.text_to_speech(script, False)
os.makedirs("./recordings", exist_ok=True)
async with aiofiles.open(filename, "wb") as f:
await f.write(audio_bytes)
async def main(generate = False):
load_dotenv()
sections = [1, 2, 3, 4]
all_combinations = []
for length in range(1, len(sections) + 1):
combos = list(combinations(sections, length))
all_combinations.extend(combos)
if generate:
async with contextlib.AsyncExitStack() as context_stack:
session = aioboto3.Session()
polly_client = await context_stack.enter_async_context(
session.client(
'polly',
region_name='eu-west-1',
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID")
)
)
service = AWSPolly(polly_client)
tasks = []
for combo in all_combinations:
preset_text = generate_preset(combo)
task = save_preset_audio(service, combo, preset_text)
tasks.append(task)
await asyncio.gather(*tasks)
else:
for combo in all_combinations:
print(combo)
preset_text = generate_preset(combo)
print(f'{preset_text}\n\n')
if __name__ == "__main__":
asyncio.run(main())