ENCOA-295

2024-12-26 12:31:22 +00:00
parent 09d6242360
commit 9bfad2d47f
13 changed files with 170 additions and 38 deletions
--- a/scripts/listening_instructions/gen_listening_instructions.py
+++ b/scripts/listening_instructions/gen_listening_instructions.py
@@ -0,0 +1,98 @@
+import os
+import asyncio
+import contextlib
+import aioboto3
+from itertools import combinations
+from dotenv import load_dotenv
+import aiofiles
+
+from ielts_be.dtos.listening import Dialog, ConversationPayload
+from ielts_be.services.impl import AWSPolly
+
+SECTION_DESCRIPTIONS = {
+    1: "a conversation between two people in an everyday social context",
+    2: "a monologue set in a social context",
+    3: "a conversation between up to four individuals in an educational context",
+    4: "a monologue about an academic subject"
+}
+
+def generate_preset(selected_sections):
+    sections = [SECTION_DESCRIPTIONS[i] for i in selected_sections]
+
+    if len(selected_sections) >= 3:
+        intro = "You will hear a number of different recordings and you will have to answer questions on what you hear."
+        play_text = "All the recordings can be played three times."
+        final_text = " Pay close attention to the audio recordings and answer the questions accordingly."
+    else:
+        num_word = "one" if len(selected_sections) == 1 else "two"
+        recording_text = "recording" if len(selected_sections) == 1 else "recordings"
+        intro = f"You will hear {num_word} {recording_text} and you will have to answer questions on what you hear."
+        play_text = f"The {recording_text} can be played three times."
+        final_text = f" Pay close attention to the audio {recording_text} and answer the questions accordingly."
+
+    instructions = f" There will be time for you to read the instructions and questions and you will have a chance to check your work. {play_text}"
+
+    if len(sections) == 1:
+        parts = f"The recording consists of {sections[0]}."
+    else:
+        parts = f"The module is in {len(sections)} parts. "
+        for i, section in enumerate(sections):
+            ordinal = ["first", "second", "third", "fourth"][i]
+            parts += f"In the {ordinal} part you will hear {section}. "
+
+    return f"{intro}{instructions} {parts}{final_text}"
+
+async def save_preset_audio(service, combo, preset_text):
+    script = Dialog(conversation=[ConversationPayload(**{
+        "text": preset_text,
+        "voice": "Matthew",
+        "name": "",
+        "gender": ""
+    })])
+
+    filename = f"./recordings/presetInstructions_{'_'.join(map(str, combo))}.mp3"
+    audio_bytes = await service.text_to_speech(script, False)
+
+    os.makedirs("./recordings", exist_ok=True)
+    async with aiofiles.open(filename, "wb") as f:
+        await f.write(audio_bytes)
+
+
+async def main(generate = False):
+    load_dotenv()
+    sections = [1, 2, 3, 4]
+    all_combinations = []
+
+    for length in range(1, len(sections) + 1):
+        combos = list(combinations(sections, length))
+        all_combinations.extend(combos)
+
+    if generate:
+        async with contextlib.AsyncExitStack() as context_stack:
+            session = aioboto3.Session()
+            polly_client = await context_stack.enter_async_context(
+                session.client(
+                    'polly',
+                    region_name='eu-west-1',
+                    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
+                    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID")
+                )
+            )
+            service = AWSPolly(polly_client)
+
+            tasks = []
+            for combo in all_combinations:
+                preset_text = generate_preset(combo)
+                task = save_preset_audio(service, combo, preset_text)
+                tasks.append(task)
+
+            await asyncio.gather(*tasks)
+    else:
+        for combo in all_combinations:
+            print(combo)
+            preset_text = generate_preset(combo)
+            print(f'{preset_text}\n\n')
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/listening_instructions/recordings/.gitkeep
+++ b/scripts/listening_instructions/recordings/.gitkeep