Add script to create videos for speaking questions.

2023-09-03 18:05:13 +01:00
parent 685fde0b77
commit 64776617f2
13 changed files with 27611 additions and 7 deletions
--- a/.env
+++ b/.env
@@ -2,3 +2,4 @@ OPENAI_API_KEY=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
 JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
 JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
 GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/mti-ielts-626a2dcf6091.json
 HEY_GEN_TOKEN=NTY0Y2JlY2Q4NWE4NDdhY2E4NTNhMDMwNjQ5ZDcxOGUtMTY5MzUxMjIxMw==
--- a/download-video/placeholder.txt
+++ b/download-video/placeholder.txt
@@ -0,0 +1 @@
 THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO
--- a/generate_base_questions.py
+++ b/generate_base_questions.py
--- a/generate_speaking_questions.py
+++ b/generate_speaking_questions.py
@@ -0,0 +1,71 @@
 from helper.firebase_helper import upload_file_firebase_get_url
 from helper.heygen_api import create_video
 import os
 import uuid
 import firebase_admin
 from firebase_admin import credentials, firestore
 from dotenv import load_dotenv
 load_dotenv()
 # Initialize Firebase Admin SDK
 cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
 firebase_admin.initialize_app(cred)
 FIREBASE_BUCKET = 'mti-ielts.appspot.com'
 VIDEO_FILES_PATH = 'download-video/'
 FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
 questions_json = {
    "topic": "Technology and Society",
    "questions": [
        "How do you think technology has affected the way people communicate with each other in today's society?",
        "In what ways has the use of smartphones and social media platforms changed the dynamics of personal relationships?",
        "Some argue that technology has made communication more convenient, while others worry that it has led to a decline in face-to-face interactions. What's your perspective on this matter, and how do you think it might impact future generations?"
    ]
 }
 questions = []
 for question in questions_json["questions"]:
    result = create_video(question)
    if result is not None:
        sound_file_path = VIDEO_FILES_PATH + result
        firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
        url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
        video = {
            "text": question,
            "video_path": firebase_file_path,
            "video_url": url
        }
        questions.append(video)
    else:
        print("Failed to create video for question: " + question)
 if len(questions) == len(questions_json["questions"]):
    speaking_pt3_to_insert = {
        "exercises": [
            {
                "id": str(uuid.uuid4()),
                "prompts": questions,
                "text": "Listen carefully and respond.",
                "title": questions_json["topic"],
                "type": "speakingPart3"
            }
        ],
        "isDiagnostic": True,
        "minTimer": 5,
        "module": "speaking"
    }
    db = firestore.client()
    # JSON data to insert
    # Add the JSON data to Firestore
    collection_ref = db.collection('speaking')
    document_ref = collection_ref.add(speaking_pt3_to_insert)
    print(f"Document added with ID: {document_ref}")
 else:
    print("Array sizes do not match. Video uploading failing is probably the cause.")
--- a/helper/file_helper.py
+++ b/helper/file_helper.py
@@ -9,8 +9,9 @@ def delete_files_older_than_one_day(directory):
    for entry in os.scandir(directory):
        if entry.is_file():
            file_path = Path(entry)
            file_name = file_path.name
            file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
            time_difference = current_time - file_modified_time
-            if time_difference.days > 1:
+            if time_difference.days > 1  and "placeholder" not in file_name:
                file_path.unlink()
                print(f"Deleted file: {file_path}")
--- a/helper/firebase_helper.py
+++ b/helper/firebase_helper.py
@@ -22,5 +22,24 @@ def upload_file_firebase(bucket_name, destination_blob_name, source_file_name):
        print("Error uploading file to Google Cloud Storage:", e)
        return False
 def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file_name):
    # Uploads a file to Firebase Storage.
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    try:
        blob = bucket.blob(destination_blob_name)
        blob.upload_from_filename(source_file_name)
        print(f"File uploaded to {destination_blob_name}")
        # Make the file public
        blob.make_public()
        # Get the public URL
        url = blob.public_url
        return url
    except Exception as e:
        print("Error uploading file to Google Cloud Storage:", e)
        return None
--- a/helper/heygen_api.py
+++ b/helper/heygen_api.py
@@ -0,0 +1,87 @@
 import os
 import requests
 import time
 from dotenv import load_dotenv
 load_dotenv()
 # Get HeyGen token
 TOKEN = os.getenv("HEY_GEN_TOKEN")
 # POST TO CREATE VIDEO
 CREATE_VIDEO_URL = 'https://api.heygen.com/v1/video.generate'
 GET_VIDEO_URL = 'https://api.heygen.com/v1/video_status.get'
 POST_HEADER = {
    'X-Api-Key': TOKEN,
    'Content-Type': 'application/json'
 }
 GET_HEADER = {
    'X-Api-Key': TOKEN
 }
 def create_video(text):
    # POST TO CREATE VIDEO
    data = {
        "background": "#ffffff",
        "clips": [
            {
                "avatar_id": "Mido-lite-20221128",
                "avatar_style": "normal",
                "input_text": text,
                "offset": {
                    "x": 0,
                    "y": 0
                },
                "scale": 1,
                "voice_id": "ccb30e87c6b34ca8941f88352c71612d"
            }
        ],
        "ratio": "16:9",
        "test": True,
        "version": "v1alpha"
    }
    response = requests.post(CREATE_VIDEO_URL, headers=POST_HEADER, json=data)
    print(response.status_code)
    print(response.json())
    # GET TO CHECK STATUS AND GET VIDEO WHEN READY
    video_id = response.json()["data"]["video_id"]
    params = {
        'video_id': response.json()["data"]["video_id"]
    }
    response = {}
    status = "processing"
    error = None
    while status != "completed" and error is None:
        response = requests.get(GET_VIDEO_URL, headers=GET_HEADER, params=params)
        response_data = response.json()
        status = response_data["data"]["status"]
        error = response_data["data"]["error"]
        if status != "completed" and error is None:
            print(f"Status: {status}")
            time.sleep(5)  # Wait for 5 second before the next request
    print(response.status_code)
    print(response.json())
    # DOWNLOAD VIDEO
    download_url = response.json()['data']['video_url']
    output_directory = 'download-video/'
    output_filename = video_id + '.mp4'
    response = requests.get(download_url)
    if response.status_code == 200:
        os.makedirs(output_directory, exist_ok=True)  # Create the directory if it doesn't exist
        output_path = os.path.join(output_directory, output_filename)
        with open(output_path, 'wb') as f:
            f.write(response.content)
        print(f"File '{output_filename}' downloaded successfully.")
        return output_filename
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
        return None
--- a/heygen/avatars.json
+++ b/heygen/avatars.json
--- a/heygen/english_voices.json
+++ b/heygen/english_voices.json
--- a/heygen/filter_json.py
+++ b/heygen/filter_json.py
@@ -0,0 +1,18 @@
 import json
 # Read JSON from a file
 input_filename = "english_voices.json"
 output_filename = "free_english_voices.json"
 with open(input_filename, "r") as json_file:
    data = json.load(json_file)
 # Filter entries based on "language": "English"
 filtered_list = [entry for entry in data["data"]["list"] if not entry["is_paid"]]
 data["data"]["list"] = filtered_list
 # Write filtered JSON to a new file
 with open(output_filename, "w") as json_file:
    json.dump(data, json_file, indent=2)
 print(f"Filtered JSON written to '{output_filename}'.")
--- a/heygen/free_english_voices.json
+++ b/heygen/free_english_voices.json
--- a/heygen/voices.json
+++ b/heygen/voices.json
--- a/run.py
+++ b/run.py
@@ -1,5 +0,0 @@
 from streamlit.web import bootstrap
 real_s_script = 'sp1_playground.py'
 real_w_script = 'wt2_playground.py'
 bootstrap.run(real_s_script, f'run.py {real_s_script}', [], {})
		`@@ -0,0 +1 @@`
							`THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO`