Merged in ft-cf-3-speaking-videos (pull request #4)

Add script to create videos for speaking questions.
2023-09-05 13:35:20 +00:00
parent 685fde0b77 64776617f2
commit eb6e9b4ef7
13 changed files with 27611 additions and 7 deletions
--- a/.env
+++ b/.env
@@ -2,3 +2,4 @@ OPENAI_API_KEY=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
 JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
 JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
 GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/mti-ielts-626a2dcf6091.json
+HEY_GEN_TOKEN=NTY0Y2JlY2Q4NWE4NDdhY2E4NTNhMDMwNjQ5ZDcxOGUtMTY5MzUxMjIxMw==
--- a/download-video/placeholder.txt
+++ b/download-video/placeholder.txt
@@ -0,0 +1 @@
+THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO
--- a/generate_base_questions.py
+++ b/generate_base_questions.py
--- a/generate_speaking_questions.py
+++ b/generate_speaking_questions.py
@@ -0,0 +1,71 @@
+from helper.firebase_helper import upload_file_firebase_get_url
+from helper.heygen_api import create_video
+import os
+import uuid
+
+import firebase_admin
+from firebase_admin import credentials, firestore
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Initialize Firebase Admin SDK
+cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
+
+firebase_admin.initialize_app(cred)
+
+FIREBASE_BUCKET = 'mti-ielts.appspot.com'
+VIDEO_FILES_PATH = 'download-video/'
+FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
+
+questions_json = {
+    "topic": "Technology and Society",
+    "questions": [
+        "How do you think technology has affected the way people communicate with each other in today's society?",
+        "In what ways has the use of smartphones and social media platforms changed the dynamics of personal relationships?",
+        "Some argue that technology has made communication more convenient, while others worry that it has led to a decline in face-to-face interactions. What's your perspective on this matter, and how do you think it might impact future generations?"
+    ]
+}
+questions = []
+for question in questions_json["questions"]:
+    result = create_video(question)
+    if result is not None:
+        sound_file_path = VIDEO_FILES_PATH + result
+        firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
+        url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
+        video = {
+            "text": question,
+            "video_path": firebase_file_path,
+            "video_url": url
+        }
+        questions.append(video)
+    else:
+        print("Failed to create video for question: " + question)
+
+if len(questions) == len(questions_json["questions"]):
+    speaking_pt3_to_insert = {
+        "exercises": [
+            {
+                "id": str(uuid.uuid4()),
+                "prompts": questions,
+                "text": "Listen carefully and respond.",
+                "title": questions_json["topic"],
+                "type": "speakingPart3"
+            }
+        ],
+        "isDiagnostic": True,
+        "minTimer": 5,
+        "module": "speaking"
+    }
+
+    db = firestore.client()
+
+    # JSON data to insert
+
+    # Add the JSON data to Firestore
+    collection_ref = db.collection('speaking')
+    document_ref = collection_ref.add(speaking_pt3_to_insert)
+    print(f"Document added with ID: {document_ref}")
+else:
+    print("Array sizes do not match. Video uploading failing is probably the cause.")
--- a/helper/file_helper.py
+++ b/helper/file_helper.py
@@ -9,8 +9,9 @@ def delete_files_older_than_one_day(directory):
    for entry in os.scandir(directory):
        if entry.is_file():
            file_path = Path(entry)
+            file_name = file_path.name
            file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
            time_difference = current_time - file_modified_time
-            if time_difference.days > 1:
+            if time_difference.days > 1  and "placeholder" not in file_name:
                file_path.unlink()
                print(f"Deleted file: {file_path}")
--- a/helper/firebase_helper.py
+++ b/helper/firebase_helper.py
@@ -22,5 +22,24 @@ def upload_file_firebase(bucket_name, destination_blob_name, source_file_name):
        print("Error uploading file to Google Cloud Storage:", e)
        return False

+def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file_name):
+    # Uploads a file to Firebase Storage.
+    storage_client = storage.Client()
+    bucket = storage_client.bucket(bucket_name)
+    try:
+        blob = bucket.blob(destination_blob_name)
+        blob.upload_from_filename(source_file_name)
+        print(f"File uploaded to {destination_blob_name}")
+
+        # Make the file public
+        blob.make_public()
+
+        # Get the public URL
+        url = blob.public_url
+        return url
+    except Exception as e:
+        print("Error uploading file to Google Cloud Storage:", e)
+        return None
+


--- a/helper/heygen_api.py
+++ b/helper/heygen_api.py
@@ -0,0 +1,87 @@
+import os
+import requests
+import time
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Get HeyGen token
+TOKEN = os.getenv("HEY_GEN_TOKEN")
+
+# POST TO CREATE VIDEO
+CREATE_VIDEO_URL = 'https://api.heygen.com/v1/video.generate'
+GET_VIDEO_URL = 'https://api.heygen.com/v1/video_status.get'
+POST_HEADER = {
+    'X-Api-Key': TOKEN,
+    'Content-Type': 'application/json'
+}
+GET_HEADER = {
+    'X-Api-Key': TOKEN
+}
+
+def create_video(text):
+    # POST TO CREATE VIDEO
+    data = {
+        "background": "#ffffff",
+        "clips": [
+            {
+                "avatar_id": "Mido-lite-20221128",
+                "avatar_style": "normal",
+                "input_text": text,
+                "offset": {
+                    "x": 0,
+                    "y": 0
+                },
+                "scale": 1,
+                "voice_id": "ccb30e87c6b34ca8941f88352c71612d"
+            }
+        ],
+        "ratio": "16:9",
+        "test": True,
+        "version": "v1alpha"
+    }
+    response = requests.post(CREATE_VIDEO_URL, headers=POST_HEADER, json=data)
+    print(response.status_code)
+    print(response.json())
+
+    # GET TO CHECK STATUS AND GET VIDEO WHEN READY
+    video_id = response.json()["data"]["video_id"]
+    params = {
+        'video_id': response.json()["data"]["video_id"]
+    }
+    response = {}
+    status = "processing"
+    error = None
+
+    while status != "completed" and error is None:
+        response = requests.get(GET_VIDEO_URL, headers=GET_HEADER, params=params)
+        response_data = response.json()
+
+        status = response_data["data"]["status"]
+        error = response_data["data"]["error"]
+
+        if status != "completed" and error is None:
+            print(f"Status: {status}")
+            time.sleep(5)  # Wait for 5 second before the next request
+
+    print(response.status_code)
+    print(response.json())
+
+    # DOWNLOAD VIDEO
+    download_url = response.json()['data']['video_url']
+    output_directory = 'download-video/'
+    output_filename = video_id + '.mp4'
+
+    response = requests.get(download_url)
+
+    if response.status_code == 200:
+        os.makedirs(output_directory, exist_ok=True)  # Create the directory if it doesn't exist
+        output_path = os.path.join(output_directory, output_filename)
+        with open(output_path, 'wb') as f:
+            f.write(response.content)
+        print(f"File '{output_filename}' downloaded successfully.")
+        return output_filename
+    else:
+        print(f"Failed to download file. Status code: {response.status_code}")
+        return None
--- a/heygen/avatars.json
+++ b/heygen/avatars.json
--- a/heygen/english_voices.json
+++ b/heygen/english_voices.json
--- a/heygen/filter_json.py
+++ b/heygen/filter_json.py
@@ -0,0 +1,18 @@
+import json
+
+# Read JSON from a file
+input_filename = "english_voices.json"
+output_filename = "free_english_voices.json"
+
+with open(input_filename, "r") as json_file:
+    data = json.load(json_file)
+
+# Filter entries based on "language": "English"
+filtered_list = [entry for entry in data["data"]["list"] if not entry["is_paid"]]
+data["data"]["list"] = filtered_list
+
+# Write filtered JSON to a new file
+with open(output_filename, "w") as json_file:
+    json.dump(data, json_file, indent=2)
+
+print(f"Filtered JSON written to '{output_filename}'.")
--- a/heygen/free_english_voices.json
+++ b/heygen/free_english_voices.json
--- a/heygen/voices.json
+++ b/heygen/voices.json
--- a/run.py
+++ b/run.py
@@ -1,5 +0,0 @@
-from streamlit.web import bootstrap
-
-real_s_script = 'sp1_playground.py'
-real_w_script = 'wt2_playground.py'
-bootstrap.run(real_s_script, f'run.py {real_s_script}', [], {})
				`@@ -0,0 +1 @@`
				`THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO`