Add script to create videos for speaking questions.

This commit is contained in:
Cristiano Ferreira
2023-09-03 18:05:13 +01:00
parent 685fde0b77
commit 64776617f2
13 changed files with 27611 additions and 7 deletions

1
.env
View File

@@ -2,3 +2,4 @@ OPENAI_API_KEY=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
JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2 JWT_SECRET_KEY=6e9c124ba92e8814719dcb0f21200c8aa4d0f119a994ac5e06eb90a366c83ab2
JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0 JWT_TEST_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.Emrs2D3BmMP4b3zMjw0fJTPeyMwWEBDbxx2vvaWguO0
GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/mti-ielts-626a2dcf6091.json GOOGLE_APPLICATION_CREDENTIALS=firebase-configs/mti-ielts-626a2dcf6091.json
HEY_GEN_TOKEN=NTY0Y2JlY2Q4NWE4NDdhY2E4NTNhMDMwNjQ5ZDcxOGUtMTY5MzUxMjIxMw==

View File

@@ -0,0 +1 @@
THIS FILE ONLY EXISTS TO KEEP THIS FOLDER IN THE REPO

View File

@@ -0,0 +1,71 @@
from helper.firebase_helper import upload_file_firebase_get_url
from helper.heygen_api import create_video
import os
import uuid
import firebase_admin
from firebase_admin import credentials, firestore
from dotenv import load_dotenv
load_dotenv()
# Initialize Firebase Admin SDK
cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
firebase_admin.initialize_app(cred)
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
VIDEO_FILES_PATH = 'download-video/'
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
questions_json = {
"topic": "Technology and Society",
"questions": [
"How do you think technology has affected the way people communicate with each other in today's society?",
"In what ways has the use of smartphones and social media platforms changed the dynamics of personal relationships?",
"Some argue that technology has made communication more convenient, while others worry that it has led to a decline in face-to-face interactions. What's your perspective on this matter, and how do you think it might impact future generations?"
]
}
questions = []
for question in questions_json["questions"]:
result = create_video(question)
if result is not None:
sound_file_path = VIDEO_FILES_PATH + result
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
video = {
"text": question,
"video_path": firebase_file_path,
"video_url": url
}
questions.append(video)
else:
print("Failed to create video for question: " + question)
if len(questions) == len(questions_json["questions"]):
speaking_pt3_to_insert = {
"exercises": [
{
"id": str(uuid.uuid4()),
"prompts": questions,
"text": "Listen carefully and respond.",
"title": questions_json["topic"],
"type": "speakingPart3"
}
],
"isDiagnostic": True,
"minTimer": 5,
"module": "speaking"
}
db = firestore.client()
# JSON data to insert
# Add the JSON data to Firestore
collection_ref = db.collection('speaking')
document_ref = collection_ref.add(speaking_pt3_to_insert)
print(f"Document added with ID: {document_ref}")
else:
print("Array sizes do not match. Video uploading failing is probably the cause.")

View File

@@ -9,8 +9,9 @@ def delete_files_older_than_one_day(directory):
for entry in os.scandir(directory): for entry in os.scandir(directory):
if entry.is_file(): if entry.is_file():
file_path = Path(entry) file_path = Path(entry)
file_name = file_path.name
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime) file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
time_difference = current_time - file_modified_time time_difference = current_time - file_modified_time
if time_difference.days > 1: if time_difference.days > 1 and "placeholder" not in file_name:
file_path.unlink() file_path.unlink()
print(f"Deleted file: {file_path}") print(f"Deleted file: {file_path}")

View File

@@ -22,5 +22,24 @@ def upload_file_firebase(bucket_name, destination_blob_name, source_file_name):
print("Error uploading file to Google Cloud Storage:", e) print("Error uploading file to Google Cloud Storage:", e)
return False return False
def upload_file_firebase_get_url(bucket_name, destination_blob_name, source_file_name):
# Uploads a file to Firebase Storage.
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
try:
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print(f"File uploaded to {destination_blob_name}")
# Make the file public
blob.make_public()
# Get the public URL
url = blob.public_url
return url
except Exception as e:
print("Error uploading file to Google Cloud Storage:", e)
return None

87
helper/heygen_api.py Normal file
View File

@@ -0,0 +1,87 @@
import os
import requests
import time
from dotenv import load_dotenv
load_dotenv()
# Get HeyGen token
TOKEN = os.getenv("HEY_GEN_TOKEN")
# POST TO CREATE VIDEO
CREATE_VIDEO_URL = 'https://api.heygen.com/v1/video.generate'
GET_VIDEO_URL = 'https://api.heygen.com/v1/video_status.get'
POST_HEADER = {
'X-Api-Key': TOKEN,
'Content-Type': 'application/json'
}
GET_HEADER = {
'X-Api-Key': TOKEN
}
def create_video(text):
# POST TO CREATE VIDEO
data = {
"background": "#ffffff",
"clips": [
{
"avatar_id": "Mido-lite-20221128",
"avatar_style": "normal",
"input_text": text,
"offset": {
"x": 0,
"y": 0
},
"scale": 1,
"voice_id": "ccb30e87c6b34ca8941f88352c71612d"
}
],
"ratio": "16:9",
"test": True,
"version": "v1alpha"
}
response = requests.post(CREATE_VIDEO_URL, headers=POST_HEADER, json=data)
print(response.status_code)
print(response.json())
# GET TO CHECK STATUS AND GET VIDEO WHEN READY
video_id = response.json()["data"]["video_id"]
params = {
'video_id': response.json()["data"]["video_id"]
}
response = {}
status = "processing"
error = None
while status != "completed" and error is None:
response = requests.get(GET_VIDEO_URL, headers=GET_HEADER, params=params)
response_data = response.json()
status = response_data["data"]["status"]
error = response_data["data"]["error"]
if status != "completed" and error is None:
print(f"Status: {status}")
time.sleep(5) # Wait for 5 second before the next request
print(response.status_code)
print(response.json())
# DOWNLOAD VIDEO
download_url = response.json()['data']['video_url']
output_directory = 'download-video/'
output_filename = video_id + '.mp4'
response = requests.get(download_url)
if response.status_code == 200:
os.makedirs(output_directory, exist_ok=True) # Create the directory if it doesn't exist
output_path = os.path.join(output_directory, output_filename)
with open(output_path, 'wb') as f:
f.write(response.content)
print(f"File '{output_filename}' downloaded successfully.")
return output_filename
else:
print(f"Failed to download file. Status code: {response.status_code}")
return None

8572
heygen/avatars.json Normal file

File diff suppressed because it is too large Load Diff

3313
heygen/english_voices.json Normal file

File diff suppressed because it is too large Load Diff

18
heygen/filter_json.py Normal file
View File

@@ -0,0 +1,18 @@
import json
# Read JSON from a file
input_filename = "english_voices.json"
output_filename = "free_english_voices.json"
with open(input_filename, "r") as json_file:
data = json.load(json_file)
# Filter entries based on "language": "English"
filtered_list = [entry for entry in data["data"]["list"] if not entry["is_paid"]]
data["data"]["list"] = filtered_list
# Write filtered JSON to a new file
with open(output_filename, "w") as json_file:
json.dump(data, json_file, indent=2)
print(f"Filtered JSON written to '{output_filename}'.")

File diff suppressed because it is too large Load Diff

13777
heygen/voices.json Normal file

File diff suppressed because it is too large Load Diff

5
run.py
View File

@@ -1,5 +0,0 @@
from streamlit.web import bootstrap
real_s_script = 'sp1_playground.py'
real_w_script = 'wt2_playground.py'
bootstrap.run(real_s_script, f'run.py {real_s_script}', [], {})