Add save endpoints but dont't actually save.
This commit is contained in:
370
app.py
370
app.py
@@ -17,6 +17,7 @@ import re
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from heygen.AvatarEnum import AvatarEnum
|
||||
from templates.question_templates import *
|
||||
|
||||
load_dotenv()
|
||||
@@ -30,6 +31,7 @@ jwt = JWTManager(app)
|
||||
cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
|
||||
firebase_admin.initialize_app(cred)
|
||||
|
||||
|
||||
@app.route('/healthcheck', methods=['GET'])
|
||||
def healthcheck():
|
||||
return {"healthy": True}
|
||||
@@ -54,7 +56,8 @@ def get_listening_section_1_question():
|
||||
print("Generated conversation: " + str(processed_conversation))
|
||||
|
||||
start_id = 1
|
||||
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, number_of_exercises_q,
|
||||
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises,
|
||||
number_of_exercises_q,
|
||||
start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
@@ -64,28 +67,6 @@ def get_listening_section_1_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_listening_section_1', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_listening_section_1_question():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
question = getListening1Template()
|
||||
file_name = str(uuid.uuid4()) + ".mp3"
|
||||
sound_file_path = AUDIO_FILES_PATH + file_name
|
||||
firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
|
||||
# TODO it's the conversation audio, still work to do on text-to-speech
|
||||
text_to_speech(question["audio"]["conversation"], sound_file_path)
|
||||
file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
question["audio"]["source"] = file_url
|
||||
if save_to_db("listening", question):
|
||||
return question
|
||||
else:
|
||||
raise Exception("Failed to save question: " + question)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/listening_section_2', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_listening_section_2_question():
|
||||
@@ -113,27 +94,6 @@ def get_listening_section_2_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_listening_section_2', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_listening_section_2_question():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
question = getListening2Template()
|
||||
file_name = str(uuid.uuid4()) + ".mp3"
|
||||
sound_file_path = AUDIO_FILES_PATH + file_name
|
||||
firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
|
||||
text_to_speech(question["audio"]["text"], sound_file_path)
|
||||
file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
question["audio"]["source"] = file_url
|
||||
if save_to_db("listening", question):
|
||||
return question
|
||||
else:
|
||||
raise Exception("Failed to save question: " + question)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/listening_section_3', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_listening_section_3_question():
|
||||
@@ -153,7 +113,8 @@ def get_listening_section_3_question():
|
||||
print("Generated conversation: " + str(processed_conversation))
|
||||
|
||||
start_id = 21
|
||||
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, number_of_exercises_q,
|
||||
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises,
|
||||
number_of_exercises_q,
|
||||
start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
@@ -163,27 +124,6 @@ def get_listening_section_3_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_listening_section_3', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_listening_section_3_question():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
question = getListening2Template()
|
||||
file_name = str(uuid.uuid4()) + ".mp3"
|
||||
sound_file_path = AUDIO_FILES_PATH + file_name
|
||||
firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
|
||||
text_to_speech(question["audio"]["text"], sound_file_path)
|
||||
file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
question["audio"]["source"] = file_url
|
||||
if save_to_db("listening", question):
|
||||
return question
|
||||
else:
|
||||
raise Exception("Failed to save question: " + question)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/listening_section_4', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_listening_section_4_question():
|
||||
@@ -211,23 +151,30 @@ def get_listening_section_4_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_listening_section_4', methods=['POST'])
|
||||
@app.route('/listening', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_listening_section_4_question():
|
||||
def save_listening():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
question = getListening2Template()
|
||||
file_name = str(uuid.uuid4()) + ".mp3"
|
||||
sound_file_path = AUDIO_FILES_PATH + file_name
|
||||
firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
|
||||
text_to_speech(question["audio"]["text"], sound_file_path)
|
||||
file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
question["audio"]["source"] = file_url
|
||||
if save_to_db("listening", question):
|
||||
return question
|
||||
else:
|
||||
raise Exception("Failed to save question: " + question)
|
||||
data = request.get_json()
|
||||
parts = data.get('parts')
|
||||
template = getListeningTemplate()
|
||||
for i, part in enumerate(parts, start=0):
|
||||
file_name = str(uuid.uuid4()) + ".mp3"
|
||||
sound_file_path = AUDIO_FILES_PATH + file_name
|
||||
firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
|
||||
if "conversation" in part["text"]:
|
||||
conversation_text_to_speech(part["text"]["conversation"], sound_file_path)
|
||||
else:
|
||||
text_to_speech(part["text"], sound_file_path)
|
||||
# file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
file_url = "test_url/" + firebase_file_path
|
||||
template["parts"][i]["audio"]["source"] = file_url
|
||||
template["parts"][i]["exercises"].append(part["exercises"])
|
||||
# if save_to_db("listening", template):
|
||||
# return template
|
||||
# else:
|
||||
# raise Exception("Failed to save question: " + parts)
|
||||
return template
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@@ -278,22 +225,6 @@ def get_writing_task_1_general_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_writing_task_1', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_writing_task_1_question():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
# TODO ADD SAVE IMAGE TO DB
|
||||
question = getListening2Template()
|
||||
if save_to_db("writing", question):
|
||||
return question
|
||||
else:
|
||||
raise Exception("Failed to save question: " + question)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/writing_task2', methods=['POST'])
|
||||
@jwt_required()
|
||||
def grade_writing_task_2():
|
||||
@@ -341,17 +272,20 @@ def get_writing_task_2_general_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_writing_task_2', methods=['POST'])
|
||||
@app.route('/writing', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_writing_task_2_question():
|
||||
def save_writing_task():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
question = getListening2Template()
|
||||
if save_to_db("writing", question):
|
||||
return question
|
||||
else:
|
||||
raise Exception("Failed to save question: " + question)
|
||||
data = request.get_json()
|
||||
exercises = data.get('exercises')
|
||||
template = getWritingTemplate()
|
||||
for i, exercise in enumerate(exercises, start=0):
|
||||
template["exercises"][i]["prompt"] = exercise
|
||||
# if save_to_db("writing", template):
|
||||
# return template
|
||||
# else:
|
||||
# raise Exception("Failed to save writing: " + template)
|
||||
return template
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@@ -408,54 +342,6 @@ def get_speaking_task_1_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_speaking_task_1', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_speaking_task_1_question():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
questions_json = getSpeaking1Template()
|
||||
questions = []
|
||||
for question in questions_json["questions"]:
|
||||
result = create_video(question)
|
||||
if result is not None:
|
||||
sound_file_path = VIDEO_FILES_PATH + result
|
||||
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
|
||||
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
video = {
|
||||
"text": question,
|
||||
"video_path": firebase_file_path,
|
||||
"video_url": url
|
||||
}
|
||||
questions.append(video)
|
||||
else:
|
||||
print("Failed to create video for question: " + question)
|
||||
|
||||
if len(questions) == len(questions_json["questions"]):
|
||||
speaking_pt1_to_insert = {
|
||||
"exercises": [
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompts": questions,
|
||||
"text": "Listen carefully and respond.",
|
||||
"title": questions_json["topic"],
|
||||
"type": "speakingPart1"
|
||||
}
|
||||
],
|
||||
"isDiagnostic": True,
|
||||
"minTimer": 5,
|
||||
"module": "speaking"
|
||||
}
|
||||
if save_to_db("speaking", speaking_pt1_to_insert):
|
||||
return speaking_pt1_to_insert
|
||||
else:
|
||||
raise Exception("Failed to save question: " + speaking_pt1_to_insert)
|
||||
else:
|
||||
raise Exception("Array sizes do not match. Video uploading failing is probably the cause.")
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/speaking_task_2', methods=['POST'])
|
||||
@jwt_required()
|
||||
def grade_speaking_task_2():
|
||||
@@ -509,54 +395,6 @@ def get_speaking_task_2_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/save_speaking_task_2', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_speaking_task_2_question():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
questions_json = getSpeaking2Template()
|
||||
questions = []
|
||||
for question in questions_json["questions"]:
|
||||
result = create_video(question)
|
||||
if result is not None:
|
||||
sound_file_path = VIDEO_FILES_PATH + result
|
||||
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
|
||||
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
video = {
|
||||
"text": question,
|
||||
"video_path": firebase_file_path,
|
||||
"video_url": url
|
||||
}
|
||||
questions.append(video)
|
||||
else:
|
||||
print("Failed to create video for question: " + question)
|
||||
|
||||
if len(questions) == len(questions_json["questions"]):
|
||||
speaking_pt2_to_insert = {
|
||||
"exercises": [
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompts": questions,
|
||||
"text": "Listen carefully and respond.",
|
||||
"title": questions_json["topic"],
|
||||
"type": "speakingPart2"
|
||||
}
|
||||
],
|
||||
"isDiagnostic": True,
|
||||
"minTimer": 5,
|
||||
"module": "speaking"
|
||||
}
|
||||
if save_to_db("speaking", speaking_pt2_to_insert):
|
||||
return speaking_pt2_to_insert
|
||||
else:
|
||||
raise Exception("Failed to save question: " + str(speaking_pt2_to_insert))
|
||||
else:
|
||||
raise Exception("Array sizes do not match. Video uploading failing is probably the cause.")
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/speaking_task_3', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_speaking_task_3_question():
|
||||
@@ -612,50 +450,76 @@ def grade_speaking_task_3():
|
||||
return str(e), 400
|
||||
|
||||
|
||||
@app.route('/save_speaking_task_3', methods=['POST'])
|
||||
@app.route('/speaking', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_speaking_task_3_question():
|
||||
def save_speaking():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
questions_json = getSpeaking3Template()
|
||||
questions = []
|
||||
for question in questions_json["questions"]:
|
||||
result = create_video(question)
|
||||
data = request.get_json()
|
||||
exercises = data.get('exercises')
|
||||
template = getSpeakingTemplate()
|
||||
|
||||
# Speaking 1
|
||||
# sp1_result = create_video(exercises[0]["question"], random.choice(list(AvatarEnum)))
|
||||
sp1_result = "speaking_1"
|
||||
if sp1_result is not None:
|
||||
sound_file_path = VIDEO_FILES_PATH + sp1_result
|
||||
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp1_result
|
||||
# url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
url = "speaking_1_url"
|
||||
sp1_video_path = firebase_file_path
|
||||
sp1_video_url = url
|
||||
template["exercises"][0]["text"] = exercises[0]["question"]
|
||||
template["exercises"][0]["title"] = exercises[0]["topic"]
|
||||
template["exercises"][0]["video_url"] = sp1_video_url
|
||||
template["exercises"][0]["video_path"] = sp1_video_path
|
||||
else:
|
||||
print("Failed to create video for part 1 question: " + exercises[0]["question"])
|
||||
|
||||
# Speaking 2
|
||||
# sp2_result = create_video(exercises[1]["question"], random.choice(list(AvatarEnum)))
|
||||
sp2_result = "speaking_2"
|
||||
if sp2_result is not None:
|
||||
sound_file_path = VIDEO_FILES_PATH + sp2_result
|
||||
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + sp2_result
|
||||
# url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
url = "speaking_2_url"
|
||||
sp2_video_path = firebase_file_path
|
||||
sp2_video_url = url
|
||||
template["exercises"][1]["prompts"] = exercises[1]["prompts"]
|
||||
template["exercises"][1]["text"] = exercises[1]["question"]
|
||||
template["exercises"][1]["title"] = exercises[1]["topic"]
|
||||
template["exercises"][1]["video_url"] = sp2_video_url
|
||||
template["exercises"][1]["video_path"] = sp2_video_path
|
||||
else:
|
||||
print("Failed to create video for part 2 question: " + exercises[1]["question"])
|
||||
|
||||
# Speaking 3
|
||||
sp3_questions = []
|
||||
avatar = random.choice(list(AvatarEnum))
|
||||
for question in exercises[2]["questions"]:
|
||||
# result = create_video(question, avatar)
|
||||
result = "speaking_3"
|
||||
if result is not None:
|
||||
sound_file_path = VIDEO_FILES_PATH + result
|
||||
firebase_file_path = FIREBASE_SPEAKING_VIDEO_FILES_PATH + result
|
||||
url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
# url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
url = "speaking_3_url"
|
||||
video = {
|
||||
"text": question,
|
||||
"video_path": firebase_file_path,
|
||||
"video_url": url
|
||||
}
|
||||
questions.append(video)
|
||||
sp3_questions.append(video)
|
||||
else:
|
||||
print("Failed to create video for question: " + question)
|
||||
print("Failed to create video for part 3 question: " + question)
|
||||
template["exercises"][2]["prompts"] = sp3_questions
|
||||
template["exercises"][2]["title"] = exercises[2]["topic"]
|
||||
|
||||
if len(questions) == len(questions_json["questions"]):
|
||||
speaking_pt3_to_insert = {
|
||||
"exercises": [
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompts": questions,
|
||||
"text": "Listen carefully and respond.",
|
||||
"title": questions_json["topic"],
|
||||
"type": "speakingPart3"
|
||||
}
|
||||
],
|
||||
"isDiagnostic": True,
|
||||
"minTimer": 5,
|
||||
"module": "speaking"
|
||||
}
|
||||
if save_to_db("speaking", speaking_pt3_to_insert):
|
||||
return speaking_pt3_to_insert
|
||||
else:
|
||||
raise Exception("Failed to save question: " + str(speaking_pt3_to_insert))
|
||||
else:
|
||||
raise Exception("Array sizes do not match. Video uploading failing is probably the cause.")
|
||||
# if save_to_db("speaking", template):
|
||||
# return template
|
||||
# else:
|
||||
# raise Exception("Failed to save speaking: " + template)
|
||||
return template
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@@ -688,28 +552,6 @@ def get_reading_passage_1_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/reading_passage_1', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_reading_passage_1_question():
|
||||
try:
|
||||
# data = request.get_json()
|
||||
# question = data.get('question')
|
||||
question = getListening1Template()
|
||||
file_name = str(uuid.uuid4()) + ".mp3"
|
||||
sound_file_path = AUDIO_FILES_PATH + file_name
|
||||
firebase_file_path = FIREBASE_LISTENING_AUDIO_FILES_PATH + file_name
|
||||
# TODO it's the conversation audio, still work to do on text-to-speech
|
||||
text_to_speech(question["audio"]["conversation"], sound_file_path)
|
||||
file_url = upload_file_firebase_get_url(FIREBASE_BUCKET, firebase_file_path, sound_file_path)
|
||||
question["audio"]["source"] = file_url
|
||||
if save_to_db("listening", question):
|
||||
return question
|
||||
else:
|
||||
raise Exception("Failed to save question: " + question)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/reading_passage_2', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_reading_passage_2_question():
|
||||
@@ -765,6 +607,22 @@ def get_reading_passage_3_question():
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@app.route('/reading', methods=['POST'])
|
||||
@jwt_required()
|
||||
def save_reading_passage():
|
||||
try:
|
||||
data = request.get_json()
|
||||
parts = data.get('parts')
|
||||
template = getReadingTemplate()
|
||||
template["parts"] = parts
|
||||
# if save_to_db("reading", template):
|
||||
# return template
|
||||
# else:
|
||||
# raise Exception("Failed to save reading: " + template)
|
||||
return template
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/level', methods=['GET'])
|
||||
@jwt_required()
|
||||
|
||||
@@ -24,18 +24,19 @@ def text_to_speech(text: str, file_name: str):
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
|
||||
)
|
||||
voice = random.choice(ALL_NEURAL_VOICES)['Id']
|
||||
# Initialize an empty list to store audio segments
|
||||
audio_segments = []
|
||||
tts_response = client.synthesize_speech(
|
||||
Engine="neural",
|
||||
Text=text,
|
||||
OutputFormat="mp3",
|
||||
VoiceId=random.choice(ALL_NEURAL_VOICES)['Id']
|
||||
)
|
||||
audio_segments.append(tts_response['AudioStream'].read())
|
||||
for part in divide_text(text):
|
||||
tts_response = client.synthesize_speech(
|
||||
Engine="neural",
|
||||
Text=part,
|
||||
OutputFormat="mp3",
|
||||
VoiceId=voice
|
||||
)
|
||||
audio_segments.append(tts_response['AudioStream'].read())
|
||||
# Combine the audio segments into a single audio file
|
||||
combined_audio = b"".join(audio_segments)
|
||||
file_name = file_name + ".mp3"
|
||||
# Save the combined audio to a single file
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(combined_audio)
|
||||
@@ -43,20 +44,6 @@ def text_to_speech(text: str, file_name: str):
|
||||
print("Speech segments saved to " + file_name)
|
||||
|
||||
def conversation_text_to_speech(conversation: list, file_name: str):
|
||||
# Create a dictionary to store the mapping of 'name' to 'voice'
|
||||
name_to_voice = {}
|
||||
for segment in conversation:
|
||||
if 'voice' not in segment:
|
||||
name = segment['name']
|
||||
if name in name_to_voice:
|
||||
voice = name_to_voice[name]
|
||||
else:
|
||||
if segment['gender'].lower() == 'male':
|
||||
voice = random.choice(MALE_NEURAL_VOICES)['Id']
|
||||
else:
|
||||
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
|
||||
name_to_voice[name] = voice
|
||||
segment['voice'] = voice
|
||||
# Initialize the Amazon Polly client
|
||||
client = boto3.client(
|
||||
'polly',
|
||||
@@ -77,7 +64,6 @@ def conversation_text_to_speech(conversation: list, file_name: str):
|
||||
audio_segments.append(response['AudioStream'].read())
|
||||
# Combine the audio segments into a single audio file
|
||||
combined_audio = b"".join(audio_segments)
|
||||
file_name = file_name + ".mp3"
|
||||
# Save the combined audio to a single file
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(combined_audio)
|
||||
@@ -94,3 +80,24 @@ def has_10_words(text: str):
|
||||
words_in_input = text.split()
|
||||
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
|
||||
return english_word_count >= 10
|
||||
|
||||
def divide_text(text, max_length=3000):
|
||||
if len(text) <= max_length:
|
||||
return [text]
|
||||
|
||||
divisions = []
|
||||
current_position = 0
|
||||
|
||||
while current_position < len(text):
|
||||
next_position = min(current_position + max_length, len(text))
|
||||
next_period_position = text.rfind('.', current_position, next_position)
|
||||
|
||||
if next_period_position != -1 and next_period_position > current_position:
|
||||
divisions.append(text[current_position:next_period_position + 1])
|
||||
current_position = next_period_position + 1
|
||||
else:
|
||||
# If no '.' found in the next chunk, split at max_length
|
||||
divisions.append(text[current_position:next_position])
|
||||
current_position = next_position
|
||||
|
||||
return divisions
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user