Generate questions endpoints working for all.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
__pycache__
|
||||
.idea
|
||||
.env
|
||||
178
app.py
178
app.py
@@ -30,16 +30,32 @@ jwt = JWTManager(app)
|
||||
cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
|
||||
firebase_admin.initialize_app(cred)
|
||||
|
||||
|
||||
@app.route('/listening_section_1', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_listening_section_1_question():
|
||||
try:
|
||||
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_1)
|
||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
return response
|
||||
delete_files_older_than_one_day(AUDIO_FILES_PATH)
|
||||
# Extract parameters from the URL query string
|
||||
topic = request.args.get('topic', default=random.choice(two_people_scenarios))
|
||||
req_exercises = request.args.getlist('exercises')
|
||||
|
||||
if (len(req_exercises) == 0):
|
||||
req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
|
||||
|
||||
unprocessed_conversation, processed_conversation = generate_listening_1_conversation(topic)
|
||||
|
||||
print("Generated conversation: " + str(processed_conversation))
|
||||
|
||||
start_id = 1
|
||||
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, number_of_exercises_q,
|
||||
start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": processed_conversation
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@@ -71,12 +87,24 @@ def save_listening_section_1_question():
|
||||
def get_listening_section_2_question():
|
||||
try:
|
||||
delete_files_older_than_one_day(AUDIO_FILES_PATH)
|
||||
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_2)
|
||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
return response
|
||||
# Extract parameters from the URL query string
|
||||
topic = request.args.get('topic', default=random.choice(social_monologue_contexts))
|
||||
req_exercises = request.args.getlist('exercises')
|
||||
|
||||
if (len(req_exercises) == 0):
|
||||
req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises))
|
||||
|
||||
monologue = generate_listening_2_monologue(topic)
|
||||
|
||||
print("Generated monologue: " + str(monologue))
|
||||
start_id = 11
|
||||
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": monologue
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@@ -107,12 +135,26 @@ def save_listening_section_2_question():
|
||||
def get_listening_section_3_question():
|
||||
try:
|
||||
delete_files_older_than_one_day(AUDIO_FILES_PATH)
|
||||
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_3)
|
||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
return response
|
||||
# Extract parameters from the URL query string
|
||||
topic = request.args.get('topic', default=random.choice(four_people_scenarios))
|
||||
req_exercises = request.args.getlist('exercises')
|
||||
|
||||
if (len(req_exercises) == 0):
|
||||
req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
|
||||
|
||||
unprocessed_conversation, processed_conversation = generate_listening_3_conversation(topic)
|
||||
|
||||
print("Generated conversation: " + str(processed_conversation))
|
||||
|
||||
start_id = 21
|
||||
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, number_of_exercises_q,
|
||||
start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": processed_conversation
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@@ -143,12 +185,24 @@ def save_listening_section_3_question():
|
||||
def get_listening_section_4_question():
|
||||
try:
|
||||
delete_files_older_than_one_day(AUDIO_FILES_PATH)
|
||||
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_4)
|
||||
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
|
||||
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
|
||||
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
return response
|
||||
# Extract parameters from the URL query string
|
||||
topic = request.args.get('topic', default=random.choice(academic_subjects))
|
||||
req_exercises = request.args.getlist('exercises')
|
||||
|
||||
if (len(req_exercises) == 0):
|
||||
req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises))
|
||||
|
||||
monologue = generate_listening_4_monologue(topic)
|
||||
|
||||
print("Generated monologue: " + str(monologue))
|
||||
start_id = 31
|
||||
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": monologue
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
@@ -606,29 +660,25 @@ def save_speaking_task_3_question():
|
||||
@jwt_required()
|
||||
def get_reading_passage_1_question():
|
||||
try:
|
||||
TOTAL_EXERCISES = 13
|
||||
|
||||
# Extract parameters from the URL query string
|
||||
topic = request.args.get('topic', default=random.choice(topics))
|
||||
req_exercises = request.args.getlist('exercises')
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_EXERCISES, len(req_exercises))
|
||||
if (len(req_exercises) == 0):
|
||||
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
|
||||
|
||||
passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
|
||||
exercises = []
|
||||
|
||||
for req_exercise in req_exercises:
|
||||
if (req_exercise == "multiple_choice"):
|
||||
mc_question = gen_multiple_choice_exercise(passage["text"], number_of_exercises_q.get())
|
||||
exercises.append(mc_question)
|
||||
|
||||
exercises = fix_exercise_ids(exercises)
|
||||
print("Generated passage: " + str(passage))
|
||||
start_id = 1
|
||||
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": {
|
||||
"content": passage["text"],
|
||||
"title": passage["title"]
|
||||
},
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
@@ -656,6 +706,62 @@ def save_reading_passage_1_question():
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/reading_passage_2', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_reading_passage_2_question():
|
||||
try:
|
||||
# Extract parameters from the URL query string
|
||||
topic = request.args.get('topic', default=random.choice(topics))
|
||||
req_exercises = request.args.getlist('exercises')
|
||||
|
||||
if (len(req_exercises) == 0):
|
||||
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
|
||||
|
||||
passage = generate_reading_passage(QuestionType.READING_PASSAGE_2, topic)
|
||||
print("Generated passage: " + str(passage))
|
||||
start_id = 14
|
||||
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": {
|
||||
"content": passage["text"],
|
||||
"title": passage["title"]
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/reading_passage_3', methods=['GET'])
|
||||
@jwt_required()
|
||||
def get_reading_passage_3_question():
|
||||
try:
|
||||
# Extract parameters from the URL query string
|
||||
topic = request.args.get('topic', default=random.choice(topics))
|
||||
req_exercises = request.args.getlist('exercises')
|
||||
|
||||
if (len(req_exercises) == 0):
|
||||
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
|
||||
|
||||
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
|
||||
|
||||
passage = generate_reading_passage(QuestionType.READING_PASSAGE_3, topic)
|
||||
print("Generated passage: " + str(passage))
|
||||
start_id = 27
|
||||
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id)
|
||||
return {
|
||||
"exercises": exercises,
|
||||
"text": {
|
||||
"content": passage["text"],
|
||||
"title": passage["title"]
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
|
||||
@app.route('/fetch_tips', methods=['POST'])
|
||||
@jwt_required()
|
||||
def fetch_answer_tips():
|
||||
|
||||
@@ -1,20 +1,101 @@
|
||||
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
|
||||
AUDIO_FILES_PATH = 'download-audio/'
|
||||
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
|
||||
VIDEO_FILES_PATH = 'download-video/'
|
||||
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
|
||||
|
||||
GRADING_TEMPERATURE = 0.1
|
||||
TIPS_TEMPERATURE = 0.2
|
||||
GEN_QUESTION_TEMPERATURE = 0.7
|
||||
GPT_3_5_TURBO = "gpt-3.5-turbo"
|
||||
GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
|
||||
GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
|
||||
|
||||
GRADING_FIELDS = ['comment', 'overall', 'task_response']
|
||||
GEN_FIELDS = ['topic']
|
||||
GEN_TEXT_FIELDS = ['title']
|
||||
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
|
||||
READING_EXERCISE_TYPES = ['multipleChoice', 'fillBlanks', 'writeBlanks', 'trueFalse']
|
||||
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
|
||||
|
||||
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
|
||||
AUDIO_FILES_PATH = 'download-audio/'
|
||||
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
|
||||
TOTAL_READING_PASSAGE_1_EXERCISES = 13
|
||||
TOTAL_READING_PASSAGE_2_EXERCISES = 13
|
||||
TOTAL_READING_PASSAGE_3_EXERCISES = 14
|
||||
|
||||
VIDEO_FILES_PATH = 'download-video/'
|
||||
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
|
||||
TOTAL_LISTENING_SECTION_1_EXERCISES = 10
|
||||
TOTAL_LISTENING_SECTION_2_EXERCISES = 10
|
||||
TOTAL_LISTENING_SECTION_3_EXERCISES = 10
|
||||
TOTAL_LISTENING_SECTION_4_EXERCISES = 10
|
||||
|
||||
EN_US_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Male', 'Id': 'Matthew', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Matthew',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Female', 'Id': 'Kimberly', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Kimberly',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Female', 'Id': 'Kendra', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Kendra',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Male', 'Id': 'Justin', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Justin',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Male', 'Id': 'Joey', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Joey',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Female', 'Id': 'Joanna', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Joanna',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Female', 'Id': 'Ivy', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Ivy',
|
||||
'SupportedEngines': ['neural', 'standard']}]
|
||||
EN_GB_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Emma', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Emma',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Male', 'Id': 'Brian', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Brian',
|
||||
'SupportedEngines': ['neural', 'standard']},
|
||||
{'Gender': 'Female', 'Id': 'Amy', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Amy',
|
||||
'SupportedEngines': ['neural', 'standard']}]
|
||||
EN_GB_WLS_VOICES = [
|
||||
{'Gender': 'Male', 'Id': 'Geraint', 'LanguageCode': 'en-GB-WLS', 'LanguageName': 'Welsh English', 'Name': 'Geraint',
|
||||
'SupportedEngines': ['standard']}]
|
||||
EN_AU_VOICES = [{'Gender': 'Male', 'Id': 'Russell', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English',
|
||||
'Name': 'Russell', 'SupportedEngines': ['standard']},
|
||||
{'Gender': 'Female', 'Id': 'Nicole', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English',
|
||||
'Name': 'Nicole', 'SupportedEngines': ['standard']}]
|
||||
ALL_VOICES = EN_US_VOICES + EN_GB_VOICES + EN_GB_WLS_VOICES + EN_AU_VOICES
|
||||
|
||||
NEURAL_EN_US_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Danielle', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Danielle',
|
||||
'SupportedEngines': ['neural']},
|
||||
{'Gender': 'Male', 'Id': 'Gregory', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Gregory',
|
||||
'SupportedEngines': ['neural']},
|
||||
{'Gender': 'Male', 'Id': 'Kevin', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Kevin',
|
||||
'SupportedEngines': ['neural']},
|
||||
{'Gender': 'Female', 'Id': 'Ruth', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Ruth',
|
||||
'SupportedEngines': ['neural']},
|
||||
{'Gender': 'Male', 'Id': 'Stephen', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Stephen',
|
||||
'SupportedEngines': ['neural']}]
|
||||
NEURAL_EN_GB_VOICES = [
|
||||
{'Gender': 'Male', 'Id': 'Arthur', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Arthur',
|
||||
'SupportedEngines': ['neural']}]
|
||||
NEURAL_EN_AU_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Olivia', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English',
|
||||
'Name': 'Olivia', 'SupportedEngines': ['neural']}]
|
||||
NEURAL_EN_ZA_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Ayanda', 'LanguageCode': 'en-ZA', 'LanguageName': 'South African English',
|
||||
'Name': 'Ayanda', 'SupportedEngines': ['neural']}]
|
||||
NEURAL_EN_NZ_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Aria', 'LanguageCode': 'en-NZ', 'LanguageName': 'New Zealand English', 'Name': 'Aria',
|
||||
'SupportedEngines': ['neural']}]
|
||||
NEURAL_EN_IN_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Kajal', 'LanguageCode': 'en-IN', 'LanguageName': 'Indian English', 'Name': 'Kajal',
|
||||
'SupportedEngines': ['neural']}]
|
||||
NEURAL_EN_IE_VOICES = [
|
||||
{'Gender': 'Female', 'Id': 'Niamh', 'LanguageCode': 'en-IE', 'LanguageName': 'Irish English', 'Name': 'Niamh',
|
||||
'SupportedEngines': ['neural']}]
|
||||
ALL_NEURAL_VOICES = NEURAL_EN_US_VOICES + NEURAL_EN_GB_VOICES + NEURAL_EN_AU_VOICES + NEURAL_EN_ZA_VOICES + NEURAL_EN_NZ_VOICES + NEURAL_EN_IE_VOICES
|
||||
|
||||
MALE_VOICES = [item for item in ALL_VOICES if item.get('Gender') == 'Male']
|
||||
FEMALE_VOICES = [item for item in ALL_VOICES if item.get('Gender') == 'Female']
|
||||
|
||||
MALE_NEURAL_VOICES = [item for item in ALL_NEURAL_VOICES if item.get('Gender') == 'Male']
|
||||
FEMALE_NEURAL_VOICES = [item for item in ALL_NEURAL_VOICES if item.get('Gender') == 'Female']
|
||||
|
||||
topics = [
|
||||
"Art and Creativity",
|
||||
@@ -118,3 +199,394 @@ topics = [
|
||||
"Global Health",
|
||||
"Cultural Appropriation"
|
||||
]
|
||||
|
||||
two_people_scenarios = [
|
||||
"Booking a table at a restaurant",
|
||||
"Making a doctor's appointment",
|
||||
"Asking for directions to a tourist attraction",
|
||||
"Inquiring about public transportation options",
|
||||
"Discussing weekend plans with a friend",
|
||||
"Ordering food at a café",
|
||||
"Renting a bicycle for a day",
|
||||
"Arranging a meeting with a colleague",
|
||||
"Talking to a real estate agent about renting an apartment",
|
||||
"Discussing travel plans for an upcoming vacation",
|
||||
"Checking the availability of a hotel room",
|
||||
"Talking to a car rental service",
|
||||
"Asking for recommendations at a library",
|
||||
"Inquiring about opening hours at a museum",
|
||||
"Discussing the weather forecast",
|
||||
"Shopping for groceries",
|
||||
"Renting a movie from a video store",
|
||||
"Booking a flight ticket",
|
||||
"Discussing a school assignment with a classmate",
|
||||
"Making a reservation for a spa appointment",
|
||||
"Talking to a customer service representative about a product issue",
|
||||
"Discussing household chores with a family member",
|
||||
"Planning a surprise party for a friend",
|
||||
"Talking to a coworker about a project deadline",
|
||||
"Inquiring about a gym membership",
|
||||
"Discussing the menu options at a fast-food restaurant",
|
||||
"Talking to a neighbor about a community event",
|
||||
"Asking for help with computer problems",
|
||||
"Discussing a recent sports game with a sports enthusiast",
|
||||
"Talking to a pet store employee about buying a pet",
|
||||
"Asking for information about a local farmer's market",
|
||||
"Discussing the details of a home renovation project",
|
||||
"Talking to a coworker about office supplies",
|
||||
"Making plans for a family picnic",
|
||||
"Inquiring about admission requirements at a university",
|
||||
"Discussing the features of a new smartphone with a salesperson",
|
||||
"Talking to a mechanic about car repairs",
|
||||
"Making arrangements for a child's birthday party",
|
||||
"Discussing a new diet plan with a nutritionist",
|
||||
"Asking for information about a music concert",
|
||||
"Talking to a hairdresser about getting a haircut",
|
||||
"Inquiring about a language course at a language school",
|
||||
"Discussing plans for a weekend camping trip",
|
||||
"Talking to a bank teller about opening a new account",
|
||||
"Ordering a drink at a coffee shop",
|
||||
"Discussing a new book with a book club member",
|
||||
"Talking to a librarian about library services",
|
||||
"Asking for advice on finding a job",
|
||||
"Discussing plans for a garden makeover with a landscaper",
|
||||
"Talking to a travel agent about a cruise vacation",
|
||||
"Inquiring about a fitness class at a gym",
|
||||
"Ordering flowers for a special occasion",
|
||||
"Discussing a new exercise routine with a personal trainer",
|
||||
"Talking to a teacher about a child's progress in school",
|
||||
"Asking for information about a local art exhibition",
|
||||
"Discussing a home improvement project with a contractor",
|
||||
"Talking to a babysitter about childcare arrangements",
|
||||
"Making arrangements for a car service appointment",
|
||||
"Inquiring about a photography workshop at a studio",
|
||||
"Discussing plans for a family reunion with a relative",
|
||||
"Talking to a tech support representative about computer issues",
|
||||
"Asking for recommendations on pet grooming services",
|
||||
"Discussing weekend plans with a significant other",
|
||||
"Talking to a counselor about personal issues",
|
||||
"Inquiring about a music lesson with a music teacher",
|
||||
"Ordering a pizza for delivery",
|
||||
"Making a reservation for a taxi",
|
||||
"Discussing a new recipe with a chef",
|
||||
"Talking to a fitness trainer about weight loss goals",
|
||||
"Inquiring about a dance class at a dance studio",
|
||||
"Ordering a meal at a food truck",
|
||||
"Discussing plans for a weekend getaway with a partner",
|
||||
"Talking to a florist about wedding flower arrangements",
|
||||
"Asking for advice on home decorating",
|
||||
"Discussing plans for a charity fundraiser event",
|
||||
"Talking to a pet sitter about taking care of pets",
|
||||
"Making arrangements for a spa day with a friend",
|
||||
"Asking for recommendations on home improvement stores",
|
||||
"Discussing weekend plans with a travel enthusiast",
|
||||
"Talking to a car mechanic about car maintenance",
|
||||
"Inquiring about a cooking class at a culinary school",
|
||||
"Ordering a sandwich at a deli",
|
||||
"Discussing plans for a family holiday party",
|
||||
"Talking to a personal assistant about organizing tasks",
|
||||
"Asking for information about a local theater production",
|
||||
"Discussing a new DIY project with a home improvement expert",
|
||||
"Talking to a wine expert about wine pairing",
|
||||
"Making arrangements for a pet adoption",
|
||||
"Asking for advice on planning a wedding"
|
||||
]
|
||||
|
||||
social_monologue_contexts = [
|
||||
"A guided tour of a historical museum",
|
||||
"An introduction to a new city for tourists",
|
||||
"An orientation session for new university students",
|
||||
"A safety briefing for airline passengers",
|
||||
"An explanation of the process of recycling",
|
||||
"A lecture on the benefits of a healthy diet",
|
||||
"A talk on the importance of time management",
|
||||
"A monologue about wildlife conservation",
|
||||
"An overview of local public transportation options",
|
||||
"A presentation on the history of cinema",
|
||||
"An introduction to the art of photography",
|
||||
"A discussion about the effects of climate change",
|
||||
"An overview of different types of cuisine",
|
||||
"A lecture on the principles of financial planning",
|
||||
"A monologue about sustainable energy sources",
|
||||
"An explanation of the process of online shopping",
|
||||
"A guided tour of a botanical garden",
|
||||
"An introduction to a local wildlife sanctuary",
|
||||
"A safety briefing for hikers in a national park",
|
||||
"A talk on the benefits of physical exercise",
|
||||
"A lecture on the principles of effective communication",
|
||||
"A monologue about the impact of social media",
|
||||
"An overview of the history of a famous landmark",
|
||||
"An introduction to the world of fashion design",
|
||||
"A discussion about the challenges of global poverty",
|
||||
"An explanation of the process of organic farming",
|
||||
"A presentation on the history of space exploration",
|
||||
"An overview of traditional music from different cultures",
|
||||
"A lecture on the principles of effective leadership",
|
||||
"A monologue about the influence of technology",
|
||||
"A guided tour of a famous archaeological site",
|
||||
"An introduction to a local wildlife rehabilitation center",
|
||||
"A safety briefing for visitors to a science museum",
|
||||
"A talk on the benefits of learning a new language",
|
||||
"A lecture on the principles of architectural design",
|
||||
"A monologue about the impact of renewable energy",
|
||||
"An explanation of the process of online banking",
|
||||
"A presentation on the history of a famous art movement",
|
||||
"An overview of traditional clothing from various regions",
|
||||
"A lecture on the principles of sustainable agriculture",
|
||||
"A discussion about the challenges of urban development",
|
||||
"A monologue about the influence of social norms",
|
||||
"A guided tour of a historical battlefield",
|
||||
"An introduction to a local animal shelter",
|
||||
"A safety briefing for participants in a charity run",
|
||||
"A talk on the benefits of community involvement",
|
||||
"A lecture on the principles of sustainable tourism",
|
||||
"A monologue about the impact of alternative medicine",
|
||||
"An explanation of the process of wildlife tracking",
|
||||
"A presentation on the history of a famous inventor",
|
||||
"An overview of traditional dance forms from different cultures",
|
||||
"A lecture on the principles of ethical business practices",
|
||||
"A discussion about the challenges of healthcare access",
|
||||
"A monologue about the influence of cultural traditions",
|
||||
"A guided tour of a famous lighthouse",
|
||||
"An introduction to a local astronomy observatory",
|
||||
"A safety briefing for participants in a team-building event",
|
||||
"A talk on the benefits of volunteering",
|
||||
"A lecture on the principles of wildlife protection",
|
||||
"A monologue about the impact of space exploration",
|
||||
"An explanation of the process of wildlife photography",
|
||||
"A presentation on the history of a famous musician",
|
||||
"An overview of traditional art forms from different cultures",
|
||||
"A lecture on the principles of effective education",
|
||||
"A discussion about the challenges of sustainable development",
|
||||
"A monologue about the influence of cultural diversity",
|
||||
"A guided tour of a famous national park",
|
||||
"An introduction to a local marine conservation project",
|
||||
"A safety briefing for participants in a hot air balloon ride",
|
||||
"A talk on the benefits of cultural exchange programs",
|
||||
"A lecture on the principles of wildlife conservation",
|
||||
"A monologue about the impact of technological advancements",
|
||||
"An explanation of the process of wildlife rehabilitation",
|
||||
"A presentation on the history of a famous explorer",
|
||||
"An overview of traditional storytelling from different cultures",
|
||||
"A lecture on the principles of effective marketing",
|
||||
"A discussion about the challenges of environmental sustainability",
|
||||
"A monologue about the influence of social entrepreneurship",
|
||||
"A guided tour of a famous historical estate",
|
||||
"An introduction to a local marine life research center",
|
||||
"A safety briefing for participants in a zip-lining adventure",
|
||||
"A talk on the benefits of cultural preservation",
|
||||
"A lecture on the principles of wildlife ecology",
|
||||
"A monologue about the impact of space technology",
|
||||
"An explanation of the process of wildlife conservation",
|
||||
"A presentation on the history of a famous scientist",
|
||||
"An overview of traditional crafts and artisans from different cultures",
|
||||
"A lecture on the principles of effective intercultural communication"
|
||||
]
|
||||
|
||||
four_people_scenarios = [
|
||||
"A university lecture on history",
|
||||
"A physics class discussing Newton's laws",
|
||||
"A medical school seminar on anatomy",
|
||||
"A training session on computer programming",
|
||||
"A business school lecture on marketing strategies",
|
||||
"A chemistry lab experiment and discussion",
|
||||
"A language class practicing conversational skills",
|
||||
"A workshop on creative writing techniques",
|
||||
"A high school math lesson on calculus",
|
||||
"A training program for customer service representatives",
|
||||
"A lecture on environmental science and sustainability",
|
||||
"A psychology class exploring human behavior",
|
||||
"A music theory class analyzing compositions",
|
||||
"A nursing school simulation for patient care",
|
||||
"A computer science class on algorithms",
|
||||
"A workshop on graphic design principles",
|
||||
"A law school lecture on constitutional law",
|
||||
"A geology class studying rock formations",
|
||||
"A vocational training program for electricians",
|
||||
"A history seminar focusing on ancient civilizations",
|
||||
"A biology class dissecting specimens",
|
||||
"A financial literacy course for adults",
|
||||
"A literature class discussing classic novels",
|
||||
"A training session for emergency response teams",
|
||||
"A sociology lecture on social inequality",
|
||||
"An art class exploring different painting techniques",
|
||||
"A medical school seminar on diagnosis",
|
||||
"A programming bootcamp teaching web development",
|
||||
"An economics class analyzing market trends",
|
||||
"A chemistry lab experiment on chemical reactions",
|
||||
"A language class practicing pronunciation",
|
||||
"A workshop on public speaking skills",
|
||||
"A high school physics lesson on electromagnetism",
|
||||
"A training program for IT professionals",
|
||||
"A lecture on climate change and its effects",
|
||||
"A psychology class studying cognitive psychology",
|
||||
"A music class composing original songs",
|
||||
"A nursing school simulation for patient assessment",
|
||||
"A computer science class on data structures",
|
||||
"A workshop on 3D modeling and animation",
|
||||
"A law school lecture on contract law",
|
||||
"A geography class examining world maps",
|
||||
"A vocational training program for plumbers",
|
||||
"A history seminar discussing revolutions",
|
||||
"A biology class exploring genetics",
|
||||
"A financial literacy course for teens",
|
||||
"A literature class analyzing poetry",
|
||||
"A training session for public speaking coaches",
|
||||
"A sociology lecture on cultural diversity",
|
||||
"An art class creating sculptures",
|
||||
"A medical school seminar on surgical techniques",
|
||||
"A programming bootcamp teaching app development",
|
||||
"An economics class on global trade policies",
|
||||
"A chemistry lab experiment on chemical bonding",
|
||||
"A language class discussing idiomatic expressions",
|
||||
"A workshop on conflict resolution",
|
||||
"A high school biology lesson on evolution",
|
||||
"A training program for project managers",
|
||||
"A lecture on renewable energy sources",
|
||||
"A psychology class on abnormal psychology",
|
||||
"A music class rehearsing for a performance",
|
||||
"A nursing school simulation for emergency response",
|
||||
"A computer science class on cybersecurity",
|
||||
"A workshop on digital marketing strategies",
|
||||
"A law school lecture on intellectual property",
|
||||
"A geology class analyzing seismic activity",
|
||||
"A vocational training program for carpenters",
|
||||
"A history seminar on the Renaissance",
|
||||
"A chemistry class synthesizing compounds",
|
||||
"A financial literacy course for seniors",
|
||||
"A literature class interpreting Shakespearean plays",
|
||||
"A training session for negotiation skills",
|
||||
"A sociology lecture on urbanization",
|
||||
"An art class creating digital art",
|
||||
"A medical school seminar on patient communication",
|
||||
"A programming bootcamp teaching mobile app development",
|
||||
"An economics class on fiscal policy",
|
||||
"A physics lab experiment on electromagnetism",
|
||||
"A language class on cultural immersion",
|
||||
"A workshop on time management",
|
||||
"A high school chemistry lesson on stoichiometry",
|
||||
"A training program for HR professionals",
|
||||
"A lecture on space exploration and astronomy",
|
||||
"A psychology class on human development",
|
||||
"A music class practicing for a recital",
|
||||
"A nursing school simulation for triage",
|
||||
"A computer science class on web development frameworks",
|
||||
"A workshop on team-building exercises",
|
||||
"A law school lecture on criminal law",
|
||||
"A geography class studying world cultures",
|
||||
"A vocational training program for HVAC technicians",
|
||||
"A history seminar on ancient civilizations",
|
||||
"A biology class examining ecosystems",
|
||||
"A financial literacy course for entrepreneurs",
|
||||
"A literature class analyzing modern literature",
|
||||
"A training session for leadership skills",
|
||||
"A sociology lecture on gender studies",
|
||||
"An art class exploring multimedia art",
|
||||
"A medical school seminar on patient diagnosis",
|
||||
"A programming bootcamp teaching software architecture"
|
||||
]
|
||||
|
||||
academic_subjects = [
|
||||
"Astrophysics",
|
||||
"Microbiology",
|
||||
"Political Science",
|
||||
"Environmental Science",
|
||||
"Literature",
|
||||
"Biochemistry",
|
||||
"Sociology",
|
||||
"Art History",
|
||||
"Geology",
|
||||
"Economics",
|
||||
"Psychology",
|
||||
"History of Architecture",
|
||||
"Linguistics",
|
||||
"Neurobiology",
|
||||
"Anthropology",
|
||||
"Quantum Mechanics",
|
||||
"Urban Planning",
|
||||
"Philosophy",
|
||||
"Marine Biology",
|
||||
"International Relations",
|
||||
"Medieval History",
|
||||
"Geophysics",
|
||||
"Finance",
|
||||
"Educational Psychology",
|
||||
"Graphic Design",
|
||||
"Paleontology",
|
||||
"Macroeconomics",
|
||||
"Cognitive Psychology",
|
||||
"Renaissance Art",
|
||||
"Archaeology",
|
||||
"Microeconomics",
|
||||
"Social Psychology",
|
||||
"Contemporary Art",
|
||||
"Meteorology",
|
||||
"Political Philosophy",
|
||||
"Space Exploration",
|
||||
"Cognitive Science",
|
||||
"Classical Music",
|
||||
"Oceanography",
|
||||
"Public Health",
|
||||
"Gender Studies",
|
||||
"Baroque Art",
|
||||
"Volcanology",
|
||||
"Business Ethics",
|
||||
"Music Composition",
|
||||
"Environmental Policy",
|
||||
"Media Studies",
|
||||
"Ancient History",
|
||||
"Seismology",
|
||||
"Marketing",
|
||||
"Human Development",
|
||||
"Modern Art",
|
||||
"Astronomy",
|
||||
"International Law",
|
||||
"Developmental Psychology",
|
||||
"Film Studies",
|
||||
"American History",
|
||||
"Soil Science",
|
||||
"Entrepreneurship",
|
||||
"Clinical Psychology",
|
||||
"Contemporary Dance",
|
||||
"Space Physics",
|
||||
"Political Economy",
|
||||
"Cognitive Neuroscience",
|
||||
"20th Century Literature",
|
||||
"Public Administration",
|
||||
"European History",
|
||||
"Atmospheric Science",
|
||||
"Supply Chain Management",
|
||||
"Social Work",
|
||||
"Japanese Literature",
|
||||
"Planetary Science",
|
||||
"Labor Economics",
|
||||
"Industrial-Organizational Psychology",
|
||||
"French Philosophy",
|
||||
"Biogeochemistry",
|
||||
"Strategic Management",
|
||||
"Educational Sociology",
|
||||
"Postmodern Literature",
|
||||
"Public Relations",
|
||||
"Middle Eastern History",
|
||||
"Oceanography",
|
||||
"International Development",
|
||||
"Human Resources Management",
|
||||
"Educational Leadership",
|
||||
"Russian Literature",
|
||||
"Quantum Chemistry",
|
||||
"Environmental Economics",
|
||||
"Environmental Psychology",
|
||||
"Ancient Philosophy",
|
||||
"Immunology",
|
||||
"Comparative Politics",
|
||||
"Child Development",
|
||||
"Fashion Design",
|
||||
"Geological Engineering",
|
||||
"Macroeconomic Policy",
|
||||
"Media Psychology",
|
||||
"Byzantine Art",
|
||||
"Ecology",
|
||||
"International Business"
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,17 @@
|
||||
import queue
|
||||
import nltk
|
||||
import random
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from helper.api_messages import QuestionType
|
||||
from helper.openai_interface import make_openai_instruct_call
|
||||
from helper.token_counter import count_tokens
|
||||
from helper.constants import *
|
||||
from wonderwords import RandomWord
|
||||
|
||||
nltk.download('words')
|
||||
|
||||
|
||||
def divide_number_into_parts(number, parts):
|
||||
if number < parts:
|
||||
@@ -22,51 +30,598 @@ def divide_number_into_parts(number, parts):
|
||||
|
||||
return q
|
||||
|
||||
def fix_exercise_ids(exercises):
|
||||
|
||||
def fix_exercise_ids(exercise, start_id):
|
||||
# Initialize the starting ID for the first exercise
|
||||
current_id = 1
|
||||
current_id = start_id
|
||||
|
||||
# Iterate through exercises
|
||||
for exercise in exercises:
|
||||
questions = exercise["questions"]
|
||||
questions = exercise["questions"]
|
||||
|
||||
# Iterate through questions and update the "id" value
|
||||
for question in questions:
|
||||
question["id"] = str(current_id)
|
||||
current_id += 1
|
||||
# Iterate through questions and update the "id" value
|
||||
for question in questions:
|
||||
question["id"] = str(current_id)
|
||||
current_id += 1
|
||||
|
||||
return exercises
|
||||
return exercise
|
||||
|
||||
|
||||
def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
|
||||
for i, word in enumerate(words_to_replace, start=start_id):
|
||||
# Create a case-insensitive regular expression pattern
|
||||
pattern = re.compile(re.escape(word), re.IGNORECASE)
|
||||
placeholder = '{{' + str(i) + '}}'
|
||||
text = pattern.sub(placeholder, text, 1)
|
||||
return text
|
||||
|
||||
def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
|
||||
replaced_notes = []
|
||||
for i, note in enumerate(notes, start=0):
|
||||
word = words_to_replace[i]
|
||||
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
|
||||
placeholder = '{{' + str(start_id + i) + '}}'
|
||||
note = pattern.sub(placeholder, note, 1)
|
||||
replaced_notes.append(note)
|
||||
return replaced_notes
|
||||
|
||||
def add_random_words_and_shuffle(word_array, num_random_words):
|
||||
r = RandomWord()
|
||||
random_words_selected = r.random_words(num_random_words)
|
||||
|
||||
combined_array = word_array + random_words_selected
|
||||
|
||||
random.shuffle(combined_array)
|
||||
|
||||
return combined_array
|
||||
|
||||
|
||||
def fillblanks_build_solutions_array(words, start_id):
|
||||
solutions = []
|
||||
for i, word in enumerate(words, start=start_id):
|
||||
solutions.append(
|
||||
{
|
||||
"id": str(i),
|
||||
"solution": word
|
||||
}
|
||||
)
|
||||
return solutions
|
||||
|
||||
|
||||
def remove_excess_questions(questions: [], quantity):
|
||||
count_true = 0
|
||||
result = []
|
||||
|
||||
for item in reversed(questions):
|
||||
if item.get('solution') == 'true' and count_true < quantity:
|
||||
count_true += 1
|
||||
else:
|
||||
result.append(item)
|
||||
|
||||
result.reverse()
|
||||
return result
|
||||
|
||||
|
||||
def build_write_blanks_text(questions: [], start_id):
|
||||
result = ""
|
||||
for i, q in enumerate(questions, start=start_id):
|
||||
placeholder = '{{' + str(i) + '}}'
|
||||
result = result + q["question"] + placeholder + "\\n"
|
||||
return result
|
||||
|
||||
def build_write_blanks_text_form(form: [], start_id):
|
||||
result = ""
|
||||
replaced_words = []
|
||||
for i, entry in enumerate(form, start=1):
|
||||
placeholder = '{{' + str(i) + '}}'
|
||||
# Use regular expression to find the string after ':'
|
||||
match = re.search(r'(?<=:)\s*(.*)', entry)
|
||||
# Extract the matched string
|
||||
original_string = match.group(1)
|
||||
# Split the string into words
|
||||
words = re.findall(r'\b\w+\b', original_string)
|
||||
# Remove words with only one letter
|
||||
filtered_words = [word for word in words if len(word) > 1]
|
||||
# Choose a random word from the list of words
|
||||
selected_word = random.choice(filtered_words)
|
||||
pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)
|
||||
|
||||
# Replace the chosen word with the placeholder
|
||||
replaced_string = pattern.sub(placeholder, original_string, 1)
|
||||
# Construct the final replaced string
|
||||
replaced_string = entry.replace(original_string, replaced_string)
|
||||
|
||||
result = result + replaced_string + "\\n"
|
||||
# Save the replaced word or use it as needed
|
||||
# For example, you can save it to a file or a list
|
||||
replaced_words.append(selected_word)
|
||||
return result, replaced_words
|
||||
|
||||
|
||||
def build_write_blanks_solutions(questions: [], start_id):
|
||||
solutions = []
|
||||
for i, q in enumerate(questions, start=start_id):
|
||||
solutions.append(
|
||||
{
|
||||
"id": str(i),
|
||||
"solution": q["possible_answers"]
|
||||
}
|
||||
)
|
||||
return solutions
|
||||
|
||||
def build_write_blanks_solutions_listening(words: [], start_id):
|
||||
solutions = []
|
||||
for i, word in enumerate(words, start=start_id):
|
||||
solutions.append(
|
||||
{
|
||||
"id": str(i),
|
||||
"solution": word
|
||||
}
|
||||
)
|
||||
return solutions
|
||||
|
||||
|
||||
def generate_reading_passage(type: QuestionType, topic: str):
|
||||
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.READING_PASSAGE_1.value + ", of at least 1500 words, on the topic " \
|
||||
"of " + topic + ". The passage should offer a substantial amount of " \
|
||||
"information, analysis, or narrative " \
|
||||
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
|
||||
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
|
||||
"Provide your response in this json format: {'title': 'title of the text', 'text': 'generated text'}"
|
||||
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
|
||||
"of " + topic + ". The passage should offer a substantial amount of " \
|
||||
"information, analysis, or narrative " \
|
||||
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
|
||||
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
|
||||
"Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
|
||||
token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
|
||||
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
def gen_multiple_choice_exercise(text: str, quantity: int):
|
||||
gen_multiple_choice_for_text = "Generate" + str(quantity) + "multiple choice questions for this text: " \
|
||||
"'" + text + "'\n" \
|
||||
"Use this format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
|
||||
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
|
||||
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
|
||||
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
|
||||
"'solution': 'C', 'variant': 'text'}]"
|
||||
|
||||
def generate_listening_1_conversation(topic: str):
|
||||
gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
|
||||
"social context of '" + topic + "'. Please include random names and genders " \
|
||||
"for the characters in your dialogue."
|
||||
token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
|
||||
response = make_openai_instruct_call(
|
||||
GPT_3_5_TURBO_INSTRUCT,
|
||||
gen_listening_1_conversation_2_people,
|
||||
token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
|
||||
|
||||
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
|
||||
|
||||
token_count = count_tokens(parse_conversation)["n_tokens"]
|
||||
processed = make_openai_instruct_call(
|
||||
GPT_3_5_TURBO_INSTRUCT,
|
||||
parse_conversation,
|
||||
token_count,
|
||||
['conversation'],
|
||||
GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
name_to_voice = {}
|
||||
for segment in processed['conversation']:
|
||||
if 'voice' not in segment:
|
||||
name = segment['name']
|
||||
if name in name_to_voice:
|
||||
voice = name_to_voice[name]
|
||||
else:
|
||||
if segment['gender'].lower() == 'male':
|
||||
voice = random.choice(MALE_NEURAL_VOICES)['Id']
|
||||
else:
|
||||
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
|
||||
name_to_voice[name] = voice
|
||||
segment['voice'] = voice
|
||||
return response, processed
|
||||
|
||||
def generate_listening_2_monologue(topic: str):
|
||||
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'"
|
||||
token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
|
||||
response = make_openai_instruct_call(
|
||||
GPT_3_5_TURBO_INSTRUCT,
|
||||
gen_listening_2_monologue_social,
|
||||
token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
return response
|
||||
|
||||
def generate_listening_3_conversation(topic: str):
|
||||
gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
|
||||
"in the everyday social context of '" + topic + \
|
||||
"'. Please include random names and genders for the characters in your dialogue."
|
||||
token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
|
||||
response = make_openai_instruct_call(
|
||||
GPT_3_5_TURBO_INSTRUCT,
|
||||
gen_listening_3_conversation_4_people,
|
||||
token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
|
||||
|
||||
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
|
||||
|
||||
token_count = count_tokens(parse_conversation)["n_tokens"]
|
||||
processed = make_openai_instruct_call(
|
||||
GPT_3_5_TURBO_INSTRUCT,
|
||||
parse_conversation,
|
||||
token_count,
|
||||
['conversation'],
|
||||
GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
|
||||
name_to_voice = {}
|
||||
for segment in processed['conversation']:
|
||||
if 'voice' not in segment:
|
||||
name = segment['name']
|
||||
if name in name_to_voice:
|
||||
voice = name_to_voice[name]
|
||||
else:
|
||||
if segment['gender'].lower() == 'male':
|
||||
voice = random.choice(MALE_NEURAL_VOICES)['Id']
|
||||
else:
|
||||
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
|
||||
name_to_voice[name] = voice
|
||||
segment['voice'] = voice
|
||||
return response, processed
|
||||
|
||||
def generate_listening_4_monologue(topic: str):
|
||||
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'"
|
||||
token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
|
||||
response = make_openai_instruct_call(
|
||||
GPT_3_5_TURBO_INSTRUCT,
|
||||
gen_listening_4_monologue_academic,
|
||||
token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE
|
||||
)
|
||||
return response
|
||||
|
||||
def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id):
|
||||
exercises = []
|
||||
for req_exercise in req_exercises:
|
||||
number_of_exercises = number_of_exercises_q.get()
|
||||
|
||||
if req_exercise == "multipleChoice":
|
||||
question = gen_multiple_choice_exercise(passage, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added multiple choice: " + str(question))
|
||||
elif req_exercise == "fillBlanks":
|
||||
question = gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added fill blanks: " + str(question))
|
||||
elif req_exercise == "trueFalse":
|
||||
question = gen_true_false_not_given_exercise(passage, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added trueFalse: " + str(question))
|
||||
elif req_exercise == "writeBlanks":
|
||||
question = gen_write_blanks_exercise(passage, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added write blanks: " + str(question))
|
||||
|
||||
start_id = start_id + number_of_exercises
|
||||
|
||||
return exercises
|
||||
|
||||
def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id):
|
||||
exercises = []
|
||||
for req_exercise in req_exercises:
|
||||
number_of_exercises = number_of_exercises_q.get()
|
||||
|
||||
if req_exercise == "multipleChoice":
|
||||
question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added multiple choice: " + str(question))
|
||||
elif req_exercise == "writeBlanksQuestions":
|
||||
question = gen_write_blanks_questions_exercise_listening_conversation(conversation, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added write blanks questions: " + str(question))
|
||||
elif req_exercise == "writeBlanksFill":
|
||||
question = gen_write_blanks_notes_exercise_listening_conversation(conversation, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added write blanks notes: " + str(question))
|
||||
elif req_exercise == "writeBlanksForm":
|
||||
question = gen_write_blanks_form_exercise_listening_conversation(conversation, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added write blanks form: " + str(question))
|
||||
|
||||
start_id = start_id + number_of_exercises
|
||||
|
||||
return exercises
|
||||
|
||||
def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id):
|
||||
exercises = []
|
||||
for req_exercise in req_exercises:
|
||||
number_of_exercises = number_of_exercises_q.get()
|
||||
|
||||
if req_exercise == "multipleChoice":
|
||||
question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added multiple choice: " + str(question))
|
||||
elif req_exercise == "writeBlanksQuestions":
|
||||
question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added write blanks questions: " + str(question))
|
||||
elif req_exercise == "writeBlanksFill":
|
||||
question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added write blanks notes: " + str(question))
|
||||
elif req_exercise == "writeBlanksForm":
|
||||
question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id)
|
||||
exercises.append(question)
|
||||
print("Added write blanks form: " + str(question))
|
||||
|
||||
start_id = start_id + number_of_exercises
|
||||
|
||||
return exercises
|
||||
|
||||
def gen_multiple_choice_exercise(text: str, quantity: int, start_id):
|
||||
gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions for this text: " \
|
||||
"'" + text + "'\n" \
|
||||
"Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
|
||||
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
|
||||
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
|
||||
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
|
||||
"\"solution\": \"C\", \"variant\": \"text\"}]"
|
||||
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
|
||||
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
parse_mc_questions = "Parse this '" + mc_questions + "' into this json format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \
|
||||
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \
|
||||
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \
|
||||
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \
|
||||
"'solution': 'C', 'variant': 'text'}]"
|
||||
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
|
||||
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
|
||||
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
|
||||
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
|
||||
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
|
||||
token_count = count_tokens(parse_mc_questions)["n_tokens"]
|
||||
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
return fix_exercise_ids(question, start_id)
|
||||
|
||||
def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id):
|
||||
gen_summary_for_text = "Summarize this text: " + text
|
||||
token_count = count_tokens(gen_summary_for_text)["n_tokens"]
|
||||
text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
gen_words_to_replace = "Select " + str(
|
||||
quantity) + " words, it must be words and not expressions, from the summary and respond in this " \
|
||||
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
|
||||
token_count = count_tokens(gen_words_to_replace)["n_tokens"]
|
||||
words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
|
||||
["words"],
|
||||
GEN_QUESTION_TEMPERATURE)["words"]
|
||||
|
||||
replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id)
|
||||
options_words = add_random_words_and_shuffle(words_to_replace, 5)
|
||||
solutions = fillblanks_build_solutions_array(words_to_replace, start_id)
|
||||
|
||||
return {
|
||||
"allowRepetition": True,
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
|
||||
"more words than spaces so you will not use them all. You may use any of the words more than once.",
|
||||
"solutions": solutions,
|
||||
"text": replaced_summary,
|
||||
"type": "fillBlanks",
|
||||
"words": options_words
|
||||
|
||||
}
|
||||
|
||||
def gen_true_false_not_given_exercise(text: str, quantity: int, start_id):
|
||||
gen_true_false_not_given = "Generate " + str(
|
||||
quantity) + " statements in JSON format (True, False, or Not Given) " \
|
||||
"based on the provided text. Ensure that your statements " \
|
||||
"accurately represent information or inferences from the " \
|
||||
"text, and provide a variety of responses, including, at least one of each True, " \
|
||||
"False, and Not Given, as appropriate, in the JSON structure " \
|
||||
"{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
|
||||
"\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
|
||||
"\"solution\": \"true/false/not_given\"}]}. Reference text: " + text
|
||||
|
||||
token_count = count_tokens(gen_true_false_not_given)["n_tokens"]
|
||||
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count,
|
||||
["prompts"],
|
||||
GEN_QUESTION_TEMPERATURE)["prompts"]
|
||||
if len(questions) > quantity:
|
||||
questions = remove_excess_questions(questions, len(questions) - quantity)
|
||||
|
||||
for i, question in enumerate(questions, start=start_id):
|
||||
question["id"] = str(i)
|
||||
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
|
||||
"questions": questions,
|
||||
"type": "trueFalse"
|
||||
}
|
||||
|
||||
def gen_write_blanks_exercise(text: str, quantity: int, start_id):
|
||||
gen_short_answer_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
|
||||
"(max 3 words per answer), about this text: '" + text + "'. " \
|
||||
"Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
|
||||
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"
|
||||
|
||||
token_count = count_tokens(gen_short_answer_questions)["n_tokens"]
|
||||
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
|
||||
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"maxWords": 3,
|
||||
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
|
||||
"solutions": build_write_blanks_solutions(questions, start_id),
|
||||
"text": build_write_blanks_text(questions, start_id),
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id):
|
||||
gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions of 4 options for this conversation: " \
|
||||
"'" + text + "'"
|
||||
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
|
||||
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
|
||||
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
|
||||
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
|
||||
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
|
||||
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
|
||||
token_count = count_tokens(parse_mc_questions)["n_tokens"]
|
||||
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
return fix_exercise_ids(question, start_id)
|
||||
|
||||
def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id):
|
||||
gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions for this monologue: " \
|
||||
"'" + text + "'"
|
||||
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
|
||||
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
|
||||
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
|
||||
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
|
||||
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
|
||||
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
|
||||
token_count = count_tokens(parse_mc_questions)["n_tokens"]
|
||||
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
return fix_exercise_ids(question, start_id)
|
||||
|
||||
def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id):
|
||||
gen_write_blanks_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
|
||||
"(max 3 words per answer), about a monologue and" \
|
||||
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
|
||||
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
|
||||
"The monologue is this: '" + text + "'"
|
||||
|
||||
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
|
||||
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
|
||||
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"maxWords": 3,
|
||||
"prompt": "You will hear a conversation. Answer the questions below using no more than three words or a number accordingly.",
|
||||
"solutions": build_write_blanks_solutions(questions, start_id),
|
||||
"text": build_write_blanks_text(questions, start_id),
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id):
|
||||
gen_write_blanks_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
|
||||
"(max 3 words per answer), about a monologue and" \
|
||||
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
|
||||
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
|
||||
"The monologue is this: '" + text + "'"
|
||||
|
||||
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
|
||||
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
|
||||
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"maxWords": 3,
|
||||
"prompt": "You will hear a monologue. Answer the questions below using no more than three words or a number accordingly.",
|
||||
"solutions": build_write_blanks_solutions(questions, start_id),
|
||||
"text": build_write_blanks_text(questions, start_id),
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id):
|
||||
gen_write_blanks_notes = "Generate " + str(quantity) + " notes taken from the conversation and and respond in this " \
|
||||
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
|
||||
|
||||
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
|
||||
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
|
||||
["notes"],
|
||||
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
|
||||
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
|
||||
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
|
||||
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
|
||||
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
|
||||
["words"],
|
||||
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
|
||||
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"maxWords": 1,
|
||||
"prompt": "Fill the blank space with the word missing from the audio.",
|
||||
"solutions": build_write_blanks_solutions_listening(words, start_id),
|
||||
"text": "\\n".join(replaced_notes),
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id):
|
||||
gen_write_blanks_notes = "Generate " + str(quantity) + " notes taken from the monologue and and respond in this " \
|
||||
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
|
||||
|
||||
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
|
||||
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
|
||||
["notes"],
|
||||
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
|
||||
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
|
||||
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
|
||||
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
|
||||
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
|
||||
["words"],
|
||||
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
|
||||
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"maxWords": 1,
|
||||
"prompt": "Fill the blank space with the word missing from the audio.",
|
||||
"solutions": build_write_blanks_solutions_listening(words, start_id),
|
||||
"text": "\\n".join(replaced_notes),
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id):
|
||||
gen_write_blanks_form = "Generate a form with " + str(quantity) + " key-value pairs about the conversation. " \
|
||||
"The conversation is this: '" + text + "'"
|
||||
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
|
||||
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
|
||||
token_count = count_tokens(parse_form)["n_tokens"]
|
||||
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
|
||||
["form"],
|
||||
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
|
||||
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"maxWords": 1,
|
||||
"prompt": "You will hear a conversation. Fill the form with words/numbers missing.",
|
||||
"solutions": build_write_blanks_solutions_listening(words, start_id),
|
||||
"text": replaced_form,
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id):
|
||||
gen_write_blanks_form = "Generate a form with " + str(quantity) + " key-value pairs about the monologue. " \
|
||||
"The monologue is this: '" + text + "'"
|
||||
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
|
||||
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
|
||||
token_count = count_tokens(parse_form)["n_tokens"]
|
||||
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
|
||||
["form"],
|
||||
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
|
||||
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"maxWords": 1,
|
||||
"prompt": "You will hear a monologue. Fill the form with words/numbers missing.",
|
||||
"solutions": build_write_blanks_solutions_listening(words, start_id),
|
||||
"text": replaced_form,
|
||||
"type": "writeBlanks"
|
||||
}
|
||||
|
||||
@@ -23,15 +23,24 @@ def process_response(input_string, quotation_check_field):
|
||||
index = input_string.index('{')
|
||||
# Extract everything after the first '{' (inclusive)
|
||||
result = input_string[index:]
|
||||
if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE):
|
||||
if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE) or \
|
||||
re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE):
|
||||
json_obj = json.loads(parse_string(result))
|
||||
return json_obj
|
||||
else:
|
||||
parsed_string = result.replace("\n\n", " ")
|
||||
json_obj = json.loads(parsed_string)
|
||||
return json_obj
|
||||
parsed_string = parsed_string.replace("\n", " ")
|
||||
parsed_string = re.sub(r',\s*]', ']', parsed_string)
|
||||
parsed_string = re.sub(r',\s*}', '}', parsed_string)
|
||||
if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1):
|
||||
parsed_string = parse_string_2(parsed_string)
|
||||
return json.loads(parsed_string)
|
||||
|
||||
return json.loads(parsed_string)
|
||||
except Exception as e:
|
||||
print(f"Invalid JSON string! Exception: {e}")
|
||||
print(f"String: {input_string}")
|
||||
print(f"Exception: {e}")
|
||||
else:
|
||||
return input_string
|
||||
|
||||
@@ -41,9 +50,36 @@ def parse_string(to_parse: str):
|
||||
parsed_string = re.sub(pattern, '"', parsed_string)
|
||||
parsed_string = parsed_string.replace("\\\"", "'")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
parsed_string = re.sub(r',\s*]', ']', parsed_string)
|
||||
parsed_string = re.sub(r',\s*}', '}', parsed_string)
|
||||
return parsed_string
|
||||
|
||||
|
||||
def parse_string_2(to_parse: str):
|
||||
keys_and_values_str = to_parse.replace("{", "").replace("}", "")
|
||||
split_pattern = r'(?<="),|(?<="):'
|
||||
keys_and_values = re.split(split_pattern, keys_and_values_str)
|
||||
|
||||
keys = []
|
||||
values = []
|
||||
|
||||
for idx, x in enumerate(keys_and_values):
|
||||
if (idx % 2) == 0:
|
||||
keys.append(x)
|
||||
else:
|
||||
values.append(x)
|
||||
|
||||
parsed_values = []
|
||||
|
||||
for value in values:
|
||||
parsed_values.append(("\"" + value.replace("\"", "").strip() + "\""))
|
||||
|
||||
for ind, parsed_value in enumerate(parsed_values):
|
||||
to_parse = to_parse.replace(values[ind], parsed_values[ind])
|
||||
|
||||
to_parse = to_parse.replace(":", ": ")
|
||||
return to_parse
|
||||
|
||||
def remove_special_chars_and_escapes(input_string):
|
||||
parsed_string = input_string.replace("\\\"", "'")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
@@ -96,16 +132,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
|
||||
)["choices"][0]["text"]
|
||||
|
||||
if fields_to_check is None:
|
||||
return remove_special_chars_and_escapes(response)
|
||||
return response
|
||||
|
||||
processed_response = process_response(response, fields_to_check[0])
|
||||
|
||||
if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
|
||||
try_count = try_count + 1
|
||||
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
|
||||
elif try_count >= TRY_LIMIT:
|
||||
try_count = 0
|
||||
return remove_special_chars_and_escapes(response)
|
||||
else:
|
||||
try_count = 0
|
||||
return processed_response
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import whisper
|
||||
import os
|
||||
import gtts
|
||||
import nltk
|
||||
import boto3
|
||||
import random
|
||||
nltk.download('words')
|
||||
from nltk.corpus import words
|
||||
from helper.constants import *
|
||||
|
||||
def speech_to_text(file_path):
|
||||
if os.path.exists(file_path):
|
||||
@@ -15,8 +17,72 @@ def speech_to_text(file_path):
|
||||
raise Exception("File " + file_path + " not found.")
|
||||
|
||||
def text_to_speech(text: str, file_name: str):
|
||||
tts = gtts.gTTS(text)
|
||||
tts.save(file_name)
|
||||
# Initialize the Amazon Polly client
|
||||
client = boto3.client(
|
||||
'polly',
|
||||
region_name='eu-west-1',
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
|
||||
)
|
||||
# Initialize an empty list to store audio segments
|
||||
audio_segments = []
|
||||
tts_response = client.synthesize_speech(
|
||||
Engine="neural",
|
||||
Text=text,
|
||||
OutputFormat="mp3",
|
||||
VoiceId=random.choice(ALL_NEURAL_VOICES)['Id']
|
||||
)
|
||||
audio_segments.append(tts_response['AudioStream'].read())
|
||||
# Combine the audio segments into a single audio file
|
||||
combined_audio = b"".join(audio_segments)
|
||||
file_name = file_name + ".mp3"
|
||||
# Save the combined audio to a single file
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(combined_audio)
|
||||
|
||||
print("Speech segments saved to " + file_name)
|
||||
|
||||
def conversation_text_to_speech(conversation: list, file_name: str):
|
||||
# Create a dictionary to store the mapping of 'name' to 'voice'
|
||||
name_to_voice = {}
|
||||
for segment in conversation:
|
||||
if 'voice' not in segment:
|
||||
name = segment['name']
|
||||
if name in name_to_voice:
|
||||
voice = name_to_voice[name]
|
||||
else:
|
||||
if segment['gender'].lower() == 'male':
|
||||
voice = random.choice(MALE_NEURAL_VOICES)['Id']
|
||||
else:
|
||||
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
|
||||
name_to_voice[name] = voice
|
||||
segment['voice'] = voice
|
||||
# Initialize the Amazon Polly client
|
||||
client = boto3.client(
|
||||
'polly',
|
||||
region_name='eu-west-1',
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
|
||||
)
|
||||
# Initialize an empty list to store audio segments
|
||||
audio_segments = []
|
||||
# Iterate through the text segments, convert to audio segments, and store them
|
||||
for segment in conversation:
|
||||
response = client.synthesize_speech(
|
||||
Engine="neural",
|
||||
Text=segment["text"],
|
||||
OutputFormat="mp3",
|
||||
VoiceId=segment["voice"]
|
||||
)
|
||||
audio_segments.append(response['AudioStream'].read())
|
||||
# Combine the audio segments into a single audio file
|
||||
combined_audio = b"".join(audio_segments)
|
||||
file_name = file_name + ".mp3"
|
||||
# Save the combined audio to a single file
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(combined_audio)
|
||||
|
||||
print("Speech segments saved to " + file_name)
|
||||
|
||||
def has_words(text: str):
|
||||
english_words = set(words.words())
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user