Generate questions endpoints working for all.

This commit is contained in:
Cristiano Ferreira
2023-11-12 23:40:24 +00:00
parent 274252bf92
commit 695d9b589a
7 changed files with 2143 additions and 312 deletions

3
.gitignore vendored
View File

@@ -1,2 +1,3 @@
__pycache__ __pycache__
.idea .idea
.env

178
app.py
View File

@@ -30,16 +30,32 @@ jwt = JWTManager(app)
cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS")) cred = credentials.Certificate(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
firebase_admin.initialize_app(cred) firebase_admin.initialize_app(cred)
@app.route('/listening_section_1', methods=['GET']) @app.route('/listening_section_1', methods=['GET'])
@jwt_required() @jwt_required()
def get_listening_section_1_question(): def get_listening_section_1_question():
try: try:
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_1) delete_files_older_than_one_day(AUDIO_FILES_PATH)
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], # Extract parameters from the URL query string
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) topic = request.args.get('topic', default=random.choice(two_people_scenarios))
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS, req_exercises = request.args.getlist('exercises')
GEN_QUESTION_TEMPERATURE)
return response if (len(req_exercises) == 0):
req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
unprocessed_conversation, processed_conversation = generate_listening_1_conversation(topic)
print("Generated conversation: " + str(processed_conversation))
start_id = 1
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, number_of_exercises_q,
start_id)
return {
"exercises": exercises,
"text": processed_conversation
}
except Exception as e: except Exception as e:
return str(e) return str(e)
@@ -71,12 +87,24 @@ def save_listening_section_1_question():
def get_listening_section_2_question(): def get_listening_section_2_question():
try: try:
delete_files_older_than_one_day(AUDIO_FILES_PATH) delete_files_older_than_one_day(AUDIO_FILES_PATH)
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_2) # Extract parameters from the URL query string
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], topic = request.args.get('topic', default=random.choice(social_monologue_contexts))
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) req_exercises = request.args.getlist('exercises')
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS,
GEN_QUESTION_TEMPERATURE) if (len(req_exercises) == 0):
return response req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises))
monologue = generate_listening_2_monologue(topic)
print("Generated monologue: " + str(monologue))
start_id = 11
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id)
return {
"exercises": exercises,
"text": monologue
}
except Exception as e: except Exception as e:
return str(e) return str(e)
@@ -107,12 +135,26 @@ def save_listening_section_2_question():
def get_listening_section_3_question(): def get_listening_section_3_question():
try: try:
delete_files_older_than_one_day(AUDIO_FILES_PATH) delete_files_older_than_one_day(AUDIO_FILES_PATH)
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_3) # Extract parameters from the URL query string
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], topic = request.args.get('topic', default=random.choice(four_people_scenarios))
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) req_exercises = request.args.getlist('exercises')
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS,
GEN_QUESTION_TEMPERATURE) if (len(req_exercises) == 0):
return response req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 1)
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
unprocessed_conversation, processed_conversation = generate_listening_3_conversation(topic)
print("Generated conversation: " + str(processed_conversation))
start_id = 21
exercises = generate_listening_conversation_exercises(unprocessed_conversation, req_exercises, number_of_exercises_q,
start_id)
return {
"exercises": exercises,
"text": processed_conversation
}
except Exception as e: except Exception as e:
return str(e) return str(e)
@@ -143,12 +185,24 @@ def save_listening_section_3_question():
def get_listening_section_4_question(): def get_listening_section_4_question():
try: try:
delete_files_older_than_one_day(AUDIO_FILES_PATH) delete_files_older_than_one_day(AUDIO_FILES_PATH)
messages = get_question_gen_messages(QuestionType.LISTENING_SECTION_4) # Extract parameters from the URL query string
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], topic = request.args.get('topic', default=random.choice(academic_subjects))
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) req_exercises = request.args.getlist('exercises')
response = make_openai_call(GPT_3_5_TURBO_16K, messages, token_count, LISTENING_GEN_FIELDS,
GEN_QUESTION_TEMPERATURE) if (len(req_exercises) == 0):
return response req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises))
monologue = generate_listening_4_monologue(topic)
print("Generated monologue: " + str(monologue))
start_id = 31
exercises = generate_listening_monologue_exercises(monologue, req_exercises, number_of_exercises_q, start_id)
return {
"exercises": exercises,
"text": monologue
}
except Exception as e: except Exception as e:
return str(e) return str(e)
@@ -606,29 +660,25 @@ def save_speaking_task_3_question():
@jwt_required() @jwt_required()
def get_reading_passage_1_question(): def get_reading_passage_1_question():
try: try:
TOTAL_EXERCISES = 13
# Extract parameters from the URL query string # Extract parameters from the URL query string
topic = request.args.get('topic', default=random.choice(topics)) topic = request.args.get('topic', default=random.choice(topics))
req_exercises = request.args.getlist('exercises') req_exercises = request.args.getlist('exercises')
number_of_exercises_q = divide_number_into_parts(TOTAL_EXERCISES, len(req_exercises)) if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic) passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
exercises = [] print("Generated passage: " + str(passage))
start_id = 1
for req_exercise in req_exercises: exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id)
if (req_exercise == "multiple_choice"):
mc_question = gen_multiple_choice_exercise(passage["text"], number_of_exercises_q.get())
exercises.append(mc_question)
exercises = fix_exercise_ids(exercises)
return { return {
"exercises": exercises, "exercises": exercises,
"text": { "text": {
"content": passage["text"], "content": passage["text"],
"title": passage["title"] "title": passage["title"]
}, }
} }
except Exception as e: except Exception as e:
return str(e) return str(e)
@@ -656,6 +706,62 @@ def save_reading_passage_1_question():
return str(e) return str(e)
@app.route('/reading_passage_2', methods=['GET'])
@jwt_required()
def get_reading_passage_2_question():
try:
# Extract parameters from the URL query string
topic = request.args.get('topic', default=random.choice(topics))
req_exercises = request.args.getlist('exercises')
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
passage = generate_reading_passage(QuestionType.READING_PASSAGE_2, topic)
print("Generated passage: " + str(passage))
start_id = 14
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
except Exception as e:
return str(e)
@app.route('/reading_passage_3', methods=['GET'])
@jwt_required()
def get_reading_passage_3_question():
try:
# Extract parameters from the URL query string
topic = request.args.get('topic', default=random.choice(topics))
req_exercises = request.args.getlist('exercises')
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
passage = generate_reading_passage(QuestionType.READING_PASSAGE_3, topic)
print("Generated passage: " + str(passage))
start_id = 27
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
except Exception as e:
return str(e)
@app.route('/fetch_tips', methods=['POST']) @app.route('/fetch_tips', methods=['POST'])
@jwt_required() @jwt_required()
def fetch_answer_tips(): def fetch_answer_tips():

View File

@@ -1,20 +1,101 @@
FIREBASE_BUCKET = 'mti-ielts.appspot.com'
AUDIO_FILES_PATH = 'download-audio/'
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/'
VIDEO_FILES_PATH = 'download-video/'
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/'
GRADING_TEMPERATURE = 0.1 GRADING_TEMPERATURE = 0.1
TIPS_TEMPERATURE = 0.2 TIPS_TEMPERATURE = 0.2
GEN_QUESTION_TEMPERATURE = 0.7 GEN_QUESTION_TEMPERATURE = 0.7
GPT_3_5_TURBO = "gpt-3.5-turbo" GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k" GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct" GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
GRADING_FIELDS = ['comment', 'overall', 'task_response'] GRADING_FIELDS = ['comment', 'overall', 'task_response']
GEN_FIELDS = ['topic'] GEN_FIELDS = ['topic']
GEN_TEXT_FIELDS = ['title'] GEN_TEXT_FIELDS = ['title']
LISTENING_GEN_FIELDS = ['transcript', 'exercise'] LISTENING_GEN_FIELDS = ['transcript', 'exercise']
READING_EXERCISE_TYPES = ['multipleChoice', 'fillBlanks', 'writeBlanks', 'trueFalse']
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
FIREBASE_BUCKET = 'mti-ielts.appspot.com' TOTAL_READING_PASSAGE_1_EXERCISES = 13
AUDIO_FILES_PATH = 'download-audio/' TOTAL_READING_PASSAGE_2_EXERCISES = 13
FIREBASE_LISTENING_AUDIO_FILES_PATH = 'listening_recordings/' TOTAL_READING_PASSAGE_3_EXERCISES = 14
VIDEO_FILES_PATH = 'download-video/' TOTAL_LISTENING_SECTION_1_EXERCISES = 10
FIREBASE_SPEAKING_VIDEO_FILES_PATH = 'speaking_videos/' TOTAL_LISTENING_SECTION_2_EXERCISES = 10
TOTAL_LISTENING_SECTION_3_EXERCISES = 10
TOTAL_LISTENING_SECTION_4_EXERCISES = 10
EN_US_VOICES = [
{'Gender': 'Female', 'Id': 'Salli', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Salli',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Male', 'Id': 'Matthew', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Matthew',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Female', 'Id': 'Kimberly', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Kimberly',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Female', 'Id': 'Kendra', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Kendra',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Male', 'Id': 'Justin', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Justin',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Male', 'Id': 'Joey', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Joey',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Female', 'Id': 'Joanna', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Joanna',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Female', 'Id': 'Ivy', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Ivy',
'SupportedEngines': ['neural', 'standard']}]
EN_GB_VOICES = [
{'Gender': 'Female', 'Id': 'Emma', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Emma',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Male', 'Id': 'Brian', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Brian',
'SupportedEngines': ['neural', 'standard']},
{'Gender': 'Female', 'Id': 'Amy', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Amy',
'SupportedEngines': ['neural', 'standard']}]
EN_GB_WLS_VOICES = [
{'Gender': 'Male', 'Id': 'Geraint', 'LanguageCode': 'en-GB-WLS', 'LanguageName': 'Welsh English', 'Name': 'Geraint',
'SupportedEngines': ['standard']}]
EN_AU_VOICES = [{'Gender': 'Male', 'Id': 'Russell', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English',
'Name': 'Russell', 'SupportedEngines': ['standard']},
{'Gender': 'Female', 'Id': 'Nicole', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English',
'Name': 'Nicole', 'SupportedEngines': ['standard']}]
ALL_VOICES = EN_US_VOICES + EN_GB_VOICES + EN_GB_WLS_VOICES + EN_AU_VOICES
NEURAL_EN_US_VOICES = [
{'Gender': 'Female', 'Id': 'Danielle', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Danielle',
'SupportedEngines': ['neural']},
{'Gender': 'Male', 'Id': 'Gregory', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Gregory',
'SupportedEngines': ['neural']},
{'Gender': 'Male', 'Id': 'Kevin', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Kevin',
'SupportedEngines': ['neural']},
{'Gender': 'Female', 'Id': 'Ruth', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Ruth',
'SupportedEngines': ['neural']},
{'Gender': 'Male', 'Id': 'Stephen', 'LanguageCode': 'en-US', 'LanguageName': 'US English', 'Name': 'Stephen',
'SupportedEngines': ['neural']}]
NEURAL_EN_GB_VOICES = [
{'Gender': 'Male', 'Id': 'Arthur', 'LanguageCode': 'en-GB', 'LanguageName': 'British English', 'Name': 'Arthur',
'SupportedEngines': ['neural']}]
NEURAL_EN_AU_VOICES = [
{'Gender': 'Female', 'Id': 'Olivia', 'LanguageCode': 'en-AU', 'LanguageName': 'Australian English',
'Name': 'Olivia', 'SupportedEngines': ['neural']}]
NEURAL_EN_ZA_VOICES = [
{'Gender': 'Female', 'Id': 'Ayanda', 'LanguageCode': 'en-ZA', 'LanguageName': 'South African English',
'Name': 'Ayanda', 'SupportedEngines': ['neural']}]
NEURAL_EN_NZ_VOICES = [
{'Gender': 'Female', 'Id': 'Aria', 'LanguageCode': 'en-NZ', 'LanguageName': 'New Zealand English', 'Name': 'Aria',
'SupportedEngines': ['neural']}]
NEURAL_EN_IN_VOICES = [
{'Gender': 'Female', 'Id': 'Kajal', 'LanguageCode': 'en-IN', 'LanguageName': 'Indian English', 'Name': 'Kajal',
'SupportedEngines': ['neural']}]
NEURAL_EN_IE_VOICES = [
{'Gender': 'Female', 'Id': 'Niamh', 'LanguageCode': 'en-IE', 'LanguageName': 'Irish English', 'Name': 'Niamh',
'SupportedEngines': ['neural']}]
ALL_NEURAL_VOICES = NEURAL_EN_US_VOICES + NEURAL_EN_GB_VOICES + NEURAL_EN_AU_VOICES + NEURAL_EN_ZA_VOICES + NEURAL_EN_NZ_VOICES + NEURAL_EN_IE_VOICES
MALE_VOICES = [item for item in ALL_VOICES if item.get('Gender') == 'Male']
FEMALE_VOICES = [item for item in ALL_VOICES if item.get('Gender') == 'Female']
MALE_NEURAL_VOICES = [item for item in ALL_NEURAL_VOICES if item.get('Gender') == 'Male']
FEMALE_NEURAL_VOICES = [item for item in ALL_NEURAL_VOICES if item.get('Gender') == 'Female']
topics = [ topics = [
"Art and Creativity", "Art and Creativity",
@@ -117,4 +198,395 @@ topics = [
"Human-Computer Interaction", "Human-Computer Interaction",
"Global Health", "Global Health",
"Cultural Appropriation" "Cultural Appropriation"
] ]
two_people_scenarios = [
"Booking a table at a restaurant",
"Making a doctor's appointment",
"Asking for directions to a tourist attraction",
"Inquiring about public transportation options",
"Discussing weekend plans with a friend",
"Ordering food at a café",
"Renting a bicycle for a day",
"Arranging a meeting with a colleague",
"Talking to a real estate agent about renting an apartment",
"Discussing travel plans for an upcoming vacation",
"Checking the availability of a hotel room",
"Talking to a car rental service",
"Asking for recommendations at a library",
"Inquiring about opening hours at a museum",
"Discussing the weather forecast",
"Shopping for groceries",
"Renting a movie from a video store",
"Booking a flight ticket",
"Discussing a school assignment with a classmate",
"Making a reservation for a spa appointment",
"Talking to a customer service representative about a product issue",
"Discussing household chores with a family member",
"Planning a surprise party for a friend",
"Talking to a coworker about a project deadline",
"Inquiring about a gym membership",
"Discussing the menu options at a fast-food restaurant",
"Talking to a neighbor about a community event",
"Asking for help with computer problems",
"Discussing a recent sports game with a sports enthusiast",
"Talking to a pet store employee about buying a pet",
"Asking for information about a local farmer's market",
"Discussing the details of a home renovation project",
"Talking to a coworker about office supplies",
"Making plans for a family picnic",
"Inquiring about admission requirements at a university",
"Discussing the features of a new smartphone with a salesperson",
"Talking to a mechanic about car repairs",
"Making arrangements for a child's birthday party",
"Discussing a new diet plan with a nutritionist",
"Asking for information about a music concert",
"Talking to a hairdresser about getting a haircut",
"Inquiring about a language course at a language school",
"Discussing plans for a weekend camping trip",
"Talking to a bank teller about opening a new account",
"Ordering a drink at a coffee shop",
"Discussing a new book with a book club member",
"Talking to a librarian about library services",
"Asking for advice on finding a job",
"Discussing plans for a garden makeover with a landscaper",
"Talking to a travel agent about a cruise vacation",
"Inquiring about a fitness class at a gym",
"Ordering flowers for a special occasion",
"Discussing a new exercise routine with a personal trainer",
"Talking to a teacher about a child's progress in school",
"Asking for information about a local art exhibition",
"Discussing a home improvement project with a contractor",
"Talking to a babysitter about childcare arrangements",
"Making arrangements for a car service appointment",
"Inquiring about a photography workshop at a studio",
"Discussing plans for a family reunion with a relative",
"Talking to a tech support representative about computer issues",
"Asking for recommendations on pet grooming services",
"Discussing weekend plans with a significant other",
"Talking to a counselor about personal issues",
"Inquiring about a music lesson with a music teacher",
"Ordering a pizza for delivery",
"Making a reservation for a taxi",
"Discussing a new recipe with a chef",
"Talking to a fitness trainer about weight loss goals",
"Inquiring about a dance class at a dance studio",
"Ordering a meal at a food truck",
"Discussing plans for a weekend getaway with a partner",
"Talking to a florist about wedding flower arrangements",
"Asking for advice on home decorating",
"Discussing plans for a charity fundraiser event",
"Talking to a pet sitter about taking care of pets",
"Making arrangements for a spa day with a friend",
"Asking for recommendations on home improvement stores",
"Discussing weekend plans with a travel enthusiast",
"Talking to a car mechanic about car maintenance",
"Inquiring about a cooking class at a culinary school",
"Ordering a sandwich at a deli",
"Discussing plans for a family holiday party",
"Talking to a personal assistant about organizing tasks",
"Asking for information about a local theater production",
"Discussing a new DIY project with a home improvement expert",
"Talking to a wine expert about wine pairing",
"Making arrangements for a pet adoption",
"Asking for advice on planning a wedding"
]
social_monologue_contexts = [
"A guided tour of a historical museum",
"An introduction to a new city for tourists",
"An orientation session for new university students",
"A safety briefing for airline passengers",
"An explanation of the process of recycling",
"A lecture on the benefits of a healthy diet",
"A talk on the importance of time management",
"A monologue about wildlife conservation",
"An overview of local public transportation options",
"A presentation on the history of cinema",
"An introduction to the art of photography",
"A discussion about the effects of climate change",
"An overview of different types of cuisine",
"A lecture on the principles of financial planning",
"A monologue about sustainable energy sources",
"An explanation of the process of online shopping",
"A guided tour of a botanical garden",
"An introduction to a local wildlife sanctuary",
"A safety briefing for hikers in a national park",
"A talk on the benefits of physical exercise",
"A lecture on the principles of effective communication",
"A monologue about the impact of social media",
"An overview of the history of a famous landmark",
"An introduction to the world of fashion design",
"A discussion about the challenges of global poverty",
"An explanation of the process of organic farming",
"A presentation on the history of space exploration",
"An overview of traditional music from different cultures",
"A lecture on the principles of effective leadership",
"A monologue about the influence of technology",
"A guided tour of a famous archaeological site",
"An introduction to a local wildlife rehabilitation center",
"A safety briefing for visitors to a science museum",
"A talk on the benefits of learning a new language",
"A lecture on the principles of architectural design",
"A monologue about the impact of renewable energy",
"An explanation of the process of online banking",
"A presentation on the history of a famous art movement",
"An overview of traditional clothing from various regions",
"A lecture on the principles of sustainable agriculture",
"A discussion about the challenges of urban development",
"A monologue about the influence of social norms",
"A guided tour of a historical battlefield",
"An introduction to a local animal shelter",
"A safety briefing for participants in a charity run",
"A talk on the benefits of community involvement",
"A lecture on the principles of sustainable tourism",
"A monologue about the impact of alternative medicine",
"An explanation of the process of wildlife tracking",
"A presentation on the history of a famous inventor",
"An overview of traditional dance forms from different cultures",
"A lecture on the principles of ethical business practices",
"A discussion about the challenges of healthcare access",
"A monologue about the influence of cultural traditions",
"A guided tour of a famous lighthouse",
"An introduction to a local astronomy observatory",
"A safety briefing for participants in a team-building event",
"A talk on the benefits of volunteering",
"A lecture on the principles of wildlife protection",
"A monologue about the impact of space exploration",
"An explanation of the process of wildlife photography",
"A presentation on the history of a famous musician",
"An overview of traditional art forms from different cultures",
"A lecture on the principles of effective education",
"A discussion about the challenges of sustainable development",
"A monologue about the influence of cultural diversity",
"A guided tour of a famous national park",
"An introduction to a local marine conservation project",
"A safety briefing for participants in a hot air balloon ride",
"A talk on the benefits of cultural exchange programs",
"A lecture on the principles of wildlife conservation",
"A monologue about the impact of technological advancements",
"An explanation of the process of wildlife rehabilitation",
"A presentation on the history of a famous explorer",
"An overview of traditional storytelling from different cultures",
"A lecture on the principles of effective marketing",
"A discussion about the challenges of environmental sustainability",
"A monologue about the influence of social entrepreneurship",
"A guided tour of a famous historical estate",
"An introduction to a local marine life research center",
"A safety briefing for participants in a zip-lining adventure",
"A talk on the benefits of cultural preservation",
"A lecture on the principles of wildlife ecology",
"A monologue about the impact of space technology",
"An explanation of the process of wildlife conservation",
"A presentation on the history of a famous scientist",
"An overview of traditional crafts and artisans from different cultures",
"A lecture on the principles of effective intercultural communication"
]
four_people_scenarios = [
"A university lecture on history",
"A physics class discussing Newton's laws",
"A medical school seminar on anatomy",
"A training session on computer programming",
"A business school lecture on marketing strategies",
"A chemistry lab experiment and discussion",
"A language class practicing conversational skills",
"A workshop on creative writing techniques",
"A high school math lesson on calculus",
"A training program for customer service representatives",
"A lecture on environmental science and sustainability",
"A psychology class exploring human behavior",
"A music theory class analyzing compositions",
"A nursing school simulation for patient care",
"A computer science class on algorithms",
"A workshop on graphic design principles",
"A law school lecture on constitutional law",
"A geology class studying rock formations",
"A vocational training program for electricians",
"A history seminar focusing on ancient civilizations",
"A biology class dissecting specimens",
"A financial literacy course for adults",
"A literature class discussing classic novels",
"A training session for emergency response teams",
"A sociology lecture on social inequality",
"An art class exploring different painting techniques",
"A medical school seminar on diagnosis",
"A programming bootcamp teaching web development",
"An economics class analyzing market trends",
"A chemistry lab experiment on chemical reactions",
"A language class practicing pronunciation",
"A workshop on public speaking skills",
"A high school physics lesson on electromagnetism",
"A training program for IT professionals",
"A lecture on climate change and its effects",
"A psychology class studying cognitive psychology",
"A music class composing original songs",
"A nursing school simulation for patient assessment",
"A computer science class on data structures",
"A workshop on 3D modeling and animation",
"A law school lecture on contract law",
"A geography class examining world maps",
"A vocational training program for plumbers",
"A history seminar discussing revolutions",
"A biology class exploring genetics",
"A financial literacy course for teens",
"A literature class analyzing poetry",
"A training session for public speaking coaches",
"A sociology lecture on cultural diversity",
"An art class creating sculptures",
"A medical school seminar on surgical techniques",
"A programming bootcamp teaching app development",
"An economics class on global trade policies",
"A chemistry lab experiment on chemical bonding",
"A language class discussing idiomatic expressions",
"A workshop on conflict resolution",
"A high school biology lesson on evolution",
"A training program for project managers",
"A lecture on renewable energy sources",
"A psychology class on abnormal psychology",
"A music class rehearsing for a performance",
"A nursing school simulation for emergency response",
"A computer science class on cybersecurity",
"A workshop on digital marketing strategies",
"A law school lecture on intellectual property",
"A geology class analyzing seismic activity",
"A vocational training program for carpenters",
"A history seminar on the Renaissance",
"A chemistry class synthesizing compounds",
"A financial literacy course for seniors",
"A literature class interpreting Shakespearean plays",
"A training session for negotiation skills",
"A sociology lecture on urbanization",
"An art class creating digital art",
"A medical school seminar on patient communication",
"A programming bootcamp teaching mobile app development",
"An economics class on fiscal policy",
"A physics lab experiment on electromagnetism",
"A language class on cultural immersion",
"A workshop on time management",
"A high school chemistry lesson on stoichiometry",
"A training program for HR professionals",
"A lecture on space exploration and astronomy",
"A psychology class on human development",
"A music class practicing for a recital",
"A nursing school simulation for triage",
"A computer science class on web development frameworks",
"A workshop on team-building exercises",
"A law school lecture on criminal law",
"A geography class studying world cultures",
"A vocational training program for HVAC technicians",
"A history seminar on ancient civilizations",
"A biology class examining ecosystems",
"A financial literacy course for entrepreneurs",
"A literature class analyzing modern literature",
"A training session for leadership skills",
"A sociology lecture on gender studies",
"An art class exploring multimedia art",
"A medical school seminar on patient diagnosis",
"A programming bootcamp teaching software architecture"
]
academic_subjects = [
"Astrophysics",
"Microbiology",
"Political Science",
"Environmental Science",
"Literature",
"Biochemistry",
"Sociology",
"Art History",
"Geology",
"Economics",
"Psychology",
"History of Architecture",
"Linguistics",
"Neurobiology",
"Anthropology",
"Quantum Mechanics",
"Urban Planning",
"Philosophy",
"Marine Biology",
"International Relations",
"Medieval History",
"Geophysics",
"Finance",
"Educational Psychology",
"Graphic Design",
"Paleontology",
"Macroeconomics",
"Cognitive Psychology",
"Renaissance Art",
"Archaeology",
"Microeconomics",
"Social Psychology",
"Contemporary Art",
"Meteorology",
"Political Philosophy",
"Space Exploration",
"Cognitive Science",
"Classical Music",
"Oceanography",
"Public Health",
"Gender Studies",
"Baroque Art",
"Volcanology",
"Business Ethics",
"Music Composition",
"Environmental Policy",
"Media Studies",
"Ancient History",
"Seismology",
"Marketing",
"Human Development",
"Modern Art",
"Astronomy",
"International Law",
"Developmental Psychology",
"Film Studies",
"American History",
"Soil Science",
"Entrepreneurship",
"Clinical Psychology",
"Contemporary Dance",
"Space Physics",
"Political Economy",
"Cognitive Neuroscience",
"20th Century Literature",
"Public Administration",
"European History",
"Atmospheric Science",
"Supply Chain Management",
"Social Work",
"Japanese Literature",
"Planetary Science",
"Labor Economics",
"Industrial-Organizational Psychology",
"French Philosophy",
"Biogeochemistry",
"Strategic Management",
"Educational Sociology",
"Postmodern Literature",
"Public Relations",
"Middle Eastern History",
"Oceanography",
"International Development",
"Human Resources Management",
"Educational Leadership",
"Russian Literature",
"Quantum Chemistry",
"Environmental Economics",
"Environmental Psychology",
"Ancient Philosophy",
"Immunology",
"Comparative Politics",
"Child Development",
"Fashion Design",
"Geological Engineering",
"Macroeconomic Policy",
"Media Psychology",
"Byzantine Art",
"Ecology",
"International Business"
]

View File

@@ -1,9 +1,17 @@
import queue import queue
import nltk
import random
import re
import uuid
from helper.api_messages import QuestionType from helper.api_messages import QuestionType
from helper.openai_interface import make_openai_instruct_call from helper.openai_interface import make_openai_instruct_call
from helper.token_counter import count_tokens from helper.token_counter import count_tokens
from helper.constants import * from helper.constants import *
from wonderwords import RandomWord
nltk.download('words')
def divide_number_into_parts(number, parts): def divide_number_into_parts(number, parts):
if number < parts: if number < parts:
@@ -22,51 +30,598 @@ def divide_number_into_parts(number, parts):
return q return q
def fix_exercise_ids(exercises):
def fix_exercise_ids(exercise, start_id):
# Initialize the starting ID for the first exercise # Initialize the starting ID for the first exercise
current_id = 1 current_id = start_id
# Iterate through exercises questions = exercise["questions"]
for exercise in exercises:
questions = exercise["questions"]
# Iterate through questions and update the "id" value # Iterate through questions and update the "id" value
for question in questions: for question in questions:
question["id"] = str(current_id) question["id"] = str(current_id)
current_id += 1 current_id += 1
return exercises return exercise
def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
for i, word in enumerate(words_to_replace, start=start_id):
# Create a case-insensitive regular expression pattern
pattern = re.compile(re.escape(word), re.IGNORECASE)
placeholder = '{{' + str(i) + '}}'
text = pattern.sub(placeholder, text, 1)
return text
def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
replaced_notes = []
for i, note in enumerate(notes, start=0):
word = words_to_replace[i]
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(start_id + i) + '}}'
note = pattern.sub(placeholder, note, 1)
replaced_notes.append(note)
return replaced_notes
def add_random_words_and_shuffle(word_array, num_random_words):
r = RandomWord()
random_words_selected = r.random_words(num_random_words)
combined_array = word_array + random_words_selected
random.shuffle(combined_array)
return combined_array
def fillblanks_build_solutions_array(words, start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solutions.append(
{
"id": str(i),
"solution": word
}
)
return solutions
def remove_excess_questions(questions: [], quantity):
count_true = 0
result = []
for item in reversed(questions):
if item.get('solution') == 'true' and count_true < quantity:
count_true += 1
else:
result.append(item)
result.reverse()
return result
def build_write_blanks_text(questions: [], start_id):
result = ""
for i, q in enumerate(questions, start=start_id):
placeholder = '{{' + str(i) + '}}'
result = result + q["question"] + placeholder + "\\n"
return result
def build_write_blanks_text_form(form: [], start_id):
result = ""
replaced_words = []
for i, entry in enumerate(form, start=1):
placeholder = '{{' + str(i) + '}}'
# Use regular expression to find the string after ':'
match = re.search(r'(?<=:)\s*(.*)', entry)
# Extract the matched string
original_string = match.group(1)
# Split the string into words
words = re.findall(r'\b\w+\b', original_string)
# Remove words with only one letter
filtered_words = [word for word in words if len(word) > 1]
# Choose a random word from the list of words
selected_word = random.choice(filtered_words)
pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)
# Replace the chosen word with the placeholder
replaced_string = pattern.sub(placeholder, original_string, 1)
# Construct the final replaced string
replaced_string = entry.replace(original_string, replaced_string)
result = result + replaced_string + "\\n"
# Save the replaced word or use it as needed
# For example, you can save it to a file or a list
replaced_words.append(selected_word)
return result, replaced_words
def build_write_blanks_solutions(questions: [], start_id):
solutions = []
for i, q in enumerate(questions, start=start_id):
solutions.append(
{
"id": str(i),
"solution": q["possible_answers"]
}
)
return solutions
def build_write_blanks_solutions_listening(words: [], start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solutions.append(
{
"id": str(i),
"solution": word
}
)
return solutions
def generate_reading_passage(type: QuestionType, topic: str): def generate_reading_passage(type: QuestionType, topic: str):
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.READING_PASSAGE_1.value + ", of at least 1500 words, on the topic " \ gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
"of " + topic + ". The passage should offer a substantial amount of " \ "of " + topic + ". The passage should offer a substantial amount of " \
"information, analysis, or narrative " \ "information, analysis, or narrative " \
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \ "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \ "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
"Provide your response in this json format: {'title': 'title of the text', 'text': 'generated text'}" "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
token_count = count_tokens(gen_reading_passage_1)["n_tokens"] token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS, return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
def gen_multiple_choice_exercise(text: str, quantity: int):
gen_multiple_choice_for_text = "Generate" + str(quantity) + "multiple choice questions for this text: " \ def generate_listening_1_conversation(topic: str):
"'" + text + "'\n" \ gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
"Use this format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \ "social context of '" + topic + "'. Please include random names and genders " \
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \ "for the characters in your dialogue."
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \ token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \ response = make_openai_instruct_call(
"'solution': 'C', 'variant': 'text'}]" GPT_3_5_TURBO_INSTRUCT,
gen_listening_1_conversation_2_people,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"]
processed = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
parse_conversation,
token_count,
['conversation'],
GEN_QUESTION_TEMPERATURE
)
name_to_voice = {}
for segment in processed['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
return response, processed
def generate_listening_2_monologue(topic: str):
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'"
token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_2_monologue_social,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
return response
def generate_listening_3_conversation(topic: str):
gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
"in the everyday social context of '" + topic + \
"'. Please include random names and genders for the characters in your dialogue."
token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_3_conversation_4_people,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"]
processed = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
parse_conversation,
token_count,
['conversation'],
GEN_QUESTION_TEMPERATURE
)
name_to_voice = {}
for segment in processed['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
return response, processed
def generate_listening_4_monologue(topic: str):
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'"
token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_4_monologue_academic,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
return response
def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "fillBlanks":
question = gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = gen_true_false_not_given_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
question = gen_write_blanks_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added write blanks: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = gen_write_blanks_questions_exercise_listening_conversation(conversation, number_of_exercises, start_id)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = gen_write_blanks_notes_exercise_listening_conversation(conversation, number_of_exercises, start_id)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = gen_write_blanks_form_exercise_listening_conversation(conversation, number_of_exercises, start_id)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def gen_multiple_choice_exercise(text: str, quantity: int, start_id):
gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions for this text: " \
"'" + text + "'\n" \
"Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None, None,
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse this '" + mc_questions + "' into this json format: 'questions': [{'id': '9', 'options': [{'id': 'A', 'text': " \ parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"'Economic benefits'}, {'id': 'B', 'text': 'Government regulations'}, {'id': 'C', 'text': " \ "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"'Concerns about climate change'}, {'id': 'D', 'text': 'Technological advancement'}], " \ "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"'prompt': 'What is the main reason for the shift towards renewable energy sources?', " \ "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"'solution': 'C', 'variant': 'text'}]" "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"] token_count = count_tokens(parse_mc_questions)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"], ["questions"],
GEN_QUESTION_TEMPERATURE) GEN_QUESTION_TEMPERATURE)
return fix_exercise_ids(question, start_id)
def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id):
gen_summary_for_text = "Summarize this text: " + text
token_count = count_tokens(gen_summary_for_text)["n_tokens"]
text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
gen_words_to_replace = "Select " + str(
quantity) + " words, it must be words and not expressions, from the summary and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
token_count = count_tokens(gen_words_to_replace)["n_tokens"]
words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"]
replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id)
options_words = add_random_words_and_shuffle(words_to_replace, 5)
solutions = fillblanks_build_solutions_array(words_to_replace, start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once.",
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
def gen_true_false_not_given_exercise(text: str, quantity: int, start_id):
gen_true_false_not_given = "Generate " + str(
quantity) + " statements in JSON format (True, False, or Not Given) " \
"based on the provided text. Ensure that your statements " \
"accurately represent information or inferences from the " \
"text, and provide a variety of responses, including, at least one of each True, " \
"False, and Not Given, as appropriate, in the JSON structure " \
"{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
"\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
"\"solution\": \"true/false/not_given\"}]}. Reference text: " + text
token_count = count_tokens(gen_true_false_not_given)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count,
["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
if len(questions) > quantity:
questions = remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}
def gen_write_blanks_exercise(text: str, quantity: int, start_id):
gen_short_answer_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
"(max 3 words per answer), about this text: '" + text + "'. " \
"Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"
token_count = count_tokens(gen_short_answer_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id):
gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions of 4 options for this conversation: " \
"'" + text + "'"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
return fix_exercise_ids(question, start_id)
def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id):
gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions for this monologue: " \
"'" + text + "'"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
return fix_exercise_ids(question, start_id)
def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id):
gen_write_blanks_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
"(max 3 words per answer), about a monologue and" \
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
"The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a conversation. Answer the questions below using no more than three words or a number accordingly.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id):
gen_write_blanks_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
"(max 3 words per answer), about a monologue and" \
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
"The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a monologue. Answer the questions below using no more than three words or a number accordingly.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id):
gen_write_blanks_notes = "Generate " + str(quantity) + " notes taken from the conversation and and respond in this " \
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 1,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id):
gen_write_blanks_notes = "Generate " + str(quantity) + " notes taken from the monologue and and respond in this " \
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 1,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id):
gen_write_blanks_form = "Generate a form with " + str(quantity) + " key-value pairs about the conversation. " \
"The conversation is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
token_count = count_tokens(parse_form)["n_tokens"]
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 1,
"prompt": "You will hear a conversation. Fill the form with words/numbers missing.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id):
gen_write_blanks_form = "Generate a form with " + str(quantity) + " key-value pairs about the monologue. " \
"The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
token_count = count_tokens(parse_form)["n_tokens"]
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 1,
"prompt": "You will hear a monologue. Fill the form with words/numbers missing.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}

View File

@@ -23,15 +23,24 @@ def process_response(input_string, quotation_check_field):
index = input_string.index('{') index = input_string.index('{')
# Extract everything after the first '{' (inclusive) # Extract everything after the first '{' (inclusive)
result = input_string[index:] result = input_string[index:]
if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE): if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE) or \
re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE):
json_obj = json.loads(parse_string(result)) json_obj = json.loads(parse_string(result))
return json_obj return json_obj
else: else:
parsed_string = result.replace("\n\n", " ") parsed_string = result.replace("\n\n", " ")
json_obj = json.loads(parsed_string) parsed_string = parsed_string.replace("\n", " ")
return json_obj parsed_string = re.sub(r',\s*]', ']', parsed_string)
parsed_string = re.sub(r',\s*}', '}', parsed_string)
if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1):
parsed_string = parse_string_2(parsed_string)
return json.loads(parsed_string)
return json.loads(parsed_string)
except Exception as e: except Exception as e:
print(f"Invalid JSON string! Exception: {e}") print(f"Invalid JSON string! Exception: {e}")
print(f"String: {input_string}")
print(f"Exception: {e}")
else: else:
return input_string return input_string
@@ -41,9 +50,36 @@ def parse_string(to_parse: str):
parsed_string = re.sub(pattern, '"', parsed_string) parsed_string = re.sub(pattern, '"', parsed_string)
parsed_string = parsed_string.replace("\\\"", "'") parsed_string = parsed_string.replace("\\\"", "'")
parsed_string = parsed_string.replace("\n\n", " ") parsed_string = parsed_string.replace("\n\n", " ")
parsed_string = re.sub(r',\s*]', ']', parsed_string)
parsed_string = re.sub(r',\s*}', '}', parsed_string)
return parsed_string return parsed_string
def parse_string_2(to_parse: str):
keys_and_values_str = to_parse.replace("{", "").replace("}", "")
split_pattern = r'(?<="),|(?<="):'
keys_and_values = re.split(split_pattern, keys_and_values_str)
keys = []
values = []
for idx, x in enumerate(keys_and_values):
if (idx % 2) == 0:
keys.append(x)
else:
values.append(x)
parsed_values = []
for value in values:
parsed_values.append(("\"" + value.replace("\"", "").strip() + "\""))
for ind, parsed_value in enumerate(parsed_values):
to_parse = to_parse.replace(values[ind], parsed_values[ind])
to_parse = to_parse.replace(":", ": ")
return to_parse
def remove_special_chars_and_escapes(input_string): def remove_special_chars_and_escapes(input_string):
parsed_string = input_string.replace("\\\"", "'") parsed_string = input_string.replace("\\\"", "'")
parsed_string = parsed_string.replace("\n\n", " ") parsed_string = parsed_string.replace("\n\n", " ")
@@ -96,16 +132,12 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
)["choices"][0]["text"] )["choices"][0]["text"]
if fields_to_check is None: if fields_to_check is None:
return remove_special_chars_and_escapes(response) return response
processed_response = process_response(response, fields_to_check[0]) processed_response = process_response(response, fields_to_check[0])
if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT: if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
try_count = try_count + 1 try_count = try_count + 1
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
elif try_count >= TRY_LIMIT:
try_count = 0
return remove_special_chars_and_escapes(response)
else: else:
try_count = 0 try_count = 0
return processed_response return processed_response

View File

@@ -1,9 +1,11 @@
import whisper import whisper
import os import os
import gtts
import nltk import nltk
import boto3
import random
nltk.download('words') nltk.download('words')
from nltk.corpus import words from nltk.corpus import words
from helper.constants import *
def speech_to_text(file_path): def speech_to_text(file_path):
if os.path.exists(file_path): if os.path.exists(file_path):
@@ -15,8 +17,72 @@ def speech_to_text(file_path):
raise Exception("File " + file_path + " not found.") raise Exception("File " + file_path + " not found.")
def text_to_speech(text: str, file_name: str): def text_to_speech(text: str, file_name: str):
tts = gtts.gTTS(text) # Initialize the Amazon Polly client
tts.save(file_name) client = boto3.client(
'polly',
region_name='eu-west-1',
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
)
# Initialize an empty list to store audio segments
audio_segments = []
tts_response = client.synthesize_speech(
Engine="neural",
Text=text,
OutputFormat="mp3",
VoiceId=random.choice(ALL_NEURAL_VOICES)['Id']
)
audio_segments.append(tts_response['AudioStream'].read())
# Combine the audio segments into a single audio file
combined_audio = b"".join(audio_segments)
file_name = file_name + ".mp3"
# Save the combined audio to a single file
with open(file_name, "wb") as f:
f.write(combined_audio)
print("Speech segments saved to " + file_name)
def conversation_text_to_speech(conversation: list, file_name: str):
# Create a dictionary to store the mapping of 'name' to 'voice'
name_to_voice = {}
for segment in conversation:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
# Initialize the Amazon Polly client
client = boto3.client(
'polly',
region_name='eu-west-1',
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
)
# Initialize an empty list to store audio segments
audio_segments = []
# Iterate through the text segments, convert to audio segments, and store them
for segment in conversation:
response = client.synthesize_speech(
Engine="neural",
Text=segment["text"],
OutputFormat="mp3",
VoiceId=segment["voice"]
)
audio_segments.append(response['AudioStream'].read())
# Combine the audio segments into a single audio file
combined_audio = b"".join(audio_segments)
file_name = file_name + ".mp3"
# Save the combined audio to a single file
with open(file_name, "wb") as f:
f.write(combined_audio)
print("Speech segments saved to " + file_name)
def has_words(text: str): def has_words(text: str):
english_words = set(words.words()) english_words = set(words.words())

File diff suppressed because it is too large Load Diff