import queue
import random
import re
import string
import uuid
import nltk
from wonderwords import RandomWord
from helper.constants import *
from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_call, count_total_tokens
from helper.speech_to_text_helper import has_x_words
nltk.download('words')
def gen_reading_passage_1(topic, req_exercises, difficulty):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
passage = generate_reading_passage_1_text(topic)
if passage == "":
return gen_reading_passage_1(topic, req_exercises, difficulty)
start_id = 1
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
return gen_reading_passage_1(topic, req_exercises, difficulty)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
def gen_reading_passage_2(topic, req_exercises, difficulty):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
passage = generate_reading_passage_2_text(topic)
if passage == "":
return gen_reading_passage_2(topic, req_exercises, difficulty)
start_id = 14
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
return gen_reading_passage_2(topic, req_exercises, difficulty)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
def gen_reading_passage_3(topic, req_exercises, difficulty):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
passage = generate_reading_passage_3_text(topic)
if passage == "":
return gen_reading_passage_3(topic, req_exercises, difficulty)
start_id = 27
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
return gen_reading_passage_3(topic, req_exercises, difficulty)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
def divide_number_into_parts(number, parts):
if number < parts:
return None
part_size = number // parts
remaining = number % parts
q = queue.Queue()
for i in range(parts):
if i < remaining:
q.put(part_size + 1)
else:
q.put(part_size)
return q
def fix_exercise_ids(exercise, start_id):
# Initialize the starting ID for the first exercise
current_id = start_id
questions = exercise["questions"]
# Iterate through questions and update the "id" value
for question in questions:
question["id"] = str(current_id)
current_id += 1
return exercise
def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
for i, word in enumerate(words_to_replace, start=start_id):
# Create a case-insensitive regular expression pattern
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(i) + '}}'
text = pattern.sub(placeholder, text, 1)
return text
def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
replaced_notes = []
for i, note in enumerate(notes, start=0):
word = words_to_replace[i]
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(start_id + i) + '}}'
note = pattern.sub(placeholder, note, 1)
replaced_notes.append(note)
return replaced_notes
def add_random_words_and_shuffle(word_array, num_random_words):
r = RandomWord()
random_words_selected = r.random_words(num_random_words)
combined_array = word_array + random_words_selected
random.shuffle(combined_array)
result = []
for i, word in enumerate(combined_array):
letter = chr(65 + i) # chr(65) is 'A'
result.append({"letter": letter, "word": word})
return result
def fillblanks_build_solutions_array(words, start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solutions.append(
{
"id": str(i),
"solution": word
}
)
return solutions
def remove_excess_questions(questions: [], quantity):
count_true = 0
result = []
for item in reversed(questions):
if item.get('solution') == 'true' and count_true < quantity:
count_true += 1
else:
result.append(item)
result.reverse()
return result
def build_write_blanks_text(questions: [], start_id):
result = ""
for i, q in enumerate(questions, start=start_id):
placeholder = '{{' + str(i) + '}}'
result = result + q["question"] + placeholder + "\\n"
return result
def build_write_blanks_text_form(form: [], start_id):
result = ""
replaced_words = []
for i, entry in enumerate(form, start=start_id):
placeholder = '{{' + str(i) + '}}'
# Use regular expression to find the string after ':'
match = re.search(r'(?<=:)\s*(.*)', entry)
# Extract the matched string
original_string = match.group(1)
# Split the string into words
words = re.findall(r'\b\w+\b', original_string)
# Remove words with only one letter
filtered_words = [word for word in words if len(word) > 1]
# Choose a random word from the list of words
selected_word = random.choice(filtered_words)
pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)
# Replace the chosen word with the placeholder
replaced_string = pattern.sub(placeholder, original_string, 1)
# Construct the final replaced string
replaced_string = entry.replace(original_string, replaced_string)
result = result + replaced_string + "\\n"
# Save the replaced word or use it as needed
# For example, you can save it to a file or a list
replaced_words.append(selected_word)
return result, replaced_words
def build_write_blanks_solutions(questions: [], start_id):
solutions = []
for i, q in enumerate(questions, start=start_id):
solution = [q["possible_answers"]] if isinstance(q["possible_answers"], str) else q["possible_answers"]
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
def build_write_blanks_solutions_listening(words: [], start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solution = [word] if isinstance(word, str) else word
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
def get_perfect_answer(question: str, size: int):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"perfect_answer": "perfect answer for the question"}')
},
{
"role": "user",
"content": ('Write a perfect answer for this writing exercise of a IELTS exam. Question: ' + question)
},
{
"role": "user",
"content": ('The answer must have at least ' + str(size) + ' words')
}
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_reading_passage_1_text(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
'Generate an extensive text for IELTS Reading Passage 1, of at least 800 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
},
{
"role": "system",
"content": ('The generated text should be fairly easy to understand and have multiple paragraphs.')
},
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_reading_passage_2_text(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
'Generate an extensive text for IELTS Reading Passage 2, of at least 800 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
},
{
"role": "system",
"content": ('The generated text should be fairly hard to understand and have multiple paragraphs.')
},
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_reading_passage_3_text(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"title": "title of the text", "text": "generated text"}')
},
{
"role": "user",
"content": (
'Generate an extensive text for IELTS Reading Passage 3, of at least 800 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
},
{
"role": "system",
"content": ('The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.')
},
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_listening_1_conversation(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
'Compose an authentic conversation between two individuals in the everyday social context '
'of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
},
{
"role": "user",
"content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).'
},
{
"role": "user",
"content": 'Try to have spelling of names (cities, people, etc)'
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
["conversation"],
GEN_QUESTION_TEMPERATURE
)
chosen_voices = []
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
while voice is None:
if segment['gender'].lower() == 'male':
available_voices = MALE_NEURAL_VOICES
else:
available_voices = FEMALE_NEURAL_VOICES
chosen_voice = random.choice(available_voices)['Id']
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
name_to_voice[name] = voice
segment['voice'] = voice
return response
def generate_listening_2_monologue(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
'Generate a comprehensive monologue set in the social context '
'of "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
["monologue"],
GEN_QUESTION_TEMPERATURE
)
return response["monologue"]
def generate_listening_3_conversation(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
},
{
"role": "user",
"content": (
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
["conversation"],
GEN_QUESTION_TEMPERATURE
)
name_to_voice = {}
for segment in response['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
return response
def generate_listening_4_monologue(topic: str):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"monologue": "monologue"}')
},
{
"role": "user",
"content": (
'Generate a comprehensive and complex monologue on the academic subject '
'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
'muslim countries.')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(
GPT_4_O,
messages,
token_count,
["monologue"],
GEN_QUESTION_TEMPERATURE
)
return response["monologue"]
def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
question = gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
question = gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
if answer_word_limit_ok(question):
exercises.append(question)
print("Added write blanks: " + str(question))
else:
exercises.append({})
print("Did not add write blanks because it did not respect word limit")
elif req_exercise == "paragraphMatch":
question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def answer_word_limit_ok(question):
# Check if any option in any solution has more than three words
return not any(len(option.split()) > 3
for solution in question["solutions"]
for option in solution["solution"])
def contains_empty_dict(arr):
return any(elem == {} for elem in arr)
def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id,
difficulty):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id,
difficulty, 4)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "multipleChoice3Options":
question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id,
difficulty, 3)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = gen_write_blanks_questions_exercise_listening_conversation(conversation, number_of_exercises,
start_id, difficulty)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = gen_write_blanks_notes_exercise_listening_conversation(conversation, number_of_exercises,
start_id, difficulty)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = gen_write_blanks_form_exercise_listening_conversation(conversation, number_of_exercises,
start_id, difficulty)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id,
difficulty):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions '
'for this text:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{ "summary": "summary" }')
},
{
"role": "user",
"content": ('Summarize this text: "' + text + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count,
["summary"],
GEN_QUESTION_TEMPERATURE)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
'expressions, from this:\n' + response[
"summary"])
}
]
token_count = count_total_tokens(messages)
words_response = make_openai_call(GPT_4_O, messages, token_count,
["summary"],
GEN_QUESTION_TEMPERATURE)
response["words"] = words_response["words"]
replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
options_words = add_random_words_and_shuffle(response["words"], 1)
solutions = fillblanks_build_solutions_array(response["words"], start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": "Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once.",
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
'{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(
quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
'Ensure that your statements accurately represent '
'information or inferences from the text, and '
'provide a variety of responses, including, at '
'least one of each True, False, and Not Given, '
'as appropriate.\n\nReference text:\n\n ' + text)
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
if len(questions) > quantity:
questions = remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}
def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers, must have maximum 3 words '
'per answer, about this text:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
paragraphs = assign_letters_to_paragraphs(text)
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
},
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(
paragraphs))
}
]
token_count = count_total_tokens(messages)
headings = make_openai_call(GPT_4_O, messages, token_count, ["headings"],
GEN_QUESTION_TEMPERATURE)["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]["heading"]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}
def gen_idea_match_exercise(text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}')
},
{
"role": "user",
"content": (
'From the text extract ' + str(quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text))
}
]
token_count = count_total_tokens(messages)
ideas = make_openai_call(GPT_4_O, messages, token_count, ["ideas"], GEN_QUESTION_TEMPERATURE)["ideas"]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": build_sentences(ideas, start_id),
"type": "matchSentences"
}
def build_options(ideas):
options = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
options.append({
"id": next(letters),
"sentence": idea["from"]
})
return options
def build_sentences(ideas, start_id):
sentences = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
sentences.append({
"solution": next(letters),
"sentence": idea["idea"]
})
random.shuffle(sentences)
for i, sentence in enumerate(sentences, start=start_id):
sentence["id"] = i
return sentences
def assign_letters_to_paragraphs(paragraphs):
result = []
letters = iter(string.ascii_uppercase)
for paragraph in paragraphs.split("\n\n"):
if has_x_words(paragraph, 10):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
return result
def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty, n_options=4):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
n_options) + ' options '
'of for this conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty, n_options=4):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
'"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
'"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
'energy sources?", "solution": "C", "variant": "text"}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(
quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
n_options) + ' options '
'of for this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers (max 3 words per answer), '
'about this conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a conversation. Answer the questions below using no more than three words or a number accordingly.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
'possible answers (max 3 words per answer), '
'about this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a monologue. Answer the questions below using no more than three words or a number accordingly.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
'conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"notes": ["note_1", "note_2"]}')
},
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
'monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
},
{
"role": "user",
"content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
}
]
words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key": "value", "key2": "value"]}')
},
{
"role": "user",
"content": (
'Generate a form with ' + str(
quantity) + ' entries with information about this conversation:\n"' + text + '"')
},
{
"role": "user",
"content": 'It must be a form and not questions. '
'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}'
}
]
token_count = count_total_tokens(messages)
parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a conversation. Fill the form with words/numbers missing.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"form": ["key: value", "key2: value"]}')
},
{
"role": "user",
"content": (
'Generate a form with ' + str(
quantity) + ' ' + difficulty + ' difficulty key-value pairs about this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a monologue. Fill the form with words/numbers missing.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
def gen_multiple_choice_level(quantity: int, start_id=1):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
"every question only has 1 correct answer."
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id)
else:
all_exams = get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
question,
seen_keys)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
else:
seen_keys.add(key)
for exam in all_exams:
exam_dict = exam.to_dict()
exercise_dict = exam_dict.get("parts", [])[0]
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exercise_dict.get("exercises", [])[0]["questions"]
):
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
return current_exercise, seen_keys
def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
else:
seen_keys.add(key)
for exam in all_exams:
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam,
seen_keys)
return current_exercise, seen_keys
def generate_single_mc_level_question():
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}')
},
{
"role": "user",
"content": ('Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
'intermediate or advanced.')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count, ["options"],
GEN_QUESTION_TEMPERATURE)
return question
def parse_conversation(conversation_data):
conversation_list = conversation_data.get('conversation', [])
readable_text = []
for message in conversation_list:
name = message.get('name', 'Unknown')
text = message.get('text', '')
readable_text.append(f"{name}: {text}")
return "\n".join(readable_text)
def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " multiple choice blank space questions of 4 options for an english level exam, some easy questions, some intermediate " \
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
"every question only has 1 correct answer."
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
"content": gen_multiple_choice_for_text
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id)
else:
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_exams, question["questions"][i],
question,
seen_keys)
return fix_exercise_ids(question, start_id)
def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
json_format = {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (
' multiple choice questions of 4 options for an english '
'level exam, some easy questions, some intermediate '
'questions and some advanced questions.Ensure that '
'the questions cover a range of topics such as verb '
'tense, subject-verb agreement, pronoun usage, '
'sentence structure, and punctuation. Make sure '
'every question only has 1 correct answer.')
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": gen_multiple_choice_for_text
},
{
"role": "user",
"content": (
'The type of multiple choice is the prompt has wrong words or group of words and the options are to '
'find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
'Prompt: "I complain about my boss all the time, but my colleagues thinks the boss is nice."\n'
'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id)
else:
return fix_exercise_ids(question, start_id)["questions"]
def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
json_format = {
"question": {
"words": [
{
"id": "1",
"text": "a"
},
{
"id": "2",
"text": "b"
},
{
"id": "3",
"text": "c"
},
{
"id": "4",
"text": "d"
}
],
"text": "text"
}
}
gen_text = 'Generate a text of at least ' + str(size) + ' words about the topic ' + topic + '.'
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": gen_text
},
{
"role": "user",
"content": (
'From the generated text choose ' + str(
quantity) + ' words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is '
'incremented for each word. The ids must be ordered throughout the text and the words must be '
'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["question"],
GEN_QUESTION_TEMPERATURE)
return question["question"]
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
passage = generate_reading_passage_1_text(topic)
short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
"content": passage["text"],
"title": passage["title"]
}
}
def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": (
'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have '
'maximum 3 words per answer, about this text:\n"' + text + '"')
},
{
"role": "user",
"content": 'The id starts at ' + str(start_id) + '.'
}
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"]
def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
json_format = {
"questions": [
{
"id": "9",
"options": [
{
"id": "A",
"text": "a"
},
{
"id": "B",
"text": "b"
},
{
"id": "C",
"text": "c"
},
{
"id": "D",
"text": "d"
}
],
"prompt": "prompt",
"solution": "A",
"variant": "text"
}
]
}
messages = [
{
"role": "system",
"content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
},
{
"role": "user",
"content": 'Generate ' + str(
mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
},
{
"role": "user",
"content": 'Make sure every question only has 1 correct answer.'
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != mc_quantity:
return gen_multiple_choice_level(mc_quantity, start_id)
else:
return fix_exercise_ids(question, start_id)["questions"]