Files
encoach_backend/helper/exercises.py
Cristiano Ferreira 6e2355ee4c Clean up the code.
2024-04-10 22:21:30 +01:00

939 lines
46 KiB
Python

import queue
import random
import re
import string
import uuid
import nltk
from wonderwords import RandomWord
from helper.api_messages import QuestionType
from helper.constants import *
from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_instruct_call, make_openai_call
from helper.token_counter import count_tokens
nltk.download('words')
def gen_reading_passage_1(topic, req_exercises, difficulty):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
if passage == "":
return gen_reading_passage_1(topic, req_exercises, difficulty)
start_id = 1
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
return gen_reading_passage_1(topic, req_exercises, difficulty)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
def gen_reading_passage_2(topic, req_exercises, difficulty):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
passage = generate_reading_passage(QuestionType.READING_PASSAGE_2, topic)
if passage == "":
return gen_reading_passage_2(topic, req_exercises, difficulty)
start_id = 14
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
return gen_reading_passage_2(topic, req_exercises, difficulty)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
def gen_reading_passage_3(topic, req_exercises, difficulty):
if (len(req_exercises) == 0):
req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
passage = generate_reading_passage(QuestionType.READING_PASSAGE_3, topic)
if passage == "":
return gen_reading_passage_3(topic, req_exercises, difficulty)
start_id = 27
exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
if contains_empty_dict(exercises):
return gen_reading_passage_3(topic, req_exercises, difficulty)
return {
"exercises": exercises,
"text": {
"content": passage["text"],
"title": passage["title"]
},
"difficulty": difficulty
}
def divide_number_into_parts(number, parts):
if number < parts:
return None
part_size = number // parts
remaining = number % parts
q = queue.Queue()
for i in range(parts):
if i < remaining:
q.put(part_size + 1)
else:
q.put(part_size)
return q
def fix_exercise_ids(exercise, start_id):
# Initialize the starting ID for the first exercise
current_id = start_id
questions = exercise["questions"]
# Iterate through questions and update the "id" value
for question in questions:
question["id"] = str(current_id)
current_id += 1
return exercise
def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
for i, word in enumerate(words_to_replace, start=start_id):
# Create a case-insensitive regular expression pattern
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(i) + '}}'
text = pattern.sub(placeholder, text, 1)
return text
def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
replaced_notes = []
for i, note in enumerate(notes, start=0):
word = words_to_replace[i]
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
placeholder = '{{' + str(start_id + i) + '}}'
note = pattern.sub(placeholder, note, 1)
replaced_notes.append(note)
return replaced_notes
def add_random_words_and_shuffle(word_array, num_random_words):
r = RandomWord()
random_words_selected = r.random_words(num_random_words)
combined_array = word_array + random_words_selected
random.shuffle(combined_array)
return combined_array
def fillblanks_build_solutions_array(words, start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solutions.append(
{
"id": str(i),
"solution": word
}
)
return solutions
def remove_excess_questions(questions: [], quantity):
count_true = 0
result = []
for item in reversed(questions):
if item.get('solution') == 'true' and count_true < quantity:
count_true += 1
else:
result.append(item)
result.reverse()
return result
def build_write_blanks_text(questions: [], start_id):
result = ""
for i, q in enumerate(questions, start=start_id):
placeholder = '{{' + str(i) + '}}'
result = result + q["question"] + placeholder + "\\n"
return result
def build_write_blanks_text_form(form: [], start_id):
result = ""
replaced_words = []
for i, entry in enumerate(form, start=start_id):
placeholder = '{{' + str(i) + '}}'
# Use regular expression to find the string after ':'
match = re.search(r'(?<=:)\s*(.*)', entry)
# Extract the matched string
original_string = match.group(1)
# Split the string into words
words = re.findall(r'\b\w+\b', original_string)
# Remove words with only one letter
filtered_words = [word for word in words if len(word) > 1]
# Choose a random word from the list of words
selected_word = random.choice(filtered_words)
pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)
# Replace the chosen word with the placeholder
replaced_string = pattern.sub(placeholder, original_string, 1)
# Construct the final replaced string
replaced_string = entry.replace(original_string, replaced_string)
result = result + replaced_string + "\\n"
# Save the replaced word or use it as needed
# For example, you can save it to a file or a list
replaced_words.append(selected_word)
return result, replaced_words
def build_write_blanks_solutions(questions: [], start_id):
solutions = []
for i, q in enumerate(questions, start=start_id):
solution = [q["possible_answers"]] if isinstance(q["possible_answers"], str) else q["possible_answers"]
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
def build_write_blanks_solutions_listening(words: [], start_id):
solutions = []
for i, word in enumerate(words, start=start_id):
solution = [word] if isinstance(word, str) else word
solutions.append(
{
"id": str(i),
"solution": solution
}
)
return solutions
def generate_reading_passage(type: QuestionType, topic: str):
gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
"of '" + topic + "'. The passage should offer a substantial amount of " \
"information, analysis, or narrative " \
"relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
"section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
"Make sure that the generated text does not contain forbidden subjects in muslim countries." \
"Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
GEN_QUESTION_TEMPERATURE)
def generate_listening_1_conversation(topic: str):
gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
"social context of '" + topic + "'. Please include random names and genders " \
"for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_1_conversation_2_people,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"]
processed = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
parse_conversation,
token_count,
['conversation'],
GEN_QUESTION_TEMPERATURE
)
chosen_voices = []
name_to_voice = {}
for segment in processed['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
voice = None
while voice is None:
if segment['gender'].lower() == 'male':
available_voices = MALE_NEURAL_VOICES
else:
available_voices = FEMALE_NEURAL_VOICES
chosen_voice = random.choice(available_voices)['Id']
if chosen_voice not in chosen_voices:
voice = chosen_voice
chosen_voices.append(voice)
name_to_voice[name] = voice
segment['voice'] = voice
return response, processed
def generate_listening_2_monologue(topic: str):
gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_2_monologue_social,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
return response
def generate_listening_3_conversation(topic: str):
gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
"in the everyday social context of '" + topic + \
"'. Please include random names and genders for the characters in your dialogue. " \
"Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_3_conversation_4_people,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
token_count = count_tokens(parse_conversation)["n_tokens"]
processed = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
parse_conversation,
token_count,
['conversation'],
GEN_QUESTION_TEMPERATURE
)
name_to_voice = {}
for segment in processed['conversation']:
if 'voice' not in segment:
name = segment['name']
if name in name_to_voice:
voice = name_to_voice[name]
else:
if segment['gender'].lower() == 'male':
voice = random.choice(MALE_NEURAL_VOICES)['Id']
else:
voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
name_to_voice[name] = voice
segment['voice'] = voice
return response, processed
def generate_listening_4_monologue(topic: str):
gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
response = make_openai_instruct_call(
GPT_3_5_TURBO_INSTRUCT,
gen_listening_4_monologue_academic,
token_count,
None,
GEN_QUESTION_TEMPERATURE
)
return response
def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "fillBlanks":
question = gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
exercises.append(question)
print("Added fill blanks: " + str(question))
elif req_exercise == "trueFalse":
question = gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty)
exercises.append(question)
print("Added trueFalse: " + str(question))
elif req_exercise == "writeBlanks":
question = gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
if answer_word_limit_ok(question):
exercises.append(question)
print("Added write blanks: " + str(question))
else:
exercises.append({})
print("Did not add write blanks because it did not respect word limit")
elif req_exercise == "matchSentences":
question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def answer_word_limit_ok(question):
# Check if any option in any solution has more than three words
return not any(len(option.split()) > 3
for solution in question["solutions"]
for option in solution["solution"])
def contains_empty_dict(arr):
return any(elem == {} for elem in arr)
def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id,
difficulty):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = gen_write_blanks_questions_exercise_listening_conversation(conversation, number_of_exercises,
start_id, difficulty)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = gen_write_blanks_notes_exercise_listening_conversation(conversation, number_of_exercises,
start_id, difficulty)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = gen_write_blanks_form_exercise_listening_conversation(conversation, number_of_exercises,
start_id, difficulty)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id,
difficulty):
exercises = []
for req_exercise in req_exercises:
number_of_exercises = number_of_exercises_q.get()
if req_exercise == "multipleChoice":
question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added multiple choice: " + str(question))
elif req_exercise == "writeBlanksQuestions":
question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added write blanks questions: " + str(question))
elif req_exercise == "writeBlanksFill":
question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added write blanks notes: " + str(question))
elif req_exercise == "writeBlanksForm":
question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id,
difficulty)
exercises.append(question)
print("Added write blanks form: " + str(question))
start_id = start_id + number_of_exercises
return exercises
def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " " + difficulty + " difficulty multiple choice questions for this text: " \
"'" + text + "'\n" \
"Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty):
gen_summary_for_text = "Summarize this text: " + text
token_count = count_tokens(gen_summary_for_text)["n_tokens"]
text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
gen_words_to_replace = "Select " + str(
quantity) + " " + difficulty + " difficulty words, it must be words and not expressions, from the summary and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
token_count = count_tokens(gen_words_to_replace)["n_tokens"]
words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"]
replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id)
options_words = add_random_words_and_shuffle(words_to_replace, 5)
solutions = fillblanks_build_solutions_array(words_to_replace, start_id)
return {
"allowRepetition": True,
"id": str(uuid.uuid4()),
"prompt": "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
"more words than spaces so you will not use them all. You may use any of the words more than once.",
"solutions": solutions,
"text": replaced_summary,
"type": "fillBlanks",
"words": options_words
}
def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty):
gen_true_false_not_given = "Generate " + str(
quantity) + " " + difficulty + " difficulty statements in JSON format (True, False, or Not Given) " \
"based on the provided text. Ensure that your statements " \
"accurately represent information or inferences from the " \
"text, and provide a variety of responses, including, at least one of each True, " \
"False, and Not Given, as appropriate, in the JSON structure " \
"{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
"\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
"\"solution\": \"true/false/not_given\"}]}. Reference text: " + text
token_count = count_tokens(gen_true_false_not_given)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count,
["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
if len(questions) > quantity:
questions = remove_excess_questions(questions, len(questions) - quantity)
for i, question in enumerate(questions, start=start_id):
question["id"] = str(i)
return {
"id": str(uuid.uuid4()),
"prompt": "Do the following statements agree with the information given in the Reading Passage?",
"questions": questions,
"type": "trueFalse"
}
def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
gen_short_answer_questions = "Generate " + str(
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers, " \
"must have maximum 3 words per answer, about this text: '" + text + "'. " \
"Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"
token_count = count_tokens(gen_short_answer_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Choose no more than three words and/or a number from the passage for each answer.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
paragraphs = assign_letters_to_paragraphs(text)
heading_prompt = (
'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: '
'{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n'
'The paragraphs are these: ' + str(paragraphs))
token_count = count_tokens(heading_prompt)["n_tokens"]
headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count,
["headings"],
GEN_QUESTION_TEMPERATURE)["headings"]
options = []
for i, paragraph in enumerate(paragraphs, start=0):
paragraph["heading"] = headings[i]
options.append({
"id": paragraph["letter"],
"sentence": paragraph["paragraph"]
})
random.shuffle(paragraphs)
sentences = []
for i, paragraph in enumerate(paragraphs, start=start_id):
sentences.append({
"id": i,
"sentence": paragraph["heading"]["heading"],
"solution": paragraph["letter"]
})
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": options,
"prompt": "Choose the correct heading for paragraphs from the list of headings below.",
"sentences": sentences[:quantity],
"type": "matchSentences"
}
def assign_letters_to_paragraphs(paragraphs):
result = []
letters = iter(string.ascii_uppercase)
for paragraph in paragraphs.split("\n"):
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
return result
def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \
"'" + text + "'"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " " + difficulty + " difficulty multiple choice questions for this monologue: " \
"'" + text + "'"
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
"\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
"\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
"\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
"\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
token_count = count_tokens(parse_mc_questions)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_questions = "Generate " + str(
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
"(max 3 words per answer), about a monologue and" \
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
"The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a conversation. Answer the questions below using no more than three words or a number accordingly.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_questions = "Generate " + str(
quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
"(max 3 words per answer), about a monologue and" \
"respond in this JSON format: {\"questions\": [{\"question\": question, " \
"\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
"The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a monologue. Answer the questions below using no more than three words or a number accordingly.",
"solutions": build_write_blanks_solutions(questions, start_id),
"text": build_write_blanks_text(questions, start_id),
"type": "writeBlanks"
}
def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_notes = "Generate " + str(
quantity) + " " + difficulty + " difficulty notes taken from the conversation and and respond in this " \
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_notes = "Generate " + str(
quantity) + " " + difficulty + " difficulty notes taken from the monologue and respond in this " \
"JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
"JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "Fill the blank space with the word missing from the audio.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": "\\n".join(replaced_notes),
"type": "writeBlanks"
}
def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_form = "Generate a form with " + str(
quantity) + " " + difficulty + " difficulty key-value pairs about the conversation. " \
"The conversation is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
token_count = count_tokens(parse_form)["n_tokens"]
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a conversation. Fill the form with words/numbers missing.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
gen_write_blanks_form = "Generate a form with " + str(
quantity) + " " + difficulty + " difficulty key-value pairs about the monologue. " \
"The monologue is this: '" + text + "'"
token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
token_count = count_tokens(parse_form)["n_tokens"]
parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
["form"],
GEN_QUESTION_TEMPERATURE)["form"][:quantity]
replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
return {
"id": str(uuid.uuid4()),
"maxWords": 3,
"prompt": "You will hear a monologue. Fill the form with words/numbers missing.",
"solutions": build_write_blanks_solutions_listening(words, start_id),
"text": replaced_form,
"type": "writeBlanks"
}
def gen_multiple_choice_level(quantity: int, start_id=1):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
"every question only has 1 correct answer."
messages = [{
"role": "user",
"content": gen_multiple_choice_for_text
}]
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
mc_questions = make_openai_call(GPT_4_PREVIEW, messages, token_count,
None,
GEN_QUESTION_TEMPERATURE)
if not '25' in mc_questions:
return gen_multiple_choice_level(quantity, start_id)
else:
split_mc_questions = mc_questions.split('13')
parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}\'\n '
'\nThe questions: "' + split_mc_questions[0] + '"')
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
print(question)
parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}\'\n '
'\nThe questions: "' + '13' + split_mc_questions[1] + '"')
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
print(question_2)
question["questions"].extend(question_2["questions"])
all_exams = get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
question,
seen_keys)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
"questions": fix_exercise_ids(question, start_id)["questions"],
"type": "multipleChoice",
}
def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
else:
seen_keys.add(key)
for exam in all_exams:
exam_dict = exam.to_dict()
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam_dict.get("exercises", [])[0]["questions"]
):
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
return current_exercise, seen_keys
def generate_single_mc_level_question():
gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \
"be easy, intermediate or advanced."
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
GEN_QUESTION_TEMPERATURE)
parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}. '
'\nThe questions: "' + mc_question + '"')
token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count,
["options"],
GEN_QUESTION_TEMPERATURE)
return question