import queue import random import re import string import uuid import nltk from wonderwords import RandomWord from helper.constants import * from helper.firebase_helper import get_all from helper.openai_interface import make_openai_call, count_total_tokens from helper.speech_to_text_helper import has_x_words nltk.download('words') def gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=1): if (len(req_exercises) == 0): req_exercises = random.sample(READING_EXERCISE_TYPES, 2) if (number_of_exercises_q.empty()): number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises)) passage = generate_reading_passage_1_text(topic) if passage == "": return gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q, start_id) exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty) if contains_empty_dict(exercises): return gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q, start_id) return { "exercises": exercises, "text": { "content": passage["text"], "title": passage["title"] }, "difficulty": difficulty } def gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=14): if (len(req_exercises) == 0): req_exercises = random.sample(READING_EXERCISE_TYPES, 2) if (number_of_exercises_q.empty()): number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises)) passage = generate_reading_passage_2_text(topic) if passage == "": return gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q, start_id) exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty) if contains_empty_dict(exercises): return gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q, start_id) return { "exercises": exercises, "text": { "content": passage["text"], "title": passage["title"] }, "difficulty": difficulty } def gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=27): if (len(req_exercises) == 0): req_exercises = random.sample(READING_EXERCISE_TYPES, 2) if (number_of_exercises_q.empty()): number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises)) passage = generate_reading_passage_3_text(topic) if passage == "": return gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q, start_id) exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty) if contains_empty_dict(exercises): return gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q, start_id) return { "exercises": exercises, "text": { "content": passage["text"], "title": passage["title"] }, "difficulty": difficulty } def divide_number_into_parts(number, parts): if number < parts: return None part_size = number // parts remaining = number % parts q = queue.Queue() for i in range(parts): if i < remaining: q.put(part_size + 1) else: q.put(part_size) return q def fix_exercise_ids(exercise, start_id): # Initialize the starting ID for the first exercise current_id = start_id questions = exercise["questions"] # Iterate through questions and update the "id" value for question in questions: question["id"] = str(current_id) current_id += 1 return exercise def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id): for i, word in enumerate(words_to_replace, start=start_id): # Create a case-insensitive regular expression pattern pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE) placeholder = '{{' + str(i) + '}}' text = pattern.sub(placeholder, text, 1) return text def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id): replaced_notes = [] for i, note in enumerate(notes, start=0): word = words_to_replace[i] pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE) placeholder = '{{' + str(start_id + i) + '}}' note = pattern.sub(placeholder, note, 1) replaced_notes.append(note) return replaced_notes def add_random_words_and_shuffle(word_array, num_random_words): r = RandomWord() random_words_selected = r.random_words(num_random_words) combined_array = word_array + random_words_selected random.shuffle(combined_array) result = [] for i, word in enumerate(combined_array): letter = chr(65 + i) # chr(65) is 'A' result.append({"letter": letter, "word": word}) return result def fillblanks_build_solutions_array(words, start_id): solutions = [] for i, word in enumerate(words, start=start_id): solutions.append( { "id": str(i), "solution": word } ) return solutions def remove_excess_questions(questions: [], quantity): count_true = 0 result = [] for item in reversed(questions): if item.get('solution') == 'true' and count_true < quantity: count_true += 1 else: result.append(item) result.reverse() return result def build_write_blanks_text(questions: [], start_id): result = "" for i, q in enumerate(questions, start=start_id): placeholder = '{{' + str(i) + '}}' result = result + q["question"] + placeholder + "\\n" return result def build_write_blanks_text_form(form: [], start_id): result = "" replaced_words = [] for i, entry in enumerate(form, start=start_id): placeholder = '{{' + str(i) + '}}' # Use regular expression to find the string after ':' match = re.search(r'(?<=:)\s*(.*)', entry) # Extract the matched string original_string = match.group(1) # Split the string into words words = re.findall(r'\b\w+\b', original_string) # Remove words with only one letter filtered_words = [word for word in words if len(word) > 1] # Choose a random word from the list of words selected_word = random.choice(filtered_words) pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE) # Replace the chosen word with the placeholder replaced_string = pattern.sub(placeholder, original_string, 1) # Construct the final replaced string replaced_string = entry.replace(original_string, replaced_string) result = result + replaced_string + "\\n" # Save the replaced word or use it as needed # For example, you can save it to a file or a list replaced_words.append(selected_word) return result, replaced_words def build_write_blanks_solutions(questions: [], start_id): solutions = [] for i, q in enumerate(questions, start=start_id): solution = [q["possible_answers"]] if isinstance(q["possible_answers"], str) else q["possible_answers"] solutions.append( { "id": str(i), "solution": solution } ) return solutions def build_write_blanks_solutions_listening(words: [], start_id): solutions = [] for i, word in enumerate(words, start=start_id): solution = [word] if isinstance(word, str) else word solutions.append( { "id": str(i), "solution": solution } ) return solutions def get_perfect_answer(question: str, size: int): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"perfect_answer": "perfect answer for the question"}') }, { "role": "user", "content": ('Write a perfect answer for this writing exercise of a IELTS exam. Question: ' + question) }, { "role": "user", "content": ('The answer must have at least ' + str(size) + ' words') } ] token_count = count_total_tokens(messages) return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) def generate_reading_passage_1_text(topic: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"title": "title of the text", "text": "generated text"}') }, { "role": "user", "content": ( 'Generate an extensive text for IELTS Reading Passage 1, of at least 800 words, on the topic ' 'of "' + topic + '". The passage should offer ' 'a substantial amount of information, ' 'analysis, or narrative relevant to the chosen ' 'subject matter. This text passage aims to ' 'serve as the primary reading section of an ' 'IELTS test, providing an in-depth and ' 'comprehensive exploration of the topic. ' 'Make sure that the generated text does not ' 'contain forbidden subjects in muslim countries.') }, { "role": "system", "content": ('The generated text should be fairly easy to understand and have multiple paragraphs.') }, ] token_count = count_total_tokens(messages) return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) def generate_reading_passage_2_text(topic: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"title": "title of the text", "text": "generated text"}') }, { "role": "user", "content": ( 'Generate an extensive text for IELTS Reading Passage 2, of at least 800 words, on the topic ' 'of "' + topic + '". The passage should offer ' 'a substantial amount of information, ' 'analysis, or narrative relevant to the chosen ' 'subject matter. This text passage aims to ' 'serve as the primary reading section of an ' 'IELTS test, providing an in-depth and ' 'comprehensive exploration of the topic. ' 'Make sure that the generated text does not ' 'contain forbidden subjects in muslim countries.') }, { "role": "system", "content": ('The generated text should be fairly hard to understand and have multiple paragraphs.') }, ] token_count = count_total_tokens(messages) return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) def generate_reading_passage_3_text(topic: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"title": "title of the text", "text": "generated text"}') }, { "role": "user", "content": ( 'Generate an extensive text for IELTS Reading Passage 3, of at least 800 words, on the topic ' 'of "' + topic + '". The passage should offer ' 'a substantial amount of information, ' 'analysis, or narrative relevant to the chosen ' 'subject matter. This text passage aims to ' 'serve as the primary reading section of an ' 'IELTS test, providing an in-depth and ' 'comprehensive exploration of the topic. ' 'Make sure that the generated text does not ' 'contain forbidden subjects in muslim countries.') }, { "role": "system", "content": ('The generated text should be very hard to understand and include different points, theories, ' 'subtle differences of opinions from people, correctly sourced to the person who said it, ' 'over the specified topic and have multiple paragraphs.') }, { "role": "user", "content": "Use real text excerpts on you generated passage and cite the sources." } ] token_count = count_total_tokens(messages) return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) def generate_listening_1_conversation(topic: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}') }, { "role": "user", "content": ( 'Compose an authentic conversation between two individuals in the everyday social context ' 'of "' + topic + '". Please include random names and genders for the characters in your dialogue. ' 'Make sure that the generated conversation does not contain forbidden subjects in ' 'muslim countries.') }, { "role": "user", "content": 'Try to have misleading discourse (refer multiple dates, multiple colors and etc).' }, { "role": "user", "content": 'Try to have spelling of names (cities, people, etc)' } ] token_count = count_total_tokens(messages) response = make_openai_call( GPT_4_O, messages, token_count, ["conversation"], GEN_QUESTION_TEMPERATURE ) chosen_voices = [] name_to_voice = {} for segment in response['conversation']: if 'voice' not in segment: name = segment['name'] if name in name_to_voice: voice = name_to_voice[name] else: voice = None while voice is None: if segment['gender'].lower() == 'male': available_voices = MALE_NEURAL_VOICES else: available_voices = FEMALE_NEURAL_VOICES chosen_voice = random.choice(available_voices)['Id'] if chosen_voice not in chosen_voices: voice = chosen_voice chosen_voices.append(voice) name_to_voice[name] = voice segment['voice'] = voice return response def generate_listening_2_monologue(topic: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"monologue": "monologue"}') }, { "role": "user", "content": ( 'Generate a comprehensive monologue set in the social context ' 'of "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in ' 'muslim countries.') } ] token_count = count_total_tokens(messages) response = make_openai_call( GPT_4_O, messages, token_count, ["monologue"], GEN_QUESTION_TEMPERATURE ) return response["monologue"] def generate_listening_3_conversation(topic: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}') }, { "role": "user", "content": ( 'Compose an authentic and elaborate conversation between up to four individuals in the everyday ' 'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. ' 'Make sure that the generated conversation does not contain forbidden subjects in ' 'muslim countries.') } ] token_count = count_total_tokens(messages) response = make_openai_call( GPT_4_O, messages, token_count, ["conversation"], GEN_QUESTION_TEMPERATURE ) name_to_voice = {} for segment in response['conversation']: if 'voice' not in segment: name = segment['name'] if name in name_to_voice: voice = name_to_voice[name] else: if segment['gender'].lower() == 'male': voice = random.choice(MALE_NEURAL_VOICES)['Id'] else: voice = random.choice(FEMALE_NEURAL_VOICES)['Id'] name_to_voice[name] = voice segment['voice'] = voice return response def generate_listening_4_monologue(topic: str): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"monologue": "monologue"}') }, { "role": "user", "content": ( 'Generate a comprehensive and complex monologue on the academic subject ' 'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in ' 'muslim countries.') } ] token_count = count_total_tokens(messages) response = make_openai_call( GPT_4_O, messages, token_count, ["monologue"], GEN_QUESTION_TEMPERATURE ) return response["monologue"] def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty): exercises = [] for req_exercise in req_exercises: number_of_exercises = number_of_exercises_q.get() if req_exercise == "fillBlanks": question = gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added fill blanks: " + str(question)) elif req_exercise == "trueFalse": question = gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added trueFalse: " + str(question)) elif req_exercise == "writeBlanks": question = gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty) if answer_word_limit_ok(question): exercises.append(question) print("Added write blanks: " + str(question)) else: exercises.append({}) print("Did not add write blanks because it did not respect word limit") elif req_exercise == "paragraphMatch": question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id) exercises.append(question) print("Added paragraph match: " + str(question)) elif req_exercise == "ideaMatch": question = gen_idea_match_exercise(passage, number_of_exercises, start_id) exercises.append(question) print("Added idea match: " + str(question)) start_id = start_id + number_of_exercises return exercises def answer_word_limit_ok(question): # Check if any option in any solution has more than three words return not any(len(option.split()) > 3 for solution in question["solutions"] for option in solution["solution"]) def contains_empty_dict(arr): return any(elem == {} for elem in arr) def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id, difficulty): exercises = [] for req_exercise in req_exercises: number_of_exercises = number_of_exercises_q.get() if req_exercise == "multipleChoice": question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty, 4) exercises.append(question) print("Added multiple choice: " + str(question)) elif req_exercise == "multipleChoice3Options": question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty, 3) exercises.append(question) print("Added multiple choice: " + str(question)) elif req_exercise == "writeBlanksQuestions": question = gen_write_blanks_questions_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks questions: " + str(question)) elif req_exercise == "writeBlanksFill": question = gen_write_blanks_notes_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks notes: " + str(question)) elif req_exercise == "writeBlanksForm": question = gen_write_blanks_form_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks form: " + str(question)) start_id = start_id + number_of_exercises return exercises def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id, difficulty): exercises = [] for req_exercise in req_exercises: number_of_exercises = number_of_exercises_q.get() if req_exercise == "multipleChoice": question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added multiple choice: " + str(question)) elif req_exercise == "writeBlanksQuestions": question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks questions: " + str(question)) elif req_exercise == "writeBlanksFill": question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks notes: " + str(question)) elif req_exercise == "writeBlanksForm": question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks form: " + str(question)) start_id = start_id + number_of_exercises return exercises def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": ' '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": ' '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable ' 'energy sources?", "solution": "C", "variant": "text"}]}') }, { "role": "user", "content": ( 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions ' 'for this text:\n"' + text + '"') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{ "summary": "summary" }') }, { "role": "user", "content": ('Summarize this text: "' + text + '"') } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_4_O, messages, token_count, ["summary"], GEN_QUESTION_TEMPERATURE) messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"words": ["word_1", "word_2"] }') }, { "role": "user", "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not ' 'expressions, from this:\n' + response[ "summary"]) } ] token_count = count_total_tokens(messages) words_response = make_openai_call(GPT_4_O, messages, token_count, ["summary"], GEN_QUESTION_TEMPERATURE) response["words"] = words_response["words"] replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id) options_words = add_random_words_and_shuffle(response["words"], 1) solutions = fillblanks_build_solutions_array(response["words"], start_id) return { "allowRepetition": True, "id": str(uuid.uuid4()), "prompt": "Complete the summary below. Write the letter of the corresponding word(s) for it.\\nThere are " "more words than spaces so you will not use them all. You may use any of the words more than once.", "solutions": solutions, "text": replaced_summary, "type": "fillBlanks", "words": options_words } def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, ' '{"prompt": "statement_2", "solution": "true/false/not_given"}]}') }, { "role": "user", "content": ( 'Generate ' + str( quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. ' 'Ensure that your statements accurately represent ' 'information or inferences from the text, and ' 'provide a variety of responses, including, at ' 'least one of each True, False, and Not Given, ' 'as appropriate.\n\nReference text:\n\n ' + text) } ] token_count = count_total_tokens(messages) questions = make_openai_call(GPT_4_O, messages, token_count, ["prompts"], GEN_QUESTION_TEMPERATURE)["prompts"] if len(questions) > quantity: questions = remove_excess_questions(questions, len(questions) - quantity) for i, question in enumerate(questions, start=start_id): question["id"] = str(i) return { "id": str(uuid.uuid4()), "prompt": "Do the following statements agree with the information given in the Reading Passage?", "questions": questions, "type": "trueFalse" } def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}') }, { "role": "user", "content": ( 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the ' 'possible answers, must have maximum 3 words ' 'per answer, about this text:\n"' + text + '"') } ] token_count = count_total_tokens(messages) questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "Choose no more than three words and/or a number from the passage for each answer.", "solutions": build_write_blanks_solutions(questions, start_id), "text": build_write_blanks_text(questions, start_id), "type": "writeBlanks" } def gen_paragraph_match_exercise(text: str, quantity: int, start_id): paragraphs = assign_letters_to_paragraphs(text) messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}') }, { "role": "user", "content": ( 'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str( paragraphs)) } ] token_count = count_total_tokens(messages) headings = make_openai_call(GPT_4_O, messages, token_count, ["headings"], GEN_QUESTION_TEMPERATURE)["headings"] options = [] for i, paragraph in enumerate(paragraphs, start=0): paragraph["heading"] = headings[i]["heading"] options.append({ "id": paragraph["letter"], "sentence": paragraph["paragraph"] }) random.shuffle(paragraphs) sentences = [] for i, paragraph in enumerate(paragraphs, start=start_id): sentences.append({ "id": i, "sentence": paragraph["heading"], "solution": paragraph["letter"] }) return { "id": str(uuid.uuid4()), "allowRepetition": False, "options": options, "prompt": "Choose the correct heading for paragraphs from the list of headings below.", "sentences": sentences[:quantity], "type": "matchSentences" } def gen_idea_match_exercise(text: str, quantity: int, start_id): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"ideas": [ ' '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, ' '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}' ']}') }, { "role": "user", "content": ( 'From the text extract ' + str( quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text)) } ] token_count = count_total_tokens(messages) ideas = make_openai_call(GPT_4_O, messages, token_count, ["ideas"], GEN_QUESTION_TEMPERATURE)["ideas"] return { "id": str(uuid.uuid4()), "allowRepetition": False, "options": build_options(ideas), "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.", "sentences": build_sentences(ideas, start_id), "type": "matchSentences" } def build_options(ideas): options = [] letters = iter(string.ascii_uppercase) for idea in ideas: options.append({ "id": next(letters), "sentence": idea["from"] }) return options def build_sentences(ideas, start_id): sentences = [] letters = iter(string.ascii_uppercase) for idea in ideas: sentences.append({ "solution": next(letters), "sentence": idea["idea"] }) random.shuffle(sentences) for i, sentence in enumerate(sentences, start=start_id): sentence["id"] = i return sentences def assign_letters_to_paragraphs(paragraphs): result = [] letters = iter(string.ascii_uppercase) for paragraph in paragraphs.split("\n\n"): if has_x_words(paragraph, 10): result.append({'paragraph': paragraph.strip(), 'letter': next(letters)}) return result def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty, n_options=4): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": ' '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": ' '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable ' 'energy sources?", "solution": "C", "variant": "text"}]}') }, { "role": "user", "content": ( 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str( n_options) + ' options ' 'of for this conversation:\n"' + text + '"') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty, n_options=4): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": ' '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": ' '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable ' 'energy sources?", "solution": "C", "variant": "text"}]}') }, { "role": "user", "content": ( 'Generate ' + str( quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str( n_options) + ' options ' 'of for this monologue:\n"' + text + '"') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}') }, { "role": "user", "content": ( 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the ' 'possible answers (max 3 words per answer), ' 'about this conversation:\n"' + text + '"') } ] token_count = count_total_tokens(messages) questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a conversation. Answer the questions below using no more than three words or a number accordingly.", "solutions": build_write_blanks_solutions(questions, start_id), "text": build_write_blanks_text(questions, start_id), "type": "writeBlanks" } def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}') }, { "role": "user", "content": ( 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the ' 'possible answers (max 3 words per answer), ' 'about this monologue:\n"' + text + '"') } ] token_count = count_total_tokens(messages) questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a monologue. Answer the questions below using no more than three words or a number accordingly.", "solutions": build_write_blanks_solutions(questions, start_id), "text": build_write_blanks_text(questions, start_id), "type": "writeBlanks" } def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"notes": ["note_1", "note_2"]}') }, { "role": "user", "content": ( 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this ' 'conversation:\n"' + text + '"') } ] token_count = count_total_tokens(messages) questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"], GEN_QUESTION_TEMPERATURE)["notes"][:quantity] formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) word_messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }') }, { "role": "user", "content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"') } ] words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"], GEN_QUESTION_TEMPERATURE)["words"][:quantity] replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "Fill the blank space with the word missing from the audio.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": "\\n".join(replaced_notes), "type": "writeBlanks" } def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"notes": ["note_1", "note_2"]}') }, { "role": "user", "content": ( 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this ' 'monologue:\n"' + text + '"') } ] token_count = count_total_tokens(messages) questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"], GEN_QUESTION_TEMPERATURE)["notes"][:quantity] formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) word_messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }') }, { "role": "user", "content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"') } ] words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"], GEN_QUESTION_TEMPERATURE)["words"][:quantity] replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "Fill the blank space with the word missing from the audio.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": "\\n".join(replaced_notes), "type": "writeBlanks" } def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"form": ["key": "value", "key2": "value"]}') }, { "role": "user", "content": ( 'Generate a form with ' + str( quantity) + ' entries with information about this conversation:\n"' + text + '"') }, { "role": "user", "content": 'It must be a form and not questions. ' 'Example: {"form": ["Color of car": "blue", "Brand of car": "toyota"]}' } ] token_count = count_total_tokens(messages) parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"], GEN_QUESTION_TEMPERATURE)["form"][:quantity] replaced_form, words = build_write_blanks_text_form(parsed_form, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a conversation. Fill the form with words/numbers missing.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": replaced_form, "type": "writeBlanks" } def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"form": ["key: value", "key2: value"]}') }, { "role": "user", "content": ( 'Generate a form with ' + str( quantity) + ' ' + difficulty + ' difficulty key-value pairs about this monologue:\n"' + text + '"') } ] token_count = count_total_tokens(messages) parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"], GEN_QUESTION_TEMPERATURE)["form"][:quantity] replaced_form, words = build_write_blanks_text_form(parsed_form, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a monologue. Fill the form with words/numbers missing.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": replaced_form, "type": "writeBlanks" } def gen_multiple_choice_level(quantity: int, start_id=1): gen_multiple_choice_for_text = "Generate " + str( quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \ "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \ "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \ "every question only has 1 correct answer." messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": ' '[{"id": "A", "text": ' '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}]}') }, { "role": "user", "content": gen_multiple_choice_for_text } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != quantity: return gen_multiple_choice_level(quantity, start_id) else: all_exams = get_all("level") seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) else: seen_keys.add(key) for exam in all_exams: exam_dict = exam.to_dict() if len(exam_dict.get("parts", [])) > 0: exercise_dict = exam_dict.get("parts", [])[0] if len(exercise_dict.get("exercises", [])) > 0: if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exercise_dict.get("exercises", [])[0]["questions"] ): return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) return current_exercise, seen_keys def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) else: seen_keys.add(key) for exam in all_exams: if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exam.get("questions", []) ): return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) return current_exercise, seen_keys def replace_blank_space_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam, seen_keys) else: seen_keys.add(key) for exam in all_exams: if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exam.get("questions", []) ): return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam, seen_keys) return current_exercise, seen_keys def replace_underlined_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam, seen_keys) else: seen_keys.add(key) for exam in all_exams: if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exam.get("questions", []) ): return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam, seen_keys) return current_exercise, seen_keys def generate_single_mc_level_question(): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}') }, { "role": "user", "content": ('Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, ' 'intermediate or advanced.') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["options"], GEN_QUESTION_TEMPERATURE) return question def generate_single_mc_blank_space_level_question(): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}') }, { "role": "user", "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, ' 'it can be easy, intermediate or advanced.') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["options"], GEN_QUESTION_TEMPERATURE) return question def generate_single_mc_underlined_level_question(): messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}') }, { "role": "user", "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, ' 'it can be easy, intermediate or advanced.') }, { "role": "user", "content": ( 'The type of multiple choice is the prompt has wrong words or group of words and the options are to ' 'find the wrong word or group of words that are underlined in the prompt. \nExample:\n' 'Prompt: "I complain about my boss all the time, but my colleagues thinks the boss is nice."\n' 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["options"], GEN_QUESTION_TEMPERATURE) return question def parse_conversation(conversation_data): conversation_list = conversation_data.get('conversation', []) readable_text = [] for message in conversation_list: name = message.get('name', 'Unknown') text = message.get('text', '') readable_text.append(f"{name}: {text}") return "\n".join(readable_text) def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams=None): gen_multiple_choice_for_text = "Generate " + str( quantity) + " multiple choice blank space questions of 4 options for an english level exam, some easy questions, some intermediate " \ "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \ "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \ "every question only has 1 correct answer." messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": ' '[{"id": "A", "text": ' '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}]}') }, { "role": "user", "content": gen_multiple_choice_for_text } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != quantity: return gen_multiple_choice_blank_space_utas(quantity, start_id) else: if all_exams is not None: seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = ( replace_blank_space_exercise_if_exists_utas(all_exams, question["questions"][i], question, seen_keys)) response = fix_exercise_ids(question, start_id) response["questions"] = randomize_mc_options_order(response["questions"]) return response def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=None): json_format = { "questions": [ { "id": "9", "options": [ { "id": "A", "text": "a" }, { "id": "B", "text": "b" }, { "id": "C", "text": "c" }, { "id": "D", "text": "d" } ], "prompt": "prompt", "solution": "A", "variant": "text" } ] } gen_multiple_choice_for_text = 'Generate ' + str(quantity) + ( ' multiple choice questions of 4 options for an english ' 'level exam, some easy questions, some intermediate ' 'questions and some advanced questions.Ensure that ' 'the questions cover a range of topics such as verb ' 'tense, subject-verb agreement, pronoun usage, ' 'sentence structure, and punctuation. Make sure ' 'every question only has 1 correct answer.') messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": gen_multiple_choice_for_text }, { "role": "user", "content": ( 'The type of multiple choice is the prompt has wrong words or group of words and the options are to ' 'find the wrong word or group of words that are underlined in the prompt. \nExample:\n' 'Prompt: "I complain about my boss all the time, but my colleagues thinks the boss is nice."\n' 'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != quantity: return gen_multiple_choice_underlined_utas(quantity, start_id) else: if all_exams is not None: seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = ( replace_underlined_exercise_if_exists_utas(all_exams, question["questions"][i], question, seen_keys)) response = fix_exercise_ids(question, start_id) response["questions"] = randomize_mc_options_order(response["questions"]) return response def gen_fill_blanks_mc_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)): json_format = { "question": { "solutions": [ { "id": "", "solution": "" } ], "words": [ { "id": "", "options": { "A": "", "B": "", "C": "", "D": "" } } ], "text": "text" } } messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": ( f'Generate a text of at least {size} words about the topic {topic}. Make sure the text is structured ' 'in paragraphs formatted with newlines (\\n\\n) to delimit them.' ) }, { "role": "user", "content": ( f'From the generated text choose {quantity} words (cannot be sequential words) to replace ' 'once with {{id}} where id starts on ' + str(start_id) + ' and is incremented for each word. ' 'For each word choose 4 options, 1 correct and the other ones false. Make sure that only 1 is the ' 'correct one amongst the 4 options and put the solution on the solutions array. ' 'The ids must be ordered throughout the text and the words must be replaced only once. Put the ' 'removed words and respective ids on the words array of the json in the correct order. You can\'t ' 'reference multiple times the same id across the text, if for example one of the chosen words is ' '"word1" then word1 must be placed in the text with an id once, if word1 is referenced other ' 'times in the text then replace with the actual text of word.' ) } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["question"], GEN_QUESTION_TEMPERATURE) return question["question"] def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)): json_format = { "question": { "words": [ { "id": "1", "text": "a" }, { "id": "2", "text": "b" }, { "id": "3", "text": "c" }, { "id": "4", "text": "d" } ], "text": "text" } } gen_text = 'Generate a text of at least ' + str(size) + ' words about the topic ' + topic + '.' messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": gen_text }, { "role": "user", "content": ( 'From the generated text choose ' + str( quantity) + ' words (cannot be sequential words) to replace ' 'once with {{id}} where id starts on ' + str(start_id) + ' and is ' 'incremented for each word. The ids must be ordered throughout the text and the words must be ' 'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.') } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["question"], GEN_QUESTION_TEMPERATURE) return question["question"] def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)): passage = generate_reading_passage_1_text(topic) short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity) mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) return { "exercises": { "shortAnswer": short_answer, "multipleChoice": mc_exercises, }, "text": { "content": passage["text"], "title": passage["title"] } } def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int): json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]} messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": ( 'Generate ' + str(sa_quantity) + ' short answer questions, and the possible answers, must have ' 'maximum 3 words per answer, about this text:\n"' + text + '"') }, { "role": "user", "content": 'The id starts at ' + str(start_id) + '.' } ] token_count = count_total_tokens(messages) return make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"] def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): json_format = { "questions": [ { "id": "9", "options": [ { "id": "A", "text": "a" }, { "id": "B", "text": "b" }, { "id": "C", "text": "c" }, { "id": "D", "text": "d" } ], "prompt": "prompt", "solution": "A", "variant": "text" } ] } messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": 'Generate ' + str( mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text }, { "role": "user", "content": 'Make sure every question only has 1 correct answer.' } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != mc_quantity: return gen_multiple_choice_level(mc_quantity, start_id) else: response = fix_exercise_ids(question, start_id) response["questions"] = randomize_mc_options_order(response["questions"]) return response def generate_level_mc(start_id: int, quantity: int, all_questions=None): json_format = { "questions": [ { "id": "9", "options": [ { "id": "A", "text": "a" }, { "id": "B", "text": "b" }, { "id": "C", "text": "c" }, { "id": "D", "text": "d" } ], "prompt": "prompt", "solution": "A", "variant": "text" } ] } messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": ('Generate ' + str(quantity) + ' multiple choice question of 4 options for an english level ' 'exam, it can be easy, intermediate or advanced.') }, { "role": "user", "content": 'Make sure every question only has 1 correct answer.' } ] token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) if all_questions is not None: seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_questions, question["questions"][i], question, seen_keys) response = fix_exercise_ids(question, start_id) response["questions"] = randomize_mc_options_order(response["questions"]) return response def randomize_mc_options_order(questions): option_ids = ['A', 'B', 'C', 'D'] for question in questions: # Store the original solution text original_solution_text = next( option['text'] for option in question['options'] if option['id'] == question['solution']) # Shuffle the options random.shuffle(question['options']) # Update the option ids and find the new solution id for idx, option in enumerate(question['options']): option['id'] = option_ids[idx] if option['text'] == original_solution_text: question['solution'] = option['id'] return questions def gen_writing_task_1(topic, difficulty): messages = [ { "role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: ' '{"prompt": "prompt content"}') }, { "role": "user", "content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the ' 'student to compose a letter. The prompt should present a specific scenario or situation, ' 'based on the topic of "' + topic + '", requiring the student to provide information, ' 'advice, or instructions within the letter. ' 'Make sure that the generated prompt is ' 'of ' + difficulty + 'difficulty and does not contain ' 'forbidden subjects in muslim ' 'countries.') }, { "role": "user", "content": 'The prompt should end with "In the letter you should" followed by 3 bullet points of what ' 'the answer should include.' } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE) return { "question": add_newline_before_hyphen(response["prompt"].strip()), "difficulty": difficulty, "topic": topic } def add_newline_before_hyphen(s): return s.replace(" -", "\n-") def gen_writing_task_2(topic, difficulty): messages = [ { "role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: ' '{"prompt": "prompt content"}') }, { "role": "user", "content": ( 'Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing ' 'Task 2 General Training that directs the ' 'candidate' 'to delve into an in-depth analysis of ' 'contrasting perspectives on the topic ' 'of "' + topic + '". The candidate should be ' 'asked to discuss the ' 'strengths and weaknesses of ' 'both viewpoints.') }, { "role": "user", "content": 'The question should lead to an answer with either "theories", "complicated information" or ' 'be "very descriptive" on the topic.' } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE) return { "question": response["prompt"].strip(), "difficulty": difficulty, "topic": topic } def gen_speaking_part_1(first_topic: str, second_topic: str, difficulty): json_format = { "first_topic": "topic 1", "second_topic": "topic 2", "questions": [ "Introductory question about the first topic, starting the topic with 'Let's talk about x' and then the " "question.", "Follow up question about the first topic", "Follow up question about the first topic", "Question about second topic", "Follow up question about the second topic", ] } messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) }, { "role": "user", "content": ( 'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 ' 'that encourages candidates to delve deeply into ' 'personal experiences, preferences, or insights on the topic ' 'of "' + first_topic + '" and the topic of "' + second_topic + '". ' 'Make sure that the generated ' 'question' 'does not contain forbidden ' 'subjects in' 'muslim countries.') }, { "role": "user", "content": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, ' 'past and future).' }, { "role": "user", "content": 'They must be 1 single question each and not be double-barreled questions.' } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_4_O, messages, token_count, ["first_topic"], GEN_QUESTION_TEMPERATURE) response["type"] = 1 response["difficulty"] = difficulty return response def gen_speaking_part_2(topic: str, difficulty): json_format = { "topic": "topic", "question": "question", "prompts": [ "prompt_1", "prompt_2", "prompt_3" ], "suffix": "And explain why..." } messages = [ { "role": "system", "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format) }, { "role": "user", "content": ( 'Create a question of medium difficulty for IELTS Speaking Part 2 ' 'that encourages candidates to narrate a ' 'personal experience or story related to the topic ' 'of "' + topic + '". Include 3 prompts that ' 'guide the candidate to describe ' 'specific aspects of the experience, ' 'such as details about the situation, ' 'their actions, and the reasons it left a ' 'lasting impression. Make sure that the ' 'generated question does not contain ' 'forbidden subjects in muslim countries.') }, { "role": "user", "content": 'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams ' 'that start with "And explain why".' } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE) response["type"] = 2 response["difficulty"] = difficulty response["topic"] = topic return response def gen_speaking_part_3(topic: str, difficulty): json_format = { "topic": "topic", "questions": [ "Introductory question about the topic.", "Follow up question about the topic", "Follow up question about the topic", "Follow up question about the topic", "Follow up question about the topic" ] } messages = [ { "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)) }, { "role": "user", "content": ( 'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3 that encourage candidates to engage in a ' 'meaningful discussion on the topic of "' + topic + '". Provide inquiries, ensuring ' 'they explore various aspects, perspectives, and implications related to the topic.' 'Make sure that the generated question does not contain forbidden subjects in muslim countries.') }, { "role": "user", "content": 'They must be 1 single question each and not be double-barreled questions.' } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE) # Remove the numbers from the questions only if the string starts with a number response["questions"] = [re.sub(r"^\d+\.\s*", "", question) if re.match(r"^\d+\.", question) else question for question in response["questions"]] response["type"] = 3 response["difficulty"] = difficulty response["topic"] = topic return response def gen_listening_section_1(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=1): if (len(req_exercises) == 0): req_exercises = random.sample(LISTENING_1_EXERCISE_TYPES, 1) if (number_of_exercises_q.empty()): number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises)) processed_conversation = generate_listening_1_conversation(topic) exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises, number_of_exercises_q, start_id, difficulty) return { "exercises": exercises, "text": processed_conversation, "difficulty": difficulty } def gen_listening_section_2(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=11): if (len(req_exercises) == 0): req_exercises = random.sample(LISTENING_2_EXERCISE_TYPES, 2) if (number_of_exercises_q.empty()): number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises)) monologue = generate_listening_2_monologue(topic) exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q, start_id, difficulty) return { "exercises": exercises, "text": monologue, "difficulty": difficulty } def gen_listening_section_3(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=21): if (len(req_exercises) == 0): req_exercises = random.sample(LISTENING_3_EXERCISE_TYPES, 1) if (number_of_exercises_q.empty()): number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises)) processed_conversation = generate_listening_3_conversation(topic) exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises, number_of_exercises_q, start_id, difficulty) return { "exercises": exercises, "text": processed_conversation, "difficulty": difficulty } def gen_listening_section_4(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=31): if (len(req_exercises) == 0): req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2) if (number_of_exercises_q.empty()): number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises)) monologue = generate_listening_4_monologue(topic) exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q, start_id, difficulty) return { "exercises": exercises, "text": monologue, "difficulty": difficulty }