import queue import string import nltk import random import re import uuid from helper.api_messages import QuestionType from helper.firebase_helper import get_all from helper.openai_interface import make_openai_instruct_call, make_openai_call from helper.token_counter import count_tokens from helper.constants import * from wonderwords import RandomWord nltk.download('words') def gen_reading_passage_1(topic, req_exercises, difficulty): if (len(req_exercises) == 0): req_exercises = random.sample(READING_EXERCISE_TYPES, 2) number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises)) passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic) start_id = 1 exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty) if contains_empty_dict(exercises): return gen_reading_passage_1(topic, req_exercises, difficulty) return { "exercises": exercises, "text": { "content": passage["text"], "title": passage["title"] }, "difficulty": difficulty } def gen_reading_passage_2(topic, req_exercises, difficulty): if (len(req_exercises) == 0): req_exercises = random.sample(READING_EXERCISE_TYPES, 2) number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises)) passage = generate_reading_passage(QuestionType.READING_PASSAGE_2, topic) start_id = 14 exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty) if contains_empty_dict(exercises): return gen_reading_passage_1(topic, req_exercises, difficulty) return { "exercises": exercises, "text": { "content": passage["text"], "title": passage["title"] }, "difficulty": difficulty } def gen_reading_passage_3(topic, req_exercises, difficulty): if (len(req_exercises) == 0): req_exercises = random.sample(READING_EXERCISE_TYPES, 2) number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises)) passage = generate_reading_passage(QuestionType.READING_PASSAGE_3, topic) start_id = 27 exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty) if contains_empty_dict(exercises): return gen_reading_passage_1(topic, req_exercises, difficulty) return { "exercises": exercises, "text": { "content": passage["text"], "title": passage["title"] }, "difficulty": difficulty } def divide_number_into_parts(number, parts): if number < parts: return None part_size = number // parts remaining = number % parts q = queue.Queue() for i in range(parts): if i < remaining: q.put(part_size + 1) else: q.put(part_size) return q def fix_exercise_ids(exercise, start_id): # Initialize the starting ID for the first exercise current_id = start_id questions = exercise["questions"] # Iterate through questions and update the "id" value for question in questions: question["id"] = str(current_id) current_id += 1 return exercise def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id): for i, word in enumerate(words_to_replace, start=start_id): # Create a case-insensitive regular expression pattern pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE) placeholder = '{{' + str(i) + '}}' text = pattern.sub(placeholder, text, 1) return text def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id): replaced_notes = [] for i, note in enumerate(notes, start=0): word = words_to_replace[i] pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE) placeholder = '{{' + str(start_id + i) + '}}' note = pattern.sub(placeholder, note, 1) replaced_notes.append(note) return replaced_notes def add_random_words_and_shuffle(word_array, num_random_words): r = RandomWord() random_words_selected = r.random_words(num_random_words) combined_array = word_array + random_words_selected random.shuffle(combined_array) return combined_array def fillblanks_build_solutions_array(words, start_id): solutions = [] for i, word in enumerate(words, start=start_id): solutions.append( { "id": str(i), "solution": word } ) return solutions def remove_excess_questions(questions: [], quantity): count_true = 0 result = [] for item in reversed(questions): if item.get('solution') == 'true' and count_true < quantity: count_true += 1 else: result.append(item) result.reverse() return result def build_write_blanks_text(questions: [], start_id): result = "" for i, q in enumerate(questions, start=start_id): placeholder = '{{' + str(i) + '}}' result = result + q["question"] + placeholder + "\\n" return result def build_write_blanks_text_form(form: [], start_id): result = "" replaced_words = [] for i, entry in enumerate(form, start=start_id): placeholder = '{{' + str(i) + '}}' # Use regular expression to find the string after ':' match = re.search(r'(?<=:)\s*(.*)', entry) # Extract the matched string original_string = match.group(1) # Split the string into words words = re.findall(r'\b\w+\b', original_string) # Remove words with only one letter filtered_words = [word for word in words if len(word) > 1] # Choose a random word from the list of words selected_word = random.choice(filtered_words) pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE) # Replace the chosen word with the placeholder replaced_string = pattern.sub(placeholder, original_string, 1) # Construct the final replaced string replaced_string = entry.replace(original_string, replaced_string) result = result + replaced_string + "\\n" # Save the replaced word or use it as needed # For example, you can save it to a file or a list replaced_words.append(selected_word) return result, replaced_words def build_write_blanks_solutions(questions: [], start_id): solutions = [] for i, q in enumerate(questions, start=start_id): solution = [q["possible_answers"]] if isinstance(q["possible_answers"], str) else q["possible_answers"] solutions.append( { "id": str(i), "solution": solution } ) return solutions def build_write_blanks_solutions_listening(words: [], start_id): solutions = [] for i, word in enumerate(words, start=start_id): solution = [word] if isinstance(word, str) else word solutions.append( { "id": str(i), "solution": solution } ) return solutions def generate_reading_passage(type: QuestionType, topic: str): gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \ "of '" + topic + "'. The passage should offer a substantial amount of " \ "information, analysis, or narrative " \ "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \ "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \ "Make sure that the generated text does not contain forbidden subjects in muslim countries." \ "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}" token_count = count_tokens(gen_reading_passage_1)["n_tokens"] return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) def generate_listening_1_conversation(topic: str): gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \ "social context of '" + topic + "'. Please include random names and genders " \ "for the characters in your dialogue. " \ "Make sure that the generated conversation does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, gen_listening_1_conversation_2_people, token_count, None, GEN_QUESTION_TEMPERATURE ) conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}' parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json token_count = count_tokens(parse_conversation)["n_tokens"] processed = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, parse_conversation, token_count, ['conversation'], GEN_QUESTION_TEMPERATURE ) name_to_voice = {} for segment in processed['conversation']: if 'voice' not in segment: name = segment['name'] if name in name_to_voice: voice = name_to_voice[name] else: if segment['gender'].lower() == 'male': voice = random.choice(MALE_NEURAL_VOICES)['Id'] else: voice = random.choice(FEMALE_NEURAL_VOICES)['Id'] name_to_voice[name] = voice segment['voice'] = voice return response, processed def generate_listening_2_monologue(topic: str): gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, gen_listening_2_monologue_social, token_count, None, GEN_QUESTION_TEMPERATURE ) return response def generate_listening_3_conversation(topic: str): gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \ "in the everyday social context of '" + topic + \ "'. Please include random names and genders for the characters in your dialogue. " \ "Make sure that the generated conversation does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, gen_listening_3_conversation_4_people, token_count, None, GEN_QUESTION_TEMPERATURE ) conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}' parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json token_count = count_tokens(parse_conversation)["n_tokens"] processed = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, parse_conversation, token_count, ['conversation'], GEN_QUESTION_TEMPERATURE ) name_to_voice = {} for segment in processed['conversation']: if 'voice' not in segment: name = segment['name'] if name in name_to_voice: voice = name_to_voice[name] else: if segment['gender'].lower() == 'male': voice = random.choice(MALE_NEURAL_VOICES)['Id'] else: voice = random.choice(FEMALE_NEURAL_VOICES)['Id'] name_to_voice[name] = voice segment['voice'] = voice return response, processed def generate_listening_4_monologue(topic: str): gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries." token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"] response = make_openai_instruct_call( GPT_3_5_TURBO_INSTRUCT, gen_listening_4_monologue_academic, token_count, None, GEN_QUESTION_TEMPERATURE ) return response def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty): exercises = [] for req_exercise in req_exercises: number_of_exercises = number_of_exercises_q.get() if req_exercise == "fillBlanks": question = gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added fill blanks: " + str(question)) elif req_exercise == "trueFalse": question = gen_true_false_not_given_exercise(passage, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added trueFalse: " + str(question)) elif req_exercise == "writeBlanks": question = gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty) if answer_word_limit_ok(question): exercises.append(question) print("Added write blanks: " + str(question)) else: exercises.append({}) print("Did not add write blanks because it did not respect word limit") elif req_exercise == "paragraphMatch": question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id) exercises.append(question) print("Added paragraph match: " + str(question)) start_id = start_id + number_of_exercises return exercises def answer_word_limit_ok(question): # Check if any option in any solution has more than three words return not any(len(option.split()) > 3 for solution in question["solutions"] for option in solution["solution"]) def contains_empty_dict(arr): return any(elem == {} for elem in arr) def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id, difficulty): exercises = [] for req_exercise in req_exercises: number_of_exercises = number_of_exercises_q.get() if req_exercise == "multipleChoice": question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added multiple choice: " + str(question)) elif req_exercise == "writeBlanksQuestions": question = gen_write_blanks_questions_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks questions: " + str(question)) elif req_exercise == "writeBlanksFill": question = gen_write_blanks_notes_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks notes: " + str(question)) elif req_exercise == "writeBlanksForm": question = gen_write_blanks_form_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks form: " + str(question)) start_id = start_id + number_of_exercises return exercises def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id, difficulty): exercises = [] for req_exercise in req_exercises: number_of_exercises = number_of_exercises_q.get() if req_exercise == "multipleChoice": question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added multiple choice: " + str(question)) elif req_exercise == "writeBlanksQuestions": question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks questions: " + str(question)) elif req_exercise == "writeBlanksFill": question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks notes: " + str(question)) elif req_exercise == "writeBlanksForm": question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks form: " + str(question)) start_id = start_id + number_of_exercises return exercises def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty): gen_multiple_choice_for_text = "Generate " + str( quantity) + " " + difficulty + " difficulty multiple choice questions for this text: " \ "'" + text + "'\n" \ "Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \ "\"solution\": \"C\", \"variant\": \"text\"}]" token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, None, GEN_QUESTION_TEMPERATURE) parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \ "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'" token_count = count_tokens(parse_mc_questions)["n_tokens"] question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty): gen_summary_for_text = "Summarize this text: " + text token_count = count_tokens(gen_summary_for_text)["n_tokens"] text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count, None, GEN_QUESTION_TEMPERATURE) gen_words_to_replace = "Select " + str( quantity) + " " + difficulty + " difficulty words, it must be words and not expressions, from the summary and respond in this " \ "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary token_count = count_tokens(gen_words_to_replace)["n_tokens"] words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count, ["words"], GEN_QUESTION_TEMPERATURE)["words"] replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id) options_words = add_random_words_and_shuffle(words_to_replace, 5) solutions = fillblanks_build_solutions_array(words_to_replace, start_id) return { "allowRepetition": True, "id": str(uuid.uuid4()), "prompt": "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are " "more words than spaces so you will not use them all. You may use any of the words more than once.", "solutions": solutions, "text": replaced_summary, "type": "fillBlanks", "words": options_words } def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty): gen_true_false_not_given = "Generate " + str( quantity) + " " + difficulty + " difficulty statements in JSON format (True, False, or Not Given) " \ "based on the provided text. Ensure that your statements " \ "accurately represent information or inferences from the " \ "text, and provide a variety of responses, including, at least one of each True, " \ "False, and Not Given, as appropriate, in the JSON structure " \ "{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \ "\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \ "\"solution\": \"true/false/not_given\"}]}. Reference text: " + text token_count = count_tokens(gen_true_false_not_given)["n_tokens"] questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count, ["prompts"], GEN_QUESTION_TEMPERATURE)["prompts"] if len(questions) > quantity: questions = remove_excess_questions(questions, len(questions) - quantity) for i, question in enumerate(questions, start=start_id): question["id"] = str(i) return { "id": str(uuid.uuid4()), "prompt": "Do the following statements agree with the information given in the Reading Passage?", "questions": questions, "type": "trueFalse" } def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty): gen_short_answer_questions = "Generate " + str( quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers, " \ "must have maximum 3 words per answer, about this text: '" + text + "'. " \ "Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \ "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}" token_count = count_tokens(gen_short_answer_questions)["n_tokens"] questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "Choose no more than three words and/or a number from the passage for each answer.", "solutions": build_write_blanks_solutions(questions, start_id), "text": build_write_blanks_text(questions, start_id), "type": "writeBlanks" } def gen_paragraph_match_exercise(text: str, quantity: int, start_id): paragraphs = assign_letters_to_paragraphs(text) heading_prompt = ( 'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: ' '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n' 'The paragraphs are these: ' + str(paragraphs)) token_count = count_tokens(heading_prompt)["n_tokens"] headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count, ["headings"], GEN_QUESTION_TEMPERATURE)["headings"] options = [] for i, paragraph in enumerate(paragraphs, start=0): paragraph["heading"] = headings[i] options.append({ "id": paragraph["letter"], "sentence": paragraph["paragraph"] }) random.shuffle(paragraphs) sentences = [] for i, paragraph in enumerate(paragraphs, start=start_id): sentences.append({ "id": i, "sentence": paragraph["heading"], "solution": paragraph["letter"] }) return { "id": str(uuid.uuid4()), "allowRepetition": False, "options": options, "prompt": "Choose the correct heading for paragraphs from the list of headings below.", "sentences": sentences[:quantity], "type": "matchSentences" } def assign_letters_to_paragraphs(paragraphs): result = [] letters = iter(string.ascii_uppercase) for paragraph in paragraphs.split("\n"): result.append({'paragraph': paragraph.strip(), 'letter': next(letters)}) return result def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): gen_multiple_choice_for_text = "Generate " + str( quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \ "'" + text + "'" token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, None, GEN_QUESTION_TEMPERATURE) parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \ "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'" token_count = count_tokens(parse_mc_questions)["n_tokens"] question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): gen_multiple_choice_for_text = "Generate " + str( quantity) + " " + difficulty + " difficulty multiple choice questions for this monologue: " \ "'" + text + "'" token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, None, GEN_QUESTION_TEMPERATURE) parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \ "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \ "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \ "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \ "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'" token_count = count_tokens(parse_mc_questions)["n_tokens"] question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): gen_write_blanks_questions = "Generate " + str( quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \ "(max 3 words per answer), about a monologue and" \ "respond in this JSON format: {\"questions\": [{\"question\": question, " \ "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \ "The monologue is this: '" + text + "'" token_count = count_tokens(gen_write_blanks_questions)["n_tokens"] questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a conversation. Answer the questions below using no more than three words or a number accordingly.", "solutions": build_write_blanks_solutions(questions, start_id), "text": build_write_blanks_text(questions, start_id), "type": "writeBlanks" } def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): gen_write_blanks_questions = "Generate " + str( quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \ "(max 3 words per answer), about a monologue and" \ "respond in this JSON format: {\"questions\": [{\"question\": question, " \ "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \ "The monologue is this: '" + text + "'" token_count = count_tokens(gen_write_blanks_questions)["n_tokens"] questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a monologue. Answer the questions below using no more than three words or a number accordingly.", "solutions": build_write_blanks_solutions(questions, start_id), "text": build_write_blanks_text(questions, start_id), "type": "writeBlanks" } def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): gen_write_blanks_notes = "Generate " + str( quantity) + " " + difficulty + " difficulty notes taken from the conversation and and respond in this " \ "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'" token_count = count_tokens(gen_write_blanks_notes)["n_tokens"] questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count, ["notes"], GEN_QUESTION_TEMPERATURE)["notes"][:quantity] formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \ "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count, ["words"], GEN_QUESTION_TEMPERATURE)["words"][:quantity] replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "Fill the blank space with the word missing from the audio.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": "\\n".join(replaced_notes), "type": "writeBlanks" } def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): gen_write_blanks_notes = "Generate " + str( quantity) + " " + difficulty + " difficulty notes taken from the monologue and respond in this " \ "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'" token_count = count_tokens(gen_write_blanks_notes)["n_tokens"] questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count, ["notes"], GEN_QUESTION_TEMPERATURE)["notes"][:quantity] formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \ "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count, ["words"], GEN_QUESTION_TEMPERATURE)["words"][:quantity] replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "Fill the blank space with the word missing from the audio.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": "\\n".join(replaced_notes), "type": "writeBlanks" } def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): gen_write_blanks_form = "Generate a form with " + str( quantity) + " " + difficulty + " difficulty key-value pairs about the conversation. " \ "The conversation is this: '" + text + "'" token_count = count_tokens(gen_write_blanks_form)["n_tokens"] form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count, None, GEN_QUESTION_TEMPERATURE) parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'" token_count = count_tokens(parse_form)["n_tokens"] parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count, ["form"], GEN_QUESTION_TEMPERATURE)["form"][:quantity] replaced_form, words = build_write_blanks_text_form(parsed_form, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a conversation. Fill the form with words/numbers missing.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": replaced_form, "type": "writeBlanks" } def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty): gen_write_blanks_form = "Generate a form with " + str( quantity) + " " + difficulty + " difficulty key-value pairs about the monologue. " \ "The monologue is this: '" + text + "'" token_count = count_tokens(gen_write_blanks_form)["n_tokens"] form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count, None, GEN_QUESTION_TEMPERATURE) parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'" token_count = count_tokens(parse_form)["n_tokens"] parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count, ["form"], GEN_QUESTION_TEMPERATURE)["form"][:quantity] replaced_form, words = build_write_blanks_text_form(parsed_form, start_id) return { "id": str(uuid.uuid4()), "maxWords": 3, "prompt": "You will hear a monologue. Fill the form with words/numbers missing.", "solutions": build_write_blanks_solutions_listening(words, start_id), "text": replaced_form, "type": "writeBlanks" } def gen_multiple_choice_level(quantity: int, start_id=1): gen_multiple_choice_for_text = "Generate " + str( quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \ "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \ "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \ "every question only has 1 correct answer." messages = [{ "role": "user", "content": gen_multiple_choice_for_text }] token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300 mc_questions = make_openai_call(GPT_4_PREVIEW, messages, token_count, None, GEN_QUESTION_TEMPERATURE) if not '25' in mc_questions: return gen_multiple_choice_level(quantity, start_id) else: split_mc_questions = mc_questions.split('13') parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": ' '[{"id": "A", "text": ' '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}]}\'\n ' '\nThe questions: "' + split_mc_questions[0] + '"') token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"] question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) print(question) parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": ' '[{"id": "A", "text": ' '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}]}\'\n ' '\nThe questions: "' + '13' + split_mc_questions[1] + '"') token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"] question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) print(question_2) question["questions"].extend(question_2["questions"]) all_exams = get_all("level") seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) # Check if the key is in the set if key in seen_keys: return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) else: seen_keys.add(key) for exam in all_exams: exam_dict = exam.to_dict() if any( exercise["prompt"] == current_exercise["prompt"] and any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) for exercise in exam_dict.get("exercises", [])[0]["questions"] ): return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) return current_exercise, seen_keys def generate_single_mc_level_question(): gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \ "be easy, intermediate or advanced." token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300 mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, None, GEN_QUESTION_TEMPERATURE) parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": ' '[{"id": "A", "text": ' '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' '"Happy"}, {"id": "D", "text": "Jump"}], ' '"prompt": "Which of the following is a conjunction?", ' '"solution": "A", "variant": "text"}. ' '\nThe questions: "' + mc_question + '"') token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"] question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count, ["options"], GEN_QUESTION_TEMPERATURE) return question