encoach_backend/helper/exercises.py

import queue
import nltk
import random
import re
import uuid

from helper.api_messages import QuestionType
from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_instruct_call
from helper.token_counter import count_tokens
from helper.constants import *
from wonderwords import RandomWord

nltk.download('words')


def divide_number_into_parts(number, parts):
    if number < parts:
        return None

    part_size = number // parts
    remaining = number % parts

    q = queue.Queue()

    for i in range(parts):
        if i < remaining:
            q.put(part_size + 1)
        else:
            q.put(part_size)

    return q


def fix_exercise_ids(exercise, start_id):
    # Initialize the starting ID for the first exercise
    current_id = start_id

    questions = exercise["questions"]

    # Iterate through questions and update the "id" value
    for question in questions:
        question["id"] = str(current_id)
        current_id += 1

    return exercise


def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
    for i, word in enumerate(words_to_replace, start=start_id):
        # Create a case-insensitive regular expression pattern
        pattern = re.compile(re.escape(word), re.IGNORECASE)
        placeholder = '{{' + str(i) + '}}'
        text = pattern.sub(placeholder, text, 1)
    return text


def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
    replaced_notes = []
    for i, note in enumerate(notes, start=0):
        word = words_to_replace[i]
        pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
        placeholder = '{{' + str(start_id + i) + '}}'
        note = pattern.sub(placeholder, note, 1)
        replaced_notes.append(note)
    return replaced_notes


def add_random_words_and_shuffle(word_array, num_random_words):
    r = RandomWord()
    random_words_selected = r.random_words(num_random_words)

    combined_array = word_array + random_words_selected

    random.shuffle(combined_array)

    return combined_array


def fillblanks_build_solutions_array(words, start_id):
    solutions = []
    for i, word in enumerate(words, start=start_id):
        solutions.append(
            {
                "id": str(i),
                "solution": word
            }
        )
    return solutions


def remove_excess_questions(questions: [], quantity):
    count_true = 0
    result = []

    for item in reversed(questions):
        if item.get('solution') == 'true' and count_true < quantity:
            count_true += 1
        else:
            result.append(item)

    result.reverse()
    return result


def build_write_blanks_text(questions: [], start_id):
    result = ""
    for i, q in enumerate(questions, start=start_id):
        placeholder = '{{' + str(i) + '}}'
        result = result + q["question"] + placeholder + "\\n"
    return result


def build_write_blanks_text_form(form: [], start_id):
    result = ""
    replaced_words = []
    for i, entry in enumerate(form, start=1):
        placeholder = '{{' + str(i) + '}}'
        # Use regular expression to find the string after ':'
        match = re.search(r'(?<=:)\s*(.*)', entry)
        # Extract the matched string
        original_string = match.group(1)
        # Split the string into words
        words = re.findall(r'\b\w+\b', original_string)
        # Remove words with only one letter
        filtered_words = [word for word in words if len(word) > 1]
        # Choose a random word from the list of words
        selected_word = random.choice(filtered_words)
        pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)

        # Replace the chosen word with the placeholder
        replaced_string = pattern.sub(placeholder, original_string, 1)
        # Construct the final replaced string
        replaced_string = entry.replace(original_string, replaced_string)

        result = result + replaced_string + "\\n"
        # Save the replaced word or use it as needed
        # For example, you can save it to a file or a list
        replaced_words.append(selected_word)
    return result, replaced_words


def build_write_blanks_solutions(questions: [], start_id):
    solutions = []
    for i, q in enumerate(questions, start=start_id):
        solutions.append(
            {
                "id": str(i),
                "solution": q["possible_answers"]
            }
        )
    return solutions


def build_write_blanks_solutions_listening(words: [], start_id):
    solutions = []
    for i, word in enumerate(words, start=start_id):
        solutions.append(
            {
                "id": str(i),
                "solution": word
            }
        )
    return solutions


def generate_reading_passage(type: QuestionType, topic: str):
    gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
                                                                                   "of " + topic + ". The passage should offer a substantial amount of " \
                                                                                                   "information, analysis, or narrative " \
                                                                                                   "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
                                                                                                   "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic." \
                                                                                                   "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
    token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
    return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
                                     GEN_QUESTION_TEMPERATURE)


def generate_listening_1_conversation(topic: str):
    gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
                                            "social context of '" + topic + "'. Please include random names and genders " \
                                                                            "for the characters in your dialogue."
    token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
        gen_listening_1_conversation_2_people,
        token_count,
        None,
        GEN_QUESTION_TEMPERATURE
    )

    conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'

    parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json

    token_count = count_tokens(parse_conversation)["n_tokens"]
    processed = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
        parse_conversation,
        token_count,
        ['conversation'],
        GEN_QUESTION_TEMPERATURE
    )

    name_to_voice = {}
    for segment in processed['conversation']:
        if 'voice' not in segment:
            name = segment['name']
            if name in name_to_voice:
                voice = name_to_voice[name]
            else:
                if segment['gender'].lower() == 'male':
                    voice = random.choice(MALE_NEURAL_VOICES)['Id']
                else:
                    voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
                name_to_voice[name] = voice
            segment['voice'] = voice
    return response, processed


def generate_listening_2_monologue(topic: str):
    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'"
    token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
        gen_listening_2_monologue_social,
        token_count,
        None,
        GEN_QUESTION_TEMPERATURE
    )
    return response


def generate_listening_3_conversation(topic: str):
    gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
                                            "in the everyday social context of '" + topic + \
                                            "'. Please include random names and genders for the characters in your dialogue."
    token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
        gen_listening_3_conversation_4_people,
        token_count,
        None,
        GEN_QUESTION_TEMPERATURE
    )
    conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'

    parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json

    token_count = count_tokens(parse_conversation)["n_tokens"]
    processed = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
        parse_conversation,
        token_count,
        ['conversation'],
        GEN_QUESTION_TEMPERATURE
    )

    name_to_voice = {}
    for segment in processed['conversation']:
        if 'voice' not in segment:
            name = segment['name']
            if name in name_to_voice:
                voice = name_to_voice[name]
            else:
                if segment['gender'].lower() == 'male':
                    voice = random.choice(MALE_NEURAL_VOICES)['Id']
                else:
                    voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
                name_to_voice[name] = voice
            segment['voice'] = voice
    return response, processed


def generate_listening_4_monologue(topic: str):
    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'"
    token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
    response = make_openai_instruct_call(
        GPT_3_5_TURBO_INSTRUCT,
        gen_listening_4_monologue_academic,
        token_count,
        None,
        GEN_QUESTION_TEMPERATURE
    )
    return response


def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id):
    exercises = []
    for req_exercise in req_exercises:
        number_of_exercises = number_of_exercises_q.get()

        if req_exercise == "multipleChoice":
            question = gen_multiple_choice_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
            print("Added multiple choice: " + str(question))
        elif req_exercise == "fillBlanks":
            question = gen_summary_fill_blanks_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
            print("Added fill blanks: " + str(question))
        elif req_exercise == "trueFalse":
            question = gen_true_false_not_given_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
            print("Added trueFalse: " + str(question))
        elif req_exercise == "writeBlanks":
            question = gen_write_blanks_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
            print("Added write blanks: " + str(question))

        start_id = start_id + number_of_exercises

    return exercises


def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id):
    exercises = []
    for req_exercise in req_exercises:
        number_of_exercises = number_of_exercises_q.get()

        if req_exercise == "multipleChoice":
            question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id)
            exercises.append(question)
            print("Added multiple choice: " + str(question))
        elif req_exercise == "writeBlanksQuestions":
            question = gen_write_blanks_questions_exercise_listening_conversation(conversation, number_of_exercises,
                                                                                  start_id)
            exercises.append(question)
            print("Added write blanks questions: " + str(question))
        elif req_exercise == "writeBlanksFill":
            question = gen_write_blanks_notes_exercise_listening_conversation(conversation, number_of_exercises,
                                                                              start_id)
            exercises.append(question)
            print("Added write blanks notes: " + str(question))
        elif req_exercise == "writeBlanksForm":
            question = gen_write_blanks_form_exercise_listening_conversation(conversation, number_of_exercises,
                                                                             start_id)
            exercises.append(question)
            print("Added write blanks form: " + str(question))

        start_id = start_id + number_of_exercises

    return exercises


def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id):
    exercises = []
    for req_exercise in req_exercises:
        number_of_exercises = number_of_exercises_q.get()

        if req_exercise == "multipleChoice":
            question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id)
            exercises.append(question)
            print("Added multiple choice: " + str(question))
        elif req_exercise == "writeBlanksQuestions":
            question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id)
            exercises.append(question)
            print("Added write blanks questions: " + str(question))
        elif req_exercise == "writeBlanksFill":
            question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id)
            exercises.append(question)
            print("Added write blanks notes: " + str(question))
        elif req_exercise == "writeBlanksForm":
            question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id)
            exercises.append(question)
            print("Added write blanks form: " + str(question))

        start_id = start_id + number_of_exercises

    return exercises


def gen_multiple_choice_exercise(text: str, quantity: int, start_id):
    gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions for this text: " \
                                                                 "'" + text + "'\n" \
                                                                              "Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
                                                                              "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
                                                                              "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
                                                                              "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
                                                                              "\"solution\": \"C\", \"variant\": \"text\"}]"
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                             None,
                                             GEN_QUESTION_TEMPERATURE)
    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
                         "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
                         "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
                         "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
                         "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
    token_count = count_tokens(parse_mc_questions)["n_tokens"]
    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
                                         ["questions"],
                                         GEN_QUESTION_TEMPERATURE)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
        "questions": fix_exercise_ids(question, start_id)["questions"],
        "type": "multipleChoice",
    }


def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id):
    gen_summary_for_text = "Summarize this text: " + text
    token_count = count_tokens(gen_summary_for_text)["n_tokens"]
    text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count,
                                             None,
                                             GEN_QUESTION_TEMPERATURE)

    gen_words_to_replace = "Select " + str(
        quantity) + " words, it must be words and not expressions, from the summary and respond in this " \
                    "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
    token_count = count_tokens(gen_words_to_replace)["n_tokens"]
    words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
                                                 ["words"],
                                                 GEN_QUESTION_TEMPERATURE)["words"]

    replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id)
    options_words = add_random_words_and_shuffle(words_to_replace, 5)
    solutions = fillblanks_build_solutions_array(words_to_replace, start_id)

    return {
        "allowRepetition": True,
        "id": str(uuid.uuid4()),
        "prompt": "Complete the summary below. Click a blank to select the corresponding word(s) for it.\\nThere are "
                  "more words than spaces so you will not use them all. You may use any of the words more than once.",
        "solutions": solutions,
        "text": replaced_summary,
        "type": "fillBlanks",
        "words": options_words

    }


def gen_true_false_not_given_exercise(text: str, quantity: int, start_id):
    gen_true_false_not_given = "Generate " + str(
        quantity) + " statements in JSON format (True, False, or Not Given) " \
                    "based on the provided text. Ensure that your statements " \
                    "accurately represent information or inferences from the " \
                    "text, and provide a variety of responses, including, at least one of each True, " \
                    "False, and Not Given, as appropriate, in the JSON structure " \
                    "{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
                    "\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
                    "\"solution\": \"true/false/not_given\"}]}. Reference text: " + text

    token_count = count_tokens(gen_true_false_not_given)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count,
                                          ["prompts"],
                                          GEN_QUESTION_TEMPERATURE)["prompts"]
    if len(questions) > quantity:
        questions = remove_excess_questions(questions, len(questions) - quantity)

    for i, question in enumerate(questions, start=start_id):
        question["id"] = str(i)

    return {
        "id": str(uuid.uuid4()),
        "prompt": "Do the following statements agree with the information given in the Reading Passage?",
        "questions": questions,
        "type": "trueFalse"
    }


def gen_write_blanks_exercise(text: str, quantity: int, start_id):
    gen_short_answer_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
                                                               "(max 3 words per answer), about this text: '" + text + "'. " \
                                                                                                                       "Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
                                                                                                                       "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"

    token_count = count_tokens(gen_short_answer_questions)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count,
                                          ["questions"],
                                          GEN_QUESTION_TEMPERATURE)["questions"][:quantity]

    return {
        "id": str(uuid.uuid4()),
        "maxWords": 3,
        "prompt": "Choose no more than three words and/or a number from the passage for each answer.",
        "solutions": build_write_blanks_solutions(questions, start_id),
        "text": build_write_blanks_text(questions, start_id),
        "type": "writeBlanks"
    }


def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id):
    gen_multiple_choice_for_text = "Generate " + str(
        quantity) + " multiple choice questions of 4 options for this conversation: " \
                    "'" + text + "'"
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                             None,
                                             GEN_QUESTION_TEMPERATURE)
    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
                         "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
                         "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
                         "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
                         "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
    token_count = count_tokens(parse_mc_questions)["n_tokens"]
    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
                                         ["questions"],
                                         GEN_QUESTION_TEMPERATURE)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
        "questions": fix_exercise_ids(question, start_id)["questions"],
        "type": "multipleChoice",
    }


def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id):
    gen_multiple_choice_for_text = "Generate " + str(quantity) + " multiple choice questions for this monologue: " \
                                                                 "'" + text + "'"
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                             None,
                                             GEN_QUESTION_TEMPERATURE)
    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
                         "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
                         "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
                         "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
                         "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
    token_count = count_tokens(parse_mc_questions)["n_tokens"]
    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
                                         ["questions"],
                                         GEN_QUESTION_TEMPERATURE)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
        "questions": fix_exercise_ids(question, start_id)["questions"],
        "type": "multipleChoice",
    }


def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id):
    gen_write_blanks_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
                                                               "(max 3 words per answer), about a monologue and" \
                                                               "respond in this JSON format: {\"questions\": [{\"question\": question, " \
                                                               "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
                                                               "The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
                                          ["questions"],
                                          GEN_QUESTION_TEMPERATURE)["questions"][:quantity]

    return {
        "id": str(uuid.uuid4()),
        "maxWords": 3,
        "prompt": "You will hear a conversation. Answer the questions below using no more than three words or a number accordingly.",
        "solutions": build_write_blanks_solutions(questions, start_id),
        "text": build_write_blanks_text(questions, start_id),
        "type": "writeBlanks"
    }


def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id):
    gen_write_blanks_questions = "Generate " + str(quantity) + " short answer questions, and the possible answers " \
                                                               "(max 3 words per answer), about a monologue and" \
                                                               "respond in this JSON format: {\"questions\": [{\"question\": question, " \
                                                               "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
                                                               "The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
                                          ["questions"],
                                          GEN_QUESTION_TEMPERATURE)["questions"][:quantity]

    return {
        "id": str(uuid.uuid4()),
        "maxWords": 3,
        "prompt": "You will hear a monologue. Answer the questions below using no more than three words or a number accordingly.",
        "solutions": build_write_blanks_solutions(questions, start_id),
        "text": build_write_blanks_text(questions, start_id),
        "type": "writeBlanks"
    }


def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id):
    gen_write_blanks_notes = "Generate " + str(quantity) + " notes taken from the conversation and and respond in this " \
                                                           "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
                                          ["notes"],
                                          GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
    formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
    gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
                           "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
    words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
                                      ["words"],
                                      GEN_QUESTION_TEMPERATURE)["words"][:quantity]
    replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
    return {
        "id": str(uuid.uuid4()),
        "maxWords": 1,
        "prompt": "Fill the blank space with the word missing from the audio.",
        "solutions": build_write_blanks_solutions_listening(words, start_id),
        "text": "\\n".join(replaced_notes),
        "type": "writeBlanks"
    }


def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id):
    gen_write_blanks_notes = "Generate " + str(quantity) + " notes taken from the monologue and and respond in this " \
                                                           "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
                                          ["notes"],
                                          GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
    formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
    gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
                           "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
    words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
                                      ["words"],
                                      GEN_QUESTION_TEMPERATURE)["words"][:quantity]
    replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
    return {
        "id": str(uuid.uuid4()),
        "maxWords": 1,
        "prompt": "Fill the blank space with the word missing from the audio.",
        "solutions": build_write_blanks_solutions_listening(words, start_id),
        "text": "\\n".join(replaced_notes),
        "type": "writeBlanks"
    }


def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id):
    gen_write_blanks_form = "Generate a form with " + str(quantity) + " key-value pairs about the conversation. " \
                                                                      "The conversation is this: '" + text + "'"
    token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
    form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
                                     None,
                                     GEN_QUESTION_TEMPERATURE)
    parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
    token_count = count_tokens(parse_form)["n_tokens"]
    parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
                                            ["form"],
                                            GEN_QUESTION_TEMPERATURE)["form"][:quantity]
    replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
    return {
        "id": str(uuid.uuid4()),
        "maxWords": 1,
        "prompt": "You will hear a conversation. Fill the form with words/numbers missing.",
        "solutions": build_write_blanks_solutions_listening(words, start_id),
        "text": replaced_form,
        "type": "writeBlanks"
    }


def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id):
    gen_write_blanks_form = "Generate a form with " + str(quantity) + " key-value pairs about the monologue. " \
                                                                      "The monologue is this: '" + text + "'"
    token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
    form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
                                     None,
                                     GEN_QUESTION_TEMPERATURE)
    parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
    token_count = count_tokens(parse_form)["n_tokens"]
    parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
                                            ["form"],
                                            GEN_QUESTION_TEMPERATURE)["form"][:quantity]
    replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
    return {
        "id": str(uuid.uuid4()),
        "maxWords": 1,
        "prompt": "You will hear a monologue. Fill the form with words/numbers missing.",
        "solutions": build_write_blanks_solutions_listening(words, start_id),
        "text": replaced_form,
        "type": "writeBlanks"
    }


def gen_multiple_choice_level(quantity: int, start_id=1):
    gen_multiple_choice_for_text = "Generate " + str(
        quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
                    "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
                    "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
                    "every question only has 1 correct answer."
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                             None,
                                             GEN_QUESTION_TEMPERATURE)
    split_mc_questions = mc_questions.split('13')

    parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
                          '[{"id": "A", "text": '
                          '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
                          '"Happy"}, {"id": "D", "text": "Jump"}], '
                          '"prompt": "Which of the following is a conjunction?", '
                          '"solution": "A", "variant": "text"}]}. '
                          '\nThe questions: "' + split_mc_questions[0] + '"')
    token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
                                         ["questions"],
                                         GEN_QUESTION_TEMPERATURE)
    parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
                          '[{"id": "A", "text": '
                          '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
                          '"Happy"}, {"id": "D", "text": "Jump"}], '
                          '"prompt": "Which of the following is a conjunction?", '
                          '"solution": "A", "variant": "text"}]}. '
                          '\nThe questions: "' + '13' + split_mc_questions[1] + '"')
    token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
    question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
                                           ["questions"],
                                           GEN_QUESTION_TEMPERATURE)
    question["questions"].extend(question_2["questions"])

    all_exams = get_all("level")
    seen_keys = set()
    for i in range(len(question["questions"])):
        question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
        "questions": fix_exercise_ids(question, start_id)["questions"],
        "type": "multipleChoice",
    }


def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_keys):
    # Extracting relevant fields for comparison
    key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
    # Check if the key is in the set
    if key in seen_keys:
        return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
    else:
        seen_keys.add(key)

    for exam in all_exams:
        exam_dict = exam.to_dict()
        if any(
                exercise["prompt"] == current_exercise["prompt"] and
                any(exercise["options"][0]["text"] == current_option["text"] for current_option in
                    current_exercise["options"])
                for exercise in exam_dict.get("exercises", [])[0]["questions"]
        ):
            return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
    return current_exercise, seen_keys


def generate_single_mc_level_question():
    gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \
                                   "be easy, intermediate or advanced."
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
    mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                            None,
                                            GEN_QUESTION_TEMPERATURE)

    parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": '
                         '[{"id": "A", "text": '
                         '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
                         '"Happy"}, {"id": "D", "text": "Jump"}], '
                         '"prompt": "Which of the following is a conjunction?", '
                         '"solution": "A", "variant": "text"}. '
                         '\nThe questions: "' + mc_question + '"')

    token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count,
                                         ["options"],
                                         GEN_QUESTION_TEMPERATURE)
    return question