All tested except grading speaking.

2024-05-22 21:07:48 +01:00
parent fe753fe72c
commit b7c18517de
4 changed files with 494 additions and 321 deletions
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -10,8 +10,8 @@ from wonderwords import RandomWord
 from helper.api_messages import QuestionType
 from helper.constants import *
 from helper.firebase_helper import get_all
-from helper.openai_interface import make_openai_instruct_call, make_openai_call
-from helper.token_counter import count_tokens
+from helper.openai_interface import make_openai_call, count_total_tokens
+from helper.speech_to_text_helper import has_x_words

 nltk.download('words')

@@ -240,48 +240,63 @@ def build_write_blanks_solutions_listening(words: [], start_id):


 def generate_reading_passage(type: QuestionType, topic: str):
-    gen_reading_passage_1 = "Generate an extensive text for IELTS " + type.value + ", of at least 1500 words, on the topic " \
-                                                                                   "of '" + topic + "'. The passage should offer a substantial amount of " \
-                                                                                                    "information, analysis, or narrative " \
-                                                                                                    "relevant to the chosen subject matter. This text passage aims to serve as the primary reading " \
-                                                                                                    "section of an IELTS test, providing an in-depth and comprehensive exploration of the topic. " \
-                                                                                                    "Make sure that the generated text does not contain forbidden subjects in muslim countries." \
-                                                                                                    "Provide your response in this json format: {\"title\": \"title of the text\", \"text\": \"generated text\"}"
-    token_count = count_tokens(gen_reading_passage_1)["n_tokens"]
-    return make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_reading_passage_1, token_count, GEN_TEXT_FIELDS,
-                                     GEN_QUESTION_TEMPERATURE)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"title": "title of the text", "text": "generated text"}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate an extensive text for IELTS ' + type.value + ', of at least 1500 words, on the topic '
+                                                                           'of "' + topic + '". The passage should offer '
+                                                                                            'a substantial amount of information, '
+                                                                                            'analysis, or narrative relevant to the chosen '
+                                                                                            'subject matter. This text passage aims to '
+                                                                                            'serve as the primary reading section of an '
+                                                                                            'IELTS test, providing an in-depth and '
+                                                                                            'comprehensive exploration of the topic. '
+                                                                                            'Make sure that the generated text does not '
+                                                                                            'contain forbidden subjects in muslim countries.')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)


 def generate_listening_1_conversation(topic: str):
-    gen_listening_1_conversation_2_people = "Compose an authentic conversation between two individuals in the everyday " \
-                                            "social context of '" + topic + "'. Please include random names and genders " \
-                                                                            "for the characters in your dialogue. " \
-                                                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
-    token_count = count_tokens(gen_listening_1_conversation_2_people)["n_tokens"]
-    response = make_openai_instruct_call(
-        GPT_3_5_TURBO_INSTRUCT,
-        gen_listening_1_conversation_2_people,
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Compose an authentic conversation between two individuals in the everyday social context '
+                    'of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
+                                     'Make sure that the generated conversation does not contain forbidden subjects in '
+                                     'muslim countries.')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(
+        GPT_4_O,
+        messages,
        token_count,
-        None,
-        GEN_QUESTION_TEMPERATURE
-    )
-
-    conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
-
-    parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json
-
-    token_count = count_tokens(parse_conversation)["n_tokens"]
-    processed = make_openai_instruct_call(
-        GPT_3_5_TURBO_INSTRUCT,
-        parse_conversation,
-        token_count,
-        ['conversation'],
+        ["conversation"],
        GEN_QUESTION_TEMPERATURE
    )

    chosen_voices = []
    name_to_voice = {}
-    for segment in processed['conversation']:
+    for segment in response['conversation']:
        if 'voice' not in segment:
            name = segment['name']
            if name in name_to_voice:
@@ -300,50 +315,66 @@ def generate_listening_1_conversation(topic: str):
                        chosen_voices.append(voice)
                name_to_voice[name] = voice
            segment['voice'] = voice
-    return response, processed
-
-
-def generate_listening_2_monologue(topic: str):
-    gen_listening_2_monologue_social = "Generate a comprehensive monologue set in the social context of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
-    token_count = count_tokens(gen_listening_2_monologue_social)["n_tokens"]
-    response = make_openai_instruct_call(
-        GPT_3_5_TURBO_INSTRUCT,
-        gen_listening_2_monologue_social,
-        token_count,
-        None,
-        GEN_QUESTION_TEMPERATURE
-    )
    return response


-def generate_listening_3_conversation(topic: str):
-    gen_listening_3_conversation_4_people = "Compose an authentic and elaborate conversation between up to four individuals " \
-                                            "in the everyday social context of '" + topic + \
-                                            "'. Please include random names and genders for the characters in your dialogue. " \
-                                            "Make sure that the generated conversation does not contain forbidden subjects in muslim countries."
-    token_count = count_tokens(gen_listening_3_conversation_4_people)["n_tokens"]
-    response = make_openai_instruct_call(
-        GPT_3_5_TURBO_INSTRUCT,
-        gen_listening_3_conversation_4_people,
+def generate_listening_2_monologue(topic: str):
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"monologue": "monologue"}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate a comprehensive monologue set in the social context '
+                    'of "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
+                                     'muslim countries.')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(
+        GPT_4_O,
+        messages,
        token_count,
-        None,
+        ["monologue"],
        GEN_QUESTION_TEMPERATURE
    )
-    conversation_json = '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}'
+    return response["monologue"]

-    parse_conversation = "Parse this conversation: '" + response + "' to the following json format: " + conversation_json

-    token_count = count_tokens(parse_conversation)["n_tokens"]
-    processed = make_openai_instruct_call(
-        GPT_3_5_TURBO_INSTRUCT,
-        parse_conversation,
+def generate_listening_3_conversation(topic: str):
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"conversation": [{"name": "name", "gender": "gender", "text": "text"}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
+                    'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
+                                     'Make sure that the generated conversation does not contain forbidden subjects in '
+                                     'muslim countries.')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(
+        GPT_4_O,
+        messages,
        token_count,
-        ['conversation'],
+        ["conversation"],
        GEN_QUESTION_TEMPERATURE
    )

    name_to_voice = {}
-    for segment in processed['conversation']:
+    for segment in response['conversation']:
        if 'voice' not in segment:
            name = segment['name']
            if name in name_to_voice:
@@ -355,20 +386,35 @@ def generate_listening_3_conversation(topic: str):
                    voice = random.choice(FEMALE_NEURAL_VOICES)['Id']
                name_to_voice[name] = voice
            segment['voice'] = voice
-    return response, processed
+    return response


 def generate_listening_4_monologue(topic: str):
-    gen_listening_4_monologue_academic = "Generate a comprehensive monologue an academic subject of: '" + topic + "'. Make sure that the generated monologue does not contain forbidden subjects in muslim countries."
-    token_count = count_tokens(gen_listening_4_monologue_academic)["n_tokens"]
-    response = make_openai_instruct_call(
-        GPT_3_5_TURBO_INSTRUCT,
-        gen_listening_4_monologue_academic,
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"monologue": "monologue"}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate a comprehensive monologue on the academic subject '
+                    'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
+                                     'muslim countries.')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(
+        GPT_4_O,
+        messages,
        token_count,
-        None,
+        ["monologue"],
        GEN_QUESTION_TEMPERATURE
    )
-    return response
+    return response["monologue"]


 def generate_reading_exercises(passage: str, req_exercises: list, number_of_exercises_q, start_id, difficulty):
@@ -392,7 +438,7 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
            else:
                exercises.append({})
                print("Did not add write blanks because it did not respect word limit")
-        elif req_exercise == "matchSentences":
+        elif req_exercise == "paragraphMatch":
            question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
            print("Added paragraph match: " + str(question))
@@ -478,27 +524,27 @@ def generate_listening_monologue_exercises(monologue: str, req_exercises: list,


 def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty):
-    gen_multiple_choice_for_text = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty multiple choice questions for this text: " \
-                                       "'" + text + "'\n" \
-                                                    "Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
-                                                    "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
-                                                    "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
-                                                    "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
-                                                    "\"solution\": \"C\", \"variant\": \"text\"}]"
-    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
-    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
-                                             None,
-                                             GEN_QUESTION_TEMPERATURE)
-    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
-                         "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
-                         "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
-                         "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
-                         "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
-    token_count = count_tokens(parse_mc_questions)["n_tokens"]
-    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
-                                         ["questions"],
-                                         GEN_QUESTION_TEMPERATURE)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
+                '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
+                '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
+                'energy sources?", "solution": "C", "variant": "text"}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions '
+                                                                     'for this text:\n"' + text + '"')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    question = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
+                                GEN_QUESTION_TEMPERATURE)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
@@ -508,23 +554,34 @@ def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty)


 def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficulty):
-    gen_summary_for_text = "Summarize this text: " + text
-    token_count = count_tokens(gen_summary_for_text)["n_tokens"]
-    text_summary = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_summary_for_text, token_count,
-                                             None,
-                                             GEN_QUESTION_TEMPERATURE)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{ "summary": "summary", "words": ["word_1", "word_2"] }')
+        },
+        {
+            "role": "user",
+            "content": ('Summarize this text: "'+ text + '"')

-    gen_words_to_replace = "Select " + str(
-        quantity) + " " + difficulty + " difficulty words, it must be words and not expressions, from the summary and respond in this " \
-                                       "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
-    token_count = count_tokens(gen_words_to_replace)["n_tokens"]
-    words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
-                                                 ["words"],
-                                                 GEN_QUESTION_TEMPERATURE)["words"]
+        },
+        {
+            "role": "user",
+            "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
+                                                                       'expressions, from the summary.')

-    replaced_summary = replace_first_occurrences_with_placeholders(text_summary, words_to_replace, start_id)
-    options_words = add_random_words_and_shuffle(words_to_replace, 5)
-    solutions = fillblanks_build_solutions_array(words_to_replace, start_id)
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    response = make_openai_call(GPT_4_O, messages, token_count,
+                                                 ["summary"],
+                                                 GEN_QUESTION_TEMPERATURE)
+
+    replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
+    options_words = add_random_words_and_shuffle(response["words"], 5)
+    solutions = fillblanks_build_solutions_array(response["words"], start_id)

    return {
        "allowRepetition": True,
@@ -540,20 +597,30 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu


 def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty):
-    gen_true_false_not_given = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty statements in JSON format (True, False, or Not Given) " \
-                                       "based on the provided text. Ensure that your statements " \
-                                       "accurately represent information or inferences from the " \
-                                       "text, and provide a variety of responses, including, at least one of each True, " \
-                                       "False, and Not Given, as appropriate, in the JSON structure " \
-                                       "{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
-                                       "\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
-                                       "\"solution\": \"true/false/not_given\"}]}. Reference text: " + text
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"prompts":[{"prompt": "statement_1", "solution": "true/false/not_given"}, '
+                '{"prompt": "statement_2", "solution": "true/false/not_given"}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
+                                                                     'Ensure that your statements accurately represent '
+                                                                     'information or inferences from the text, and '
+                                                                     'provide a variety of responses, including, at '
+                                                                     'least one of each True, False, and Not Given, '
+                                                                     'as appropriate.\n\nReference text:\n\n ' + text)

-    token_count = count_tokens(gen_true_false_not_given)["n_tokens"]
-    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count,
-                                          ["prompts"],
-                                          GEN_QUESTION_TEMPERATURE)["prompts"]
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    questions = make_openai_call(GPT_4_O, messages, token_count,["prompts"],
+                                 GEN_QUESTION_TEMPERATURE)["prompts"]
    if len(questions) > quantity:
        questions = remove_excess_questions(questions, len(questions) - quantity)

@@ -569,16 +636,25 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic


 def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
-    gen_short_answer_questions = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers, " \
-                                       "must have maximum 3 words per answer, about this text: '" + text + "'. " \
-                                                                                                           "Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
-                                                                                                           "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
+                                                                     'possible answers, must have maximum 3 words '
+                                                                     'per answer, about this text:\n"' + text + '"')

-    token_count = count_tokens(gen_short_answer_questions)["n_tokens"]
-    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count,
-                                          ["questions"],
-                                          GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
+                                 GEN_QUESTION_TEMPERATURE)["questions"][:quantity]

    return {
        "id": str(uuid.uuid4()),
@@ -592,15 +668,24 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):

 def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
    paragraphs = assign_letters_to_paragraphs(text)
-    heading_prompt = (
-            'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: '
-            '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n'
-            'The paragraphs are these: ' + str(paragraphs))
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs))

-    token_count = count_tokens(heading_prompt)["n_tokens"]
-    headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count,
-                                         ["headings"],
-                                         GEN_QUESTION_TEMPERATURE)["headings"]
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    headings = make_openai_call(GPT_4_O, messages, token_count,["headings"],
+                                GEN_QUESTION_TEMPERATURE)["headings"]

    options = []
    for i, paragraph in enumerate(paragraphs, start=0):
@@ -615,7 +700,7 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
    for i, paragraph in enumerate(paragraphs, start=start_id):
        sentences.append({
            "id": i,
-            "sentence": paragraph["heading"]["heading"],
+            "sentence": paragraph["heading"],
            "solution": paragraph["letter"]
        })

@@ -632,28 +717,34 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
 def assign_letters_to_paragraphs(paragraphs):
    result = []
    letters = iter(string.ascii_uppercase)
-    for paragraph in paragraphs.split("\n"):
-        result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
+    for paragraph in paragraphs.split("\n\n"):
+        if has_x_words(paragraph, 10):
+            result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
    return result


 def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
-    gen_multiple_choice_for_text = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \
-                                       "'" + text + "'"
-    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
-    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
-                                             None,
-                                             GEN_QUESTION_TEMPERATURE)
-    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
-                         "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
-                         "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
-                         "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
-                         "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
-    token_count = count_tokens(parse_mc_questions)["n_tokens"]
-    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
-                                         ["questions"],
-                                         GEN_QUESTION_TEMPERATURE)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
+                '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
+                '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
+                'energy sources?", "solution": "C", "variant": "text"}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
+                                                                     'of for this conversation:\n"' + text + '"')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
@@ -663,22 +754,28 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int


 def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_multiple_choice_for_text = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty multiple choice questions for this monologue: " \
-                                       "'" + text + "'"
-    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
-    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
-                                             None,
-                                             GEN_QUESTION_TEMPERATURE)
-    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
-                         "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
-                         "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
-                         "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
-                         "\"solution\": \"C\", \"variant\": \"text\"}]}. \nThe questions: '" + mc_questions + "'"
-    token_count = count_tokens(parse_mc_questions)["n_tokens"]
-    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
-                                         ["questions"],
-                                         GEN_QUESTION_TEMPERATURE)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"questions": [{"id": "9", "options": [{"id": "A", "text": "Economic benefits"}, {"id": "B", "text": '
+                '"Government regulations"}, {"id": "C", "text": "Concerns about climate change"}, {"id": "D", "text": '
+                '"Technological advancement"}], "prompt": "What is the main reason for the shift towards renewable '
+                'energy sources?", "solution": "C", "variant": "text"}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(
+                quantity) + ' ' + difficulty + ' difficulty multiple choice questions of 4 options '
+                                               'of for this monologue:\n"' + text + '"')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
    return {
        "id": str(uuid.uuid4()),
        "prompt": "Select the appropriate option.",
@@ -688,17 +785,26 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s


 def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_questions = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
-                                       "(max 3 words per answer), about a monologue and" \
-                                       "respond in this JSON format: {\"questions\": [{\"question\": question, " \
-                                       "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
-                                       "The monologue is this: '" + text + "'"
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
+                                                                     'possible answers (max 3 words per answer), '
+                                                                     'about this conversation:\n"' + text + '"')

-    token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
-    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
-                                          ["questions"],
-                                          GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
+                                 GEN_QUESTION_TEMPERATURE)["questions"][:quantity]

    return {
        "id": str(uuid.uuid4()),
@@ -711,17 +817,26 @@ def gen_write_blanks_questions_exercise_listening_conversation(text: str, quanti


 def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_questions = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
-                                       "(max 3 words per answer), about a monologue and" \
-                                       "respond in this JSON format: {\"questions\": [{\"question\": question, " \
-                                       "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
-                                       "The monologue is this: '" + text + "'"
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"questions": [{"question": question, "possible_answers": ["answer_1", "answer_2"]}]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty short answer questions, and the '
+                                                                     'possible answers (max 3 words per answer), '
+                                                                     'about this monologue:\n"' + text + '"')

-    token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
-    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
-                                          ["questions"],
-                                          GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
+                                 GEN_QUESTION_TEMPERATURE)["questions"][:quantity]

    return {
        "id": str(uuid.uuid4()),
@@ -734,20 +849,43 @@ def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity:


 def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_notes = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty notes taken from the conversation and and respond in this " \
-                                       "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"notes": ["note_1", "note_2"]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
+                                                                     'conversation:\n"' + text + '"')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
+                                 GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
+

-    token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
-    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
-                                          ["notes"],
-                                          GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
    formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
-    gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
-                           "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
-    words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
-                                      ["words"],
-                                      GEN_QUESTION_TEMPERATURE)["words"][:quantity]
+
+    word_messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
+        },
+        {
+            "role": "user",
+            "content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
+
+        }
+    ]
+    words = make_openai_call(GPT_4_O, word_messages, token_count,["words"],
+                             GEN_QUESTION_TEMPERATURE)["words"][:quantity]
    replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
    return {
        "id": str(uuid.uuid4()),
@@ -760,20 +898,42 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:


 def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_notes = "Generate " + str(
-        quantity) + " " + difficulty + " difficulty notes taken from the monologue and respond in this " \
-                                       "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"notes": ["note_1", "note_2"]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty notes taken from this '
+                                                                     'monologue:\n"' + text + '"')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
+                                 GEN_QUESTION_TEMPERATURE)["notes"][:quantity]

-    token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
-    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
-                                          ["notes"],
-                                          GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
    formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
-    gen_words_to_replace = "Select 1 word from each phrase in the list and respond in this " \
-                           "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The phrases are: " + formatted_phrases
-    words = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
-                                      ["words"],
-                                      GEN_QUESTION_TEMPERATURE)["words"][:quantity]
+
+    word_messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: {"words": ["word_1", "word_2"] }')
+        },
+        {
+            "role": "user",
+            "content": ('Select 1 word from each phrase in this list:\n"' + formatted_phrases + '"')
+
+        }
+    ]
+    words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"],
+                             GEN_QUESTION_TEMPERATURE)["words"][:quantity]
    replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
    return {
        "id": str(uuid.uuid4()),
@@ -786,18 +946,25 @@ def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int


 def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_form = "Generate a form with " + str(
-        quantity) + " " + difficulty + " difficulty key-value pairs about the conversation. " \
-                                       "The conversation is this: '" + text + "'"
-    token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
-    form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
-                                     None,
-                                     GEN_QUESTION_TEMPERATURE)
-    parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
-    token_count = count_tokens(parse_form)["n_tokens"]
-    parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
-                                            ["form"],
-                                            GEN_QUESTION_TEMPERATURE)["form"][:quantity]
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"form": ["key: value", "key2: value"]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate a form with ' + str(
+                quantity) + ' ' + difficulty + ' difficulty key-value pairs about this conversation:\n"' + text + '"')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
+                                   GEN_QUESTION_TEMPERATURE)["form"][:quantity]
    replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
    return {
        "id": str(uuid.uuid4()),
@@ -810,18 +977,25 @@ def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: i


 def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_form = "Generate a form with " + str(
-        quantity) + " " + difficulty + " difficulty key-value pairs about the monologue. " \
-                                       "The monologue is this: '" + text + "'"
-    token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
-    form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
-                                     None,
-                                     GEN_QUESTION_TEMPERATURE)
-    parse_form = "Parse the form to this JSON format: { \"form\": [\"string\", \"string\"] }. The form is this: '" + form + "'"
-    token_count = count_tokens(parse_form)["n_tokens"]
-    parsed_form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_form, token_count,
-                                            ["form"],
-                                            GEN_QUESTION_TEMPERATURE)["form"][:quantity]
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"form": ["key: value", "key2: value"]}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Generate a form with ' + str(
+                quantity) + ' ' + difficulty + ' difficulty key-value pairs about this monologue:\n"' + text + '"')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    parsed_form = make_openai_call(GPT_4_O, messages, token_count, ["form"],
+                                   GEN_QUESTION_TEMPERATURE)["form"][:quantity]
    replaced_form, words = build_write_blanks_text_form(parsed_form, start_id)
    return {
        "id": str(uuid.uuid4()),
@@ -840,46 +1014,31 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
                    "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
                    "every question only has 1 correct answer."

-    messages = [{
-        "role": "user",
-        "content": gen_multiple_choice_for_text
-    }]
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
+                              '[{"id": "A", "text": '
+                              '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                              '"Happy"}, {"id": "D", "text": "Jump"}], '
+                              '"prompt": "Which of the following is a conjunction?", '
+                              '"solution": "A", "variant": "text"}]}')
+        },
+        {
+            "role": "user",
+            "content": gen_multiple_choice_for_text
+        }
+    ]

-    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
-    mc_questions = make_openai_call(GPT_4_PREVIEW, messages, token_count,
-                                    None,
+    token_count = count_total_tokens(messages)
+    question = make_openai_call(GPT_4_O, messages, token_count,
+                                    ["questions"],
                                    GEN_QUESTION_TEMPERATURE)
-    if not '25' in mc_questions:
+
+    if len(question["questions"]) != 25:
        return gen_multiple_choice_level(quantity, start_id)
    else:
-        split_mc_questions = mc_questions.split('13')
-
-        parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
-                              '[{"id": "A", "text": '
-                              '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
-                              '"Happy"}, {"id": "D", "text": "Jump"}], '
-                              '"prompt": "Which of the following is a conjunction?", '
-                              '"solution": "A", "variant": "text"}]}\'\n '
-                              '\nThe questions: "' + split_mc_questions[0] + '"')
-        token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
-        question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
-                                             ["questions"],
-                                             GEN_QUESTION_TEMPERATURE)
-        print(question)
-        parse_mc_questions = ('Parse the questions into this json format: \n\'{"questions": [{"id": "9", "options": '
-                              '[{"id": "A", "text": '
-                              '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
-                              '"Happy"}, {"id": "D", "text": "Jump"}], '
-                              '"prompt": "Which of the following is a conjunction?", '
-                              '"solution": "A", "variant": "text"}]}\'\n '
-                              '\nThe questions: "' + '13' + split_mc_questions[1] + '"')
-        token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
-        question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
-                                               ["questions"],
-                                               GEN_QUESTION_TEMPERATURE)
-        print(question_2)
-        question["questions"].extend(question_2["questions"])
-
        all_exams = get_all("level")
        seen_keys = set()
        for i in range(len(question["questions"])):
@@ -916,23 +1075,37 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k


 def generate_single_mc_level_question():
-    gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \
-                                   "be easy, intermediate or advanced."
-    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
-    mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
-                                            None,
-                                            GEN_QUESTION_TEMPERATURE)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
+                '"solution": "A", "variant": "text"}')
+        },
+        {
+            "role": "user",
+            "content": ('Generate 1 multiple choice question of 4 options for an english level exam, it can be easy, '
+                        'intermediate or advanced.')

-    parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": '
-                         '[{"id": "A", "text": '
-                         '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
-                         '"Happy"}, {"id": "D", "text": "Jump"}], '
-                         '"prompt": "Which of the following is a conjunction?", '
-                         '"solution": "A", "variant": "text"}. '
-                         '\nThe questions: "' + mc_question + '"')
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    question = make_openai_call(GPT_4_O, messages, token_count,["options"],
+                                   GEN_QUESTION_TEMPERATURE)

-    token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
-    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count,
-                                         ["options"],
-                                         GEN_QUESTION_TEMPERATURE)
    return question
+
+
+def parse_conversation(conversation_data):
+    conversation_list = conversation_data.get('conversation', [])
+    readable_text = []
+
+    for message in conversation_list:
+        name = message.get('name', 'Unknown')
+        text = message.get('text', '')
+        readable_text.append(f"{name}: {text}")
+
+    return "\n".join(readable_text)