Add regular ielts modules to custom level.

2024-08-15 13:58:07 +01:00
parent beccf8b501
commit d68617f33b
2 changed files with 573 additions and 299 deletions
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -15,19 +15,19 @@ from helper.speech_to_text_helper import has_x_words
 nltk.download('words')


-def gen_reading_passage_1(topic, req_exercises, difficulty):
+def gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=1):
    if (len(req_exercises) == 0):
        req_exercises = random.sample(READING_EXERCISE_TYPES, 2)

-    number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
+    if (number_of_exercises_q.empty()):
+        number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))

    passage = generate_reading_passage_1_text(topic)
    if passage == "":
-        return gen_reading_passage_1(topic, req_exercises, difficulty)
-    start_id = 1
+        return gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
    exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
    if contains_empty_dict(exercises):
-        return gen_reading_passage_1(topic, req_exercises, difficulty)
+        return gen_reading_passage_1(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
    return {
        "exercises": exercises,
        "text": {
@@ -38,19 +38,19 @@ def gen_reading_passage_1(topic, req_exercises, difficulty):
    }


-def gen_reading_passage_2(topic, req_exercises, difficulty):
+def gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=14):
    if (len(req_exercises) == 0):
        req_exercises = random.sample(READING_EXERCISE_TYPES, 2)

-    number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
+    if (number_of_exercises_q.empty()):
+        number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))

    passage = generate_reading_passage_2_text(topic)
    if passage == "":
-        return gen_reading_passage_2(topic, req_exercises, difficulty)
-    start_id = 14
+        return gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
    exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
    if contains_empty_dict(exercises):
-        return gen_reading_passage_2(topic, req_exercises, difficulty)
+        return gen_reading_passage_2(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
    return {
        "exercises": exercises,
        "text": {
@@ -61,19 +61,19 @@ def gen_reading_passage_2(topic, req_exercises, difficulty):
    }


-def gen_reading_passage_3(topic, req_exercises, difficulty):
+def gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=27):
    if (len(req_exercises) == 0):
        req_exercises = random.sample(READING_EXERCISE_TYPES, 2)

-    number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
+    if (number_of_exercises_q.empty()):
+        number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))

    passage = generate_reading_passage_3_text(topic)
    if passage == "":
-        return gen_reading_passage_3(topic, req_exercises, difficulty)
-    start_id = 27
+        return gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
    exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
    if contains_empty_dict(exercises):
-        return gen_reading_passage_3(topic, req_exercises, difficulty)
+        return gen_reading_passage_3(topic, difficulty, req_exercises, number_of_exercises_q, start_id)
    return {
        "exercises": exercises,
        "text": {
@@ -865,7 +865,8 @@ def gen_idea_match_exercise(text: str, quantity: int, start_id):
        {
            "role": "user",
            "content": (
-                    'From the text extract ' + str(quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text))
+                    'From the text extract ' + str(
+                quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text))

        }
    ]
@@ -882,6 +883,7 @@ def gen_idea_match_exercise(text: str, quantity: int, start_id):
        "type": "matchSentences"
    }

+
 def build_options(ideas):
    options = []
    letters = iter(string.ascii_uppercase)
@@ -892,6 +894,7 @@ def build_options(ideas):
        })
    return options

+
 def build_sentences(ideas, start_id):
    sentences = []
    letters = iter(string.ascii_uppercase)
@@ -906,6 +909,7 @@ def build_sentences(ideas, start_id):
        sentence["id"] = i
    return sentences

+
 def assign_letters_to_paragraphs(paragraphs):
    result = []
    letters = iter(string.ascii_uppercase)
@@ -1272,7 +1276,8 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k
                            current_exercise["options"])
                        for exercise in exercise_dict.get("exercises", [])[0]["questions"]
                ):
-                    return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
+                    return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam,
+                                                      seen_keys)
    return current_exercise, seen_keys


@@ -1302,7 +1307,8 @@ def replace_blank_space_exercise_if_exists_utas(all_exams, current_exercise, cur
    key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
    # Check if the key is in the set
    if key in seen_keys:
-        return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam, seen_keys)
+        return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam,
+                                               seen_keys)
    else:
        seen_keys.add(key)

@@ -1313,7 +1319,8 @@ def replace_blank_space_exercise_if_exists_utas(all_exams, current_exercise, cur
                    current_exercise["options"])
                for exercise in exam.get("questions", [])
        ):
-            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam,
+            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(),
+                                                   current_exam,
                                                   seen_keys)
    return current_exercise, seen_keys

@@ -1323,7 +1330,8 @@ def replace_underlined_exercise_if_exists_utas(all_exams, current_exercise, curr
    key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
    # Check if the key is in the set
    if key in seen_keys:
-        return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam, seen_keys)
+        return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam,
+                                               seen_keys)
    else:
        seen_keys.add(key)

@@ -1334,7 +1342,8 @@ def replace_underlined_exercise_if_exists_utas(all_exams, current_exercise, curr
                    current_exercise["options"])
                for exercise in exam.get("questions", [])
        ):
-            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam,
+            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(),
+                                                   current_exam,
                                                   seen_keys)
    return current_exercise, seen_keys

@@ -1376,8 +1385,8 @@ def generate_single_mc_blank_space_level_question():
        },
        {
            "role": "user",
-            "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, it can be easy, '
-                        'intermediate or advanced.')
+            "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, '
+                        'it can be easy, intermediate or advanced.')

        }
    ]
@@ -1401,8 +1410,8 @@ def generate_single_mc_underlined_level_question():
        },
        {
            "role": "user",
-            "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, it can be easy, '
-                        'intermediate or advanced.')
+            "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, '
+                        'it can be easy, intermediate or advanced.')

        },
        {
@@ -1469,9 +1478,9 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
        if all_exams is not None:
            seen_keys = set()
            for i in range(len(question["questions"])):
-                question["questions"][i], seen_keys = replace_blank_space_exercise_if_exists_utas(all_exams, question["questions"][i],
-                                                                                      question,
-                                                                                      seen_keys)
+                question["questions"][i], seen_keys = (
+                    replace_blank_space_exercise_if_exists_utas(all_exams, question["questions"][i], question,
+                                                                seen_keys))
        response = fix_exercise_ids(question, start_id)
        response["questions"] = randomize_mc_options_order(response["questions"])
        return response
@@ -1546,11 +1555,9 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=
        if all_exams is not None:
            seen_keys = set()
            for i in range(len(question["questions"])):
-                question["questions"][i], seen_keys = replace_underlined_exercise_if_exists_utas(all_exams,
-                                                                                                  question["questions"][
-                                                                                                      i],
-                                                                                                  question,
-                                                                                                  seen_keys)
+                question["questions"][i], seen_keys = (
+                    replace_underlined_exercise_if_exists_utas(all_exams, question["questions"][i], question,
+                                                               seen_keys))
        response = fix_exercise_ids(question, start_id)
        response["questions"] = randomize_mc_options_order(response["questions"])
        return response
@@ -1765,7 +1772,8 @@ def generate_level_mc(start_id: int, quantity: int, all_questions=None):
    if all_questions is not None:
        seen_keys = set()
        for i in range(len(question["questions"])):
-            question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_questions, question["questions"][i],
+            question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_questions,
+                                                                                  question["questions"][i],
                                                                                  question,
                                                                                  seen_keys)
    response = fix_exercise_ids(question, start_id)
@@ -1791,3 +1799,293 @@ def randomize_mc_options_order(questions):
                question['solution'] = option['id']

    return questions
+
+
+def gen_writing_task_1(topic, difficulty):
+    messages = [
+        {
+            "role": "system",
+            "content": ('You are a helpful assistant designed to output JSON on this format: '
+                        '{"prompt": "prompt content"}')
+        },
+        {
+            "role": "user",
+            "content": ('Craft a prompt for an IELTS Writing Task 1 General Training exercise that instructs the '
+                        'student to compose a letter. The prompt should present a specific scenario or situation, '
+                        'based on the topic of "' + topic + '", requiring the student to provide information, '
+                                                            'advice, or instructions within the letter. '
+                                                            'Make sure that the generated prompt is '
+                                                            'of ' + difficulty + 'difficulty and does not contain '
+                                                                                 'forbidden subjects in muslim '
+                                                                                 'countries.')
+        },
+        {
+            "role": "user",
+            "content": 'The prompt should end with "In the letter you should" followed by 3 bullet points of what '
+                       'the answer should include.'
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, "prompt",
+                                GEN_QUESTION_TEMPERATURE)
+    return {
+        "question": add_newline_before_hyphen(response["prompt"].strip()),
+        "difficulty": difficulty,
+        "topic": topic
+    }
+
+
+def add_newline_before_hyphen(s):
+    return s.replace(" -", "\n-")
+
+
+def gen_writing_task_2(topic, difficulty):
+    messages = [
+        {
+            "role": "system",
+            "content": ('You are a helpful assistant designed to output JSON on this format: '
+                        '{"prompt": "prompt content"}')
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing '
+                                                                        'Task 2 General Training that directs the '
+                                                                        'candidate'
+                                                                        'to delve into an in-depth analysis of '
+                                                                        'contrasting perspectives on the topic '
+                                                                        'of "' + topic + '". The candidate should be '
+                                                                                         'asked to discuss the '
+                                                                                         'strengths and weaknesses of '
+                                                                                         'both viewpoints.')
+        },
+        {
+            "role": "user",
+            "content": 'The question should lead to an answer with either "theories", "complicated information" or '
+                       'be "very descriptive" on the topic.'
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(GPT_4_O, messages, token_count, "prompt", GEN_QUESTION_TEMPERATURE)
+    return {
+        "question": response["prompt"].strip(),
+        "difficulty": difficulty,
+        "topic": topic
+    }
+
+
+def gen_speaking_part_1(first_topic: str, second_topic: str, difficulty):
+    json_format = {
+        "first_topic": "topic 1",
+        "second_topic": "topic 2",
+        "questions": [
+            "Introductory question about the first topic, starting the topic with 'Let's talk about x' and then the "
+            "question.",
+            "Follow up question about the first topic",
+            "Follow up question about the first topic",
+            "Question about second topic",
+            "Follow up question about the second topic",
+        ]
+    }
+
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                    'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Craft 5 simple and single questions of easy difficulty for IELTS Speaking Part 1 '
+                    'that encourages candidates to delve deeply into '
+                    'personal experiences, preferences, or insights on the topic '
+                    'of "' + first_topic + '" and the topic of "' + second_topic + '". '
+                                                                                   'Make sure that the generated '
+                                                                                   'question'
+                                                                                   'does not contain forbidden '
+                                                                                   'subjects in'
+                                                                                   'muslim countries.')
+        },
+        {
+            "role": "user",
+            "content": 'The questions should lead to the usage of 4 verb tenses (present perfect, present, '
+                       'past and future).'
+        },
+        {
+            "role": "user",
+            "content": 'They must be 1 single question each and not be double-barreled questions.'
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(GPT_4_O, messages, token_count, ["first_topic"],
+                                GEN_QUESTION_TEMPERATURE)
+    response["type"] = 1
+    response["difficulty"] = difficulty
+    return response
+
+
+def gen_speaking_part_2(topic: str, difficulty):
+    json_format = {
+        "topic": "topic",
+        "question": "question",
+        "prompts": [
+            "prompt_1",
+            "prompt_2",
+            "prompt_3"
+        ],
+        "suffix": "And explain why..."
+    }
+
+    messages = [
+        {
+            "role": "system",
+            "content": 'You are a helpful assistant designed to output JSON on this format: ' + str(json_format)
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Create a question of medium difficulty for IELTS Speaking Part 2 '
+                    'that encourages candidates to narrate a '
+                    'personal experience or story related to the topic '
+                    'of "' + topic + '". Include 3 prompts that '
+                                     'guide the candidate to describe '
+                                     'specific aspects of the experience, '
+                                     'such as details about the situation, '
+                                     'their actions, and the reasons it left a '
+                                     'lasting impression. Make sure that the '
+                                     'generated question does not contain '
+                                     'forbidden subjects in muslim countries.')
+        },
+        {
+            "role": "user",
+            "content": 'The prompts must not be questions. Also include a suffix like the ones in the IELTS exams '
+                       'that start with "And explain why".'
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
+    response["type"] = 2
+    response["difficulty"] = difficulty
+    response["topic"] = topic
+    return response
+
+
+def gen_speaking_part_3(topic: str, difficulty):
+    json_format = {
+        "topic": "topic",
+        "questions": [
+            "Introductory question about the topic.",
+            "Follow up question about the topic",
+            "Follow up question about the topic",
+            "Follow up question about the topic",
+            "Follow up question about the topic"
+        ]
+    }
+
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                    'You are a helpful assistant designed to output JSON on this format: ' + str(json_format))
+        },
+        {
+            "role": "user",
+            "content": (
+                    'Formulate a set of 5 single questions of hard difficulty for IELTS Speaking Part 3 that encourage candidates to engage in a '
+                    'meaningful discussion on the topic of "' + topic + '". Provide inquiries, ensuring '
+                                                                        'they explore various aspects, perspectives, and implications related to the topic.'
+                                                                        'Make sure that the generated question does not contain forbidden subjects in muslim countries.')
+
+        },
+        {
+            "role": "user",
+            "content": 'They must be 1 single question each and not be double-barreled questions.'
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(GPT_4_O, messages, token_count, GEN_FIELDS, GEN_QUESTION_TEMPERATURE)
+    # Remove the numbers from the questions only if the string starts with a number
+    response["questions"] = [re.sub(r"^\d+\.\s*", "", question) if re.match(r"^\d+\.", question) else question for
+                             question in response["questions"]]
+    response["type"] = 3
+    response["difficulty"] = difficulty
+    response["topic"] = topic
+    return response
+
+
+def gen_listening_section_1(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=1):
+    if (len(req_exercises) == 0):
+        req_exercises = random.sample(LISTENING_1_EXERCISE_TYPES, 1)
+
+    if (number_of_exercises_q.empty()):
+        number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_1_EXERCISES, len(req_exercises))
+
+    processed_conversation = generate_listening_1_conversation(topic)
+
+    exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation),
+                                                          req_exercises,
+                                                          number_of_exercises_q,
+                                                          start_id, difficulty)
+    return {
+        "exercises": exercises,
+        "text": processed_conversation,
+        "difficulty": difficulty
+    }
+
+
+def gen_listening_section_2(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=11):
+    if (len(req_exercises) == 0):
+        req_exercises = random.sample(LISTENING_2_EXERCISE_TYPES, 2)
+
+    if (number_of_exercises_q.empty()):
+        number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_2_EXERCISES, len(req_exercises))
+
+    monologue = generate_listening_2_monologue(topic)
+
+    exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
+                                                       start_id, difficulty)
+    return {
+        "exercises": exercises,
+        "text": monologue,
+        "difficulty": difficulty
+    }
+
+
+def gen_listening_section_3(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=21):
+    if (len(req_exercises) == 0):
+        req_exercises = random.sample(LISTENING_3_EXERCISE_TYPES, 1)
+
+    if (number_of_exercises_q.empty()):
+        number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_3_EXERCISES, len(req_exercises))
+
+    processed_conversation = generate_listening_3_conversation(topic)
+
+    exercises = generate_listening_conversation_exercises(parse_conversation(processed_conversation), req_exercises,
+                                                          number_of_exercises_q,
+                                                          start_id, difficulty)
+    return {
+        "exercises": exercises,
+        "text": processed_conversation,
+        "difficulty": difficulty
+    }
+
+
+def gen_listening_section_4(topic, difficulty, req_exercises, number_of_exercises_q=queue.Queue(), start_id=31):
+    if (len(req_exercises) == 0):
+        req_exercises = random.sample(LISTENING_EXERCISE_TYPES, 2)
+
+    if (number_of_exercises_q.empty()):
+        number_of_exercises_q = divide_number_into_parts(TOTAL_LISTENING_SECTION_4_EXERCISES, len(req_exercises))
+
+    monologue = generate_listening_4_monologue(topic)
+
+    exercises = generate_listening_monologue_exercises(str(monologue), req_exercises, number_of_exercises_q,
+                                                       start_id, difficulty)
+    return {
+        "exercises": exercises,
+        "text": monologue,
+        "difficulty": difficulty
+    }