From a1ee7e47dacfe96d58cab3151736d4dd532b4601 Mon Sep 17 00:00:00 2001
From: Cristiano Ferreira <cristiano.ferreira@ecrop.dev>
Date: Sun, 28 Jul 2024 14:33:08 +0100
Subject: [PATCH] Can now generate lots of mc in level custom.

---
 app.py              |  53 ++++++++++++++++---
 helper/exercises.py | 124 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 164 insertions(+), 13 deletions(-)

diff --git a/app.py b/app.py
index a0b26be..1d07c37 100644
--- a/app.py
+++ b/app.py
@@ -361,9 +361,11 @@ def get_writing_task_1_general_question():
     except Exception as e:
         return str(e)
 
+
 def add_newline_before_hyphen(s):
     return s.replace(" -", "\n-")
 
+
 @app.route('/writing_task2', methods=['POST'])
 @jwt_required()
 def grade_writing_task_2():
@@ -501,7 +503,7 @@ def get_writing_task_2_general_question():
                 "content": (
                         'Craft a comprehensive question of ' + difficulty + 'difficulty like the ones for IELTS Writing Task 2 General Training that directs the candidate '
                                                                             'to delve into an in-depth analysis of contrasting perspectives on the topic of "' + topic + '". '
-                                                                            'The candidate should be asked to discuss the strengths and weaknesses of both viewpoints.')
+                                                                                                                                                                         'The candidate should be asked to discuss the strengths and weaknesses of both viewpoints.')
             },
             {
                 "role": "user",
@@ -1535,18 +1537,53 @@ def get_custom_level():
         exercise_mc_qty = int(request.args.get('exercise_' + str(i) + '_mc_qty', -1))
 
         if exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_4.value:
-            response["exercises"]["exercise_" + str(i)] = generate_level_mc(exercise_id, exercise_qty)
+            response["exercises"]["exercise_" + str(i)] = {}
+            response["exercises"]["exercise_" + str(i)]["questions"] = []
             response["exercises"]["exercise_" + str(i)]["type"] = "multipleChoice"
-            exercise_id = exercise_id + exercise_qty
+            while exercise_qty > 0:
+                if exercise_qty - 15 > 0:
+                    qty = 15
+                else:
+                    qty = exercise_qty
+
+                response["exercises"]["exercise_" + str(i)]["questions"].extend(
+                    generate_level_mc(exercise_id, qty,
+                    response["exercises"]["exercise_" + str(i)]["questions"])["questions"])
+                exercise_id = exercise_id + qty
+                exercise_qty = exercise_qty - qty
+
         elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_BLANK_SPACE.value:
-            response["exercises"]["exercise_" + str(i)] = gen_multiple_choice_blank_space_utas(exercise_qty,
-                                                                                               exercise_id)
+            response["exercises"]["exercise_" + str(i)] = {}
+            response["exercises"]["exercise_" + str(i)]["questions"] = []
             response["exercises"]["exercise_" + str(i)]["type"] = "multipleChoice"
-            exercise_id = exercise_id + exercise_qty
+            while exercise_qty > 0:
+                if exercise_qty - 15 > 0:
+                    qty = 15
+                else:
+                    qty = exercise_qty
+
+                response["exercises"]["exercise_" + str(i)]["questions"].extend(
+                    gen_multiple_choice_blank_space_utas(qty, exercise_id,
+                    response["exercises"]["exercise_" + str(i)]["questions"])["questions"])
+                exercise_id = exercise_id + exercise_qty
+                exercise_qty = exercise_qty - qty
+
         elif exercise_type == CustomLevelExerciseTypes.MULTIPLE_CHOICE_UNDERLINED.value:
-            response["exercises"]["exercise_" + str(i)] = gen_multiple_choice_underlined_utas(exercise_qty, exercise_id)
+            response["exercises"]["exercise_" + str(i)] = {}
+            response["exercises"]["exercise_" + str(i)]["questions"] = []
             response["exercises"]["exercise_" + str(i)]["type"] = "multipleChoice"
-            exercise_id = exercise_id + exercise_qty
+            while exercise_qty > 0:
+                if exercise_qty - 15 > 0:
+                    qty = 15
+                else:
+                    qty = exercise_qty
+
+                response["exercises"]["exercise_" + str(i)]["questions"].extend(
+                    gen_multiple_choice_underlined_utas(qty, exercise_id,
+                    response["exercises"]["exercise_" + str(i)]["questions"])["questions"])
+                exercise_id = exercise_id + exercise_qty
+                exercise_qty = exercise_qty - qty
+
         elif exercise_type == CustomLevelExerciseTypes.BLANK_SPACE_TEXT.value:
             response["exercises"]["exercise_" + str(i)] = gen_blank_space_text_utas(exercise_qty, exercise_id,
                                                                                     exercise_text_size)
diff --git a/helper/exercises.py b/helper/exercises.py
index 8eddcb3..53321c4 100644
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -1297,6 +1297,48 @@ def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, s
     return current_exercise, seen_keys
 
 
+def replace_blank_space_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
+    # Extracting relevant fields for comparison
+    key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+    # Check if the key is in the set
+    if key in seen_keys:
+        return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam, seen_keys)
+    else:
+        seen_keys.add(key)
+
+    for exam in all_exams:
+        if any(
+                exercise["prompt"] == current_exercise["prompt"] and
+                any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                    current_exercise["options"])
+                for exercise in exam.get("questions", [])
+        ):
+            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_blank_space_level_question(), current_exam,
+                                                   seen_keys)
+    return current_exercise, seen_keys
+
+
+def replace_underlined_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
+    # Extracting relevant fields for comparison
+    key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
+    # Check if the key is in the set
+    if key in seen_keys:
+        return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam, seen_keys)
+    else:
+        seen_keys.add(key)
+
+    for exam in all_exams:
+        if any(
+                exercise["prompt"] == current_exercise["prompt"] and
+                any(exercise["options"][0]["text"] == current_option["text"] for current_option in
+                    current_exercise["options"])
+                for exercise in exam.get("questions", [])
+        ):
+            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_underlined_level_question(), current_exam,
+                                                   seen_keys)
+    return current_exercise, seen_keys
+
+
 def generate_single_mc_level_question():
     messages = [
         {
@@ -1322,6 +1364,64 @@ def generate_single_mc_level_question():
     return question
 
 
+def generate_single_mc_blank_space_level_question():
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
+                '"solution": "A", "variant": "text"}')
+        },
+        {
+            "role": "user",
+            "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, it can be easy, '
+                        'intermediate or advanced.')
+
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    question = make_openai_call(GPT_4_O, messages, token_count, ["options"],
+                                GEN_QUESTION_TEMPERATURE)
+
+    return question
+
+
+def generate_single_mc_underlined_level_question():
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                'You are a helpful assistant designed to output JSON on this format: '
+                '{"id": "9", "options": [{"id": "A", "text": "And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                '"Happy"}, {"id": "D", "text": "Jump"}], "prompt": "Which of the following is a conjunction?", '
+                '"solution": "A", "variant": "text"}')
+        },
+        {
+            "role": "user",
+            "content": ('Generate 1 multiple choice blank space question of 4 options for an english level exam, it can be easy, '
+                        'intermediate or advanced.')
+
+        },
+        {
+            "role": "user",
+            "content": (
+                'The type of multiple choice is the prompt has wrong words or group of words and the options are to '
+                'find the wrong word or group of words that are underlined in the prompt. \nExample:\n'
+                'Prompt: "I <u>complain</u> about my boss <u>all the time</u>, but my colleagues <u>thinks</u> the boss <u>is</u> nice."\n'
+                'Options:\na: "complain"\nb: "all the time"\nc: "thinks"\nd: "is"')
+        }
+    ]
+    token_count = count_total_tokens(messages)
+
+    question = make_openai_call(GPT_4_O, messages, token_count, ["options"],
+                                GEN_QUESTION_TEMPERATURE)
+
+    return question
+
+
 def parse_conversation(conversation_data):
     conversation_list = conversation_data.get('conversation', [])
     readable_text = []
@@ -1364,12 +1464,12 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
                                 GEN_QUESTION_TEMPERATURE)
 
     if len(question["questions"]) != quantity:
-        return gen_multiple_choice_level(quantity, start_id)
+        return gen_multiple_choice_blank_space_utas(quantity, start_id)
     else:
         if all_exams is not None:
             seen_keys = set()
             for i in range(len(question["questions"])):
-                question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_exams, question["questions"][i],
+                question["questions"][i], seen_keys = replace_blank_space_exercise_if_exists_utas(all_exams, question["questions"][i],
                                                                                       question,
                                                                                       seen_keys)
         response = fix_exercise_ids(question, start_id)
@@ -1377,7 +1477,7 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
         return response
 
 
-def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
+def gen_multiple_choice_underlined_utas(quantity: int, start_id: int, all_exams=None):
     json_format = {
         "questions": [
             {
@@ -1441,8 +1541,16 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
                                 GEN_QUESTION_TEMPERATURE)
 
     if len(question["questions"]) != quantity:
-        return gen_multiple_choice_level(quantity, start_id)
+        return gen_multiple_choice_underlined_utas(quantity, start_id)
     else:
+        if all_exams is not None:
+            seen_keys = set()
+            for i in range(len(question["questions"])):
+                question["questions"][i], seen_keys = replace_underlined_exercise_if_exists_utas(all_exams,
+                                                                                                  question["questions"][
+                                                                                                      i],
+                                                                                                  question,
+                                                                                                  seen_keys)
         response = fix_exercise_ids(question, start_id)
         response["questions"] = randomize_mc_options_order(response["questions"])
         return response
@@ -1603,7 +1711,7 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
         return response
 
 
-def generate_level_mc(start_id: int, quantity: int):
+def generate_level_mc(start_id: int, quantity: int, all_questions=None):
     json_format = {
         "questions": [
             {
@@ -1654,6 +1762,12 @@ def generate_level_mc(start_id: int, quantity: int):
     question = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
                                 GEN_QUESTION_TEMPERATURE)
 
+    if all_questions is not None:
+        seen_keys = set()
+        for i in range(len(question["questions"])):
+            question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_questions, question["questions"][i],
+                                                                                  question,
+                                                                                  seen_keys)
     response = fix_exercise_ids(question, start_id)
     response["questions"] = randomize_mc_options_order(response["questions"])
     return response