From 97f30ea8814d54d311e877f65a9d3bd7c9008d05 Mon Sep 17 00:00:00 2001
From: Cristiano Ferreira <cristiano.ferreira@flowinn.biz>
Date: Sat, 3 Feb 2024 15:58:51 +0000
Subject: [PATCH] Verify for duplicate exercises in level exam generation.

---
 helper/exercises.py       | 74 +++++++++++++++++++++++++++++----------
 helper/firebase_helper.py |  9 +++++
 2 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/helper/exercises.py b/helper/exercises.py
index 28adc11..2f03a02 100644
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -5,6 +5,7 @@ import re
 import uuid
 
 from helper.api_messages import QuestionType
+from helper.firebase_helper import get_all
 from helper.openai_interface import make_openai_instruct_call
 from helper.token_counter import count_tokens
 from helper.constants import *
@@ -670,44 +671,79 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int,
 
 def gen_multiple_choice_level(quantity: int, start_id=1):
     gen_multiple_choice_for_text = "Generate " + str(
-        quantity) + " multiple choice questions of 4 options for an english level exam, 7 easy questions, 8 intermediate " \
-                    "questions and 10 advanced questions. Ensure that the questions cover a range of topics such as " \
+        quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
+                    "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
                     "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation."
     token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
     mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                              None,
                                              GEN_QUESTION_TEMPERATURE)
-
     split_mc_questions = mc_questions.split('13')
 
-    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": " \
-                         "[{\"id\": \"A\", \"text\": " \
-                         "\"And\"}, {\"id\": \"B\", \"text\": \"Cat\"}, {\"id\": \"C\", \"text\": " \
-                         "\"Happy\"}, {\"id\": \"D\", \"text\": \"Jump\"}], " \
-                         "\"prompt\": \"Which of the following is a conjunction?\", " \
-                         "\"solution\": \"A\", \"variant\": \"text\"}]}." \
-                         " \nThe questions: '" + split_mc_questions[0] + "'"
-
+    parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
+                         '[{"id": "A", "text": '
+                         '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                         '"Happy"}, {"id": "D", "text": "Jump"}], '
+                         '"prompt": "Which of the following is a conjunction?", '
+                         '"solution": "A", "variant": "text"}]}. '
+                         '\nThe questions: "' + split_mc_questions[0] + '"')
     token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
     question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
                                          ["questions"],
                                          GEN_QUESTION_TEMPERATURE)
-
-    parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": " \
-                         "[{\"id\": \"A\", \"text\": " \
-                         "\"And\"}, {\"id\": \"B\", \"text\": \"Cat\"}, {\"id\": \"C\", \"text\": " \
-                         "\"Happy\"}, {\"id\": \"D\", \"text\": \"Jump\"}], " \
-                         "\"prompt\": \"Which of the following is a conjunction?\", " \
-                         "\"solution\": \"A\", \"variant\": \"text\"}]}. " \
-                         "\nThe questions: '" + '13' + split_mc_questions[1] + "'"
+    parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
+                         '[{"id": "A", "text": '
+                         '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                         '"Happy"}, {"id": "D", "text": "Jump"}], '
+                         '"prompt": "Which of the following is a conjunction?", '
+                         '"solution": "A", "variant": "text"}]}. '
+                         '\nThe questions: "' + '13' + split_mc_questions[1] + '"')
     token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
     question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
                                            ["questions"],
                                            GEN_QUESTION_TEMPERATURE)
     question["questions"].extend(question_2["questions"])
+
+    all_exams = get_all("level")
+    for i in range(len(question["questions"])):
+        question["questions"][i] = replace_exercise_if_exists(all_exams, question["questions"][i])
     return {
         "id": str(uuid.uuid4()),
         "prompt": "Select the appropriate option.",
         "questions": fix_exercise_ids(question, start_id)["questions"],
         "type": "multipleChoice",
     }
+
+def replace_exercise_if_exists(all_exams, current_exercise):
+    for exam in all_exams:
+        exam_dict = exam.to_dict()
+        if any(
+            exercise["prompt"] == current_exercise["prompt"] and
+            any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"])
+            for exercise in exam_dict.get("exercises", [])
+        ):
+            return replace_exercise_if_exists(all_exams, generate_single_mc_level_question())
+    return current_exercise
+
+
+def generate_single_mc_level_question():
+    gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \
+                                   "be easy, intermediate or advanced."
+    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
+    mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
+                                             None,
+                                             GEN_QUESTION_TEMPERATURE)
+
+    parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": '
+                         '[{"id": "A", "text": '
+                         '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
+                         '"Happy"}, {"id": "D", "text": "Jump"}], '
+                         '"prompt": "Which of the following is a conjunction?", '
+                         '"solution": "A", "variant": "text"}. '
+                         '\nThe questions: "' + mc_question + '"')
+
+    token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
+    question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count,
+                                         ["options"],
+                                         GEN_QUESTION_TEMPERATURE)
+    return question
diff --git a/helper/firebase_helper.py b/helper/firebase_helper.py
index 9617d6e..8e56fb5 100644
--- a/helper/firebase_helper.py
+++ b/helper/firebase_helper.py
@@ -77,5 +77,14 @@ def save_to_db_with_id(collection: str, item, id: str):
     else:
         return (False, None)
 
+def get_all(collection: str):
+    db = firestore.client()
+    collection_ref = db.collection(collection)
 
+    all_exercises = (
+        collection_ref
+        .get()
+    )
+
+    return all_exercises