Verify for duplicate exercises in level exam generation.

This commit is contained in:
Cristiano Ferreira
2024-02-04 22:37:57 +00:00
parent 81d7167cbf
commit 45a4dbe018

View File

@@ -673,7 +673,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
gen_multiple_choice_for_text = "Generate " + str(
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation."
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
"every question only has 1 correct answer."
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
None,
@@ -705,8 +706,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
question["questions"].extend(question_2["questions"])
all_exams = get_all("level")
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i] = replace_exercise_if_exists(all_exams, question["questions"][i])
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -714,16 +716,26 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
"type": "multipleChoice",
}
def replace_exercise_if_exists(all_exams, current_exercise):
def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
# Check if the key is in the set
if key in seen_keys:
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
else:
seen_keys.add(key)
for exam in all_exams:
exam_dict = exam.to_dict()
if any(
exercise["prompt"] == current_exercise["prompt"] and
any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"])
for exercise in exam_dict.get("exercises", [])
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
current_exercise["options"])
for exercise in exam_dict.get("exercises", [])[0]["questions"]
):
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question())
return current_exercise
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
return current_exercise, seen_keys
def generate_single_mc_level_question():