From 97f30ea8814d54d311e877f65a9d3bd7c9008d05 Mon Sep 17 00:00:00 2001 From: Cristiano Ferreira Date: Sat, 3 Feb 2024 15:58:51 +0000 Subject: [PATCH] Verify for duplicate exercises in level exam generation. --- helper/exercises.py | 74 +++++++++++++++++++++++++++++---------- helper/firebase_helper.py | 9 +++++ 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/helper/exercises.py b/helper/exercises.py index 28adc11..2f03a02 100644 --- a/helper/exercises.py +++ b/helper/exercises.py @@ -5,6 +5,7 @@ import re import uuid from helper.api_messages import QuestionType +from helper.firebase_helper import get_all from helper.openai_interface import make_openai_instruct_call from helper.token_counter import count_tokens from helper.constants import * @@ -670,44 +671,79 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, def gen_multiple_choice_level(quantity: int, start_id=1): gen_multiple_choice_for_text = "Generate " + str( - quantity) + " multiple choice questions of 4 options for an english level exam, 7 easy questions, 8 intermediate " \ - "questions and 10 advanced questions. Ensure that the questions cover a range of topics such as " \ + quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \ + "questions and some advanced questions. Ensure that the questions cover a range of topics such as " \ "verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation." token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300 mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, None, GEN_QUESTION_TEMPERATURE) - split_mc_questions = mc_questions.split('13') - parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": " \ - "[{\"id\": \"A\", \"text\": " \ - "\"And\"}, {\"id\": \"B\", \"text\": \"Cat\"}, {\"id\": \"C\", \"text\": " \ - "\"Happy\"}, {\"id\": \"D\", \"text\": \"Jump\"}], " \ - "\"prompt\": \"Which of the following is a conjunction?\", " \ - "\"solution\": \"A\", \"variant\": \"text\"}]}." \ - " \nThe questions: '" + split_mc_questions[0] + "'" - + parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": ' + '[{"id": "A", "text": ' + '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' + '"Happy"}, {"id": "D", "text": "Jump"}], ' + '"prompt": "Which of the following is a conjunction?", ' + '"solution": "A", "variant": "text"}]}. ' + '\nThe questions: "' + split_mc_questions[0] + '"') token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"] question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) - - parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": " \ - "[{\"id\": \"A\", \"text\": " \ - "\"And\"}, {\"id\": \"B\", \"text\": \"Cat\"}, {\"id\": \"C\", \"text\": " \ - "\"Happy\"}, {\"id\": \"D\", \"text\": \"Jump\"}], " \ - "\"prompt\": \"Which of the following is a conjunction?\", " \ - "\"solution\": \"A\", \"variant\": \"text\"}]}. " \ - "\nThe questions: '" + '13' + split_mc_questions[1] + "'" + parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": ' + '[{"id": "A", "text": ' + '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' + '"Happy"}, {"id": "D", "text": "Jump"}], ' + '"prompt": "Which of the following is a conjunction?", ' + '"solution": "A", "variant": "text"}]}. ' + '\nThe questions: "' + '13' + split_mc_questions[1] + '"') token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"] question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) question["questions"].extend(question_2["questions"]) + + all_exams = get_all("level") + for i in range(len(question["questions"])): + question["questions"][i] = replace_exercise_if_exists(all_exams, question["questions"][i]) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", "questions": fix_exercise_ids(question, start_id)["questions"], "type": "multipleChoice", } + +def replace_exercise_if_exists(all_exams, current_exercise): + for exam in all_exams: + exam_dict = exam.to_dict() + if any( + exercise["prompt"] == current_exercise["prompt"] and + any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"]) + for exercise in exam_dict.get("exercises", []) + ): + return replace_exercise_if_exists(all_exams, generate_single_mc_level_question()) + return current_exercise + + +def generate_single_mc_level_question(): + gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \ + "be easy, intermediate or advanced." + token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300 + mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count, + None, + GEN_QUESTION_TEMPERATURE) + + parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": ' + '[{"id": "A", "text": ' + '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' + '"Happy"}, {"id": "D", "text": "Jump"}], ' + '"prompt": "Which of the following is a conjunction?", ' + '"solution": "A", "variant": "text"}. ' + '\nThe questions: "' + mc_question + '"') + + token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"] + question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count, + ["options"], + GEN_QUESTION_TEMPERATURE) + return question diff --git a/helper/firebase_helper.py b/helper/firebase_helper.py index 9617d6e..8e56fb5 100644 --- a/helper/firebase_helper.py +++ b/helper/firebase_helper.py @@ -77,5 +77,14 @@ def save_to_db_with_id(collection: str, item, id: str): else: return (False, None) +def get_all(collection: str): + db = firestore.client() + collection_ref = db.collection(collection) + all_exercises = ( + collection_ref + .get() + ) + + return all_exercises