Verify for duplicate exercises in level exam generation.
This commit is contained in:
@@ -673,7 +673,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
gen_multiple_choice_for_text = "Generate " + str(
|
||||
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
|
||||
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
|
||||
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation."
|
||||
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation. Make sure " \
|
||||
"every question only has 1 correct answer."
|
||||
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
|
||||
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||
None,
|
||||
@@ -681,23 +682,23 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
split_mc_questions = mc_questions.split('13')
|
||||
|
||||
parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
|
||||
'[{"id": "A", "text": '
|
||||
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||
'"prompt": "Which of the following is a conjunction?", '
|
||||
'"solution": "A", "variant": "text"}]}. '
|
||||
'\nThe questions: "' + split_mc_questions[0] + '"')
|
||||
'[{"id": "A", "text": '
|
||||
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||
'"prompt": "Which of the following is a conjunction?", '
|
||||
'"solution": "A", "variant": "text"}]}. '
|
||||
'\nThe questions: "' + split_mc_questions[0] + '"')
|
||||
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
|
||||
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||
["questions"],
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
|
||||
'[{"id": "A", "text": '
|
||||
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||
'"prompt": "Which of the following is a conjunction?", '
|
||||
'"solution": "A", "variant": "text"}]}. '
|
||||
'\nThe questions: "' + '13' + split_mc_questions[1] + '"')
|
||||
'[{"id": "A", "text": '
|
||||
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||
'"prompt": "Which of the following is a conjunction?", '
|
||||
'"solution": "A", "variant": "text"}]}. '
|
||||
'\nThe questions: "' + '13' + split_mc_questions[1] + '"')
|
||||
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
|
||||
question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||
["questions"],
|
||||
@@ -705,8 +706,9 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
question["questions"].extend(question_2["questions"])
|
||||
|
||||
all_exams = get_all("level")
|
||||
seen_keys = set()
|
||||
for i in range(len(question["questions"])):
|
||||
question["questions"][i] = replace_exercise_if_exists(all_exams, question["questions"][i])
|
||||
question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question, seen_keys)
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"prompt": "Select the appropriate option.",
|
||||
@@ -714,16 +716,26 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||
"type": "multipleChoice",
|
||||
}
|
||||
|
||||
def replace_exercise_if_exists(all_exams, current_exercise):
|
||||
|
||||
def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_keys):
|
||||
# Extracting relevant fields for comparison
|
||||
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
|
||||
# Check if the key is in the set
|
||||
if key in seen_keys:
|
||||
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
|
||||
else:
|
||||
seen_keys.add(key)
|
||||
|
||||
for exam in all_exams:
|
||||
exam_dict = exam.to_dict()
|
||||
if any(
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"])
|
||||
for exercise in exam_dict.get("exercises", [])
|
||||
exercise["prompt"] == current_exercise["prompt"] and
|
||||
any(exercise["options"][0]["text"] == current_option["text"] for current_option in
|
||||
current_exercise["options"])
|
||||
for exercise in exam_dict.get("exercises", [])[0]["questions"]
|
||||
):
|
||||
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question())
|
||||
return current_exercise
|
||||
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
|
||||
return current_exercise, seen_keys
|
||||
|
||||
|
||||
def generate_single_mc_level_question():
|
||||
@@ -731,8 +743,8 @@ def generate_single_mc_level_question():
|
||||
"be easy, intermediate or advanced."
|
||||
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
|
||||
mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
None,
|
||||
GEN_QUESTION_TEMPERATURE)
|
||||
|
||||
parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": '
|
||||
'[{"id": "A", "text": '
|
||||
|
||||
Reference in New Issue
Block a user