Verify for duplicate exercises in level exam generation.
This commit is contained in:
@@ -5,6 +5,7 @@ import re
|
|||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from helper.api_messages import QuestionType
|
from helper.api_messages import QuestionType
|
||||||
|
from helper.firebase_helper import get_all
|
||||||
from helper.openai_interface import make_openai_instruct_call
|
from helper.openai_interface import make_openai_instruct_call
|
||||||
from helper.token_counter import count_tokens
|
from helper.token_counter import count_tokens
|
||||||
from helper.constants import *
|
from helper.constants import *
|
||||||
@@ -670,44 +671,79 @@ def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int,
|
|||||||
|
|
||||||
def gen_multiple_choice_level(quantity: int, start_id=1):
|
def gen_multiple_choice_level(quantity: int, start_id=1):
|
||||||
gen_multiple_choice_for_text = "Generate " + str(
|
gen_multiple_choice_for_text = "Generate " + str(
|
||||||
quantity) + " multiple choice questions of 4 options for an english level exam, 7 easy questions, 8 intermediate " \
|
quantity) + " multiple choice questions of 4 options for an english level exam, some easy questions, some intermediate " \
|
||||||
"questions and 10 advanced questions. Ensure that the questions cover a range of topics such as " \
|
"questions and some advanced questions. Ensure that the questions cover a range of topics such as " \
|
||||||
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation."
|
"verb tense, subject-verb agreement, pronoun usage, sentence structure, and punctuation."
|
||||||
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
|
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
|
||||||
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||||
None,
|
None,
|
||||||
GEN_QUESTION_TEMPERATURE)
|
GEN_QUESTION_TEMPERATURE)
|
||||||
|
|
||||||
split_mc_questions = mc_questions.split('13')
|
split_mc_questions = mc_questions.split('13')
|
||||||
|
|
||||||
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": " \
|
parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
|
||||||
"[{\"id\": \"A\", \"text\": " \
|
'[{"id": "A", "text": '
|
||||||
"\"And\"}, {\"id\": \"B\", \"text\": \"Cat\"}, {\"id\": \"C\", \"text\": " \
|
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||||
"\"Happy\"}, {\"id\": \"D\", \"text\": \"Jump\"}], " \
|
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||||
"\"prompt\": \"Which of the following is a conjunction?\", " \
|
'"prompt": "Which of the following is a conjunction?", '
|
||||||
"\"solution\": \"A\", \"variant\": \"text\"}]}." \
|
'"solution": "A", "variant": "text"}]}. '
|
||||||
" \nThe questions: '" + split_mc_questions[0] + "'"
|
'\nThe questions: "' + split_mc_questions[0] + '"')
|
||||||
|
|
||||||
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
|
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
|
||||||
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||||
["questions"],
|
["questions"],
|
||||||
GEN_QUESTION_TEMPERATURE)
|
GEN_QUESTION_TEMPERATURE)
|
||||||
|
parse_mc_questions = ('Parse the questions into this json format: {"questions": [{"id": "9", "options": '
|
||||||
parse_mc_questions = "Parse the questions into this json format: {\"questions\": [{\"id\": \"9\", \"options\": " \
|
'[{"id": "A", "text": '
|
||||||
"[{\"id\": \"A\", \"text\": " \
|
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||||
"\"And\"}, {\"id\": \"B\", \"text\": \"Cat\"}, {\"id\": \"C\", \"text\": " \
|
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||||
"\"Happy\"}, {\"id\": \"D\", \"text\": \"Jump\"}], " \
|
'"prompt": "Which of the following is a conjunction?", '
|
||||||
"\"prompt\": \"Which of the following is a conjunction?\", " \
|
'"solution": "A", "variant": "text"}]}. '
|
||||||
"\"solution\": \"A\", \"variant\": \"text\"}]}. " \
|
'\nThe questions: "' + '13' + split_mc_questions[1] + '"')
|
||||||
"\nThe questions: '" + '13' + split_mc_questions[1] + "'"
|
|
||||||
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
|
token_count = count_tokens(parse_mc_questions, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
|
||||||
question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
question_2 = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_questions, token_count,
|
||||||
["questions"],
|
["questions"],
|
||||||
GEN_QUESTION_TEMPERATURE)
|
GEN_QUESTION_TEMPERATURE)
|
||||||
question["questions"].extend(question_2["questions"])
|
question["questions"].extend(question_2["questions"])
|
||||||
|
|
||||||
|
all_exams = get_all("level")
|
||||||
|
for i in range(len(question["questions"])):
|
||||||
|
question["questions"][i] = replace_exercise_if_exists(all_exams, question["questions"][i])
|
||||||
return {
|
return {
|
||||||
"id": str(uuid.uuid4()),
|
"id": str(uuid.uuid4()),
|
||||||
"prompt": "Select the appropriate option.",
|
"prompt": "Select the appropriate option.",
|
||||||
"questions": fix_exercise_ids(question, start_id)["questions"],
|
"questions": fix_exercise_ids(question, start_id)["questions"],
|
||||||
"type": "multipleChoice",
|
"type": "multipleChoice",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def replace_exercise_if_exists(all_exams, current_exercise):
|
||||||
|
for exam in all_exams:
|
||||||
|
exam_dict = exam.to_dict()
|
||||||
|
if any(
|
||||||
|
exercise["prompt"] == current_exercise["prompt"] and
|
||||||
|
any(exercise["options"][0]["text"] == current_option["text"] for current_option in current_exercise["options"])
|
||||||
|
for exercise in exam_dict.get("exercises", [])
|
||||||
|
):
|
||||||
|
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question())
|
||||||
|
return current_exercise
|
||||||
|
|
||||||
|
|
||||||
|
def generate_single_mc_level_question():
|
||||||
|
gen_multiple_choice_for_text = "Generate 1 multiple choice question of 4 options for an english level exam, it can " \
|
||||||
|
"be easy, intermediate or advanced."
|
||||||
|
token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
|
||||||
|
mc_question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
|
||||||
|
None,
|
||||||
|
GEN_QUESTION_TEMPERATURE)
|
||||||
|
|
||||||
|
parse_mc_question = ('Parse the question into this json format: {"id": "9", "options": '
|
||||||
|
'[{"id": "A", "text": '
|
||||||
|
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
|
||||||
|
'"Happy"}, {"id": "D", "text": "Jump"}], '
|
||||||
|
'"prompt": "Which of the following is a conjunction?", '
|
||||||
|
'"solution": "A", "variant": "text"}. '
|
||||||
|
'\nThe questions: "' + mc_question + '"')
|
||||||
|
|
||||||
|
token_count = count_tokens(parse_mc_question, model_name=GPT_3_5_TURBO_INSTRUCT)["n_tokens"]
|
||||||
|
question = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, parse_mc_question, token_count,
|
||||||
|
["options"],
|
||||||
|
GEN_QUESTION_TEMPERATURE)
|
||||||
|
return question
|
||||||
|
|||||||
@@ -77,5 +77,14 @@ def save_to_db_with_id(collection: str, item, id: str):
|
|||||||
else:
|
else:
|
||||||
return (False, None)
|
return (False, None)
|
||||||
|
|
||||||
|
def get_all(collection: str):
|
||||||
|
db = firestore.client()
|
||||||
|
collection_ref = db.collection(collection)
|
||||||
|
|
||||||
|
all_exercises = (
|
||||||
|
collection_ref
|
||||||
|
.get()
|
||||||
|
)
|
||||||
|
|
||||||
|
return all_exercises
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user