Added new ideaMatch exercise type.

This commit is contained in:
Cristiano Ferreira
2024-07-18 23:20:06 +01:00
parent 358f240d16
commit 4c41942dfe
2 changed files with 195 additions and 98 deletions

View File

@@ -18,6 +18,7 @@ GEN_FIELDS = ['topic']
GEN_TEXT_FIELDS = ['title']
LISTENING_GEN_FIELDS = ['transcript', 'exercise']
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']

View File

@@ -7,7 +7,6 @@ import uuid
import nltk
from wonderwords import RandomWord
from helper.api_messages import QuestionType
from helper.constants import *
from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_call, count_total_tokens
@@ -243,6 +242,7 @@ def build_write_blanks_solutions_listening(words: [], start_id):
)
return solutions
def get_perfect_answer(question: str, size: int):
messages = [
{
@@ -278,20 +278,20 @@ def generate_reading_passage_1_text(topic: str):
"role": "user",
"content": (
'Generate an extensive text for IELTS Reading Passage 1, of at least 800 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
},
{
"role": "system",
"content": ('The generated text should be fairly easy to understand.')
"content": ('The generated text should be fairly easy to understand and have multiple paragraphs.')
},
]
token_count = count_total_tokens(messages)
@@ -310,25 +310,26 @@ def generate_reading_passage_2_text(topic: str):
"role": "user",
"content": (
'Generate an extensive text for IELTS Reading Passage 2, of at least 800 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
},
{
"role": "system",
"content": ('The generated text should be fairly hard to understand.')
"content": ('The generated text should be fairly hard to understand and have multiple paragraphs.')
},
]
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_reading_passage_3_text(topic: str):
messages = [
{
@@ -341,21 +342,22 @@ def generate_reading_passage_3_text(topic: str):
"role": "user",
"content": (
'Generate an extensive text for IELTS Reading Passage 3, of at least 800 words, on the topic '
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
'of "' + topic + '". The passage should offer '
'a substantial amount of information, '
'analysis, or narrative relevant to the chosen '
'subject matter. This text passage aims to '
'serve as the primary reading section of an '
'IELTS test, providing an in-depth and '
'comprehensive exploration of the topic. '
'Make sure that the generated text does not '
'contain forbidden subjects in muslim countries.')
},
{
"role": "system",
"content": ('The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people over the specified topic .')
'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.')
},
]
token_count = count_total_tokens(messages)
@@ -464,8 +466,8 @@ def generate_listening_3_conversation(topic: str):
"content": (
'Compose an authentic and elaborate conversation between up to four individuals in the everyday '
'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. '
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
'Make sure that the generated conversation does not contain forbidden subjects in '
'muslim countries.')
}
]
@@ -507,7 +509,7 @@ def generate_listening_4_monologue(topic: str):
"content": (
'Generate a comprehensive and complex monologue on the academic subject '
'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in '
'muslim countries.')
'muslim countries.')
}
]
@@ -547,6 +549,10 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises
@@ -673,15 +679,15 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
},
{
"role": "user",
"content": ('Summarize this text: "'+ text + '"')
"content": ('Summarize this text: "' + text + '"')
}
]
token_count = count_total_tokens(messages)
response = make_openai_call(GPT_4_O, messages, token_count,
["summary"],
GEN_QUESTION_TEMPERATURE)
["summary"],
GEN_QUESTION_TEMPERATURE)
messages = [
{
@@ -693,15 +699,16 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
{
"role": "user",
"content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
'expressions, from this:\n' + response["summary"])
'expressions, from this:\n' + response[
"summary"])
}
]
token_count = count_total_tokens(messages)
words_response = make_openai_call(GPT_4_O, messages, token_count,
["summary"],
GEN_QUESTION_TEMPERATURE)
["summary"],
GEN_QUESTION_TEMPERATURE)
response["words"] = words_response["words"]
replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id)
options_words = add_random_words_and_shuffle(response["words"], 1)
@@ -732,18 +739,19 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
'Ensure that your statements accurately represent '
'information or inferences from the text, and '
'provide a variety of responses, including, at '
'least one of each True, False, and Not Given, '
'as appropriate.\n\nReference text:\n\n ' + text)
'Generate ' + str(
quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
'Ensure that your statements accurately represent '
'information or inferences from the text, and '
'provide a variety of responses, including, at '
'least one of each True, False, and Not Given, '
'as appropriate.\n\nReference text:\n\n ' + text)
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count,["prompts"],
questions = make_openai_call(GPT_4_O, messages, token_count, ["prompts"],
GEN_QUESTION_TEMPERATURE)["prompts"]
if len(questions) > quantity:
questions = remove_excess_questions(questions, len(questions) - quantity)
@@ -777,7 +785,7 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
}
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
@@ -802,13 +810,14 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
{
"role": "user",
"content": (
'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs))
'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(
paragraphs))
}
]
token_count = count_total_tokens(messages)
headings = make_openai_call(GPT_4_O, messages, token_count,["headings"],
headings = make_openai_call(GPT_4_O, messages, token_count, ["headings"],
GEN_QUESTION_TEMPERATURE)["headings"]
options = []
@@ -838,6 +847,83 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
}
def gen_idea_match_exercise(text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}')
},
{
"role": "user",
"content": (
'From the text extract ' + str(quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text))
}
]
token_count = count_total_tokens(messages)
ideas = make_openai_call(GPT_4_O, messages, token_count, ["ideas"], GEN_QUESTION_TEMPERATURE)["ideas"]
# options = [
# {
# "id": "A",
# "sentence": "Cultural appropriation is a term that has gained significant traction in contemporary"
# },
# {
# "id": "B",
# "sentence": "Historically, cultural appropriation can be traced back to the era of colonialism"
# }
# ]
# sentences = [
# {
# "id": 21,
# "sentence": "Concluding Thoughts on Cultural Appropriation",
# "solution": "I"
# },
# {
# "id": 22,
# "sentence": "Understanding the Concept of Cultural Appropriation",
# "solution": "A"
# }
# ]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": build_sentences(ideas, start_id),
"type": "matchSentences"
}
def build_options(ideas):
options = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
options.append({
"id": next(letters),
"sentence": idea["from"]
})
return options
def build_sentences(ideas, start_id):
sentences = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
sentences.append({
"solution": next(letters),
"sentence": idea["idea"]
})
random.shuffle(sentences)
for i, sentence in enumerate(sentences, start=start_id):
sentence["id"] = i
return sentences
def assign_letters_to_paragraphs(paragraphs):
result = []
letters = iter(string.ascii_uppercase)
@@ -861,14 +947,15 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int
{
"role": "user",
"content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options '
'of for this conversation:\n"' + text + '"')
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
n_options) + ' options '
'of for this conversation:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -892,14 +979,15 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s
"role": "user",
"content": (
'Generate ' + str(
quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options '
'of for this monologue:\n"' + text + '"')
quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
n_options) + ' options '
'of for this monologue:\n"' + text + '"')
}
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE)
question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)
return {
"id": str(uuid.uuid4()),
"prompt": "Select the appropriate option.",
@@ -927,7 +1015,7 @@ def gen_write_blanks_questions_exercise_listening_conversation(text: str, quanti
]
token_count = count_total_tokens(messages)
questions = make_openai_call(GPT_4_O, messages, token_count,["questions"],
questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"][:quantity]
return {
@@ -993,7 +1081,6 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [
@@ -1008,7 +1095,7 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
}
]
words = make_openai_call(GPT_4_O, word_messages, token_count,["words"],
words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"],
GEN_QUESTION_TEMPERATURE)["words"][:quantity]
replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id)
return {
@@ -1149,11 +1236,11 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
@@ -1163,8 +1250,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
["questions"],
GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id)
@@ -1204,6 +1291,7 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
return current_exercise, seen_keys
def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
@@ -1220,7 +1308,8 @@ def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, s
current_exercise["options"])
for exercise in exam.get("questions", [])
):
return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam,
seen_keys)
return current_exercise, seen_keys
@@ -1243,8 +1332,8 @@ def generate_single_mc_level_question():
]
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,["options"],
GEN_QUESTION_TEMPERATURE)
question = make_openai_call(GPT_4_O, messages, token_count, ["options"],
GEN_QUESTION_TEMPERATURE)
return question
@@ -1273,11 +1362,11 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": '
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
'[{"id": "A", "text": '
'"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": '
'"Happy"}, {"id": "D", "text": "Jump"}], '
'"prompt": "Which of the following is a conjunction?", '
'"solution": "A", "variant": "text"}]}')
},
{
"role": "user",
@@ -1287,8 +1376,8 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
["questions"],
GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id)
@@ -1296,8 +1385,8 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams
seen_keys = set()
for i in range(len(question["questions"])):
question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_exams, question["questions"][i],
question,
seen_keys)
question,
seen_keys)
return fix_exercise_ids(question, start_id)
@@ -1331,13 +1420,14 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
]
}
gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (' multiple choice questions of 4 options for an english '
'level exam, some easy questions, some intermediate '
'questions and some advanced questions.Ensure that '
'the questions cover a range of topics such as verb '
'tense, subject-verb agreement, pronoun usage, '
'sentence structure, and punctuation. Make sure '
'every question only has 1 correct answer.')
gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (
' multiple choice questions of 4 options for an english '
'level exam, some easy questions, some intermediate '
'questions and some advanced questions.Ensure that '
'the questions cover a range of topics such as verb '
'tense, subject-verb agreement, pronoun usage, '
'sentence structure, and punctuation. Make sure '
'every question only has 1 correct answer.')
messages = [
{
@@ -1360,14 +1450,15 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
token_count = count_total_tokens(messages)
question = make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)
["questions"],
GEN_QUESTION_TEMPERATURE)
if len(question["questions"]) != quantity:
return gen_multiple_choice_level(quantity, start_id)
else:
return fix_exercise_ids(question, start_id)["questions"]
def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
json_format = {
"question": {
@@ -1406,10 +1497,11 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
{
"role": "user",
"content": (
'From the generated text choose ' + str(quantity) + ' words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is '
'incremented for each word. The ids must be ordered throughout the text and the words must be '
'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.')
'From the generated text choose ' + str(
quantity) + ' words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is '
'incremented for each word. The ids must be ordered throughout the text and the words must be '
'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.')
}
]
@@ -1420,14 +1512,14 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
return question["question"]
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
passage = generate_reading_passage_1_text(topic)
short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id+sa_quantity, mc_quantity)
mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
return {
"exercises": {
"shortAnswer":short_answer,
"shortAnswer": short_answer,
"multipleChoice": mc_exercises,
},
"text": {
@@ -1436,6 +1528,7 @@ def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic
}
}
def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
@@ -1458,8 +1551,10 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count,
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"]
["questions"],
GEN_QUESTION_TEMPERATURE)["questions"]
def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
json_format = {
"questions": [
@@ -1497,7 +1592,8 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
},
{
"role": "user",
"content": 'Generate ' + str(mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
"content": 'Generate ' + str(
mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
},
{
"role": "user",
@@ -1513,4 +1609,4 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
if len(question["questions"]) != mc_quantity:
return gen_multiple_choice_level(mc_quantity, start_id)
else:
return fix_exercise_ids(question, start_id)["questions"]
return fix_exercise_ids(question, start_id)["questions"]