Added new ideaMatch exercise type.

This commit is contained in:
Cristiano Ferreira
2024-07-18 23:20:06 +01:00
parent 358f240d16
commit 4c41942dfe
2 changed files with 195 additions and 98 deletions

View File

@@ -18,6 +18,7 @@ GEN_FIELDS = ['topic']
GEN_TEXT_FIELDS = ['title'] GEN_TEXT_FIELDS = ['title']
LISTENING_GEN_FIELDS = ['transcript', 'exercise'] LISTENING_GEN_FIELDS = ['transcript', 'exercise']
READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch'] READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm'] LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill', LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm'] 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']

View File

@@ -7,7 +7,6 @@ import uuid
import nltk import nltk
from wonderwords import RandomWord from wonderwords import RandomWord
from helper.api_messages import QuestionType
from helper.constants import * from helper.constants import *
from helper.firebase_helper import get_all from helper.firebase_helper import get_all
from helper.openai_interface import make_openai_call, count_total_tokens from helper.openai_interface import make_openai_call, count_total_tokens
@@ -243,6 +242,7 @@ def build_write_blanks_solutions_listening(words: [], start_id):
) )
return solutions return solutions
def get_perfect_answer(question: str, size: int): def get_perfect_answer(question: str, size: int):
messages = [ messages = [
{ {
@@ -291,7 +291,7 @@ def generate_reading_passage_1_text(topic: str):
}, },
{ {
"role": "system", "role": "system",
"content": ('The generated text should be fairly easy to understand.') "content": ('The generated text should be fairly easy to understand and have multiple paragraphs.')
}, },
] ]
token_count = count_total_tokens(messages) token_count = count_total_tokens(messages)
@@ -323,12 +323,13 @@ def generate_reading_passage_2_text(topic: str):
}, },
{ {
"role": "system", "role": "system",
"content": ('The generated text should be fairly hard to understand.') "content": ('The generated text should be fairly hard to understand and have multiple paragraphs.')
}, },
] ]
token_count = count_total_tokens(messages) token_count = count_total_tokens(messages)
return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
def generate_reading_passage_3_text(topic: str): def generate_reading_passage_3_text(topic: str):
messages = [ messages = [
{ {
@@ -355,7 +356,8 @@ def generate_reading_passage_3_text(topic: str):
{ {
"role": "system", "role": "system",
"content": ('The generated text should be very hard to understand and include different points, theories, ' "content": ('The generated text should be very hard to understand and include different points, theories, '
'subtle differences of opinions from people over the specified topic .') 'subtle differences of opinions from people, correctly sourced to the person who said it, '
'over the specified topic and have multiple paragraphs.')
}, },
] ]
token_count = count_total_tokens(messages) token_count = count_total_tokens(messages)
@@ -547,6 +549,10 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id) question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question) exercises.append(question)
print("Added paragraph match: " + str(question)) print("Added paragraph match: " + str(question))
elif req_exercise == "ideaMatch":
question = gen_idea_match_exercise(passage, number_of_exercises, start_id)
exercises.append(question)
print("Added idea match: " + str(question))
start_id = start_id + number_of_exercises start_id = start_id + number_of_exercises
@@ -693,7 +699,8 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
{ {
"role": "user", "role": "user",
"content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not ' "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
'expressions, from this:\n' + response["summary"]) 'expressions, from this:\n' + response[
"summary"])
} }
] ]
@@ -732,7 +739,8 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic
{ {
"role": "user", "role": "user",
"content": ( "content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. ' 'Generate ' + str(
quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
'Ensure that your statements accurately represent ' 'Ensure that your statements accurately represent '
'information or inferences from the text, and ' 'information or inferences from the text, and '
'provide a variety of responses, including, at ' 'provide a variety of responses, including, at '
@@ -802,7 +810,8 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
{ {
"role": "user", "role": "user",
"content": ( "content": (
'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs)) 'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(
paragraphs))
} }
] ]
@@ -838,6 +847,83 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
} }
def gen_idea_match_exercise(text: str, quantity: int, start_id):
messages = [
{
"role": "system",
"content": (
'You are a helpful assistant designed to output JSON on this format: '
'{"ideas": [ '
'{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
'{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
']}')
},
{
"role": "user",
"content": (
'From the text extract ' + str(quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text))
}
]
token_count = count_total_tokens(messages)
ideas = make_openai_call(GPT_4_O, messages, token_count, ["ideas"], GEN_QUESTION_TEMPERATURE)["ideas"]
# options = [
# {
# "id": "A",
# "sentence": "Cultural appropriation is a term that has gained significant traction in contemporary"
# },
# {
# "id": "B",
# "sentence": "Historically, cultural appropriation can be traced back to the era of colonialism"
# }
# ]
# sentences = [
# {
# "id": 21,
# "sentence": "Concluding Thoughts on Cultural Appropriation",
# "solution": "I"
# },
# {
# "id": 22,
# "sentence": "Understanding the Concept of Cultural Appropriation",
# "solution": "A"
# }
# ]
return {
"id": str(uuid.uuid4()),
"allowRepetition": False,
"options": build_options(ideas),
"prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
"sentences": build_sentences(ideas, start_id),
"type": "matchSentences"
}
def build_options(ideas):
options = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
options.append({
"id": next(letters),
"sentence": idea["from"]
})
return options
def build_sentences(ideas, start_id):
sentences = []
letters = iter(string.ascii_uppercase)
for idea in ideas:
sentences.append({
"solution": next(letters),
"sentence": idea["idea"]
})
random.shuffle(sentences)
for i, sentence in enumerate(sentences, start=start_id):
sentence["id"] = i
return sentences
def assign_letters_to_paragraphs(paragraphs): def assign_letters_to_paragraphs(paragraphs):
result = [] result = []
letters = iter(string.ascii_uppercase) letters = iter(string.ascii_uppercase)
@@ -861,7 +947,8 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int
{ {
"role": "user", "role": "user",
"content": ( "content": (
'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options ' 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
n_options) + ' options '
'of for this conversation:\n"' + text + '"') 'of for this conversation:\n"' + text + '"')
} }
@@ -892,7 +979,8 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s
"role": "user", "role": "user",
"content": ( "content": (
'Generate ' + str( 'Generate ' + str(
quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options ' quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
n_options) + ' options '
'of for this monologue:\n"' + text + '"') 'of for this monologue:\n"' + text + '"')
} }
@@ -993,7 +1081,6 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"], questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
GEN_QUESTION_TEMPERATURE)["notes"][:quantity] GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
word_messages = [ word_messages = [
@@ -1204,6 +1291,7 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k
return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
return current_exercise, seen_keys return current_exercise, seen_keys
def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys): def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
# Extracting relevant fields for comparison # Extracting relevant fields for comparison
key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
@@ -1220,7 +1308,8 @@ def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, s
current_exercise["options"]) current_exercise["options"])
for exercise in exam.get("questions", []) for exercise in exam.get("questions", [])
): ):
return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam,
seen_keys)
return current_exercise, seen_keys return current_exercise, seen_keys
@@ -1331,7 +1420,8 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
] ]
} }
gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (' multiple choice questions of 4 options for an english ' gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (
' multiple choice questions of 4 options for an english '
'level exam, some easy questions, some intermediate ' 'level exam, some easy questions, some intermediate '
'questions and some advanced questions.Ensure that ' 'questions and some advanced questions.Ensure that '
'the questions cover a range of topics such as verb ' 'the questions cover a range of topics such as verb '
@@ -1368,6 +1458,7 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
else: else:
return fix_exercise_ids(question, start_id)["questions"] return fix_exercise_ids(question, start_id)["questions"]
def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)): def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
json_format = { json_format = {
"question": { "question": {
@@ -1406,7 +1497,8 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
{ {
"role": "user", "role": "user",
"content": ( "content": (
'From the generated text choose ' + str(quantity) + ' words (cannot be sequential words) to replace ' 'From the generated text choose ' + str(
quantity) + ' words (cannot be sequential words) to replace '
'once with {{id}} where id starts on ' + str(start_id) + ' and is ' 'once with {{id}} where id starts on ' + str(start_id) + ' and is '
'incremented for each word. The ids must be ordered throughout the text and the words must be ' 'incremented for each word. The ids must be ordered throughout the text and the words must be '
'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.') 'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.')
@@ -1420,8 +1512,8 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
return question["question"] return question["question"]
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
passage = generate_reading_passage_1_text(topic) passage = generate_reading_passage_1_text(topic)
short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity) short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
@@ -1436,6 +1528,7 @@ def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic
} }
} }
def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int): def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]} json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
@@ -1460,6 +1553,8 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
return make_openai_call(GPT_4_O, messages, token_count, return make_openai_call(GPT_4_O, messages, token_count,
["questions"], ["questions"],
GEN_QUESTION_TEMPERATURE)["questions"] GEN_QUESTION_TEMPERATURE)["questions"]
def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
json_format = { json_format = {
"questions": [ "questions": [
@@ -1497,7 +1592,8 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
}, },
{ {
"role": "user", "role": "user",
"content": 'Generate ' + str(mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text "content": 'Generate ' + str(
mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
}, },
{ {
"role": "user", "role": "user",