diff --git a/helper/constants.py b/helper/constants.py index 883ae9a..fdd45e4 100644 --- a/helper/constants.py +++ b/helper/constants.py @@ -18,6 +18,7 @@ GEN_FIELDS = ['topic'] GEN_TEXT_FIELDS = ['title'] LISTENING_GEN_FIELDS = ['transcript', 'exercise'] READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch'] +READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch'] LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm'] LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm'] diff --git a/helper/exercises.py b/helper/exercises.py index c3022bd..8e299e4 100644 --- a/helper/exercises.py +++ b/helper/exercises.py @@ -7,7 +7,6 @@ import uuid import nltk from wonderwords import RandomWord -from helper.api_messages import QuestionType from helper.constants import * from helper.firebase_helper import get_all from helper.openai_interface import make_openai_call, count_total_tokens @@ -243,6 +242,7 @@ def build_write_blanks_solutions_listening(words: [], start_id): ) return solutions + def get_perfect_answer(question: str, size: int): messages = [ { @@ -278,20 +278,20 @@ def generate_reading_passage_1_text(topic: str): "role": "user", "content": ( 'Generate an extensive text for IELTS Reading Passage 1, of at least 800 words, on the topic ' - 'of "' + topic + '". The passage should offer ' - 'a substantial amount of information, ' - 'analysis, or narrative relevant to the chosen ' - 'subject matter. This text passage aims to ' - 'serve as the primary reading section of an ' - 'IELTS test, providing an in-depth and ' - 'comprehensive exploration of the topic. ' - 'Make sure that the generated text does not ' - 'contain forbidden subjects in muslim countries.') + 'of "' + topic + '". The passage should offer ' + 'a substantial amount of information, ' + 'analysis, or narrative relevant to the chosen ' + 'subject matter. This text passage aims to ' + 'serve as the primary reading section of an ' + 'IELTS test, providing an in-depth and ' + 'comprehensive exploration of the topic. ' + 'Make sure that the generated text does not ' + 'contain forbidden subjects in muslim countries.') }, { "role": "system", - "content": ('The generated text should be fairly easy to understand.') + "content": ('The generated text should be fairly easy to understand and have multiple paragraphs.') }, ] token_count = count_total_tokens(messages) @@ -310,25 +310,26 @@ def generate_reading_passage_2_text(topic: str): "role": "user", "content": ( 'Generate an extensive text for IELTS Reading Passage 2, of at least 800 words, on the topic ' - 'of "' + topic + '". The passage should offer ' - 'a substantial amount of information, ' - 'analysis, or narrative relevant to the chosen ' - 'subject matter. This text passage aims to ' - 'serve as the primary reading section of an ' - 'IELTS test, providing an in-depth and ' - 'comprehensive exploration of the topic. ' - 'Make sure that the generated text does not ' - 'contain forbidden subjects in muslim countries.') + 'of "' + topic + '". The passage should offer ' + 'a substantial amount of information, ' + 'analysis, or narrative relevant to the chosen ' + 'subject matter. This text passage aims to ' + 'serve as the primary reading section of an ' + 'IELTS test, providing an in-depth and ' + 'comprehensive exploration of the topic. ' + 'Make sure that the generated text does not ' + 'contain forbidden subjects in muslim countries.') }, { "role": "system", - "content": ('The generated text should be fairly hard to understand.') + "content": ('The generated text should be fairly hard to understand and have multiple paragraphs.') }, ] token_count = count_total_tokens(messages) return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE) + def generate_reading_passage_3_text(topic: str): messages = [ { @@ -341,21 +342,22 @@ def generate_reading_passage_3_text(topic: str): "role": "user", "content": ( 'Generate an extensive text for IELTS Reading Passage 3, of at least 800 words, on the topic ' - 'of "' + topic + '". The passage should offer ' - 'a substantial amount of information, ' - 'analysis, or narrative relevant to the chosen ' - 'subject matter. This text passage aims to ' - 'serve as the primary reading section of an ' - 'IELTS test, providing an in-depth and ' - 'comprehensive exploration of the topic. ' - 'Make sure that the generated text does not ' - 'contain forbidden subjects in muslim countries.') + 'of "' + topic + '". The passage should offer ' + 'a substantial amount of information, ' + 'analysis, or narrative relevant to the chosen ' + 'subject matter. This text passage aims to ' + 'serve as the primary reading section of an ' + 'IELTS test, providing an in-depth and ' + 'comprehensive exploration of the topic. ' + 'Make sure that the generated text does not ' + 'contain forbidden subjects in muslim countries.') }, { "role": "system", "content": ('The generated text should be very hard to understand and include different points, theories, ' - 'subtle differences of opinions from people over the specified topic .') + 'subtle differences of opinions from people, correctly sourced to the person who said it, ' + 'over the specified topic and have multiple paragraphs.') }, ] token_count = count_total_tokens(messages) @@ -464,8 +466,8 @@ def generate_listening_3_conversation(topic: str): "content": ( 'Compose an authentic and elaborate conversation between up to four individuals in the everyday ' 'social context of "' + topic + '". Please include random names and genders for the characters in your dialogue. ' - 'Make sure that the generated conversation does not contain forbidden subjects in ' - 'muslim countries.') + 'Make sure that the generated conversation does not contain forbidden subjects in ' + 'muslim countries.') } ] @@ -507,7 +509,7 @@ def generate_listening_4_monologue(topic: str): "content": ( 'Generate a comprehensive and complex monologue on the academic subject ' 'of: "' + topic + '". Make sure that the generated monologue does not contain forbidden subjects in ' - 'muslim countries.') + 'muslim countries.') } ] @@ -547,6 +549,10 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id) exercises.append(question) print("Added paragraph match: " + str(question)) + elif req_exercise == "ideaMatch": + question = gen_idea_match_exercise(passage, number_of_exercises, start_id) + exercises.append(question) + print("Added idea match: " + str(question)) start_id = start_id + number_of_exercises @@ -673,15 +679,15 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu }, { "role": "user", - "content": ('Summarize this text: "'+ text + '"') + "content": ('Summarize this text: "' + text + '"') } ] token_count = count_total_tokens(messages) response = make_openai_call(GPT_4_O, messages, token_count, - ["summary"], - GEN_QUESTION_TEMPERATURE) + ["summary"], + GEN_QUESTION_TEMPERATURE) messages = [ { @@ -693,15 +699,16 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu { "role": "user", "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not ' - 'expressions, from this:\n' + response["summary"]) + 'expressions, from this:\n' + response[ + "summary"]) } ] token_count = count_total_tokens(messages) words_response = make_openai_call(GPT_4_O, messages, token_count, - ["summary"], - GEN_QUESTION_TEMPERATURE) + ["summary"], + GEN_QUESTION_TEMPERATURE) response["words"] = words_response["words"] replaced_summary = replace_first_occurrences_with_placeholders(response["summary"], response["words"], start_id) options_words = add_random_words_and_shuffle(response["words"], 1) @@ -732,18 +739,19 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic { "role": "user", "content": ( - 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. ' - 'Ensure that your statements accurately represent ' - 'information or inferences from the text, and ' - 'provide a variety of responses, including, at ' - 'least one of each True, False, and Not Given, ' - 'as appropriate.\n\nReference text:\n\n ' + text) + 'Generate ' + str( + quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. ' + 'Ensure that your statements accurately represent ' + 'information or inferences from the text, and ' + 'provide a variety of responses, including, at ' + 'least one of each True, False, and Not Given, ' + 'as appropriate.\n\nReference text:\n\n ' + text) } ] token_count = count_total_tokens(messages) - questions = make_openai_call(GPT_4_O, messages, token_count,["prompts"], + questions = make_openai_call(GPT_4_O, messages, token_count, ["prompts"], GEN_QUESTION_TEMPERATURE)["prompts"] if len(questions) > quantity: questions = remove_excess_questions(questions, len(questions) - quantity) @@ -777,7 +785,7 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty): } ] token_count = count_total_tokens(messages) - questions = make_openai_call(GPT_4_O, messages, token_count,["questions"], + questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { @@ -802,13 +810,14 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id): { "role": "user", "content": ( - 'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs)) + 'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str( + paragraphs)) } ] token_count = count_total_tokens(messages) - headings = make_openai_call(GPT_4_O, messages, token_count,["headings"], + headings = make_openai_call(GPT_4_O, messages, token_count, ["headings"], GEN_QUESTION_TEMPERATURE)["headings"] options = [] @@ -838,6 +847,83 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id): } +def gen_idea_match_exercise(text: str, quantity: int, start_id): + messages = [ + { + "role": "system", + "content": ( + 'You are a helpful assistant designed to output JSON on this format: ' + '{"ideas": [ ' + '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, ' + '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}' + ']}') + }, + { + "role": "user", + "content": ( + 'From the text extract ' + str(quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text)) + + } + ] + token_count = count_total_tokens(messages) + + ideas = make_openai_call(GPT_4_O, messages, token_count, ["ideas"], GEN_QUESTION_TEMPERATURE)["ideas"] + # options = [ + # { + # "id": "A", + # "sentence": "Cultural appropriation is a term that has gained significant traction in contemporary" + # }, + # { + # "id": "B", + # "sentence": "Historically, cultural appropriation can be traced back to the era of colonialism" + # } + # ] + + # sentences = [ + # { + # "id": 21, + # "sentence": "Concluding Thoughts on Cultural Appropriation", + # "solution": "I" + # }, + # { + # "id": 22, + # "sentence": "Understanding the Concept of Cultural Appropriation", + # "solution": "A" + # } + # ] + return { + "id": str(uuid.uuid4()), + "allowRepetition": False, + "options": build_options(ideas), + "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.", + "sentences": build_sentences(ideas, start_id), + "type": "matchSentences" + } + +def build_options(ideas): + options = [] + letters = iter(string.ascii_uppercase) + for idea in ideas: + options.append({ + "id": next(letters), + "sentence": idea["from"] + }) + return options + +def build_sentences(ideas, start_id): + sentences = [] + letters = iter(string.ascii_uppercase) + for idea in ideas: + sentences.append({ + "solution": next(letters), + "sentence": idea["idea"] + }) + + random.shuffle(sentences) + for i, sentence in enumerate(sentences, start=start_id): + sentence["id"] = i + return sentences + def assign_letters_to_paragraphs(paragraphs): result = [] letters = iter(string.ascii_uppercase) @@ -861,14 +947,15 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int { "role": "user", "content": ( - 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options ' - 'of for this conversation:\n"' + text + '"') + 'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str( + n_options) + ' options ' + 'of for this conversation:\n"' + text + '"') } ] token_count = count_total_tokens(messages) - question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE) + question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", @@ -892,14 +979,15 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s "role": "user", "content": ( 'Generate ' + str( - quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options ' - 'of for this monologue:\n"' + text + '"') + quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str( + n_options) + ' options ' + 'of for this monologue:\n"' + text + '"') } ] token_count = count_total_tokens(messages) - question = make_openai_call(GPT_4_O, messages, token_count,["questions"], GEN_QUESTION_TEMPERATURE) + question = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE) return { "id": str(uuid.uuid4()), "prompt": "Select the appropriate option.", @@ -927,7 +1015,7 @@ def gen_write_blanks_questions_exercise_listening_conversation(text: str, quanti ] token_count = count_total_tokens(messages) - questions = make_openai_call(GPT_4_O, messages, token_count,["questions"], + questions = make_openai_call(GPT_4_O, messages, token_count, ["questions"], GEN_QUESTION_TEMPERATURE)["questions"][:quantity] return { @@ -993,7 +1081,6 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"], GEN_QUESTION_TEMPERATURE)["notes"][:quantity] - formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)]) word_messages = [ @@ -1008,7 +1095,7 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: } ] - words = make_openai_call(GPT_4_O, word_messages, token_count,["words"], + words = make_openai_call(GPT_4_O, word_messages, token_count, ["words"], GEN_QUESTION_TEMPERATURE)["words"][:quantity] replaced_notes = replace_first_occurrences_with_placeholders_notes(questions, words, start_id) return { @@ -1149,11 +1236,11 @@ def gen_multiple_choice_level(quantity: int, start_id=1): "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": ' - '[{"id": "A", "text": ' - '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' - '"Happy"}, {"id": "D", "text": "Jump"}], ' - '"prompt": "Which of the following is a conjunction?", ' - '"solution": "A", "variant": "text"}]}') + '[{"id": "A", "text": ' + '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' + '"Happy"}, {"id": "D", "text": "Jump"}], ' + '"prompt": "Which of the following is a conjunction?", ' + '"solution": "A", "variant": "text"}]}') }, { "role": "user", @@ -1163,8 +1250,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1): token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, - ["questions"], - GEN_QUESTION_TEMPERATURE) + ["questions"], + GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != quantity: return gen_multiple_choice_level(quantity, start_id) @@ -1204,6 +1291,7 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) return current_exercise, seen_keys + def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys): # Extracting relevant fields for comparison key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options']))) @@ -1220,7 +1308,8 @@ def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, s current_exercise["options"]) for exercise in exam.get("questions", []) ): - return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys) + return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, + seen_keys) return current_exercise, seen_keys @@ -1243,8 +1332,8 @@ def generate_single_mc_level_question(): ] token_count = count_total_tokens(messages) - question = make_openai_call(GPT_4_O, messages, token_count,["options"], - GEN_QUESTION_TEMPERATURE) + question = make_openai_call(GPT_4_O, messages, token_count, ["options"], + GEN_QUESTION_TEMPERATURE) return question @@ -1273,11 +1362,11 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams "role": "system", "content": ( 'You are a helpful assistant designed to output JSON on this format: {"questions": [{"id": "9", "options": ' - '[{"id": "A", "text": ' - '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' - '"Happy"}, {"id": "D", "text": "Jump"}], ' - '"prompt": "Which of the following is a conjunction?", ' - '"solution": "A", "variant": "text"}]}') + '[{"id": "A", "text": ' + '"And"}, {"id": "B", "text": "Cat"}, {"id": "C", "text": ' + '"Happy"}, {"id": "D", "text": "Jump"}], ' + '"prompt": "Which of the following is a conjunction?", ' + '"solution": "A", "variant": "text"}]}') }, { "role": "user", @@ -1287,8 +1376,8 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, - ["questions"], - GEN_QUESTION_TEMPERATURE) + ["questions"], + GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != quantity: return gen_multiple_choice_level(quantity, start_id) @@ -1296,8 +1385,8 @@ def gen_multiple_choice_blank_space_utas(quantity: int, start_id: int, all_exams seen_keys = set() for i in range(len(question["questions"])): question["questions"][i], seen_keys = replace_exercise_if_exists_utas(all_exams, question["questions"][i], - question, - seen_keys) + question, + seen_keys) return fix_exercise_ids(question, start_id) @@ -1331,13 +1420,14 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int): ] } - gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (' multiple choice questions of 4 options for an english ' - 'level exam, some easy questions, some intermediate ' - 'questions and some advanced questions.Ensure that ' - 'the questions cover a range of topics such as verb ' - 'tense, subject-verb agreement, pronoun usage, ' - 'sentence structure, and punctuation. Make sure ' - 'every question only has 1 correct answer.') + gen_multiple_choice_for_text = 'Generate ' + str(quantity) + ( + ' multiple choice questions of 4 options for an english ' + 'level exam, some easy questions, some intermediate ' + 'questions and some advanced questions.Ensure that ' + 'the questions cover a range of topics such as verb ' + 'tense, subject-verb agreement, pronoun usage, ' + 'sentence structure, and punctuation. Make sure ' + 'every question only has 1 correct answer.') messages = [ { @@ -1360,14 +1450,15 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int): token_count = count_total_tokens(messages) question = make_openai_call(GPT_4_O, messages, token_count, - ["questions"], - GEN_QUESTION_TEMPERATURE) + ["questions"], + GEN_QUESTION_TEMPERATURE) if len(question["questions"]) != quantity: return gen_multiple_choice_level(quantity, start_id) else: return fix_exercise_ids(question, start_id)["questions"] + def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)): json_format = { "question": { @@ -1406,10 +1497,11 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran { "role": "user", "content": ( - 'From the generated text choose ' + str(quantity) + ' words (cannot be sequential words) to replace ' - 'once with {{id}} where id starts on ' + str(start_id) + ' and is ' - 'incremented for each word. The ids must be ordered throughout the text and the words must be ' - 'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.') + 'From the generated text choose ' + str( + quantity) + ' words (cannot be sequential words) to replace ' + 'once with {{id}} where id starts on ' + str(start_id) + ' and is ' + 'incremented for each word. The ids must be ordered throughout the text and the words must be ' + 'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.') } ] @@ -1420,14 +1512,14 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran return question["question"] -def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)): +def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)): passage = generate_reading_passage_1_text(topic) short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity) - mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id+sa_quantity, mc_quantity) + mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity) return { "exercises": { - "shortAnswer":short_answer, + "shortAnswer": short_answer, "multipleChoice": mc_exercises, }, "text": { @@ -1436,6 +1528,7 @@ def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic } } + def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int): json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]} @@ -1458,8 +1551,10 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int): token_count = count_total_tokens(messages) return make_openai_call(GPT_4_O, messages, token_count, - ["questions"], - GEN_QUESTION_TEMPERATURE)["questions"] + ["questions"], + GEN_QUESTION_TEMPERATURE)["questions"] + + def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): json_format = { "questions": [ @@ -1497,7 +1592,8 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): }, { "role": "user", - "content": 'Generate ' + str(mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text + "content": 'Generate ' + str( + mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text }, { "role": "user", @@ -1513,4 +1609,4 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int): if len(question["questions"]) != mc_quantity: return gen_multiple_choice_level(mc_quantity, start_id) else: - return fix_exercise_ids(question, start_id)["questions"] \ No newline at end of file + return fix_exercise_ids(question, start_id)["questions"]