Added new ideaMatch exercise type.

2024-07-18 23:20:06 +01:00
parent 358f240d16
commit 4c41942dfe
2 changed files with 195 additions and 98 deletions
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -18,6 +18,7 @@ GEN_FIELDS = ['topic']
 GEN_TEXT_FIELDS = ['title']
 LISTENING_GEN_FIELDS = ['transcript', 'exercise']
 READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
 READING_3_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch', 'ideaMatch']
 LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']
 LISTENING_1_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksFill',
                              'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm', 'writeBlanksForm']
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -7,7 +7,6 @@ import uuid
 import nltk
 from wonderwords import RandomWord
 from helper.api_messages import QuestionType
 from helper.constants import *
 from helper.firebase_helper import get_all
 from helper.openai_interface import make_openai_call, count_total_tokens
@@ -243,6 +242,7 @@ def build_write_blanks_solutions_listening(words: [], start_id):
        )
    return solutions
 def get_perfect_answer(question: str, size: int):
    messages = [
        {
@@ -291,7 +291,7 @@ def generate_reading_passage_1_text(topic: str):
        },
        {
            "role": "system",
-            "content": ('The generated text should be fairly easy to understand.')
+            "content": ('The generated text should be fairly easy to understand and have multiple paragraphs.')
        },
    ]
    token_count = count_total_tokens(messages)
@@ -323,12 +323,13 @@ def generate_reading_passage_2_text(topic: str):
        },
        {
            "role": "system",
-            "content": ('The generated text should be fairly hard to understand.')
+            "content": ('The generated text should be fairly hard to understand and have multiple paragraphs.')
        },
    ]
    token_count = count_total_tokens(messages)
    return make_openai_call(GPT_4_O, messages, token_count, GEN_TEXT_FIELDS, GEN_QUESTION_TEMPERATURE)
 def generate_reading_passage_3_text(topic: str):
    messages = [
        {
@@ -355,7 +356,8 @@ def generate_reading_passage_3_text(topic: str):
        {
            "role": "system",
            "content": ('The generated text should be very hard to understand and include different points, theories, '
-                        'subtle differences of opinions from people over the specified topic .')
+                        'subtle differences of opinions from people, correctly sourced to the person who said it, '
                        'over the specified topic and have multiple paragraphs.')
        },
    ]
    token_count = count_total_tokens(messages)
@@ -547,6 +549,10 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
            question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
            print("Added paragraph match: " + str(question))
        elif req_exercise == "ideaMatch":
            question = gen_idea_match_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
            print("Added idea match: " + str(question))
        start_id = start_id + number_of_exercises
@@ -693,7 +699,8 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
        {
            "role": "user",
            "content": ('Select ' + str(quantity) + ' ' + difficulty + ' difficulty words, it must be words and not '
-                                                                       'expressions, from this:\n' + response["summary"])
+                                                                       'expressions, from this:\n' + response[
                            "summary"])
        }
    ]
@@ -732,7 +739,8 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic
        {
            "role": "user",
            "content": (
-                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
+                    'Generate ' + str(
                quantity) + ' ' + difficulty + ' difficulty statements based on the provided text. '
                                               'Ensure that your statements accurately represent '
                                               'information or inferences from the text, and '
                                               'provide a variety of responses, including, at '
@@ -802,7 +810,8 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
        {
            "role": "user",
            "content": (
-                    'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(paragraphs))
+                    'For every paragraph of the list generate a minimum 5 word heading for it. The paragraphs are these: ' + str(
                paragraphs))
        }
    ]
@@ -838,6 +847,83 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
    }
 def gen_idea_match_exercise(text: str, quantity: int, start_id):
    messages = [
        {
            "role": "system",
            "content": (
                'You are a helpful assistant designed to output JSON on this format: '
                '{"ideas": [ '
                '{"idea": "some idea or opinion", "from": "person, institution whose idea or opinion this is"}, '
                '{"idea": "some other idea or opinion", "from": "person, institution whose idea or opinion this is"}'
                ']}')
        },
        {
            "role": "user",
            "content": (
                    'From the text extract ' + str(quantity) + ' ideas, theories, opinions and who they are from. The text: ' + str(text))
        }
    ]
    token_count = count_total_tokens(messages)
    ideas = make_openai_call(GPT_4_O, messages, token_count, ["ideas"], GEN_QUESTION_TEMPERATURE)["ideas"]
    # options = [
    #             {
    #                 "id": "A",
    #                 "sentence": "Cultural appropriation is a term that has gained significant traction in contemporary"
    #             },
    #             {
    #                 "id": "B",
    #                 "sentence": "Historically, cultural appropriation can be traced back to the era of colonialism"
    #             }
    #         ]
    # sentences = [
    #     {
    #         "id": 21,
    #         "sentence": "Concluding Thoughts on Cultural Appropriation",
    #         "solution": "I"
    #     },
    #     {
    #         "id": 22,
    #         "sentence": "Understanding the Concept of Cultural Appropriation",
    #         "solution": "A"
    #     }
    # ]
    return {
        "id": str(uuid.uuid4()),
        "allowRepetition": False,
        "options": build_options(ideas),
        "prompt": "Choose the correct author for the ideas/opinions from the list of authors below.",
        "sentences": build_sentences(ideas, start_id),
        "type": "matchSentences"
    }
 def build_options(ideas):
    options = []
    letters = iter(string.ascii_uppercase)
    for idea in ideas:
        options.append({
            "id": next(letters),
            "sentence": idea["from"]
        })
    return options
 def build_sentences(ideas, start_id):
    sentences = []
    letters = iter(string.ascii_uppercase)
    for idea in ideas:
        sentences.append({
            "solution": next(letters),
            "sentence": idea["idea"]
        })
    random.shuffle(sentences)
    for i, sentence in enumerate(sentences, start=start_id):
        sentence["id"] = i
    return sentences
 def assign_letters_to_paragraphs(paragraphs):
    result = []
    letters = iter(string.ascii_uppercase)
@@ -861,7 +947,8 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int
        {
            "role": "user",
            "content": (
-                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options '
+                    'Generate ' + str(quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
                n_options) + ' options '
                             'of for this conversation:\n"' + text + '"')
        }
@@ -892,7 +979,8 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s
            "role": "user",
            "content": (
                    'Generate ' + str(
-                quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(n_options) + ' options '
+                quantity) + ' ' + difficulty + ' difficulty multiple choice questions of ' + str(
                n_options) + ' options '
                             'of for this monologue:\n"' + text + '"')
        }
@@ -993,7 +1081,6 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:
    questions = make_openai_call(GPT_4_O, messages, token_count, ["notes"],
                                 GEN_QUESTION_TEMPERATURE)["notes"][:quantity]
    formatted_phrases = "\n".join([f"{i + 1}. {phrase}" for i, phrase in enumerate(questions)])
    word_messages = [
@@ -1204,6 +1291,7 @@ def replace_exercise_if_exists(all_exams, current_exercise, current_exam, seen_k
            return replace_exercise_if_exists(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
    return current_exercise, seen_keys
 def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, seen_keys):
    # Extracting relevant fields for comparison
    key = (current_exercise['prompt'], tuple(sorted(option['text'] for option in current_exercise['options'])))
@@ -1220,7 +1308,8 @@ def replace_exercise_if_exists_utas(all_exams, current_exercise, current_exam, s
                    current_exercise["options"])
                for exercise in exam.get("questions", [])
        ):
-            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam, seen_keys)
+            return replace_exercise_if_exists_utas(all_exams, generate_single_mc_level_question(), current_exam,
                                                   seen_keys)
    return current_exercise, seen_keys
@@ -1331,7 +1420,8 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
        ]
    }
-    gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (' multiple choice questions of 4 options for an english '
+    gen_multiple_choice_for_text = 'Generate ' + str(quantity) + (
        ' multiple choice questions of 4 options for an english '
        'level exam, some easy questions, some intermediate '
        'questions and some advanced questions.Ensure that '
        'the questions cover a range of topics such as verb '
@@ -1368,6 +1458,7 @@ def gen_multiple_choice_underlined_utas(quantity: int, start_id: int):
    else:
        return fix_exercise_ids(question, start_id)["questions"]
 def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=random.choice(mti_topics)):
    json_format = {
        "question": {
@@ -1406,7 +1497,8 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
        {
            "role": "user",
            "content": (
-                'From the generated text choose ' + str(quantity) + ' words (cannot be sequential words) to replace '
+                    'From the generated text choose ' + str(
                quantity) + ' words (cannot be sequential words) to replace '
                            'once with {{id}} where id starts on ' + str(start_id) + ' and is '
                                                                                     'incremented for each word. The ids must be ordered throughout the text and the words must be '
                                                                                     'replaced only once. Put the removed words and respective ids on the words array of the json in the correct order.')
@@ -1420,8 +1512,8 @@ def gen_blank_space_text_utas(quantity: int, start_id: int, size: int, topic=ran
    return question["question"]
 def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
 def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic=random.choice(mti_topics)):
    passage = generate_reading_passage_1_text(topic)
    short_answer = gen_short_answer_utas(passage["text"], start_id, sa_quantity)
    mc_exercises = gen_text_multiple_choice_utas(passage["text"], start_id + sa_quantity, mc_quantity)
@@ -1436,6 +1528,7 @@ def gen_reading_passage_utas(start_id, sa_quantity: int, mc_quantity: int, topic
        }
    }
 def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
    json_format = {"questions": [{"id": 1, "question": "question", "possible_answers": ["answer_1", "answer_2"]}]}
@@ -1460,6 +1553,8 @@ def gen_short_answer_utas(text: str, start_id: int, sa_quantity: int):
    return make_openai_call(GPT_4_O, messages, token_count,
                            ["questions"],
                            GEN_QUESTION_TEMPERATURE)["questions"]
 def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
    json_format = {
        "questions": [
@@ -1497,7 +1592,8 @@ def gen_text_multiple_choice_utas(text: str, start_id: int, mc_quantity: int):
        },
        {
            "role": "user",
-            "content": 'Generate ' + str(mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
+            "content": 'Generate ' + str(
                mc_quantity) + ' multiple choice questions of 4 options for this text:\n' + text
        },
        {
            "role": "user",