Fix reading exercise with more than 3 words.

2024-03-24 00:41:55 +00:00
parent 73532d5fed
commit 6aba83f3bb
2 changed files with 160 additions and 117 deletions
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -16,6 +16,68 @@ from wonderwords import RandomWord
 nltk.download('words')


+def gen_reading_passage_1(topic, req_exercises, difficulty):
+    if (len(req_exercises) == 0):
+        req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
+
+    number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_1_EXERCISES, len(req_exercises))
+
+    passage = generate_reading_passage(QuestionType.READING_PASSAGE_1, topic)
+    start_id = 1
+    exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
+    if contains_empty_dict(exercises):
+        return gen_reading_passage_1(topic, req_exercises, difficulty)
+    return {
+        "exercises": exercises,
+        "text": {
+            "content": passage["text"],
+            "title": passage["title"]
+        },
+        "difficulty": difficulty
+    }
+
+
+def gen_reading_passage_2(topic, req_exercises, difficulty):
+    if (len(req_exercises) == 0):
+        req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
+
+    number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_2_EXERCISES, len(req_exercises))
+
+    passage = generate_reading_passage(QuestionType.READING_PASSAGE_2, topic)
+    start_id = 14
+    exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
+    if contains_empty_dict(exercises):
+        return gen_reading_passage_1(topic, req_exercises, difficulty)
+    return {
+        "exercises": exercises,
+        "text": {
+            "content": passage["text"],
+            "title": passage["title"]
+        },
+        "difficulty": difficulty
+    }
+
+
+def gen_reading_passage_3(topic, req_exercises, difficulty):
+    if (len(req_exercises) == 0):
+        req_exercises = random.sample(READING_EXERCISE_TYPES, 2)
+
+    number_of_exercises_q = divide_number_into_parts(TOTAL_READING_PASSAGE_3_EXERCISES, len(req_exercises))
+
+    passage = generate_reading_passage(QuestionType.READING_PASSAGE_3, topic)
+    start_id = 27
+    exercises = generate_reading_exercises(passage["text"], req_exercises, number_of_exercises_q, start_id, difficulty)
+    if contains_empty_dict(exercises):
+        return gen_reading_passage_1(topic, req_exercises, difficulty)
+    return {
+        "exercises": exercises,
+        "text": {
+            "content": passage["text"],
+            "title": passage["title"]
+        },
+        "difficulty": difficulty
+    }
+
 def divide_number_into_parts(number, parts):
    if number < parts:
        return None
@@ -309,8 +371,12 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
            print("Added trueFalse: " + str(question))
        elif req_exercise == "writeBlanks":
            question = gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
-            exercises.append(question)
-            print("Added write blanks: " + str(question))
+            if answer_word_limit_ok(question):
+                exercises.append(question)
+                print("Added write blanks: " + str(question))
+            else:
+                exercises.append({})
+                print("Did not add write blanks because it did not respect word limit")
        elif req_exercise == "paragraphMatch":
            question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
            exercises.append(question)
@@ -321,13 +387,26 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
    return exercises


-def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id, difficulty):
+def answer_word_limit_ok(question):
+    # Check if any option in any solution has more than three words
+    return not any(len(option.split()) > 3
+                   for solution in question["solutions"]
+                   for option in solution["solution"])
+
+
+def contains_empty_dict(arr):
+    return any(elem == {} for elem in arr)
+
+
+def generate_listening_conversation_exercises(conversation: str, req_exercises: list, number_of_exercises_q, start_id,
+                                              difficulty):
    exercises = []
    for req_exercise in req_exercises:
        number_of_exercises = number_of_exercises_q.get()

        if req_exercise == "multipleChoice":
-            question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id, difficulty)
+            question = gen_multiple_choice_exercise_listening_conversation(conversation, number_of_exercises, start_id,
+                                                                           difficulty)
            exercises.append(question)
            print("Added multiple choice: " + str(question))
        elif req_exercise == "writeBlanksQuestions":
@@ -351,25 +430,30 @@ def generate_listening_conversation_exercises(conversation: str, req_exercises:
    return exercises


-def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id, difficulty):
+def generate_listening_monologue_exercises(monologue: str, req_exercises: list, number_of_exercises_q, start_id,
+                                           difficulty):
    exercises = []
    for req_exercise in req_exercises:
        number_of_exercises = number_of_exercises_q.get()

        if req_exercise == "multipleChoice":
-            question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty)
+            question = gen_multiple_choice_exercise_listening_monologue(monologue, number_of_exercises, start_id,
+                                                                        difficulty)
            exercises.append(question)
            print("Added multiple choice: " + str(question))
        elif req_exercise == "writeBlanksQuestions":
-            question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty)
+            question = gen_write_blanks_questions_exercise_listening_monologue(monologue, number_of_exercises, start_id,
+                                                                               difficulty)
            exercises.append(question)
            print("Added write blanks questions: " + str(question))
        elif req_exercise == "writeBlanksFill":
-            question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty)
+            question = gen_write_blanks_notes_exercise_listening_monologue(monologue, number_of_exercises, start_id,
+                                                                           difficulty)
            exercises.append(question)
            print("Added write blanks notes: " + str(question))
        elif req_exercise == "writeBlanksForm":
-            question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id, difficulty)
+            question = gen_write_blanks_form_exercise_listening_monologue(monologue, number_of_exercises, start_id,
+                                                                          difficulty)
            exercises.append(question)
            print("Added write blanks form: " + str(question))

@@ -379,13 +463,14 @@ def generate_listening_monologue_exercises(monologue: str, req_exercises: list,


 def gen_multiple_choice_exercise(text: str, quantity: int, start_id, difficulty):
-    gen_multiple_choice_for_text = "Generate " + str(quantity) + " " + difficulty + " difficulty multiple choice questions for this text: " \
-                                                                 "'" + text + "'\n" \
-                                                                              "Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
-                                                                              "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
-                                                                              "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
-                                                                              "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
-                                                                              "\"solution\": \"C\", \"variant\": \"text\"}]"
+    gen_multiple_choice_for_text = "Generate " + str(
+        quantity) + " " + difficulty + " difficulty multiple choice questions for this text: " \
+                                       "'" + text + "'\n" \
+                                                    "Use this format: \"questions\": [{\"id\": \"9\", \"options\": [{\"id\": \"A\", \"text\": " \
+                                                    "\"Economic benefits\"}, {\"id\": \"B\", \"text\": \"Government regulations\"}, {\"id\": \"C\", \"text\": " \
+                                                    "\"Concerns about climate change\"}, {\"id\": \"D\", \"text\": \"Technological advancement\"}], " \
+                                                    "\"prompt\": \"What is the main reason for the shift towards renewable energy sources?\", " \
+                                                    "\"solution\": \"C\", \"variant\": \"text\"}]"
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                             None,
@@ -416,7 +501,7 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu

    gen_words_to_replace = "Select " + str(
        quantity) + " " + difficulty + " difficulty words, it must be words and not expressions, from the summary and respond in this " \
-                    "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
+                                       "JSON format: { \"words\": [\"word_1\", \"word_2\"] }. The summary is: " + text_summary
    token_count = count_tokens(gen_words_to_replace)["n_tokens"]
    words_to_replace = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_words_to_replace, token_count,
                                                 ["words"],
@@ -442,13 +527,13 @@ def gen_summary_fill_blanks_exercise(text: str, quantity: int, start_id, difficu
 def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, difficulty):
    gen_true_false_not_given = "Generate " + str(
        quantity) + " " + difficulty + " difficulty statements in JSON format (True, False, or Not Given) " \
-                    "based on the provided text. Ensure that your statements " \
-                    "accurately represent information or inferences from the " \
-                    "text, and provide a variety of responses, including, at least one of each True, " \
-                    "False, and Not Given, as appropriate, in the JSON structure " \
-                    "{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
-                    "\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
-                    "\"solution\": \"true/false/not_given\"}]}. Reference text: " + text
+                                       "based on the provided text. Ensure that your statements " \
+                                       "accurately represent information or inferences from the " \
+                                       "text, and provide a variety of responses, including, at least one of each True, " \
+                                       "False, and Not Given, as appropriate, in the JSON structure " \
+                                       "{\"prompts\":[{\"prompt\": \"statement_1\", \"solution\": " \
+                                       "\"true/false/not_given\"}, {\"prompt\": \"statement_2\", " \
+                                       "\"solution\": \"true/false/not_given\"}]}. Reference text: " + text

    token_count = count_tokens(gen_true_false_not_given)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_true_false_not_given, token_count,
@@ -469,10 +554,11 @@ def gen_true_false_not_given_exercise(text: str, quantity: int, start_id, diffic


 def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
-    gen_short_answer_questions = "Generate " + str(quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
-                                                               "(max 3 words per answer), about this text: '" + text + "'. " \
-                                                                                                                       "Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
-                                                                                                                       "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"
+    gen_short_answer_questions = "Generate " + str(
+        quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers, " \
+                                       "must have maximum 3 words per answer, about this text: '" + text + "'. " \
+                                                                                                           "Provide your answer in this JSON format: {\"questions\": [{\"question\": question, " \
+                                                                                                           "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}"

    token_count = count_tokens(gen_short_answer_questions)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_short_answer_questions, token_count,
@@ -492,9 +578,9 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
 def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
    paragraphs = assign_letters_to_paragraphs(text)
    heading_prompt = (
-                'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: '
-                '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n'
-                'The paragraphs are these: ' + str(paragraphs))
+            'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: '
+            '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n'
+            'The paragraphs are these: ' + str(paragraphs))

    token_count = count_tokens(heading_prompt)["n_tokens"]
    headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count,
@@ -519,13 +605,13 @@ def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
        })

    return {
-            "id": str(uuid.uuid4()),
-            "allowRepetition": False,
-            "options": options,
-            "prompt": "Choose the correct heading for paragraphs from the list of headings below.",
-            "sentences": sentences[:quantity],
-            "type": "matchSentences"
-        }
+        "id": str(uuid.uuid4()),
+        "allowRepetition": False,
+        "options": options,
+        "prompt": "Choose the correct heading for paragraphs from the list of headings below.",
+        "sentences": sentences[:quantity],
+        "type": "matchSentences"
+    }


 def assign_letters_to_paragraphs(paragraphs):
@@ -539,7 +625,7 @@ def assign_letters_to_paragraphs(paragraphs):
 def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
    gen_multiple_choice_for_text = "Generate " + str(
        quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \
-                    "'" + text + "'"
+                                       "'" + text + "'"
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                             None,
@@ -562,8 +648,9 @@ def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int


 def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_multiple_choice_for_text = "Generate " + str(quantity) + " " + difficulty + " difficulty multiple choice questions for this monologue: " \
-                                                                 "'" + text + "'"
+    gen_multiple_choice_for_text = "Generate " + str(
+        quantity) + " " + difficulty + " difficulty multiple choice questions for this monologue: " \
+                                       "'" + text + "'"
    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"]
    mc_questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_multiple_choice_for_text, token_count,
                                             None,
@@ -586,11 +673,12 @@ def gen_multiple_choice_exercise_listening_monologue(text: str, quantity: int, s


 def gen_write_blanks_questions_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_questions = "Generate " + str(quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
-                                                               "(max 3 words per answer), about a monologue and" \
-                                                               "respond in this JSON format: {\"questions\": [{\"question\": question, " \
-                                                               "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
-                                                               "The monologue is this: '" + text + "'"
+    gen_write_blanks_questions = "Generate " + str(
+        quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
+                                       "(max 3 words per answer), about a monologue and" \
+                                       "respond in this JSON format: {\"questions\": [{\"question\": question, " \
+                                       "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
+                                       "The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
@@ -608,11 +696,12 @@ def gen_write_blanks_questions_exercise_listening_conversation(text: str, quanti


 def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_questions = "Generate " + str(quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
-                                                               "(max 3 words per answer), about a monologue and" \
-                                                               "respond in this JSON format: {\"questions\": [{\"question\": question, " \
-                                                               "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
-                                                               "The monologue is this: '" + text + "'"
+    gen_write_blanks_questions = "Generate " + str(
+        quantity) + " " + difficulty + " difficulty short answer questions, and the possible answers " \
+                                       "(max 3 words per answer), about a monologue and" \
+                                       "respond in this JSON format: {\"questions\": [{\"question\": question, " \
+                                       "\"possible_answers\": [\"answer_1\", \"answer_2\"]}]}." \
+                                       "The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_questions)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_questions, token_count,
@@ -630,8 +719,9 @@ def gen_write_blanks_questions_exercise_listening_monologue(text: str, quantity:


 def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_notes = "Generate " + str(quantity) + " " + difficulty + " difficulty notes taken from the conversation and and respond in this " \
-                                                           "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
+    gen_write_blanks_notes = "Generate " + str(
+        quantity) + " " + difficulty + " difficulty notes taken from the conversation and and respond in this " \
+                                       "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
@@ -655,8 +745,9 @@ def gen_write_blanks_notes_exercise_listening_conversation(text: str, quantity:


 def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_notes = "Generate " + str(quantity) + " " + difficulty + " difficulty notes taken from the monologue and respond in this " \
-                                                           "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"
+    gen_write_blanks_notes = "Generate " + str(
+        quantity) + " " + difficulty + " difficulty notes taken from the monologue and respond in this " \
+                                       "JSON format: { \"notes\": [\"note_1\", \"note_2\"] }. The monologue is this: '" + text + "'"

    token_count = count_tokens(gen_write_blanks_notes)["n_tokens"]
    questions = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_notes, token_count,
@@ -680,8 +771,9 @@ def gen_write_blanks_notes_exercise_listening_monologue(text: str, quantity: int


 def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_form = "Generate a form with " + str(quantity) + " " + difficulty + " difficulty key-value pairs about the conversation. " \
-                                                                      "The conversation is this: '" + text + "'"
+    gen_write_blanks_form = "Generate a form with " + str(
+        quantity) + " " + difficulty + " difficulty key-value pairs about the conversation. " \
+                                       "The conversation is this: '" + text + "'"
    token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
    form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
                                     None,
@@ -703,8 +795,9 @@ def gen_write_blanks_form_exercise_listening_conversation(text: str, quantity: i


 def gen_write_blanks_form_exercise_listening_monologue(text: str, quantity: int, start_id, difficulty):
-    gen_write_blanks_form = "Generate a form with " + str(quantity) + " " + difficulty + " difficulty key-value pairs about the monologue. " \
-                                                                      "The monologue is this: '" + text + "'"
+    gen_write_blanks_form = "Generate a form with " + str(
+        quantity) + " " + difficulty + " difficulty key-value pairs about the monologue. " \
+                                       "The monologue is this: '" + text + "'"
    token_count = count_tokens(gen_write_blanks_form)["n_tokens"]
    form = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, gen_write_blanks_form, token_count,
                                     None,
@@ -739,8 +832,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):

    token_count = count_tokens(gen_multiple_choice_for_text)["n_tokens"] - 300
    mc_questions = make_openai_call(GPT_4_PREVIEW, messages, token_count,
-                                             None,
-                                             GEN_QUESTION_TEMPERATURE)
+                                    None,
+                                    GEN_QUESTION_TEMPERATURE)
    if not '25' in mc_questions:
        return gen_multiple_choice_level(quantity, start_id)
    else:
@@ -775,7 +868,8 @@ def gen_multiple_choice_level(quantity: int, start_id=1):
        all_exams = get_all("level")
        seen_keys = set()
        for i in range(len(question["questions"])):
-            question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i], question,
+            question["questions"][i], seen_keys = replace_exercise_if_exists(all_exams, question["questions"][i],
+                                                                             question,
                                                                             seen_keys)
        return {
            "id": str(uuid.uuid4()),