Add paragraphMatch.

2024-03-19 23:05:44 +00:00
parent bed07ca819
commit 6e65732e94
3 changed files with 77 additions and 6 deletions
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -16,7 +16,7 @@ GRADING_FIELDS = ['comment', 'overall', 'task_response']
 GEN_FIELDS = ['topic']
 GEN_TEXT_FIELDS = ['title']
 LISTENING_GEN_FIELDS = ['transcript', 'exercise']
-READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse']
+READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch']
 LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm']

 TOTAL_READING_PASSAGE_1_EXERCISES = 13
--- a/helper/exercises.py
+++ b/helper/exercises.py
@@ -1,4 +1,6 @@
 import queue
+import string
+
 import nltk
 import random
 import re
@@ -309,6 +311,10 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer
            question = gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty)
            exercises.append(question)
            print("Added write blanks: " + str(question))
+        elif req_exercise == "paragraphMatch":
+            question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id)
+            exercises.append(question)
+            print("Added paragraph match: " + str(question))

        start_id = start_id + number_of_exercises

@@ -483,6 +489,53 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty):
    }


+def gen_paragraph_match_exercise(text: str, quantity: int, start_id):
+    paragraphs = assign_letters_to_paragraphs(text)
+    heading_prompt = (
+                'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: '
+                '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n'
+                'The paragraphs are these: ' + str(paragraphs))
+
+    token_count = count_tokens(heading_prompt)["n_tokens"]
+    headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count,
+                                         ["headings"],
+                                         GEN_QUESTION_TEMPERATURE)["headings"]
+
+    options = []
+    for i, paragraph in enumerate(paragraphs, start=0):
+        paragraph["heading"] = headings[i]
+        options.append({
+            "id": paragraph["letter"],
+            "sentence": paragraph["paragraph"]
+        })
+
+    random.shuffle(paragraphs)
+    sentences = []
+    for i, paragraph in enumerate(paragraphs, start=start_id):
+        sentences.append({
+            "id": i,
+            "sentence": paragraph["heading"],
+            "solution": paragraph["letter"]
+        })
+
+    return {
+            "id": str(uuid.uuid4()),
+            "allowRepetition": False,
+            "options": options,
+            "prompt": "Choose the correct heading for paragraphs from the list of headings below.",
+            "sentences": sentences[:quantity],
+            "type": "matchSentences"
+        }
+
+
+def assign_letters_to_paragraphs(paragraphs):
+    result = []
+    letters = iter(string.ascii_uppercase)
+    for paragraph in paragraphs.split("\n"):
+        result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
+    return result
+
+
 def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty):
    gen_multiple_choice_for_text = "Generate " + str(
        quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -60,6 +60,10 @@ def process_response(input_string, quotation_check_field):
                    re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE):
                json_obj = json.loads(parse_string(result))
                return json_obj
+            else:
+                if "title" in result:
+                    parsed_string = result.replace("\n\n", "\n")
+                    parsed_string = parsed_string.replace("\n", "**paragraph**")
                else:
                    parsed_string = result.replace("\n\n", " ")
                    parsed_string = parsed_string.replace("\n", " ")
@@ -177,9 +181,11 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
        try_count = try_count + 1
        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
    elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
+        try_count = 0
        return ""

    if fields_to_check is None:
+        try_count = 0
        return response.replace("\n\n", " ").strip()

    response = remove_special_characters_from_beginning(response)
@@ -189,13 +195,13 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check,
        response = response + "}"
    try:
        processed_response = process_response(response, fields_to_check[0])
-
-        if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
+        reparagraphed_response = replace_expression_in_object(processed_response, "**paragraph**", "\n")
+        if check_fields(reparagraphed_response, fields_to_check) is False and try_count < TRY_LIMIT:
            try_count = try_count + 1
            return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
        else:
            try_count = 0
-            return processed_response
+            return reparagraphed_response
    except Exception as e:
        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)

@@ -300,3 +306,15 @@ def remove_special_characters_from_beginning(string):
        return cleaned_string[:-1]
    else:
        return cleaned_string
+
+
+def replace_expression_in_object(obj, expression, replacement):
+    if isinstance(obj, dict):
+        for key in obj:
+            if isinstance(obj[key], str):
+                obj[key] = obj[key].replace(expression, replacement)
+            elif isinstance(obj[key], list):
+                obj[key] = [replace_expression_in_object(item, expression, replacement) for item in obj[key]]
+            elif isinstance(obj[key], dict):
+                obj[key] = replace_expression_in_object(obj[key], expression, replacement)
+    return obj