diff --git a/helper/constants.py b/helper/constants.py index 3df941c..0ccaa0d 100644 --- a/helper/constants.py +++ b/helper/constants.py @@ -15,7 +15,7 @@ GRADING_FIELDS = ['comment', 'overall', 'task_response'] GEN_FIELDS = ['topic'] GEN_TEXT_FIELDS = ['title'] LISTENING_GEN_FIELDS = ['transcript', 'exercise'] -READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse'] +READING_EXERCISE_TYPES = ['fillBlanks', 'writeBlanks', 'trueFalse', 'paragraphMatch'] LISTENING_EXERCISE_TYPES = ['multipleChoice', 'writeBlanksQuestions', 'writeBlanksFill', 'writeBlanksForm'] TOTAL_READING_PASSAGE_1_EXERCISES = 13 diff --git a/helper/exercises.py b/helper/exercises.py index f09a19a..0988c23 100644 --- a/helper/exercises.py +++ b/helper/exercises.py @@ -1,4 +1,6 @@ import queue +import string + import nltk import random import re @@ -309,6 +311,10 @@ def generate_reading_exercises(passage: str, req_exercises: list, number_of_exer question = gen_write_blanks_exercise(passage, number_of_exercises, start_id, difficulty) exercises.append(question) print("Added write blanks: " + str(question)) + elif req_exercise == "paragraphMatch": + question = gen_paragraph_match_exercise(passage, number_of_exercises, start_id) + exercises.append(question) + print("Added paragraph match: " + str(question)) start_id = start_id + number_of_exercises @@ -483,6 +489,53 @@ def gen_write_blanks_exercise(text: str, quantity: int, start_id, difficulty): } +def gen_paragraph_match_exercise(text: str, quantity: int, start_id): + paragraphs = assign_letters_to_paragraphs(text) + heading_prompt = ( + 'For every paragraph of the list generate a minimum 5 word heading for it. Provide your answer in this JSON format: ' + '{"headings": [ {"heading": "first paragraph heading"}, {"heading": "second paragraph heading"}]}\n' + 'The paragraphs are these: ' + str(paragraphs)) + + token_count = count_tokens(heading_prompt)["n_tokens"] + headings = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, heading_prompt, token_count, + ["headings"], + GEN_QUESTION_TEMPERATURE)["headings"] + + options = [] + for i, paragraph in enumerate(paragraphs, start=0): + paragraph["heading"] = headings[i] + options.append({ + "id": paragraph["letter"], + "sentence": paragraph["paragraph"] + }) + + random.shuffle(paragraphs) + sentences = [] + for i, paragraph in enumerate(paragraphs, start=start_id): + sentences.append({ + "id": i, + "sentence": paragraph["heading"], + "solution": paragraph["letter"] + }) + + return { + "id": str(uuid.uuid4()), + "allowRepetition": False, + "options": options, + "prompt": "Choose the correct heading for paragraphs from the list of headings below.", + "sentences": sentences[:quantity], + "type": "matchSentences" + } + + +def assign_letters_to_paragraphs(paragraphs): + result = [] + letters = iter(string.ascii_uppercase) + for paragraph in paragraphs.split("\n"): + result.append({'paragraph': paragraph.strip(), 'letter': next(letters)}) + return result + + def gen_multiple_choice_exercise_listening_conversation(text: str, quantity: int, start_id, difficulty): gen_multiple_choice_for_text = "Generate " + str( quantity) + " " + difficulty + " difficulty multiple choice questions of 4 options of for this conversation: " \ diff --git a/helper/openai_interface.py b/helper/openai_interface.py index 902e511..a6f07d3 100644 --- a/helper/openai_interface.py +++ b/helper/openai_interface.py @@ -61,8 +61,12 @@ def process_response(input_string, quotation_check_field): json_obj = json.loads(parse_string(result)) return json_obj else: - parsed_string = result.replace("\n\n", " ") - parsed_string = parsed_string.replace("\n", " ") + if "title" in result: + parsed_string = result.replace("\n\n", "\n") + parsed_string = parsed_string.replace("\n", "**paragraph**") + else: + parsed_string = result.replace("\n\n", " ") + parsed_string = parsed_string.replace("\n", " ") parsed_string = re.sub(r',\s*]', ']', parsed_string) parsed_string = re.sub(r',\s*}', '}', parsed_string) if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1): @@ -177,9 +181,11 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check, try_count = try_count + 1 return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) elif has_blacklisted_words(response) and try_count >= TRY_LIMIT: + try_count = 0 return "" if fields_to_check is None: + try_count = 0 return response.replace("\n\n", " ").strip() response = remove_special_characters_from_beginning(response) @@ -189,13 +195,13 @@ def make_openai_instruct_call(model, message: str, token_count, fields_to_check, response = response + "}" try: processed_response = process_response(response, fields_to_check[0]) - - if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT: + reparagraphed_response = replace_expression_in_object(processed_response, "**paragraph**", "\n") + if check_fields(reparagraphed_response, fields_to_check) is False and try_count < TRY_LIMIT: try_count = try_count + 1 return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) else: try_count = 0 - return processed_response + return reparagraphed_response except Exception as e: return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) @@ -300,3 +306,15 @@ def remove_special_characters_from_beginning(string): return cleaned_string[:-1] else: return cleaned_string + + +def replace_expression_in_object(obj, expression, replacement): + if isinstance(obj, dict): + for key in obj: + if isinstance(obj[key], str): + obj[key] = obj[key].replace(expression, replacement) + elif isinstance(obj[key], list): + obj[key] = [replace_expression_in_object(item, expression, replacement) for item in obj[key]] + elif isinstance(obj[key], dict): + obj[key] = replace_expression_in_object(obj[key], expression, replacement) + return obj