From 63823a01de403cf1af6fece40ba270df2178f1fe Mon Sep 17 00:00:00 2001 From: Cristiano Ferreira Date: Wed, 3 Jan 2024 17:40:48 +0000 Subject: [PATCH] Add misspelled pairs to writing grading. --- app.py | 2 ++ helper/speech_to_text_helper.py | 29 ++++++++++++++++++++++++++++- requirements.txt | Bin 460 -> 574 bytes 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index fac3057..643e465 100644 --- a/app.py +++ b/app.py @@ -204,6 +204,7 @@ def grade_writing_task_1(): response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["comment"], GEN_QUESTION_TEMPERATURE) + response['misspelled_pairs'] = get_misspelled_pairs(answer) return response else: return { @@ -256,6 +257,7 @@ def grade_writing_task_2(): response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["comment"], GEN_QUESTION_TEMPERATURE) + response['misspelled_pairs'] = get_misspelled_pairs(answer) return response else: return { diff --git a/helper/speech_to_text_helper.py b/helper/speech_to_text_helper.py index 6db79bb..b94c84b 100644 --- a/helper/speech_to_text_helper.py +++ b/helper/speech_to_text_helper.py @@ -1,8 +1,11 @@ +import string import whisper import os import nltk import boto3 import random +from spellchecker import SpellChecker + nltk.download('words') from nltk.corpus import words from helper.constants import * @@ -100,4 +103,28 @@ def divide_text(text, max_length=3000): divisions.append(text[current_position:next_position]) current_position = next_position - return divisions \ No newline at end of file + return divisions + + +def get_misspelled_pairs(text): + spell = SpellChecker() + + # Remove punctuation from the text + translator = str.maketrans("", "", string.punctuation) + text_without_punctuation = text.translate(translator) + + # Split the text into words + words = text_without_punctuation.split() + + # Find misspelled words + misspelled = spell.unknown(words) + + # Create a list to store misspelled word pairs + misspelled_pairs = [] + + # Generate misspelled word pairs with their corrections + for word in misspelled: + correction = spell.correction(word) + misspelled_pairs.append({"misspelled": word, "correction": correction}) + + return misspelled_pairs \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c00a2002f404897aa1824b03ed26ff03bed46150..d61d479d8e8cb926176d7313a0468430fdd0a1cb 100644 GIT binary patch delta 122 zcmX@ZypLtW8O9=B1}=sIhDwHFAeqXL1BA&889-JtLpG2sVgSh(0a=9%r9iTnp#+FQ cVkJO51q_)Cc?{`bemPKACe##&E?g!804HM?-~a#s delta 7 OcmdnTa)x=s8AbpN0s{{K