Add misspelled pairs to writing grading.

2024-01-03 17:40:48 +00:00
parent 9b3997f65e
commit 63823a01de
3 changed files with 30 additions and 1 deletions
--- a/helper/speech_to_text_helper.py
+++ b/helper/speech_to_text_helper.py
@@ -1,8 +1,11 @@
+import string
 import whisper
 import os
 import nltk
 import boto3
 import random
+from spellchecker import SpellChecker
+
 nltk.download('words')
 from nltk.corpus import words
 from helper.constants import *
@@ -100,4 +103,28 @@ def divide_text(text, max_length=3000):
            divisions.append(text[current_position:next_position])
            current_position = next_position

-    return divisions
+    return divisions
+
+
+def get_misspelled_pairs(text):
+    spell = SpellChecker()
+
+    # Remove punctuation from the text
+    translator = str.maketrans("", "", string.punctuation)
+    text_without_punctuation = text.translate(translator)
+
+    # Split the text into words
+    words = text_without_punctuation.split()
+
+    # Find misspelled words
+    misspelled = spell.unknown(words)
+
+    # Create a list to store misspelled word pairs
+    misspelled_pairs = []
+
+    # Generate misspelled word pairs with their corrections
+    for word in misspelled:
+        correction = spell.correction(word)
+        misspelled_pairs.append({"misspelled": word, "correction": correction})
+
+    return misspelled_pairs