Add misspelled pairs to writing grading.

This commit is contained in:
Cristiano Ferreira
2024-01-03 17:40:48 +00:00
parent 9b3997f65e
commit 63823a01de
3 changed files with 30 additions and 1 deletions

View File

@@ -1,8 +1,11 @@
import string
import whisper
import os
import nltk
import boto3
import random
from spellchecker import SpellChecker
nltk.download('words')
from nltk.corpus import words
from helper.constants import *
@@ -100,4 +103,28 @@ def divide_text(text, max_length=3000):
divisions.append(text[current_position:next_position])
current_position = next_position
return divisions
return divisions
def get_misspelled_pairs(text):
spell = SpellChecker()
# Remove punctuation from the text
translator = str.maketrans("", "", string.punctuation)
text_without_punctuation = text.translate(translator)
# Split the text into words
words = text_without_punctuation.split()
# Find misspelled words
misspelled = spell.unknown(words)
# Create a list to store misspelled word pairs
misspelled_pairs = []
# Generate misspelled word pairs with their corrections
for word in misspelled:
correction = spell.correction(word)
misspelled_pairs.append({"misspelled": word, "correction": correction})
return misspelled_pairs