Improve spellchecking for writing

This commit is contained in:
Cristiano Ferreira
2024-01-11 19:10:56 +00:00
parent a40ce04ad2
commit 61f876b3e4
4 changed files with 17 additions and 30 deletions

View File

@@ -1,10 +1,8 @@
import string
import whisper
import os
import nltk
import boto3
import random
from spellchecker import SpellChecker
nltk.download('words')
from nltk.corpus import words
@@ -103,28 +101,4 @@ def divide_text(text, max_length=3000):
divisions.append(text[current_position:next_position])
current_position = next_position
return divisions
def get_misspelled_pairs(text):
spell = SpellChecker()
# Remove punctuation from the text
translator = str.maketrans("", "", string.punctuation)
text_without_punctuation = text.translate(translator)
# Split the text into words
words = text_without_punctuation.split()
# Find misspelled words
misspelled = spell.unknown(words)
# Create a list to store misspelled word pairs
misspelled_pairs = []
# Generate misspelled word pairs with their corrections
for word in misspelled:
correction = spell.correction(word)
misspelled_pairs.append({"misspelled": word, "correction": correction})
return misspelled_pairs
return divisions