Improve spellchecking for writing

2024-01-11 19:10:56 +00:00
parent a40ce04ad2
commit 61f876b3e4
4 changed files with 17 additions and 30 deletions
--- a/app.py
+++ b/app.py
@@ -9,7 +9,6 @@ from helper.file_helper import delete_files_older_than_one_day
 from helper.firebase_helper import *
 from helper.heygen_api import create_videos_and_save_to_db
 from helper.speech_to_text_helper import *
 from helper.token_counter import count_tokens
 from helper.openai_interface import *
 import os
 import re
@@ -17,7 +16,6 @@ import logging
 from dotenv import load_dotenv
 from heygen.AvatarEnum import AvatarEnum
 from templates.question_templates import *
 load_dotenv()
@@ -250,7 +248,8 @@ def grade_writing_task_2():
            message = (
                    "Grade this Writing Task 2 answer according to ielts grading system and provide an example of a perfect "
                    "answer and an elaborated comment where you deep dive into what is wrong and right about the answer."
-                    "Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', 'comment': 'comment about answer quality', 'overall': 7.0, "
+                    "Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', "
                    "'comment': 'comment about answer quality', 'overall': 7.0, "
                    "'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, "
                    "'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' "
                                                                                                "and the answer was '" + answer + "'")
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -5,6 +5,9 @@ import re
 from dotenv import load_dotenv
 from helper.constants import GPT_3_5_TURBO_INSTRUCT
 from helper.token_counter import count_tokens
 load_dotenv()
 openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -229,3 +232,14 @@ def extract_existing_sections_from_body(my_dict, keys_to_extract):
        return list(filter(
            lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item,
            my_dict['sections']))
 def get_misspelled_pairs(text):
    message = ('From the given text, extract the misspelled words and put them in the json with the correct word that '
               'should be on the text instead. Sample JSON: '
               '{"misspelled_words":[{"misspelled": "piza", "correction": "pizza"}] \n The text: "' + text + '"')
    token_count = count_tokens(message)["n_tokens"]
    response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["misspelled_words"], 0.2)
    # Filter out items with the same value for misspelled and correction
    filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']]
    return filtered_data
--- a/helper/speech_to_text_helper.py
+++ b/helper/speech_to_text_helper.py
@@ -1,10 +1,8 @@
 import string
 import whisper
 import os
 import nltk
 import boto3
 import random
 from spellchecker import SpellChecker
 nltk.download('words')
 from nltk.corpus import words
@@ -103,28 +101,4 @@ def divide_text(text, max_length=3000):
            divisions.append(text[current_position:next_position])
            current_position = next_position
-    return divisions
+    return divisions
 def get_misspelled_pairs(text):
    spell = SpellChecker()
    # Remove punctuation from the text
    translator = str.maketrans("", "", string.punctuation)
    text_without_punctuation = text.translate(translator)
    # Split the text into words
    words = text_without_punctuation.split()
    # Find misspelled words
    misspelled = spell.unknown(words)
    # Create a list to store misspelled word pairs
    misspelled_pairs = []
    # Generate misspelled word pairs with their corrections
    for word in misspelled:
        correction = spell.correction(word)
        misspelled_pairs.append({"misspelled": word, "correction": correction})
    return misspelled_pairs
--- a/requirements.txt
+++ b/requirements.txt