Improve spellchecking for writing
This commit is contained in:
5
app.py
5
app.py
@@ -9,7 +9,6 @@ from helper.file_helper import delete_files_older_than_one_day
|
|||||||
from helper.firebase_helper import *
|
from helper.firebase_helper import *
|
||||||
from helper.heygen_api import create_videos_and_save_to_db
|
from helper.heygen_api import create_videos_and_save_to_db
|
||||||
from helper.speech_to_text_helper import *
|
from helper.speech_to_text_helper import *
|
||||||
from helper.token_counter import count_tokens
|
|
||||||
from helper.openai_interface import *
|
from helper.openai_interface import *
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -17,7 +16,6 @@ import logging
|
|||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from heygen.AvatarEnum import AvatarEnum
|
|
||||||
from templates.question_templates import *
|
from templates.question_templates import *
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -250,7 +248,8 @@ def grade_writing_task_2():
|
|||||||
message = (
|
message = (
|
||||||
"Grade this Writing Task 2 answer according to ielts grading system and provide an example of a perfect "
|
"Grade this Writing Task 2 answer according to ielts grading system and provide an example of a perfect "
|
||||||
"answer and an elaborated comment where you deep dive into what is wrong and right about the answer."
|
"answer and an elaborated comment where you deep dive into what is wrong and right about the answer."
|
||||||
"Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', 'comment': 'comment about answer quality', 'overall': 7.0, "
|
"Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', "
|
||||||
|
"'comment': 'comment about answer quality', 'overall': 7.0, "
|
||||||
"'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, "
|
"'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, "
|
||||||
"'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' "
|
"'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' "
|
||||||
"and the answer was '" + answer + "'")
|
"and the answer was '" + answer + "'")
|
||||||
|
|||||||
@@ -5,6 +5,9 @@ import re
|
|||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from helper.constants import GPT_3_5_TURBO_INSTRUCT
|
||||||
|
from helper.token_counter import count_tokens
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
@@ -229,3 +232,14 @@ def extract_existing_sections_from_body(my_dict, keys_to_extract):
|
|||||||
return list(filter(
|
return list(filter(
|
||||||
lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item,
|
lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item,
|
||||||
my_dict['sections']))
|
my_dict['sections']))
|
||||||
|
|
||||||
|
|
||||||
|
def get_misspelled_pairs(text):
|
||||||
|
message = ('From the given text, extract the misspelled words and put them in the json with the correct word that '
|
||||||
|
'should be on the text instead. Sample JSON: '
|
||||||
|
'{"misspelled_words":[{"misspelled": "piza", "correction": "pizza"}] \n The text: "' + text + '"')
|
||||||
|
token_count = count_tokens(message)["n_tokens"]
|
||||||
|
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["misspelled_words"], 0.2)
|
||||||
|
# Filter out items with the same value for misspelled and correction
|
||||||
|
filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']]
|
||||||
|
return filtered_data
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
import string
|
|
||||||
import whisper
|
import whisper
|
||||||
import os
|
import os
|
||||||
import nltk
|
import nltk
|
||||||
import boto3
|
import boto3
|
||||||
import random
|
import random
|
||||||
from spellchecker import SpellChecker
|
|
||||||
|
|
||||||
nltk.download('words')
|
nltk.download('words')
|
||||||
from nltk.corpus import words
|
from nltk.corpus import words
|
||||||
@@ -103,28 +101,4 @@ def divide_text(text, max_length=3000):
|
|||||||
divisions.append(text[current_position:next_position])
|
divisions.append(text[current_position:next_position])
|
||||||
current_position = next_position
|
current_position = next_position
|
||||||
|
|
||||||
return divisions
|
return divisions
|
||||||
|
|
||||||
|
|
||||||
def get_misspelled_pairs(text):
|
|
||||||
spell = SpellChecker()
|
|
||||||
|
|
||||||
# Remove punctuation from the text
|
|
||||||
translator = str.maketrans("", "", string.punctuation)
|
|
||||||
text_without_punctuation = text.translate(translator)
|
|
||||||
|
|
||||||
# Split the text into words
|
|
||||||
words = text_without_punctuation.split()
|
|
||||||
|
|
||||||
# Find misspelled words
|
|
||||||
misspelled = spell.unknown(words)
|
|
||||||
|
|
||||||
# Create a list to store misspelled word pairs
|
|
||||||
misspelled_pairs = []
|
|
||||||
|
|
||||||
# Generate misspelled word pairs with their corrections
|
|
||||||
for word in misspelled:
|
|
||||||
correction = spell.correction(word)
|
|
||||||
misspelled_pairs.append({"misspelled": word, "correction": correction})
|
|
||||||
|
|
||||||
return misspelled_pairs
|
|
||||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Reference in New Issue
Block a user