diff --git a/app.py b/app.py index 1eb1c57..5a9c441 100644 --- a/app.py +++ b/app.py @@ -9,7 +9,6 @@ from helper.file_helper import delete_files_older_than_one_day from helper.firebase_helper import * from helper.heygen_api import create_videos_and_save_to_db from helper.speech_to_text_helper import * -from helper.token_counter import count_tokens from helper.openai_interface import * import os import re @@ -17,7 +16,6 @@ import logging from dotenv import load_dotenv -from heygen.AvatarEnum import AvatarEnum from templates.question_templates import * load_dotenv() @@ -250,7 +248,8 @@ def grade_writing_task_2(): message = ( "Grade this Writing Task 2 answer according to ielts grading system and provide an example of a perfect " "answer and an elaborated comment where you deep dive into what is wrong and right about the answer." - "Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', 'comment': 'comment about answer quality', 'overall': 7.0, " + "Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', " + "'comment': 'comment about answer quality', 'overall': 7.0, " "'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, " "'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' " "and the answer was '" + answer + "'") diff --git a/helper/openai_interface.py b/helper/openai_interface.py index f96afe8..d871b84 100644 --- a/helper/openai_interface.py +++ b/helper/openai_interface.py @@ -5,6 +5,9 @@ import re from dotenv import load_dotenv +from helper.constants import GPT_3_5_TURBO_INSTRUCT +from helper.token_counter import count_tokens + load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") @@ -237,3 +240,14 @@ def extract_existing_sections_from_body(my_dict, keys_to_extract): return list(filter( lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item, my_dict['sections'])) + + +def get_misspelled_pairs(text): + message = ('From the given text, extract the misspelled words and put them in the json with the correct word that ' + 'should be on the text instead. Sample JSON: ' + '{"misspelled_words":[{"misspelled": "piza", "correction": "pizza"}] \n The text: "' + text + '"') + token_count = count_tokens(message)["n_tokens"] + response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["misspelled_words"], 0.2) + # Filter out items with the same value for misspelled and correction + filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']] + return filtered_data diff --git a/helper/speech_to_text_helper.py b/helper/speech_to_text_helper.py index b94c84b..e536cf8 100644 --- a/helper/speech_to_text_helper.py +++ b/helper/speech_to_text_helper.py @@ -1,10 +1,8 @@ -import string import whisper import os import nltk import boto3 import random -from spellchecker import SpellChecker nltk.download('words') from nltk.corpus import words @@ -103,28 +101,4 @@ def divide_text(text, max_length=3000): divisions.append(text[current_position:next_position]) current_position = next_position - return divisions - - -def get_misspelled_pairs(text): - spell = SpellChecker() - - # Remove punctuation from the text - translator = str.maketrans("", "", string.punctuation) - text_without_punctuation = text.translate(translator) - - # Split the text into words - words = text_without_punctuation.split() - - # Find misspelled words - misspelled = spell.unknown(words) - - # Create a list to store misspelled word pairs - misspelled_pairs = [] - - # Generate misspelled word pairs with their corrections - for word in misspelled: - correction = spell.correction(word) - misspelled_pairs.append({"misspelled": word, "correction": correction}) - - return misspelled_pairs \ No newline at end of file + return divisions \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8793ac3..58bd20d 100644 Binary files a/requirements.txt and b/requirements.txt differ