This commit is contained in:
Pedro Fonseca
2024-01-12 17:40:32 +00:00
4 changed files with 17 additions and 30 deletions

5
app.py
View File

@@ -9,7 +9,6 @@ from helper.file_helper import delete_files_older_than_one_day
from helper.firebase_helper import * from helper.firebase_helper import *
from helper.heygen_api import create_videos_and_save_to_db from helper.heygen_api import create_videos_and_save_to_db
from helper.speech_to_text_helper import * from helper.speech_to_text_helper import *
from helper.token_counter import count_tokens
from helper.openai_interface import * from helper.openai_interface import *
import os import os
import re import re
@@ -17,7 +16,6 @@ import logging
from dotenv import load_dotenv from dotenv import load_dotenv
from heygen.AvatarEnum import AvatarEnum
from templates.question_templates import * from templates.question_templates import *
load_dotenv() load_dotenv()
@@ -250,7 +248,8 @@ def grade_writing_task_2():
message = ( message = (
"Grade this Writing Task 2 answer according to ielts grading system and provide an example of a perfect " "Grade this Writing Task 2 answer according to ielts grading system and provide an example of a perfect "
"answer and an elaborated comment where you deep dive into what is wrong and right about the answer." "answer and an elaborated comment where you deep dive into what is wrong and right about the answer."
"Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', 'comment': 'comment about answer quality', 'overall': 7.0, " "Provide your answer on the following json format: {'perfect_answer': 'example perfect answer', "
"'comment': 'comment about answer quality', 'overall': 7.0, "
"'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, " "'task_response': {'Task Achievement': 0.0, 'Coherence and Cohesion': 0.0, 'Lexical Resource': 0.0, "
"'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' " "'Grammatical Range and Accuracy': 0.0}}\n The question was '" + question + "' "
"and the answer was '" + answer + "'") "and the answer was '" + answer + "'")

View File

@@ -5,6 +5,9 @@ import re
from dotenv import load_dotenv from dotenv import load_dotenv
from helper.constants import GPT_3_5_TURBO_INSTRUCT
from helper.token_counter import count_tokens
load_dotenv() load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY") openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -237,3 +240,14 @@ def extract_existing_sections_from_body(my_dict, keys_to_extract):
return list(filter( return list(filter(
lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item, lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item,
my_dict['sections'])) my_dict['sections']))
def get_misspelled_pairs(text):
message = ('From the given text, extract the misspelled words and put them in the json with the correct word that '
'should be on the text instead. Sample JSON: '
'{"misspelled_words":[{"misspelled": "piza", "correction": "pizza"}] \n The text: "' + text + '"')
token_count = count_tokens(message)["n_tokens"]
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["misspelled_words"], 0.2)
# Filter out items with the same value for misspelled and correction
filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']]
return filtered_data

View File

@@ -1,10 +1,8 @@
import string
import whisper import whisper
import os import os
import nltk import nltk
import boto3 import boto3
import random import random
from spellchecker import SpellChecker
nltk.download('words') nltk.download('words')
from nltk.corpus import words from nltk.corpus import words
@@ -104,27 +102,3 @@ def divide_text(text, max_length=3000):
current_position = next_position current_position = next_position
return divisions return divisions
def get_misspelled_pairs(text):
spell = SpellChecker()
# Remove punctuation from the text
translator = str.maketrans("", "", string.punctuation)
text_without_punctuation = text.translate(translator)
# Split the text into words
words = text_without_punctuation.split()
# Find misspelled words
misspelled = spell.unknown(words)
# Create a list to store misspelled word pairs
misspelled_pairs = []
# Generate misspelled word pairs with their corrections
for word in misspelled:
correction = spell.correction(word)
misspelled_pairs.append({"misspelled": word, "correction": correction})
return misspelled_pairs

Binary file not shown.