Add verification for words in speaking grading.

This commit is contained in:
Cristiano Ferreira
2023-09-05 20:31:30 +01:00
parent eb6e9b4ef7
commit 8e043104ad
2 changed files with 30 additions and 19 deletions

38
app.py
View File

@@ -6,7 +6,7 @@ from firebase_admin import credentials
from helper.api_messages import QuestionType, get_grading_messages, get_question_gen_messages, get_question_tips from helper.api_messages import QuestionType, get_grading_messages, get_question_gen_messages, get_question_tips
from helper.file_helper import delete_files_older_than_one_day from helper.file_helper import delete_files_older_than_one_day
from helper.firebase_helper import download_firebase_file, upload_file_firebase from helper.firebase_helper import download_firebase_file, upload_file_firebase
from helper.speech_to_text_helper import speech_to_text, text_to_speech from helper.speech_to_text_helper import speech_to_text, text_to_speech, has_words
from helper.token_counter import count_tokens from helper.token_counter import count_tokens
from helper.openai_interface import make_openai_call from helper.openai_interface import make_openai_call
import os import os
@@ -200,16 +200,18 @@ def grade_speaking_task_1():
download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name)
answer = speech_to_text(sound_file_name) answer = speech_to_text(sound_file_name)
if has_words(answer):
messages = get_grading_messages(QuestionType.SPEAKING_1, question, answer) messages = get_grading_messages(QuestionType.SPEAKING_1, question, answer)
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE) response = make_openai_call(GPT_3_5_TURBO, messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
os.remove(sound_file_name) os.remove(sound_file_name)
return response return response
else:
raise Exception("The audio recorded does not contain any english words.")
except Exception as e: except Exception as e:
os.remove(sound_file_name) os.remove(sound_file_name)
return str(e) return str(e), 400
@app.route('/speaking_task_1', methods=['GET']) @app.route('/speaking_task_1', methods=['GET'])
@@ -237,16 +239,18 @@ def grade_speaking_task_2():
download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name) download_firebase_file(FIREBASE_BUCKET, answer_firebase_path, sound_file_name)
answer = speech_to_text(sound_file_name) answer = speech_to_text(sound_file_name)
if has_words(answer):
messages = get_grading_messages(QuestionType.SPEAKING_2, question, answer) messages = get_grading_messages(QuestionType.SPEAKING_2, question, answer)
token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'], token_count = reduce(lambda count, item: count + count_tokens(item)['n_tokens'],
map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0) map(lambda x: x["content"], filter(lambda x: "content" in x, messages)), 0)
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE) response = make_openai_call(GPT_3_5_TURBO, messages, token_count, GRADING_FIELDS, GRADING_TEMPERATURE)
os.remove(sound_file_name) os.remove(sound_file_name)
return response return response
else:
raise Exception("The audio recorded does not contain any english words.")
except Exception as e: except Exception as e:
os.remove(sound_file_name) os.remove(sound_file_name)
return str(e) return str(e), 400
@app.route('/speaking_task_2', methods=['GET']) @app.route('/speaking_task_2', methods=['GET'])

View File

@@ -1,7 +1,9 @@
import whisper import whisper
import os import os
import gtts import gtts
from playsound import playsound import nltk
nltk.download('words')
from nltk.corpus import words
def speech_to_text(file_path): def speech_to_text(file_path):
if os.path.exists(file_path): if os.path.exists(file_path):
@@ -14,4 +16,9 @@ def speech_to_text(file_path):
def text_to_speech(text: str, file_name: str): def text_to_speech(text: str, file_name: str):
tts = gtts.gTTS(text) tts = gtts.gTTS(text)
tts.save(file_name) tts.save(file_name)
def has_words(text: str):
english_words = set(words.words())
words_in_input = text.split()
return any(word.lower() in english_words for word in words_in_input)