Add corrections for speaking.

This commit is contained in:
Cristiano Ferreira
2024-01-12 19:45:58 +00:00
parent 5aedd1864d
commit de4042efac
2 changed files with 21 additions and 0 deletions

10
app.py
View File

@@ -345,6 +345,8 @@ def grade_speaking_task_1():
token_count,
None,
GEN_QUESTION_TEMPERATURE)
response['transcript'] = answer
response['corrections'] = get_speaking_corrections(answer)
return response
else:
return {
@@ -411,6 +413,8 @@ def grade_speaking_task_2():
token_count,
None,
GEN_QUESTION_TEMPERATURE)
response['transcript'] = answer
response['corrections'] = get_speaking_corrections(answer)
return response
else:
return {
@@ -473,11 +477,13 @@ def grade_speaking_task_3():
try:
data = request.get_json()
answers = data.get('answers')
text_answers = []
perfect_answers = []
for item in answers:
sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name)
answer_text = speech_to_text(sound_file_name)
text_answers.append(answer_text)
item["answer"] = answer_text
os.remove(sound_file_name)
if not has_10_words(answer_text):
@@ -522,6 +528,10 @@ def grade_speaking_task_3():
GEN_QUESTION_TEMPERATURE)
for i, answer in enumerate(perfect_answers, start=1):
response['perfect_answer_' + str(i)] = answer
for i, answer in enumerate(text_answers, start=1):
response['transcript_' + str(i)] = answer
response['corrections_' + str(i)] = get_speaking_corrections(answer)
return response
except Exception as e:
return str(e), 400

View File

@@ -251,3 +251,14 @@ def get_misspelled_pairs(text):
# Filter out items with the same value for misspelled and correction
filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']]
return filtered_data
def get_speaking_corrections(text):
message = ('Given the provided transcription, identify and extract any inaccuracies, including incorrect words and '
'expressions. Put them in the json with the correct words and expressions that should be on the transcription '
'instead. Sample JSON: {"corrections":[{"wrong": "wrong_word", "correct": "correct_word"}] '
'\n The text: "' + text + '"')
token_count = count_tokens(message)["n_tokens"]
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["corrections"], 0.2)
# Filter out items with the same value for misspelled and correction
filtered_data = [item for item in response["corrections"] if item['wrong'] != item['correct']]
return filtered_data