From de4042efac238bfcd4bfca1ce0764ce188f31b09 Mon Sep 17 00:00:00 2001 From: Cristiano Ferreira Date: Fri, 12 Jan 2024 19:45:58 +0000 Subject: [PATCH] Add corrections for speaking. --- app.py | 10 ++++++++++ helper/openai_interface.py | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/app.py b/app.py index 5a9c441..8ba47da 100644 --- a/app.py +++ b/app.py @@ -345,6 +345,8 @@ def grade_speaking_task_1(): token_count, None, GEN_QUESTION_TEMPERATURE) + response['transcript'] = answer + response['corrections'] = get_speaking_corrections(answer) return response else: return { @@ -411,6 +413,8 @@ def grade_speaking_task_2(): token_count, None, GEN_QUESTION_TEMPERATURE) + response['transcript'] = answer + response['corrections'] = get_speaking_corrections(answer) return response else: return { @@ -473,11 +477,13 @@ def grade_speaking_task_3(): try: data = request.get_json() answers = data.get('answers') + text_answers = [] perfect_answers = [] for item in answers: sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4()) download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name) answer_text = speech_to_text(sound_file_name) + text_answers.append(answer_text) item["answer"] = answer_text os.remove(sound_file_name) if not has_10_words(answer_text): @@ -522,6 +528,10 @@ def grade_speaking_task_3(): GEN_QUESTION_TEMPERATURE) for i, answer in enumerate(perfect_answers, start=1): response['perfect_answer_' + str(i)] = answer + + for i, answer in enumerate(text_answers, start=1): + response['transcript_' + str(i)] = answer + response['corrections_' + str(i)] = get_speaking_corrections(answer) return response except Exception as e: return str(e), 400 diff --git a/helper/openai_interface.py b/helper/openai_interface.py index d871b84..3b332d3 100644 --- a/helper/openai_interface.py +++ b/helper/openai_interface.py @@ -251,3 +251,14 @@ def get_misspelled_pairs(text): # Filter out items with the same value for misspelled and correction filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']] return filtered_data + +def get_speaking_corrections(text): + message = ('Given the provided transcription, identify and extract any inaccuracies, including incorrect words and ' + 'expressions. Put them in the json with the correct words and expressions that should be on the transcription ' + 'instead. Sample JSON: {"corrections":[{"wrong": "wrong_word", "correct": "correct_word"}] ' + '\n The text: "' + text + '"') + token_count = count_tokens(message)["n_tokens"] + response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["corrections"], 0.2) + # Filter out items with the same value for misspelled and correction + filtered_data = [item for item in response["corrections"] if item['wrong'] != item['correct']] + return filtered_data