From de4042efac238bfcd4bfca1ce0764ce188f31b09 Mon Sep 17 00:00:00 2001
From: Cristiano Ferreira <cristiano.ferreira@flowinn.biz>
Date: Fri, 12 Jan 2024 19:45:58 +0000
Subject: [PATCH] Add corrections for speaking.

---
 app.py                     | 10 ++++++++++
 helper/openai_interface.py | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/app.py b/app.py
index 5a9c441..8ba47da 100644
--- a/app.py
+++ b/app.py
@@ -345,6 +345,8 @@ def grade_speaking_task_1():
                                                                    token_count,
                                                                    None,
                                                                    GEN_QUESTION_TEMPERATURE)
+            response['transcript'] = answer
+            response['corrections'] = get_speaking_corrections(answer)
             return response
         else:
             return {
@@ -411,6 +413,8 @@ def grade_speaking_task_2():
                                                                    token_count,
                                                                    None,
                                                                    GEN_QUESTION_TEMPERATURE)
+            response['transcript'] = answer
+            response['corrections'] = get_speaking_corrections(answer)
             return response
         else:
             return {
@@ -473,11 +477,13 @@ def grade_speaking_task_3():
     try:
         data = request.get_json()
         answers = data.get('answers')
+        text_answers = []
         perfect_answers = []
         for item in answers:
             sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
             download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name)
             answer_text = speech_to_text(sound_file_name)
+            text_answers.append(answer_text)
             item["answer"] = answer_text
             os.remove(sound_file_name)
             if not has_10_words(answer_text):
@@ -522,6 +528,10 @@ def grade_speaking_task_3():
                                              GEN_QUESTION_TEMPERATURE)
         for i, answer in enumerate(perfect_answers, start=1):
             response['perfect_answer_' + str(i)] = answer
+
+        for i, answer in enumerate(text_answers, start=1):
+            response['transcript_' + str(i)] = answer
+            response['corrections_' + str(i)] = get_speaking_corrections(answer)
         return response
     except Exception as e:
         return str(e), 400
diff --git a/helper/openai_interface.py b/helper/openai_interface.py
index d871b84..3b332d3 100644
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -251,3 +251,14 @@ def get_misspelled_pairs(text):
     # Filter out items with the same value for misspelled and correction
     filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']]
     return filtered_data
+
+def get_speaking_corrections(text):
+    message = ('Given the provided transcription, identify and extract any inaccuracies, including incorrect words and '
+               'expressions. Put them in the json with the correct words and expressions that should be on the transcription '
+               'instead. Sample JSON: {"corrections":[{"wrong": "wrong_word", "correct": "correct_word"}] '
+               '\n The text: "' + text + '"')
+    token_count = count_tokens(message)["n_tokens"]
+    response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["corrections"], 0.2)
+    # Filter out items with the same value for misspelled and correction
+    filtered_data = [item for item in response["corrections"] if item['wrong'] != item['correct']]
+    return filtered_data