Add corrections for speaking.

2024-01-12 19:45:58 +00:00
parent 5aedd1864d
commit de4042efac
2 changed files with 21 additions and 0 deletions
--- a/app.py
+++ b/app.py
@@ -345,6 +345,8 @@ def grade_speaking_task_1():
                                                                   token_count,
                                                                   None,
                                                                   GEN_QUESTION_TEMPERATURE)
+            response['transcript'] = answer
+            response['corrections'] = get_speaking_corrections(answer)
            return response
        else:
            return {
@@ -411,6 +413,8 @@ def grade_speaking_task_2():
                                                                   token_count,
                                                                   None,
                                                                   GEN_QUESTION_TEMPERATURE)
+            response['transcript'] = answer
+            response['corrections'] = get_speaking_corrections(answer)
            return response
        else:
            return {
@@ -473,11 +477,13 @@ def grade_speaking_task_3():
    try:
        data = request.get_json()
        answers = data.get('answers')
+        text_answers = []
        perfect_answers = []
        for item in answers:
            sound_file_name = AUDIO_FILES_PATH + str(uuid.uuid4())
            download_firebase_file(FIREBASE_BUCKET, item["answer"], sound_file_name)
            answer_text = speech_to_text(sound_file_name)
+            text_answers.append(answer_text)
            item["answer"] = answer_text
            os.remove(sound_file_name)
            if not has_10_words(answer_text):
@@ -522,6 +528,10 @@ def grade_speaking_task_3():
                                             GEN_QUESTION_TEMPERATURE)
        for i, answer in enumerate(perfect_answers, start=1):
            response['perfect_answer_' + str(i)] = answer
+
+        for i, answer in enumerate(text_answers, start=1):
+            response['transcript_' + str(i)] = answer
+            response['corrections_' + str(i)] = get_speaking_corrections(answer)
        return response
    except Exception as e:
        return str(e), 400
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -251,3 +251,14 @@ def get_misspelled_pairs(text):
    # Filter out items with the same value for misspelled and correction
    filtered_data = [item for item in response["misspelled_words"] if item['misspelled'] != item['correction']]
    return filtered_data
+
+def get_speaking_corrections(text):
+    message = ('Given the provided transcription, identify and extract any inaccuracies, including incorrect words and '
+               'expressions. Put them in the json with the correct words and expressions that should be on the transcription '
+               'instead. Sample JSON: {"corrections":[{"wrong": "wrong_word", "correct": "correct_word"}] '
+               '\n The text: "' + text + '"')
+    token_count = count_tokens(message)["n_tokens"]
+    response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["corrections"], 0.2)
+    # Filter out items with the same value for misspelled and correction
+    filtered_data = [item for item in response["corrections"] if item['wrong'] != item['correct']]
+    return filtered_data