From 94c2b5a052d26159300435c6146c22acdc9110c8 Mon Sep 17 00:00:00 2001
From: Pedro Fonseca <pedro.fonseca@ecrop.dev>
Date: Sun, 24 Mar 2024 23:21:38 +0000
Subject: [PATCH] Adding bullet points to grading_summary endpoint

---
 helper/openai_interface.py      | 29 ++++++++++++++++++++++++++---
 helper/speech_to_text_helper.py | 10 ++++++++--
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/helper/openai_interface.py b/helper/openai_interface.py
index a6f07d3..8b9f02b 100644
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -40,6 +40,10 @@ tools = [{
                     "type": "string",
                     "description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.",
                 },
+                "bullet_points": {
+                    "type": "string",
+                    "description": "Text with four bullet points to improve the english speaking ability. Only include text for the bullet points separated by a paragraph. ",
+                },
             },
             "required": ["evaluation", "suggestions"],
         },
@@ -214,9 +218,11 @@ def calculate_grading_summary(body):
 
     for section in extracted_sections:
         openai_response_dict = calculate_section_grade_summary(section)
+
         ret = ret + [{'code': section['code'], 'name': section['name'], 'grade': section['grade'],
                       'evaluation': openai_response_dict['evaluation'],
-                      'suggestions': openai_response_dict['suggestions']}]
+                      'suggestions': openai_response_dict['suggestions'],
+                      'bullet_points': parse_bullet_points(openai_response_dict['bullet_points'], section['grade'])}]
 
     return {'sections': ret}
 
@@ -236,7 +242,8 @@ def calculate_section_grade_summary(section):
          "content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."},
         {"role": "user",
          "content": "Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, be one paragraph long. "},
-        {"role": "user", "content": "Please save the evaluation comment and suggestions generated."}
+        {"role": "user", "content": "Please save the evaluation comment and suggestions generated."},
+        {"role": "user", "content": "Offer bullet points to improve the english speaking ability."},
     ]
 
     if section['code'] == "level":
@@ -266,7 +273,7 @@ def parse_openai_response(response):
             response['choices'][0]['message']['tool_calls'][0]['function']['arguments']:
         return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments'])
     else:
-        return {'evaluation': "", 'suggestions': ""}
+        return {'evaluation': "", 'suggestions': "", 'bullet_points': []}
 
 
 def extract_existing_sections_from_body(my_dict, keys_to_extract):
@@ -276,6 +283,21 @@ def extract_existing_sections_from_body(my_dict, keys_to_extract):
             my_dict['sections']))
 
 
+def parse_bullet_points(bullet_points_str, grade):
+    max_grade_for_suggestions = 9
+    if isinstance(bullet_points_str, str) and grade < max_grade_for_suggestions:
+        # Split the string by '\n'
+        lines = bullet_points_str.split('\n')
+
+        # Remove '-' and trim whitespace from each line
+        cleaned_lines = [line.replace('-', '').strip() for line in lines]
+
+        # Add '.' to lines that don't end with it
+        return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
+    else:
+        return []
+
+
 def get_fixed_text(text):
     message = ('Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what '
                'is wrong. Sample JSON: {"fixed_text": "fixed test with no '
@@ -284,6 +306,7 @@ def get_fixed_text(text):
     response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
     return response["fixed_text"]
 
+
 def get_speaking_corrections(text):
     message = ('Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only '
                'replace what is wrong. Sample JSON: {"fixed_text": "fixed '
diff --git a/helper/speech_to_text_helper.py b/helper/speech_to_text_helper.py
index fc262b8..be6067b 100644
--- a/helper/speech_to_text_helper.py
+++ b/helper/speech_to_text_helper.py
@@ -8,6 +8,7 @@ nltk.download('words')
 from nltk.corpus import words
 from helper.constants import *
 
+
 def speech_to_text(file_path):
     if os.path.exists(file_path):
         model = whisper.load_model("base")
@@ -17,6 +18,7 @@ def speech_to_text(file_path):
         print("File not found:", file_path)
         raise Exception("File " + file_path + " not found.")
 
+
 def text_to_speech(text: str, file_name: str):
     # Initialize the Amazon Polly client
     client = boto3.client(
@@ -53,6 +55,7 @@ def text_to_speech(text: str, file_name: str):
 
     print("Speech segments saved to " + file_name)
 
+
 def conversation_text_to_speech(conversation: list, file_name: str):
     # Initialize the Amazon Polly client
     client = boto3.client(
@@ -66,7 +69,7 @@ def conversation_text_to_speech(conversation: list, file_name: str):
     # Iterate through the text segments, convert to audio segments, and store them
     for segment in conversation:
         response = client.synthesize_speech(
-            Engine="neural", 
+            Engine="neural",
             Text=segment["text"],
             OutputFormat="mp3",
             VoiceId=segment["voice"]
@@ -89,17 +92,20 @@ def conversation_text_to_speech(conversation: list, file_name: str):
 
     print("Speech segments saved to " + file_name)
 
+
 def has_words(text: str):
     english_words = set(words.words())
     words_in_input = text.split()
     return any(word.lower() in english_words for word in words_in_input)
 
+
 def has_x_words(text: str, quantity):
     english_words = set(words.words())
     words_in_input = text.split()
     english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
     return english_word_count >= quantity
 
+
 def divide_text(text, max_length=3000):
     if len(text) <= max_length:
         return [text]
@@ -119,4 +125,4 @@ def divide_text(text, max_length=3000):
             divisions.append(text[current_position:next_position])
             current_position = next_position
 
-    return divisions
\ No newline at end of file
+    return divisions