From 94c2b5a052d26159300435c6146c22acdc9110c8 Mon Sep 17 00:00:00 2001 From: Pedro Fonseca Date: Sun, 24 Mar 2024 23:21:38 +0000 Subject: [PATCH] Adding bullet points to grading_summary endpoint --- helper/openai_interface.py | 29 ++++++++++++++++++++++++++--- helper/speech_to_text_helper.py | 10 ++++++++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/helper/openai_interface.py b/helper/openai_interface.py index a6f07d3..8b9f02b 100644 --- a/helper/openai_interface.py +++ b/helper/openai_interface.py @@ -40,6 +40,10 @@ tools = [{ "type": "string", "description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.", }, + "bullet_points": { + "type": "string", + "description": "Text with four bullet points to improve the english speaking ability. Only include text for the bullet points separated by a paragraph. ", + }, }, "required": ["evaluation", "suggestions"], }, @@ -214,9 +218,11 @@ def calculate_grading_summary(body): for section in extracted_sections: openai_response_dict = calculate_section_grade_summary(section) + ret = ret + [{'code': section['code'], 'name': section['name'], 'grade': section['grade'], 'evaluation': openai_response_dict['evaluation'], - 'suggestions': openai_response_dict['suggestions']}] + 'suggestions': openai_response_dict['suggestions'], + 'bullet_points': parse_bullet_points(openai_response_dict['bullet_points'], section['grade'])}] return {'sections': ret} @@ -236,7 +242,8 @@ def calculate_section_grade_summary(section): "content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."}, {"role": "user", "content": "Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, be one paragraph long. "}, - {"role": "user", "content": "Please save the evaluation comment and suggestions generated."} + {"role": "user", "content": "Please save the evaluation comment and suggestions generated."}, + {"role": "user", "content": "Offer bullet points to improve the english speaking ability."}, ] if section['code'] == "level": @@ -266,7 +273,7 @@ def parse_openai_response(response): response['choices'][0]['message']['tool_calls'][0]['function']['arguments']: return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments']) else: - return {'evaluation': "", 'suggestions': ""} + return {'evaluation': "", 'suggestions': "", 'bullet_points': []} def extract_existing_sections_from_body(my_dict, keys_to_extract): @@ -276,6 +283,21 @@ def extract_existing_sections_from_body(my_dict, keys_to_extract): my_dict['sections'])) +def parse_bullet_points(bullet_points_str, grade): + max_grade_for_suggestions = 9 + if isinstance(bullet_points_str, str) and grade < max_grade_for_suggestions: + # Split the string by '\n' + lines = bullet_points_str.split('\n') + + # Remove '-' and trim whitespace from each line + cleaned_lines = [line.replace('-', '').strip() for line in lines] + + # Add '.' to lines that don't end with it + return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines] + else: + return [] + + def get_fixed_text(text): message = ('Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what ' 'is wrong. Sample JSON: {"fixed_text": "fixed test with no ' @@ -284,6 +306,7 @@ def get_fixed_text(text): response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) return response["fixed_text"] + def get_speaking_corrections(text): message = ('Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only ' 'replace what is wrong. Sample JSON: {"fixed_text": "fixed ' diff --git a/helper/speech_to_text_helper.py b/helper/speech_to_text_helper.py index fc262b8..be6067b 100644 --- a/helper/speech_to_text_helper.py +++ b/helper/speech_to_text_helper.py @@ -8,6 +8,7 @@ nltk.download('words') from nltk.corpus import words from helper.constants import * + def speech_to_text(file_path): if os.path.exists(file_path): model = whisper.load_model("base") @@ -17,6 +18,7 @@ def speech_to_text(file_path): print("File not found:", file_path) raise Exception("File " + file_path + " not found.") + def text_to_speech(text: str, file_name: str): # Initialize the Amazon Polly client client = boto3.client( @@ -53,6 +55,7 @@ def text_to_speech(text: str, file_name: str): print("Speech segments saved to " + file_name) + def conversation_text_to_speech(conversation: list, file_name: str): # Initialize the Amazon Polly client client = boto3.client( @@ -66,7 +69,7 @@ def conversation_text_to_speech(conversation: list, file_name: str): # Iterate through the text segments, convert to audio segments, and store them for segment in conversation: response = client.synthesize_speech( - Engine="neural", + Engine="neural", Text=segment["text"], OutputFormat="mp3", VoiceId=segment["voice"] @@ -89,17 +92,20 @@ def conversation_text_to_speech(conversation: list, file_name: str): print("Speech segments saved to " + file_name) + def has_words(text: str): english_words = set(words.words()) words_in_input = text.split() return any(word.lower() in english_words for word in words_in_input) + def has_x_words(text: str, quantity): english_words = set(words.words()) words_in_input = text.split() english_word_count = sum(1 for word in words_in_input if word.lower() in english_words) return english_word_count >= quantity + def divide_text(text, max_length=3000): if len(text) <= max_length: return [text] @@ -119,4 +125,4 @@ def divide_text(text, max_length=3000): divisions.append(text[current_position:next_position]) current_position = next_position - return divisions \ No newline at end of file + return divisions