import json import openai import os import re from dotenv import load_dotenv from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS from helper.token_counter import count_tokens load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") MAX_TOKENS = 4097 TOP_P = 0.9 FREQUENCY_PENALTY = 0.5 TRY_LIMIT = 2 try_count = 0 # GRADING SUMMARY chat_config = {'max_tokens': 1000, 'temperature': 0.2} section_keys = ['reading', 'listening', 'writing', 'speaking', 'level'] grade_top_limit = 9 tools = [{ "type": "function", "function": { "name": "save_evaluation_and_suggestions", "description": "Saves the evaluation and suggestions requested by input.", "parameters": { "type": "object", "properties": { "evaluation": { "type": "string", "description": "A comment on the IELTS section grade obtained in the specific section and what it could mean without suggestions.", }, "suggestions": { "type": "string", "description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.", }, }, "required": ["evaluation", "suggestions"], }, } }] ### def process_response(input_string, quotation_check_field): if '{' in input_string: try: # Find the index of the first occurrence of '{' index = input_string.index('{') # Extract everything after the first '{' (inclusive) result = input_string[index:] if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE) or \ re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE): json_obj = json.loads(parse_string(result)) return json_obj else: if "title" in result: parsed_string = result.replace("\n\n", "\n") parsed_string = parsed_string.replace("\n", "**paragraph**") else: parsed_string = result.replace("\n\n", " ") parsed_string = parsed_string.replace("\n", " ") parsed_string = re.sub(r',\s*]', ']', parsed_string) parsed_string = re.sub(r',\s*}', '}', parsed_string) if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1): parsed_string = parse_string_2(parsed_string) return json.loads(parsed_string) return json.loads(parsed_string) except Exception as e: print(f"Invalid JSON string! Exception: {e}") print(f"String: {input_string}") print(f"Exception: {e}") else: return input_string def parse_string(to_parse: str): parsed_string = to_parse.replace("\"", "\\\"") pattern = r"(?= TRY_LIMIT: return "" if fields_to_check is None: return result.replace("\n\n", " ").strip() processed_response = process_response(result, fields_to_check[0]) if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT: try_count = try_count + 1 return make_openai_call(model, messages, token_count, fields_to_check, temperature) elif try_count >= TRY_LIMIT: try_count = 0 return result else: try_count = 0 return processed_response def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature): global try_count response = openai.Completion.create( model=model, prompt=message, max_tokens=int(4097 - token_count - 300), temperature=0.7 )["choices"][0]["text"] if has_blacklisted_words(response) and try_count < TRY_LIMIT: try_count = try_count + 1 return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) elif has_blacklisted_words(response) and try_count >= TRY_LIMIT: try_count = 0 return "" if fields_to_check is None: try_count = 0 return response.replace("\n\n", " ").strip() response = remove_special_characters_from_beginning(response) if response[0] != "{" and response[0] != '"': response = "{\"" + response if not response.endswith("}"): response = response + "}" try: processed_response = process_response(response, fields_to_check[0]) reparagraphed_response = replace_expression_in_object(processed_response, "**paragraph**", "\n") if check_fields(reparagraphed_response, fields_to_check) is False and try_count < TRY_LIMIT: try_count = try_count + 1 return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) else: try_count = 0 return reparagraphed_response except Exception as e: return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature) # GRADING SUMMARY def calculate_grading_summary(body): extracted_sections = extract_existing_sections_from_body(body, section_keys) ret = [] for section in extracted_sections: openai_response_dict = calculate_section_grade_summary(section) ret = ret + [{'code': section['code'], 'name': section['name'], 'grade': section['grade'], 'evaluation': openai_response_dict['evaluation'], 'suggestions': openai_response_dict['suggestions']}] return {'sections': ret} def calculate_section_grade_summary(section): messages = [ { "role": "user", "content": "You are a IELTS test section grade evaluator. You will receive a IELTS test section name and the grade obtained in the section. You should offer a evaluation comment on this grade and separately suggestions on how to possibly get a better grade.", }, { "role": "user", "content": "Section: " + str(section['name']) + " Grade: " + str(section['grade']), }, {"role": "user", "content": "Speak in third person."}, {"role": "user", "content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."}, {"role": "user", "content": "Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, be one paragraph long. "}, {"role": "user", "content": "Please save the evaluation comment and suggestions generated."} ] if section['code'] == "level": messages[2:2] = [{ "role": "user", "content": "This section is comprised of multiple choice questions that measure the user's overall english level. These multiple choice questions are about knowledge on vocabulary, syntax, grammar rules, and contextual usage. The grade obtained measures the ability in these areas and english language overall." }] elif section['code'] == "speaking": messages[2:2] = [{"role": "user", "content": "This section is s designed to assess the English language proficiency of individuals who want to study or work in English-speaking countries. The speaking section evaluates a candidate's ability to communicate effectively in spoken English."}] res = openai.ChatCompletion.create( model="gpt-3.5-turbo", max_tokens=chat_config['max_tokens'], temperature=chat_config['temperature'], tools=tools, messages=messages) return parse_openai_response(res) def parse_openai_response(response): if 'choices' in response and len(response['choices']) > 0 and 'message' in response['choices'][ 0] and 'tool_calls' in response['choices'][0]['message'] and isinstance( response['choices'][0]['message']['tool_calls'], list) and len( response['choices'][0]['message']['tool_calls']) > 0 and \ response['choices'][0]['message']['tool_calls'][0]['function']['arguments']: return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments']) else: return {'evaluation': "", 'suggestions': ""} def extract_existing_sections_from_body(my_dict, keys_to_extract): if 'sections' in my_dict and isinstance(my_dict['sections'], list) and len(my_dict['sections']) > 0: return list(filter( lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item, my_dict['sections'])) def get_fixed_text(text): message = ('Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what ' 'is wrong. Sample JSON: {"fixed_text": "fixed test with no ' 'misspelling errors"}] \n The text: "' + text + '"') token_count = count_tokens(message)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) return response["fixed_text"] def get_speaking_corrections(text): message = ('Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only ' 'replace what is wrong. Sample JSON: {"fixed_text": "fixed ' 'transcription with no misspelling errors"}] \n The text: "' + text + '"') token_count = count_tokens(message)["n_tokens"] response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2) return response["fixed_text"] def has_blacklisted_words(text: str): text_lower = text.lower() return any(word in text_lower for word in BLACKLISTED_WORDS) def remove_special_characters_from_beginning(string): cleaned_string = string.lstrip('\n') if string.startswith("'") or string.startswith('"'): cleaned_string = string[1:] if cleaned_string.endswith('"'): return cleaned_string[:-1] else: return cleaned_string def replace_expression_in_object(obj, expression, replacement): if isinstance(obj, dict): for key in obj: if isinstance(obj[key], str): obj[key] = obj[key].replace(expression, replacement) elif isinstance(obj[key], list): obj[key] = [replace_expression_in_object(item, expression, replacement) for item in obj[key]] elif isinstance(obj[key], dict): obj[key] = replace_expression_in_object(obj[key], expression, replacement) return obj