Initial updates to most recent openai api version.
This commit is contained in:
@@ -1,15 +1,14 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
import openai
|
||||
from openai import OpenAI
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS
|
||||
from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO
|
||||
from helper.token_counter import count_tokens
|
||||
|
||||
load_dotenv()
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
|
||||
MAX_TOKENS = 4097
|
||||
TOP_P = 0.9
|
||||
@@ -50,105 +49,20 @@ tools = [{
|
||||
}]
|
||||
|
||||
|
||||
###
|
||||
|
||||
def process_response(input_string, quotation_check_field):
|
||||
if '{' in input_string:
|
||||
try:
|
||||
# Find the index of the first occurrence of '{'
|
||||
index = input_string.index('{')
|
||||
# Extract everything after the first '{' (inclusive)
|
||||
result = input_string[index:]
|
||||
if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE) or \
|
||||
re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE):
|
||||
json_obj = json.loads(parse_string(result))
|
||||
return json_obj
|
||||
else:
|
||||
if "title" in result:
|
||||
parsed_string = result.replace("\n\n", "\n")
|
||||
parsed_string = parsed_string.replace("\n", "**paragraph**")
|
||||
else:
|
||||
parsed_string = result.replace("\n\n", " ")
|
||||
parsed_string = parsed_string.replace("\n", " ")
|
||||
parsed_string = re.sub(r',\s*]', ']', parsed_string)
|
||||
parsed_string = re.sub(r',\s*}', '}', parsed_string)
|
||||
if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1):
|
||||
parsed_string = parse_string_2(parsed_string)
|
||||
return json.loads(parsed_string)
|
||||
|
||||
return json.loads(parsed_string)
|
||||
except Exception as e:
|
||||
print(f"Invalid JSON string! Exception: {e}")
|
||||
print(f"String: {input_string}")
|
||||
print(f"Exception: {e}")
|
||||
else:
|
||||
return input_string
|
||||
|
||||
|
||||
def parse_string(to_parse: str):
|
||||
parsed_string = to_parse.replace("\"", "\\\"")
|
||||
pattern = r"(?<!\w)'|'(?!\w)"
|
||||
parsed_string = re.sub(pattern, '"', parsed_string)
|
||||
parsed_string = parsed_string.replace("\\\"", "'")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
parsed_string = re.sub(r',\s*]', ']', parsed_string)
|
||||
parsed_string = re.sub(r',\s*}', '}', parsed_string)
|
||||
return parsed_string
|
||||
|
||||
|
||||
def parse_string_2(to_parse: str):
|
||||
keys_and_values_str = to_parse.replace("{", "").replace("}", "")
|
||||
split_pattern = r'(?<="),|(?<="):'
|
||||
keys_and_values = re.split(split_pattern, keys_and_values_str)
|
||||
|
||||
keys = []
|
||||
values = []
|
||||
|
||||
for idx, x in enumerate(keys_and_values):
|
||||
if (idx % 2) == 0:
|
||||
keys.append(x)
|
||||
else:
|
||||
values.append(x)
|
||||
|
||||
parsed_values = []
|
||||
|
||||
for value in values:
|
||||
parsed_values.append(("\"" + value.replace("\"", "").strip() + "\""))
|
||||
|
||||
for ind, parsed_value in enumerate(parsed_values):
|
||||
to_parse = to_parse.replace(values[ind], parsed_values[ind])
|
||||
|
||||
to_parse = to_parse.replace(":", ": ")
|
||||
return to_parse
|
||||
|
||||
|
||||
def remove_special_chars_and_escapes(input_string):
|
||||
parsed_string = input_string.replace("\\\"", "'")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
# Define a regular expression pattern to match special characters and escapes
|
||||
pattern = r'(\\[nrt])|[^a-zA-Z0-9\s]'
|
||||
|
||||
# Use re.sub() to replace the matched patterns with an empty string
|
||||
cleaned_string = re.sub(pattern, '', parsed_string)
|
||||
|
||||
return cleaned_string
|
||||
|
||||
|
||||
def check_fields(obj, fields):
|
||||
return all(field in obj for field in fields)
|
||||
|
||||
|
||||
def make_openai_call(model, messages, token_count, fields_to_check, temperature):
|
||||
global try_count
|
||||
result = openai.ChatCompletion.create(
|
||||
result = client.chat.completions.create(
|
||||
model=model,
|
||||
max_tokens=int(MAX_TOKENS - token_count - 300),
|
||||
temperature=float(temperature),
|
||||
top_p=float(TOP_P),
|
||||
frequency_penalty=float(FREQUENCY_PENALTY),
|
||||
messages=messages
|
||||
)["choices"][0]["message"]["content"]
|
||||
|
||||
messages=messages,
|
||||
response_format={"type": "json_object"}
|
||||
)
|
||||
result = result.choices[0].message.content
|
||||
if has_blacklisted_words(result) and try_count < TRY_LIMIT:
|
||||
try_count = try_count + 1
|
||||
return make_openai_call(model, messages, token_count, fields_to_check, temperature)
|
||||
@@ -156,57 +70,22 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
|
||||
return ""
|
||||
|
||||
if fields_to_check is None:
|
||||
return result.replace("\n\n", " ").strip()
|
||||
return json.loads(result)
|
||||
|
||||
processed_response = process_response(result, fields_to_check[0])
|
||||
|
||||
if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
|
||||
if check_fields(result, fields_to_check) is False and try_count < TRY_LIMIT:
|
||||
try_count = try_count + 1
|
||||
return make_openai_call(model, messages, token_count, fields_to_check, temperature)
|
||||
elif try_count >= TRY_LIMIT:
|
||||
try_count = 0
|
||||
return result
|
||||
return json.loads(result)
|
||||
else:
|
||||
try_count = 0
|
||||
return processed_response
|
||||
return json.loads(result)
|
||||
|
||||
|
||||
def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
|
||||
global try_count
|
||||
response = openai.Completion.create(
|
||||
model=model,
|
||||
prompt=message,
|
||||
max_tokens=int(4097 - token_count - 300),
|
||||
temperature=0.7
|
||||
)["choices"][0]["text"]
|
||||
|
||||
if has_blacklisted_words(response) and try_count < TRY_LIMIT:
|
||||
try_count = try_count + 1
|
||||
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
|
||||
elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
|
||||
try_count = 0
|
||||
return ""
|
||||
|
||||
if fields_to_check is None:
|
||||
try_count = 0
|
||||
return response.replace("\n\n", " ").strip()
|
||||
|
||||
response = remove_special_characters_from_beginning(response)
|
||||
if response[0] != "{" and response[0] != '"':
|
||||
response = "{\"" + response
|
||||
if not response.endswith("}"):
|
||||
response = response + "}"
|
||||
try:
|
||||
processed_response = process_response(response, fields_to_check[0])
|
||||
reparagraphed_response = replace_expression_in_object(processed_response, "**paragraph**", "\n")
|
||||
if check_fields(reparagraphed_response, fields_to_check) is False and try_count < TRY_LIMIT:
|
||||
try_count = try_count + 1
|
||||
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
|
||||
else:
|
||||
try_count = 0
|
||||
return reparagraphed_response
|
||||
except Exception as e:
|
||||
return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
|
||||
return ""
|
||||
|
||||
|
||||
# GRADING SUMMARY
|
||||
@@ -254,7 +133,7 @@ def calculate_section_grade_summary(section):
|
||||
messages[2:2] = [{"role": "user",
|
||||
"content": "This section is s designed to assess the English language proficiency of individuals who want to study or work in English-speaking countries. The speaking section evaluates a candidate's ability to communicate effectively in spoken English."}]
|
||||
|
||||
res = openai.ChatCompletion.create(
|
||||
res = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
max_tokens=chat_config['max_tokens'],
|
||||
temperature=chat_config['temperature'],
|
||||
@@ -298,20 +177,32 @@ def parse_bullet_points(bullet_points_str, grade):
|
||||
|
||||
|
||||
def get_fixed_text(text):
|
||||
message = ('Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what '
|
||||
'is wrong. Sample JSON: {"fixed_text": "fixed test with no '
|
||||
'misspelling errors"}] \n The text: "' + text + '"')
|
||||
token_count = count_tokens(message)["n_tokens"]
|
||||
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
|
||||
messages = [
|
||||
{"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"fixed_text": "fixed test with no misspelling errors"}')
|
||||
},
|
||||
{"role": "user", "content": (
|
||||
'Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what '
|
||||
'is wrong. \n The text: "' + text + '"')
|
||||
}
|
||||
]
|
||||
token_count = count_total_tokens(messages)
|
||||
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
|
||||
return response["fixed_text"]
|
||||
|
||||
|
||||
def get_speaking_corrections(text):
|
||||
message = ('Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only '
|
||||
'replace what is wrong. Sample JSON: {"fixed_text": "fixed '
|
||||
'transcription with no misspelling errors"}] \n The text: "' + text + '"')
|
||||
token_count = count_tokens(message)["n_tokens"]
|
||||
response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
|
||||
messages = [
|
||||
{"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
|
||||
'{"fixed_text": "fixed transcription with no misspelling errors"}')
|
||||
},
|
||||
{"role": "user", "content": (
|
||||
'Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only '
|
||||
'replace what is wrong. \n The text: "' + text + '"')
|
||||
}
|
||||
]
|
||||
token_count = count_total_tokens(messages)
|
||||
response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
|
||||
return response["fixed_text"]
|
||||
|
||||
|
||||
@@ -340,3 +231,9 @@ def replace_expression_in_object(obj, expression, replacement):
|
||||
elif isinstance(obj[key], dict):
|
||||
obj[key] = replace_expression_in_object(obj[key], expression, replacement)
|
||||
return obj
|
||||
|
||||
def count_total_tokens(messages):
|
||||
total_tokens = 0
|
||||
for message in messages:
|
||||
total_tokens += count_tokens(message["content"])["n_tokens"]
|
||||
return total_tokens
|
||||
|
||||
Reference in New Issue
Block a user