Initial updates to most recent openai api version.

2024-05-19 14:37:50 +01:00
parent 070e8808b1
commit e568aff4e4
4 changed files with 162 additions and 205 deletions
--- a/helper/constants.py
+++ b/helper/constants.py
@@ -7,6 +7,8 @@ GRADING_TEMPERATURE = 0.1
 TIPS_TEMPERATURE = 0.2
 GEN_QUESTION_TEMPERATURE = 0.7
 GPT_3_5_TURBO = "gpt-3.5-turbo"
+GPT_4_TURBO = "gpt-4-turbo"
+GPT_4_O = "gpt-4o"
 GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
 GPT_3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
 GPT_4_PREVIEW = "gpt-4-turbo-preview"
--- a/helper/openai_interface.py
+++ b/helper/openai_interface.py
@@ -1,15 +1,14 @@
 import json
 import os
-import re

-import openai
+from openai import OpenAI
 from dotenv import load_dotenv

-from helper.constants import GPT_3_5_TURBO_INSTRUCT, BLACKLISTED_WORDS
+from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO
 from helper.token_counter import count_tokens

 load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
+client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

 MAX_TOKENS = 4097
 TOP_P = 0.9
@@ -50,105 +49,20 @@ tools = [{
 }]


-###
-
-def process_response(input_string, quotation_check_field):
-    if '{' in input_string:
-        try:
-            # Find the index of the first occurrence of '{'
-            index = input_string.index('{')
-            # Extract everything after the first '{' (inclusive)
-            result = input_string[index:]
-            if re.search(r"'" + quotation_check_field + "':\s*'(.*?)'", result, re.DOTALL | re.MULTILINE) or \
-                    re.search(r"'" + quotation_check_field + "':\s*\[([^\]]+)]", result, re.DOTALL | re.MULTILINE):
-                json_obj = json.loads(parse_string(result))
-                return json_obj
-            else:
-                if "title" in result:
-                    parsed_string = result.replace("\n\n", "\n")
-                    parsed_string = parsed_string.replace("\n", "**paragraph**")
-                else:
-                    parsed_string = result.replace("\n\n", " ")
-                    parsed_string = parsed_string.replace("\n", " ")
-                parsed_string = re.sub(r',\s*]', ']', parsed_string)
-                parsed_string = re.sub(r',\s*}', '}', parsed_string)
-                if (parsed_string.find('[') == -1) and (parsed_string.find(']') == -1):
-                    parsed_string = parse_string_2(parsed_string)
-                    return json.loads(parsed_string)
-
-                return json.loads(parsed_string)
-        except Exception as e:
-            print(f"Invalid JSON string! Exception: {e}")
-            print(f"String: {input_string}")
-            print(f"Exception: {e}")
-    else:
-        return input_string
-
-
-def parse_string(to_parse: str):
-    parsed_string = to_parse.replace("\"", "\\\"")
-    pattern = r"(?<!\w)'|'(?!\w)"
-    parsed_string = re.sub(pattern, '"', parsed_string)
-    parsed_string = parsed_string.replace("\\\"", "'")
-    parsed_string = parsed_string.replace("\n\n", " ")
-    parsed_string = re.sub(r',\s*]', ']', parsed_string)
-    parsed_string = re.sub(r',\s*}', '}', parsed_string)
-    return parsed_string
-
-
-def parse_string_2(to_parse: str):
-    keys_and_values_str = to_parse.replace("{", "").replace("}", "")
-    split_pattern = r'(?<="),|(?<="):'
-    keys_and_values = re.split(split_pattern, keys_and_values_str)
-
-    keys = []
-    values = []
-
-    for idx, x in enumerate(keys_and_values):
-        if (idx % 2) == 0:
-            keys.append(x)
-        else:
-            values.append(x)
-
-    parsed_values = []
-
-    for value in values:
-        parsed_values.append(("\"" + value.replace("\"", "").strip() + "\""))
-
-    for ind, parsed_value in enumerate(parsed_values):
-        to_parse = to_parse.replace(values[ind], parsed_values[ind])
-
-    to_parse = to_parse.replace(":", ": ")
-    return to_parse
-
-
-def remove_special_chars_and_escapes(input_string):
-    parsed_string = input_string.replace("\\\"", "'")
-    parsed_string = parsed_string.replace("\n\n", " ")
-    # Define a regular expression pattern to match special characters and escapes
-    pattern = r'(\\[nrt])|[^a-zA-Z0-9\s]'
-
-    # Use re.sub() to replace the matched patterns with an empty string
-    cleaned_string = re.sub(pattern, '', parsed_string)
-
-    return cleaned_string
-
-
 def check_fields(obj, fields):
    return all(field in obj for field in fields)


 def make_openai_call(model, messages, token_count, fields_to_check, temperature):
    global try_count
-    result = openai.ChatCompletion.create(
+    result = client.chat.completions.create(
        model=model,
        max_tokens=int(MAX_TOKENS - token_count - 300),
        temperature=float(temperature),
-        top_p=float(TOP_P),
-        frequency_penalty=float(FREQUENCY_PENALTY),
-        messages=messages
-    )["choices"][0]["message"]["content"]
-
+        messages=messages,
+        response_format={"type": "json_object"}
+    )
+    result = result.choices[0].message.content
    if has_blacklisted_words(result) and try_count < TRY_LIMIT:
        try_count = try_count + 1
        return make_openai_call(model, messages, token_count, fields_to_check, temperature)
@@ -156,57 +70,22 @@ def make_openai_call(model, messages, token_count, fields_to_check, temperature)
        return ""

    if fields_to_check is None:
-        return result.replace("\n\n", " ").strip()
+        return json.loads(result)

-    processed_response = process_response(result, fields_to_check[0])
-
-    if check_fields(processed_response, fields_to_check) is False and try_count < TRY_LIMIT:
+    if check_fields(result, fields_to_check) is False and try_count < TRY_LIMIT:
        try_count = try_count + 1
        return make_openai_call(model, messages, token_count, fields_to_check, temperature)
    elif try_count >= TRY_LIMIT:
        try_count = 0
-        return result
+        return json.loads(result)
    else:
        try_count = 0
-        return processed_response
+        return json.loads(result)


 def make_openai_instruct_call(model, message: str, token_count, fields_to_check, temperature):
    global try_count
-    response = openai.Completion.create(
-        model=model,
-        prompt=message,
-        max_tokens=int(4097 - token_count - 300),
-        temperature=0.7
-    )["choices"][0]["text"]
-
-    if has_blacklisted_words(response) and try_count < TRY_LIMIT:
-        try_count = try_count + 1
-        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
-    elif has_blacklisted_words(response) and try_count >= TRY_LIMIT:
-        try_count = 0
-        return ""
-
-    if fields_to_check is None:
-        try_count = 0
-        return response.replace("\n\n", " ").strip()
-
-    response = remove_special_characters_from_beginning(response)
-    if response[0] != "{" and response[0] != '"':
-        response = "{\"" + response
-    if not response.endswith("}"):
-        response = response + "}"
-    try:
-        processed_response = process_response(response, fields_to_check[0])
-        reparagraphed_response = replace_expression_in_object(processed_response, "**paragraph**", "\n")
-        if check_fields(reparagraphed_response, fields_to_check) is False and try_count < TRY_LIMIT:
-            try_count = try_count + 1
-            return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
-        else:
-            try_count = 0
-            return reparagraphed_response
-    except Exception as e:
-        return make_openai_instruct_call(model, message, token_count, fields_to_check, temperature)
+    return ""


 # GRADING SUMMARY
@@ -254,7 +133,7 @@ def calculate_section_grade_summary(section):
        messages[2:2] = [{"role": "user",
                          "content": "This section is s designed to assess the English language proficiency of individuals who want to study or work in English-speaking countries. The speaking section evaluates a candidate's ability to communicate effectively in spoken English."}]

-    res = openai.ChatCompletion.create(
+    res = client.chat.completions.create(
        model="gpt-3.5-turbo",
        max_tokens=chat_config['max_tokens'],
        temperature=chat_config['temperature'],
@@ -298,20 +177,32 @@ def parse_bullet_points(bullet_points_str, grade):


 def get_fixed_text(text):
-    message = ('Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what '
-               'is wrong. Sample JSON: {"fixed_text": "fixed test with no '
-               'misspelling errors"}] \n The text: "' + text + '"')
-    token_count = count_tokens(message)["n_tokens"]
-    response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
+    messages = [
+        {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
+                                       '{"fixed_text": "fixed test with no misspelling errors"}')
+         },
+        {"role": "user", "content": (
+                'Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what '
+                'is wrong. \n The text: "' + text + '"')
+         }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
    return response["fixed_text"]


 def get_speaking_corrections(text):
-    message = ('Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only '
-               'replace what is wrong. Sample JSON: {"fixed_text": "fixed '
-               'transcription with no misspelling errors"}] \n The text: "' + text + '"')
-    token_count = count_tokens(message)["n_tokens"]
-    response = make_openai_instruct_call(GPT_3_5_TURBO_INSTRUCT, message, token_count, ["fixed_text"], 0.2)
+    messages = [
+        {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
+                                       '{"fixed_text": "fixed transcription with no misspelling errors"}')
+         },
+        {"role": "user", "content": (
+                'Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only '
+                'replace what is wrong. \n The text: "' + text + '"')
+         }
+    ]
+    token_count = count_total_tokens(messages)
+    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2)
    return response["fixed_text"]


@@ -340,3 +231,9 @@ def replace_expression_in_object(obj, expression, replacement):
            elif isinstance(obj[key], dict):
                obj[key] = replace_expression_in_object(obj[key], expression, replacement)
    return obj
+
+def count_total_tokens(messages):
+    total_tokens = 0
+    for message in messages:
+        total_tokens += count_tokens(message["content"])["n_tokens"]
+    return total_tokens