encoach_backend/helper/openai_interface.py

import json
import os
import re

from dotenv import load_dotenv
from openai import OpenAI

from helper.constants import BLACKLISTED_WORDS, GPT_3_5_TURBO
from helper.token_counter import count_tokens

load_dotenv()
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

MAX_TOKENS = 4097
TOP_P = 0.9
FREQUENCY_PENALTY = 0.5

TRY_LIMIT = 2
try_count = 0

# GRADING SUMMARY
chat_config = {'max_tokens': 1000, 'temperature': 0.2}
section_keys = ['reading', 'listening', 'writing', 'speaking', 'level']
grade_top_limit = 9

tools = [{
    "type": "function",
    "function": {
        "name": "save_evaluation_and_suggestions",
        "description": "Saves the evaluation and suggestions requested by input.",
        "parameters": {
            "type": "object",
            "properties": {
                "evaluation": {
                    "type": "string",
                    "description": "A comment on the IELTS section grade obtained in the specific section and what it could mean without suggestions.",
                },
                "suggestions": {
                    "type": "string",
                    "description": "A small paragraph text with suggestions on how to possibly get a better grade than the one obtained.",
                },
                "bullet_points": {
                    "type": "string",
                    "description": "Text with four bullet points to improve the english speaking ability. Only include text for the bullet points separated by a paragraph. ",
                },
            },
            "required": ["evaluation", "suggestions"],
        },
    }
}]


def check_fields(obj, fields):
    return all(field in obj for field in fields)


def make_openai_call(model, messages, token_count, fields_to_check, temperature, check_blacklisted=True):
    global try_count
    result = client.chat.completions.create(
        model=model,
        max_tokens=int(MAX_TOKENS - token_count - 300),
        temperature=float(temperature),
        messages=messages,
        response_format={"type": "json_object"}
    )
    result = result.choices[0].message.content

    if check_blacklisted:
        found_blacklisted_word = get_found_blacklisted_words(result)

        if found_blacklisted_word is not None and try_count < TRY_LIMIT:
            from app import app
            app.logger.warning("Result contains blacklisted words: " + str(found_blacklisted_word))
            try_count = try_count + 1
            return make_openai_call(model, messages, token_count, fields_to_check, temperature)
        elif found_blacklisted_word is not None and try_count >= TRY_LIMIT:
            return ""

    if fields_to_check is None:
        return json.loads(result)

    if check_fields(result, fields_to_check) is False and try_count < TRY_LIMIT:
        try_count = try_count + 1
        return make_openai_call(model, messages, token_count, fields_to_check, temperature)
    elif try_count >= TRY_LIMIT:
        try_count = 0
        return json.loads(result)
    else:
        try_count = 0
        return json.loads(result)


# GRADING SUMMARY
def calculate_grading_summary(body):
    extracted_sections = extract_existing_sections_from_body(body, section_keys)

    ret = []

    for section in extracted_sections:
        openai_response_dict = calculate_section_grade_summary(section)

        ret = ret + [{'code': section['code'], 'name': section['name'], 'grade': section['grade'],
                      'evaluation': openai_response_dict['evaluation'],
                      'suggestions': openai_response_dict['suggestions'],
                      'bullet_points': parse_bullet_points(openai_response_dict['bullet_points'], section['grade'])}]

    return {'sections': ret}


def calculate_section_grade_summary(section):
    messages = [
        {
            "role": "user",
            "content": "You are a IELTS test section grade evaluator. You will receive a IELTS test section name and the grade obtained in the section. You should offer a evaluation comment on this grade and separately suggestions on how to possibly get a better grade.",
        },
        {
            "role": "user",
            "content": "Section: " + str(section['name']) + " Grade: " + str(section['grade']),
        },
        {"role": "user", "content": "Speak in third person."},
        {"role": "user",
         "content": "Don't offer suggestions in the evaluation comment. Only in the suggestions section."},
        {"role": "user",
         "content": "Your evaluation comment on the grade should enunciate the grade, be insightful, be speculative, be one paragraph long. "},
        {"role": "user", "content": "Please save the evaluation comment and suggestions generated."},
        {"role": "user", "content": f"Offer bullet points to improve the english {str(section['name'])} ability."},
    ]

    if section['code'] == "level":
        messages[2:2] = [{
            "role": "user",
            "content": "This section is comprised of multiple choice questions that measure the user's overall english level. These multiple choice questions are about knowledge on vocabulary, syntax, grammar rules, and contextual usage. The grade obtained measures the ability in these areas and english language overall."
        }]
    elif section['code'] == "speaking":
        messages[2:2] = [{"role": "user",
                          "content": "This section is s designed to assess the English language proficiency of individuals who want to study or work in English-speaking countries. The speaking section evaluates a candidate's ability to communicate effectively in spoken English."}]

    res = client.chat.completions.create(
        model="gpt-3.5-turbo",
        max_tokens=chat_config['max_tokens'],
        temperature=chat_config['temperature'],
        tools=tools,
        messages=messages)

    return parse_openai_response(res)


def parse_openai_response(response):
    if 'choices' in response and len(response['choices']) > 0 and 'message' in response['choices'][
        0] and 'tool_calls' in response['choices'][0]['message'] and isinstance(
        response['choices'][0]['message']['tool_calls'], list) and len(
        response['choices'][0]['message']['tool_calls']) > 0 and \
            response['choices'][0]['message']['tool_calls'][0]['function']['arguments']:
        return json.loads(response['choices'][0]['message']['tool_calls'][0]['function']['arguments'])
    else:
        return {'evaluation': "", 'suggestions': "", 'bullet_points': []}


def extract_existing_sections_from_body(my_dict, keys_to_extract):
    if 'sections' in my_dict and isinstance(my_dict['sections'], list) and len(my_dict['sections']) > 0:
        return list(filter(
            lambda item: 'code' in item and item['code'] in keys_to_extract and 'grade' in item and 'name' in item,
            my_dict['sections']))


def parse_bullet_points(bullet_points_str, grade):
    max_grade_for_suggestions = 9
    if isinstance(bullet_points_str, str) and grade < max_grade_for_suggestions:
        # Split the string by '\n'
        lines = bullet_points_str.split('\n')

        # Remove '-' and trim whitespace from each line
        cleaned_lines = [line.replace('-', '').strip() for line in lines]

        # Add '.' to lines that don't end with it
        return [line + '.' if line and not line.endswith('.') else line for line in cleaned_lines]
    else:
        return []


def get_fixed_text(text):
    messages = [
        {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
                                       '{"fixed_text": "fixed test with no misspelling errors"}')
         },
        {"role": "user", "content": (
                'Fix the errors in the given text and put it in a JSON. Do not complete the answer, only replace what '
                'is wrong. \n The text: "' + text + '"')
         }
    ]
    token_count = count_total_tokens(messages)
    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2, False)
    return response["fixed_text"]


def get_speaking_corrections(text):
    messages = [
        {"role": "system", "content": ('You are a helpful assistant designed to output JSON on this format: '
                                       '{"fixed_text": "fixed transcription with no misspelling errors"}')
         },
        {"role": "user", "content": (
                'Fix the errors in the provided transcription and put it in a JSON. Do not complete the answer, only '
                'replace what is wrong. \n The text: "' + text + '"')
         }
    ]
    token_count = count_total_tokens(messages)
    response = make_openai_call(GPT_3_5_TURBO, messages, token_count, ["fixed_text"], 0.2, False)
    return response["fixed_text"]


def has_blacklisted_words(text: str):
    text_lower = text.lower()
    return any(word in text_lower for word in BLACKLISTED_WORDS)


def get_found_blacklisted_words(text: str):
    text_lower = text.lower()
    for word in BLACKLISTED_WORDS:
        if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
            return word
    return None


def remove_special_characters_from_beginning(string):
    cleaned_string = string.lstrip('\n')
    if string.startswith("'") or string.startswith('"'):
        cleaned_string = string[1:]
    if cleaned_string.endswith('"'):
        return cleaned_string[:-1]
    else:
        return cleaned_string


def replace_expression_in_object(obj, expression, replacement):
    if isinstance(obj, dict):
        for key in obj:
            if isinstance(obj[key], str):
                obj[key] = obj[key].replace(expression, replacement)
            elif isinstance(obj[key], list):
                obj[key] = [replace_expression_in_object(item, expression, replacement) for item in obj[key]]
            elif isinstance(obj[key], dict):
                obj[key] = replace_expression_in_object(obj[key], expression, replacement)
    return obj


def count_total_tokens(messages):
    total_tokens = 0
    for message in messages:
        total_tokens += count_tokens(message["content"])["n_tokens"]
    return total_tokens