Api for writing task 2 v1.

This commit is contained in:
Cristiano Ferreira
2023-06-19 23:38:47 +01:00
parent adb07a56ff
commit 48a1197d56
14 changed files with 281 additions and 329 deletions

17
helper/generate_jwt.py Normal file
View File

@@ -0,0 +1,17 @@
import jwt
import os
from dotenv import load_dotenv
load_dotenv()
# Define the payload (data to be included in the token)
payload = {'sub': 'test'}
# Define the secret key
secret_key = os.getenv("JWT_SECRET_KEY")
# Generate the JWT
jwt_token = jwt.encode(payload, secret_key, algorithm='HS256')
print(jwt_token)

View File

@@ -0,0 +1,5 @@
import secrets
jwt_secret_key = secrets.token_hex(32)
print(jwt_secret_key)

View File

@@ -0,0 +1,58 @@
import json
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
MAX_TOKENS = 4097
TEMPERATURE = 0.1
TOP_P = 0.9
FREQUENCY_PENALTY = 0.5
TRY_LIMIT = 1
try_count = 0
def process_response(input_string):
json_obj = {}
parsed_string = input_string.replace("'", "\"")
parsed_string = parsed_string.replace("\n\n", " ")
try:
json_obj = json.loads(parsed_string)
except json.JSONDecodeError:
print("Invalid JSON string!")
return json_obj
def check_fields(obj):
if "overall" in obj and "task_response" in obj and "comment" in obj:
return True
else:
return False
def make_openai_call(messages, token_count):
global try_count
result = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
max_tokens=int(MAX_TOKENS - token_count - 500),
temperature=float(TEMPERATURE),
top_p=float(TOP_P),
frequency_penalty=float(FREQUENCY_PENALTY),
messages=messages
)
processed_response = process_response(result["choices"][0]["message"]["content"])
if check_fields(processed_response) is False and try_count < TRY_LIMIT:
try_count = try_count + 1
return make_openai_call(messages, token_count)
elif try_count >= TRY_LIMIT:
try_count = 0
return result["choices"][0]["message"]["content"]
else:
try_count = 0
return processed_response

89
helper/token_counter.py Normal file
View File

@@ -0,0 +1,89 @@
# This is a work in progress. There are still bugs. Once it is production-ready this will become a full repo.
import os
def count_tokens(text, model_name="gpt-3.5-turbo", debug=False):
"""
Count the number of tokens in a given text string without using the OpenAI API.
This function tries three methods in the following order:
1. tiktoken (preferred): Accurate token counting similar to the OpenAI API.
2. nltk: Token counting using the Natural Language Toolkit library.
3. split: Simple whitespace-based token counting as a fallback.
Usage:
------
text = "Your text here"
result = count_tokens(text, model_name="gpt-3.5-turbo", debug=True)
print(result)
Required libraries:
-------------------
- tiktoken: Install with 'pip install tiktoken'
- nltk: Install with 'pip install nltk'
Parameters:
-----------
text : str
The text string for which you want to count tokens.
model_name : str, optional
The OpenAI model for which you want to count tokens (default: "gpt-3.5-turbo").
debug : bool, optional
Set to True to print error messages (default: False).
Returns:
--------
result : dict
A dictionary containing the number of tokens and the method used for counting.
"""
# Try using tiktoken
try:
import tiktoken
encoding = tiktoken.encoding_for_model(model_name)
num_tokens = len(encoding.encode(text))
result = {"n_tokens": num_tokens, "method": "tiktoken"}
return result
except Exception as e:
if debug:
print(f"Error using tiktoken: {e}")
pass
# Try using nltk
try:
import nltk
nltk.download("punkt")
tokens = nltk.word_tokenize(text)
result = {"n_tokens": len(tokens), "method": "nltk"}
return result
except Exception as e:
if debug:
print(f"Error using nltk: {e}")
pass
# If nltk and tiktoken fail, use a simple split-based method
tokens = text.split()
result = {"n_tokens": len(tokens), "method": "split"}
return result
class TokenBuffer:
def __init__(self, max_tokens=2048):
self.max_tokens = max_tokens
self.buffer = ""
self.token_lengths = []
self.token_count = 0
def update(self, text, model_name="gpt-3.5-turbo", debug=False):
new_tokens = count_tokens(text, model_name=model_name, debug=debug)["n_tokens"]
self.token_count += new_tokens
self.buffer += text
self.token_lengths.append(new_tokens)
while self.token_count > self.max_tokens:
removed_tokens = self.token_lengths.pop(0)
self.token_count -= removed_tokens
self.buffer = self.buffer.split(" ", removed_tokens)[-1]
def get_buffer(self):
return self.buffer