Api for writing task 2 v1.
This commit is contained in:
17
helper/generate_jwt.py
Normal file
17
helper/generate_jwt.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import jwt
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Define the payload (data to be included in the token)
|
||||
payload = {'sub': 'test'}
|
||||
|
||||
# Define the secret key
|
||||
secret_key = os.getenv("JWT_SECRET_KEY")
|
||||
|
||||
# Generate the JWT
|
||||
jwt_token = jwt.encode(payload, secret_key, algorithm='HS256')
|
||||
|
||||
print(jwt_token)
|
||||
5
helper/generate_jwt_secret.py
Normal file
5
helper/generate_jwt_secret.py
Normal file
@@ -0,0 +1,5 @@
|
||||
import secrets
|
||||
|
||||
jwt_secret_key = secrets.token_hex(32)
|
||||
|
||||
print(jwt_secret_key)
|
||||
58
helper/process_response.py
Normal file
58
helper/process_response.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import json
|
||||
import openai
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
MAX_TOKENS = 4097
|
||||
TEMPERATURE = 0.1
|
||||
TOP_P = 0.9
|
||||
FREQUENCY_PENALTY = 0.5
|
||||
|
||||
TRY_LIMIT = 1
|
||||
|
||||
try_count = 0
|
||||
def process_response(input_string):
|
||||
json_obj = {}
|
||||
parsed_string = input_string.replace("'", "\"")
|
||||
parsed_string = parsed_string.replace("\n\n", " ")
|
||||
try:
|
||||
json_obj = json.loads(parsed_string)
|
||||
except json.JSONDecodeError:
|
||||
print("Invalid JSON string!")
|
||||
|
||||
return json_obj
|
||||
|
||||
|
||||
def check_fields(obj):
|
||||
if "overall" in obj and "task_response" in obj and "comment" in obj:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def make_openai_call(messages, token_count):
|
||||
global try_count
|
||||
result = openai.ChatCompletion.create(
|
||||
model="gpt-3.5-turbo",
|
||||
max_tokens=int(MAX_TOKENS - token_count - 500),
|
||||
temperature=float(TEMPERATURE),
|
||||
top_p=float(TOP_P),
|
||||
frequency_penalty=float(FREQUENCY_PENALTY),
|
||||
messages=messages
|
||||
)
|
||||
processed_response = process_response(result["choices"][0]["message"]["content"])
|
||||
if check_fields(processed_response) is False and try_count < TRY_LIMIT:
|
||||
try_count = try_count + 1
|
||||
return make_openai_call(messages, token_count)
|
||||
elif try_count >= TRY_LIMIT:
|
||||
try_count = 0
|
||||
return result["choices"][0]["message"]["content"]
|
||||
else:
|
||||
try_count = 0
|
||||
return processed_response
|
||||
|
||||
|
||||
89
helper/token_counter.py
Normal file
89
helper/token_counter.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# This is a work in progress. There are still bugs. Once it is production-ready this will become a full repo.
|
||||
import os
|
||||
|
||||
|
||||
def count_tokens(text, model_name="gpt-3.5-turbo", debug=False):
|
||||
"""
|
||||
Count the number of tokens in a given text string without using the OpenAI API.
|
||||
|
||||
This function tries three methods in the following order:
|
||||
1. tiktoken (preferred): Accurate token counting similar to the OpenAI API.
|
||||
2. nltk: Token counting using the Natural Language Toolkit library.
|
||||
3. split: Simple whitespace-based token counting as a fallback.
|
||||
|
||||
Usage:
|
||||
------
|
||||
text = "Your text here"
|
||||
result = count_tokens(text, model_name="gpt-3.5-turbo", debug=True)
|
||||
print(result)
|
||||
|
||||
Required libraries:
|
||||
-------------------
|
||||
- tiktoken: Install with 'pip install tiktoken'
|
||||
- nltk: Install with 'pip install nltk'
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
text : str
|
||||
The text string for which you want to count tokens.
|
||||
model_name : str, optional
|
||||
The OpenAI model for which you want to count tokens (default: "gpt-3.5-turbo").
|
||||
debug : bool, optional
|
||||
Set to True to print error messages (default: False).
|
||||
|
||||
Returns:
|
||||
--------
|
||||
result : dict
|
||||
A dictionary containing the number of tokens and the method used for counting.
|
||||
"""
|
||||
|
||||
# Try using tiktoken
|
||||
try:
|
||||
import tiktoken
|
||||
encoding = tiktoken.encoding_for_model(model_name)
|
||||
num_tokens = len(encoding.encode(text))
|
||||
result = {"n_tokens": num_tokens, "method": "tiktoken"}
|
||||
return result
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"Error using tiktoken: {e}")
|
||||
pass
|
||||
|
||||
# Try using nltk
|
||||
try:
|
||||
import nltk
|
||||
nltk.download("punkt")
|
||||
tokens = nltk.word_tokenize(text)
|
||||
result = {"n_tokens": len(tokens), "method": "nltk"}
|
||||
return result
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"Error using nltk: {e}")
|
||||
pass
|
||||
|
||||
# If nltk and tiktoken fail, use a simple split-based method
|
||||
tokens = text.split()
|
||||
result = {"n_tokens": len(tokens), "method": "split"}
|
||||
return result
|
||||
|
||||
|
||||
class TokenBuffer:
|
||||
def __init__(self, max_tokens=2048):
|
||||
self.max_tokens = max_tokens
|
||||
self.buffer = ""
|
||||
self.token_lengths = []
|
||||
self.token_count = 0
|
||||
|
||||
def update(self, text, model_name="gpt-3.5-turbo", debug=False):
|
||||
new_tokens = count_tokens(text, model_name=model_name, debug=debug)["n_tokens"]
|
||||
self.token_count += new_tokens
|
||||
self.buffer += text
|
||||
self.token_lengths.append(new_tokens)
|
||||
|
||||
while self.token_count > self.max_tokens:
|
||||
removed_tokens = self.token_lengths.pop(0)
|
||||
self.token_count -= removed_tokens
|
||||
self.buffer = self.buffer.split(" ", removed_tokens)[-1]
|
||||
|
||||
def get_buffer(self):
|
||||
return self.buffer
|
||||
Reference in New Issue
Block a user