Api for writing task 2 v1.

2023-06-19 23:38:47 +01:00
parent adb07a56ff
commit 48a1197d56
14 changed files with 281 additions and 329 deletions
--- a/helper/generate_jwt.py
+++ b/helper/generate_jwt.py
@@ -0,0 +1,17 @@
+import jwt
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Define the payload (data to be included in the token)
+payload = {'sub': 'test'}
+
+# Define the secret key
+secret_key = os.getenv("JWT_SECRET_KEY")
+
+# Generate the JWT
+jwt_token = jwt.encode(payload, secret_key, algorithm='HS256')
+
+print(jwt_token)
--- a/helper/generate_jwt_secret.py
+++ b/helper/generate_jwt_secret.py
@@ -0,0 +1,5 @@
+import secrets
+
+jwt_secret_key = secrets.token_hex(32)
+
+print(jwt_secret_key)
--- a/helper/process_response.py
+++ b/helper/process_response.py
@@ -0,0 +1,58 @@
+import json
+import openai
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+MAX_TOKENS = 4097
+TEMPERATURE = 0.1
+TOP_P = 0.9
+FREQUENCY_PENALTY = 0.5
+
+TRY_LIMIT = 1
+
+try_count = 0
+def process_response(input_string):
+    json_obj = {}
+    parsed_string = input_string.replace("'", "\"")
+    parsed_string = parsed_string.replace("\n\n", " ")
+    try:
+        json_obj = json.loads(parsed_string)
+    except json.JSONDecodeError:
+        print("Invalid JSON string!")
+
+    return json_obj
+
+
+def check_fields(obj):
+    if "overall" in obj and "task_response" in obj and "comment" in obj:
+        return True
+    else:
+        return False
+
+
+def make_openai_call(messages, token_count):
+    global try_count
+    result = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        max_tokens=int(MAX_TOKENS - token_count - 500),
+        temperature=float(TEMPERATURE),
+        top_p=float(TOP_P),
+        frequency_penalty=float(FREQUENCY_PENALTY),
+        messages=messages
+    )
+    processed_response = process_response(result["choices"][0]["message"]["content"])
+    if check_fields(processed_response) is False and try_count < TRY_LIMIT:
+        try_count = try_count + 1
+        return make_openai_call(messages, token_count)
+    elif try_count >= TRY_LIMIT:
+        try_count = 0
+        return result["choices"][0]["message"]["content"]
+    else:
+        try_count = 0
+        return processed_response
+
+
--- a/helper/token_counter.py
+++ b/helper/token_counter.py
@@ -0,0 +1,89 @@
+# This is a work in progress. There are still bugs. Once it is production-ready this will become a full repo.
+import os
+
+
+def count_tokens(text, model_name="gpt-3.5-turbo", debug=False):
+    """
+    Count the number of tokens in a given text string without using the OpenAI API.
+
+    This function tries three methods in the following order:
+    1. tiktoken (preferred): Accurate token counting similar to the OpenAI API.
+    2. nltk: Token counting using the Natural Language Toolkit library.
+    3. split: Simple whitespace-based token counting as a fallback.
+
+    Usage:
+    ------
+    text = "Your text here"
+    result = count_tokens(text, model_name="gpt-3.5-turbo", debug=True)
+    print(result)
+
+    Required libraries:
+    -------------------
+    - tiktoken: Install with 'pip install tiktoken'
+    - nltk: Install with 'pip install nltk'
+
+    Parameters:
+    -----------
+    text : str
+        The text string for which you want to count tokens.
+    model_name : str, optional
+        The OpenAI model for which you want to count tokens (default: "gpt-3.5-turbo").
+    debug : bool, optional
+        Set to True to print error messages (default: False).
+
+    Returns:
+    --------
+    result : dict
+        A dictionary containing the number of tokens and the method used for counting.
+    """
+
+    # Try using tiktoken
+    try:
+        import tiktoken
+        encoding = tiktoken.encoding_for_model(model_name)
+        num_tokens = len(encoding.encode(text))
+        result = {"n_tokens": num_tokens, "method": "tiktoken"}
+        return result
+    except Exception as e:
+        if debug:
+            print(f"Error using tiktoken: {e}")
+        pass
+
+    # Try using nltk
+    try:
+        import nltk
+        nltk.download("punkt")
+        tokens = nltk.word_tokenize(text)
+        result = {"n_tokens": len(tokens), "method": "nltk"}
+        return result
+    except Exception as e:
+        if debug:
+            print(f"Error using nltk: {e}")
+        pass
+
+    # If nltk and tiktoken fail, use a simple split-based method
+    tokens = text.split()
+    result = {"n_tokens": len(tokens), "method": "split"}
+    return result
+
+
+class TokenBuffer:
+    def __init__(self, max_tokens=2048):
+        self.max_tokens = max_tokens
+        self.buffer = ""
+        self.token_lengths = []
+        self.token_count = 0
+
+    def update(self, text, model_name="gpt-3.5-turbo", debug=False):
+        new_tokens = count_tokens(text, model_name=model_name, debug=debug)["n_tokens"]
+        self.token_count += new_tokens
+        self.buffer += text
+        self.token_lengths.append(new_tokens)
+
+        while self.token_count > self.max_tokens:
+            removed_tokens = self.token_lengths.pop(0)
+            self.token_count -= removed_tokens
+            self.buffer = self.buffer.split(" ", removed_tokens)[-1]
+
+    def get_buffer(self):
+        return self.buffer