Async release
This commit is contained in:
11
app/helpers/__init__.py
Normal file
11
app/helpers/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .io import IOHelper
|
||||
from .text_helper import TextHelper
|
||||
from .token_counter import count_tokens
|
||||
from .exercises_helper import ExercisesHelper
|
||||
|
||||
__all__ = [
|
||||
"IOHelper",
|
||||
"TextHelper",
|
||||
"count_tokens",
|
||||
"ExercisesHelper"
|
||||
]
|
||||
195
app/helpers/exercises_helper.py
Normal file
195
app/helpers/exercises_helper.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import queue
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
from wonderwords import RandomWord
|
||||
|
||||
from .text_helper import TextHelper
|
||||
|
||||
|
||||
class ExercisesHelper:
|
||||
|
||||
@staticmethod
|
||||
def divide_number_into_parts(number, parts):
|
||||
if number < parts:
|
||||
return None
|
||||
|
||||
part_size = number // parts
|
||||
remaining = number % parts
|
||||
|
||||
q = queue.Queue()
|
||||
|
||||
for i in range(parts):
|
||||
if i < remaining:
|
||||
q.put(part_size + 1)
|
||||
else:
|
||||
q.put(part_size)
|
||||
|
||||
return q
|
||||
|
||||
@staticmethod
|
||||
def fix_exercise_ids(exercise, start_id):
|
||||
# Initialize the starting ID for the first exercise
|
||||
current_id = start_id
|
||||
|
||||
questions = exercise["questions"]
|
||||
|
||||
# Iterate through questions and update the "id" value
|
||||
for question in questions:
|
||||
question["id"] = str(current_id)
|
||||
current_id += 1
|
||||
|
||||
return exercise
|
||||
|
||||
@staticmethod
|
||||
def replace_first_occurrences_with_placeholders(text: str, words_to_replace: list, start_id):
|
||||
for i, word in enumerate(words_to_replace, start=start_id):
|
||||
# Create a case-insensitive regular expression pattern
|
||||
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
|
||||
placeholder = '{{' + str(i) + '}}'
|
||||
text = pattern.sub(placeholder, text, 1)
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def replace_first_occurrences_with_placeholders_notes(notes: list, words_to_replace: list, start_id):
|
||||
replaced_notes = []
|
||||
for i, note in enumerate(notes, start=0):
|
||||
word = words_to_replace[i]
|
||||
pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
|
||||
placeholder = '{{' + str(start_id + i) + '}}'
|
||||
note = pattern.sub(placeholder, note, 1)
|
||||
replaced_notes.append(note)
|
||||
return replaced_notes
|
||||
|
||||
@staticmethod
|
||||
def add_random_words_and_shuffle(word_array, num_random_words):
|
||||
r = RandomWord()
|
||||
random_words_selected = r.random_words(num_random_words)
|
||||
|
||||
combined_array = word_array + random_words_selected
|
||||
|
||||
random.shuffle(combined_array)
|
||||
|
||||
return combined_array
|
||||
|
||||
@staticmethod
|
||||
def fillblanks_build_solutions_array(words, start_id):
|
||||
solutions = []
|
||||
for i, word in enumerate(words, start=start_id):
|
||||
solutions.append(
|
||||
{
|
||||
"id": str(i),
|
||||
"solution": word
|
||||
}
|
||||
)
|
||||
return solutions
|
||||
|
||||
@staticmethod
|
||||
def remove_excess_questions(questions: [], quantity):
|
||||
count_true = 0
|
||||
result = []
|
||||
|
||||
for item in reversed(questions):
|
||||
if item.get('solution') == 'true' and count_true < quantity:
|
||||
count_true += 1
|
||||
else:
|
||||
result.append(item)
|
||||
|
||||
result.reverse()
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def build_write_blanks_text(questions: [], start_id):
|
||||
result = ""
|
||||
for i, q in enumerate(questions, start=start_id):
|
||||
placeholder = '{{' + str(i) + '}}'
|
||||
result = result + q["question"] + placeholder + "\\n"
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def build_write_blanks_text_form(form: [], start_id):
|
||||
result = ""
|
||||
replaced_words = []
|
||||
for i, entry in enumerate(form, start=start_id):
|
||||
placeholder = '{{' + str(i) + '}}'
|
||||
# Use regular expression to find the string after ':'
|
||||
match = re.search(r'(?<=:)\s*(.*)', entry)
|
||||
# Extract the matched string
|
||||
original_string = match.group(1)
|
||||
# Split the string into words
|
||||
words = re.findall(r'\b\w+\b', original_string)
|
||||
# Remove words with only one letter
|
||||
filtered_words = [word for word in words if len(word) > 1]
|
||||
# Choose a random word from the list of words
|
||||
selected_word = random.choice(filtered_words)
|
||||
pattern = re.compile(r'\b' + re.escape(selected_word) + r'\b', re.IGNORECASE)
|
||||
|
||||
# Replace the chosen word with the placeholder
|
||||
replaced_string = pattern.sub(placeholder, original_string, 1)
|
||||
# Construct the final replaced string
|
||||
replaced_string = entry.replace(original_string, replaced_string)
|
||||
|
||||
result = result + replaced_string + "\\n"
|
||||
# Save the replaced word or use it as needed
|
||||
# For example, you can save it to a file or a list
|
||||
replaced_words.append(selected_word)
|
||||
return result, replaced_words
|
||||
|
||||
@staticmethod
|
||||
def build_write_blanks_solutions(questions: [], start_id):
|
||||
solutions = []
|
||||
for i, q in enumerate(questions, start=start_id):
|
||||
solution = [q["possible_answers"]] if isinstance(q["possible_answers"], str) else q["possible_answers"]
|
||||
|
||||
solutions.append(
|
||||
{
|
||||
"id": str(i),
|
||||
"solution": solution
|
||||
}
|
||||
)
|
||||
return solutions
|
||||
|
||||
@staticmethod
|
||||
def build_write_blanks_solutions_listening(words: [], start_id):
|
||||
solutions = []
|
||||
for i, word in enumerate(words, start=start_id):
|
||||
solution = [word] if isinstance(word, str) else word
|
||||
|
||||
solutions.append(
|
||||
{
|
||||
"id": str(i),
|
||||
"solution": solution
|
||||
}
|
||||
)
|
||||
return solutions
|
||||
|
||||
@staticmethod
|
||||
def answer_word_limit_ok(question):
|
||||
# Check if any option in any solution has more than three words
|
||||
return not any(
|
||||
len(option.split()) > 3
|
||||
for solution in question["solutions"]
|
||||
for option in solution["solution"]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def assign_letters_to_paragraphs(paragraphs):
|
||||
result = []
|
||||
letters = iter(string.ascii_uppercase)
|
||||
for paragraph in paragraphs.split("\n\n"):
|
||||
if TextHelper.has_x_words(paragraph, 10):
|
||||
result.append({'paragraph': paragraph.strip(), 'letter': next(letters)})
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def contains_empty_dict(arr):
|
||||
return any(elem == {} for elem in arr)
|
||||
|
||||
@staticmethod
|
||||
def fix_writing_overall(overall: float, task_response: dict):
|
||||
if overall > max(task_response.values()) or overall < min(task_response.values()):
|
||||
total_sum = sum(task_response.values())
|
||||
average = total_sum / len(task_response.values())
|
||||
rounded_average = round(average, 0)
|
||||
return rounded_average
|
||||
return overall
|
||||
20
app/helpers/io.py
Normal file
20
app/helpers/io.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import datetime
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class IOHelper:
|
||||
|
||||
@staticmethod
|
||||
def delete_files_older_than_one_day(directory: str):
|
||||
current_time = datetime.datetime.now()
|
||||
|
||||
for entry in os.scandir(directory):
|
||||
if entry.is_file():
|
||||
file_path = Path(entry)
|
||||
file_name = file_path.name
|
||||
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
|
||||
time_difference = current_time - file_modified_time
|
||||
if time_difference.days > 1 and "placeholder" not in file_name:
|
||||
file_path.unlink()
|
||||
print(f"Deleted file: {file_path}")
|
||||
28
app/helpers/text_helper.py
Normal file
28
app/helpers/text_helper.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from nltk.corpus import words
|
||||
|
||||
|
||||
class TextHelper:
|
||||
|
||||
@classmethod
|
||||
def has_words(cls, text: str):
|
||||
if not cls._has_common_words(text):
|
||||
return False
|
||||
english_words = set(words.words())
|
||||
words_in_input = text.split()
|
||||
return any(word.lower() in english_words for word in words_in_input)
|
||||
|
||||
@classmethod
|
||||
def has_x_words(cls, text: str, quantity):
|
||||
if not cls._has_common_words(text):
|
||||
return False
|
||||
english_words = set(words.words())
|
||||
words_in_input = text.split()
|
||||
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
|
||||
return english_word_count >= quantity
|
||||
|
||||
@staticmethod
|
||||
def _has_common_words(text: str):
|
||||
english_words = {"the", "be", "to", "of", "and", "a", "in", "that", "have", "i"}
|
||||
words_in_input = text.split()
|
||||
english_word_count = sum(1 for word in words_in_input if word.lower() in english_words)
|
||||
return english_word_count >= 10
|
||||
89
app/helpers/token_counter.py
Normal file
89
app/helpers/token_counter.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# This is a work in progress. There are still bugs. Once it is production-ready this will become a full repo.
|
||||
|
||||
import tiktoken
|
||||
import nltk
|
||||
|
||||
|
||||
def count_tokens(text, model_name="gpt-3.5-turbo", debug=False):
|
||||
"""
|
||||
Count the number of tokens in a given text string without using the OpenAI API.
|
||||
|
||||
This function tries three methods in the following order:
|
||||
1. tiktoken (preferred): Accurate token counting similar to the OpenAI API.
|
||||
2. nltk: Token counting using the Natural Language Toolkit library.
|
||||
3. split: Simple whitespace-based token counting as a fallback.
|
||||
|
||||
Usage:
|
||||
------
|
||||
text = "Your text here"
|
||||
result = count_tokens(text, model_name="gpt-3.5-turbo", debug=True)
|
||||
print(result)
|
||||
|
||||
Required libraries:
|
||||
-------------------
|
||||
- tiktoken: Install with 'pip install tiktoken'
|
||||
- nltk: Install with 'pip install nltk'
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
text : str
|
||||
The text string for which you want to count tokens.
|
||||
model_name : str, optional
|
||||
The OpenAI model for which you want to count tokens (default: "gpt-3.5-turbo").
|
||||
debug : bool, optional
|
||||
Set to True to print error messages (default: False).
|
||||
|
||||
Returns:
|
||||
--------
|
||||
result : dict
|
||||
A dictionary containing the number of tokens and the method used for counting.
|
||||
"""
|
||||
|
||||
# Try using tiktoken
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model_name)
|
||||
num_tokens = len(encoding.encode(text))
|
||||
result = {"n_tokens": num_tokens, "method": "tiktoken"}
|
||||
return result
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"Error using tiktoken: {e}")
|
||||
pass
|
||||
|
||||
# Try using nltk
|
||||
try:
|
||||
# Passed nltk.download("punkt") to server.py's @asynccontextmanager
|
||||
tokens = nltk.word_tokenize(text)
|
||||
result = {"n_tokens": len(tokens), "method": "nltk"}
|
||||
return result
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"Error using nltk: {e}")
|
||||
pass
|
||||
|
||||
# If nltk and tiktoken fail, use a simple split-based method
|
||||
tokens = text.split()
|
||||
result = {"n_tokens": len(tokens), "method": "split"}
|
||||
return result
|
||||
|
||||
|
||||
class TokenBuffer:
|
||||
def __init__(self, max_tokens=2048):
|
||||
self.max_tokens = max_tokens
|
||||
self.buffer = ""
|
||||
self.token_lengths = []
|
||||
self.token_count = 0
|
||||
|
||||
def update(self, text, model_name="gpt-3.5-turbo", debug=False):
|
||||
new_tokens = count_tokens(text, model_name=model_name, debug=debug)["n_tokens"]
|
||||
self.token_count += new_tokens
|
||||
self.buffer += text
|
||||
self.token_lengths.append(new_tokens)
|
||||
|
||||
while self.token_count > self.max_tokens:
|
||||
removed_tokens = self.token_lengths.pop(0)
|
||||
self.token_count -= removed_tokens
|
||||
self.buffer = self.buffer.split(" ", removed_tokens)[-1]
|
||||
|
||||
def get_buffer(self):
|
||||
return self.buffer
|
||||
Reference in New Issue
Block a user