Fastapi refactor update

This commit is contained in:
Carlos-Mesquita
2024-10-01 19:31:01 +01:00
parent f92a803d96
commit 2a032c5aba
132 changed files with 22856 additions and 10309 deletions

View File

@@ -1,95 +1,114 @@
import datetime
from pathlib import Path
import base64
import io
import os
import shutil
import subprocess
from typing import Optional
import numpy as np
import pypandoc
from PIL import Image
import aiofiles
class FileHelper:
@staticmethod
def delete_files_older_than_one_day(directory: str):
current_time = datetime.datetime.now()
for entry in os.scandir(directory):
if entry.is_file():
file_path = Path(entry)
file_name = file_path.name
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
time_difference = current_time - file_modified_time
if time_difference.days > 1 and "placeholder" not in file_name:
file_path.unlink()
print(f"Deleted file: {file_path}")
# Supposedly pandoc covers a wide range of file extensions only tested with docx
@staticmethod
def convert_file_to_pdf(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
'-V', 'geometry:paperwidth=5.5in',
'-V', 'geometry:paperheight=8.5in',
'-V', 'geometry:margin=0.5in',
'-V', 'pagestyle=empty'
])
@staticmethod
def convert_file_to_html(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'html', outputfile=output_path)
@staticmethod
def pdf_to_png(path_id: str):
to_png = f"pdftoppm -png exercises.pdf page"
result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
if result.returncode != 0:
raise Exception(
f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
@staticmethod
def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
with Image.open(io.BytesIO(image_bytes)) as img:
img_gray = img.convert('L')
img_array = np.array(img_gray)
non_white_pixels = np.sum(img_array < 255)
return non_white_pixels <= image_threshold
@classmethod
async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
async with aiofiles.open(image_path, "rb") as image_file:
image_bytes = await image_file.read()
if cls.is_page_blank(image_bytes, image_threshold):
return None
return base64.b64encode(image_bytes).decode('utf-8')
@classmethod
def b64_pngs(cls, path_id: str, files: list[str]):
png_messages = []
for filename in files:
b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
if b64_string:
png_messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{b64_string}"
}
})
return png_messages
@staticmethod
def remove_directory(path):
try:
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
except Exception as e:
print(f"An error occurred while trying to remove {path}: {str(e)}")
import base64
import io
import os
import shutil
import subprocess
import uuid
import datetime
from pathlib import Path
from typing import Optional, Tuple
import aiofiles
import numpy as np
import pypandoc
from PIL import Image
class FileHelper:
@staticmethod
def delete_files_older_than_one_day(directory: str):
current_time = datetime.datetime.now()
for entry in os.scandir(directory):
if entry.is_file():
file_path = Path(entry)
file_name = file_path.name
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
time_difference = current_time - file_modified_time
if time_difference.days > 1 and "placeholder" not in file_name:
file_path.unlink()
print(f"Deleted file: {file_path}")
# Supposedly pandoc covers a wide range of file extensions only tested with docx
@staticmethod
def convert_file_to_pdf(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
'-V', 'geometry:paperwidth=5.5in',
'-V', 'geometry:paperheight=8.5in',
'-V', 'geometry:margin=0.5in',
'-V', 'pagestyle=empty'
])
@staticmethod
def convert_file_to_html(input_path: str, output_path: str):
pypandoc.convert_file(input_path, 'html', outputfile=output_path)
@staticmethod
def pdf_to_png(path_id: str):
to_png = f"pdftoppm -png exercises.pdf page"
result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
if result.returncode != 0:
raise Exception(
f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
@staticmethod
def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
with Image.open(io.BytesIO(image_bytes)) as img:
img_gray = img.convert('L')
img_array = np.array(img_gray)
non_white_pixels = np.sum(img_array < 255)
return non_white_pixels <= image_threshold
@classmethod
async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
async with aiofiles.open(image_path, "rb") as image_file:
image_bytes = await image_file.read()
if cls.is_page_blank(image_bytes, image_threshold):
return None
return base64.b64encode(image_bytes).decode('utf-8')
@classmethod
async def b64_pngs(cls, path_id: str, files: list[str]):
png_messages = []
for filename in files:
b64_string = await cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
if b64_string:
png_messages.append({
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{b64_string}"
}
})
return png_messages
@staticmethod
def remove_directory(path):
try:
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
except Exception as e:
print(f"An error occurred while trying to remove {path}: {str(e)}")
@staticmethod
def remove_file(file_path):
try:
if os.path.exists(file_path):
if os.path.isfile(file_path):
os.remove(file_path)
except Exception as e:
print(f"An error occurred while trying to remove the file {file_path}: {str(e)}")
@staticmethod
def save_upload(file) -> Tuple[str, str]:
ext = file.filename.split('.')[-1]
path_id = str(uuid.uuid4())
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
file.save(tmp_filename)
return ext, path_id