115 lines
4.0 KiB
Python
115 lines
4.0 KiB
Python
import base64
|
|
import io
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import uuid
|
|
import datetime
|
|
from pathlib import Path
|
|
from typing import Optional, Tuple
|
|
|
|
import aiofiles
|
|
import numpy as np
|
|
import pypandoc
|
|
from PIL import Image
|
|
|
|
|
|
class FileHelper:
|
|
|
|
@staticmethod
|
|
def delete_files_older_than_one_day(directory: str):
|
|
current_time = datetime.datetime.now()
|
|
|
|
for entry in os.scandir(directory):
|
|
if entry.is_file():
|
|
file_path = Path(entry)
|
|
file_name = file_path.name
|
|
file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
|
|
time_difference = current_time - file_modified_time
|
|
if time_difference.days > 1 and "placeholder" not in file_name:
|
|
file_path.unlink()
|
|
print(f"Deleted file: {file_path}")
|
|
|
|
# Supposedly pandoc covers a wide range of file extensions only tested with docx
|
|
@staticmethod
|
|
def convert_file_to_pdf(input_path: str, output_path: str):
|
|
pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[
|
|
'-V', 'geometry:paperwidth=5.5in',
|
|
'-V', 'geometry:paperheight=8.5in',
|
|
'-V', 'geometry:margin=0.5in',
|
|
'-V', 'pagestyle=empty'
|
|
])
|
|
|
|
@staticmethod
|
|
def convert_file_to_html(input_path: str, output_path: str):
|
|
pypandoc.convert_file(input_path, 'html', outputfile=output_path)
|
|
|
|
@staticmethod
|
|
def pdf_to_png(path_id: str):
|
|
to_png = f"pdftoppm -png exercises.pdf page"
|
|
result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True)
|
|
if result.returncode != 0:
|
|
raise Exception(
|
|
f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```")
|
|
|
|
@staticmethod
|
|
def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool:
|
|
with Image.open(io.BytesIO(image_bytes)) as img:
|
|
img_gray = img.convert('L')
|
|
img_array = np.array(img_gray)
|
|
non_white_pixels = np.sum(img_array < 255)
|
|
|
|
return non_white_pixels <= image_threshold
|
|
|
|
@classmethod
|
|
async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]:
|
|
async with aiofiles.open(image_path, "rb") as image_file:
|
|
image_bytes = await image_file.read()
|
|
|
|
if cls.is_page_blank(image_bytes, image_threshold):
|
|
return None
|
|
|
|
return base64.b64encode(image_bytes).decode('utf-8')
|
|
|
|
@classmethod
|
|
async def b64_pngs(cls, path_id: str, files: list[str]):
|
|
png_messages = []
|
|
for filename in files:
|
|
b64_string = await cls._encode_image(os.path.join(f'./tmp/{path_id}', filename))
|
|
if b64_string:
|
|
png_messages.append({
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/png;base64,{b64_string}"
|
|
}
|
|
})
|
|
return png_messages
|
|
|
|
@staticmethod
|
|
def remove_directory(path):
|
|
try:
|
|
if os.path.exists(path):
|
|
if os.path.isdir(path):
|
|
shutil.rmtree(path)
|
|
except Exception as e:
|
|
print(f"An error occurred while trying to remove {path}: {str(e)}")
|
|
|
|
@staticmethod
|
|
def remove_file(file_path):
|
|
try:
|
|
if os.path.exists(file_path):
|
|
if os.path.isfile(file_path):
|
|
os.remove(file_path)
|
|
except Exception as e:
|
|
print(f"An error occurred while trying to remove the file {file_path}: {str(e)}")
|
|
|
|
@staticmethod
|
|
def save_upload(file) -> Tuple[str, str]:
|
|
ext = file.filename.split('.')[-1]
|
|
path_id = str(uuid.uuid4())
|
|
os.makedirs(f'./tmp/{path_id}', exist_ok=True)
|
|
|
|
tmp_filename = f'./tmp/{path_id}/uploaded.{ext}'
|
|
file.save(tmp_filename)
|
|
return ext, path_id
|