import base64 import io import os import shutil import subprocess import uuid import datetime from pathlib import Path from typing import Optional, Tuple import aiofiles import numpy as np import pypandoc from PIL import Image from fastapi import UploadFile class FileHelper: @staticmethod def delete_files_older_than_one_day(directory: str): current_time = datetime.datetime.now() for entry in os.scandir(directory): if entry.is_file(): file_path = Path(entry) file_name = file_path.name file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime) time_difference = current_time - file_modified_time if time_difference.days > 1 and "placeholder" not in file_name: file_path.unlink() print(f"Deleted file: {file_path}") # Supposedly pandoc covers a wide range of file extensions only tested with docx @staticmethod def convert_file_to_pdf(input_path: str, output_path: str): pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[ '-V', 'geometry:paperwidth=5.5in', '-V', 'geometry:paperheight=8.5in', '-V', 'geometry:margin=0.5in', '-V', 'pagestyle=empty' ]) @staticmethod def convert_file_to_html(input_path: str, output_path: str): pypandoc.convert_file(input_path, 'html', outputfile=output_path) @staticmethod def pdf_to_png(path_id: str): to_png = f"pdftoppm -png exercises.pdf page" result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True) if result.returncode != 0: raise Exception( f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```") @staticmethod def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool: with Image.open(io.BytesIO(image_bytes)) as img: img_gray = img.convert('L') img_array = np.array(img_gray) non_white_pixels = np.sum(img_array < 255) return non_white_pixels <= image_threshold @classmethod async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]: async with aiofiles.open(image_path, "rb") as image_file: image_bytes = await image_file.read() if cls.is_page_blank(image_bytes, image_threshold): return None return base64.b64encode(image_bytes).decode('utf-8') @classmethod async def b64_pngs(cls, path_id: str, files: list[str]): png_messages = [] for filename in files: b64_string = await cls._encode_image(os.path.join(f'./tmp/{path_id}', filename)) if b64_string: png_messages.append({ "type": "image_url", "image_url": { "url": f"data:image/png;base64,{b64_string}" } }) return png_messages @staticmethod def remove_directory(path): try: if os.path.exists(path): if os.path.isdir(path): shutil.rmtree(path) except Exception as e: print(f"An error occurred while trying to remove {path}: {str(e)}") @staticmethod def remove_file(file_path): try: if os.path.exists(file_path): if os.path.isfile(file_path): os.remove(file_path) except Exception as e: print(f"An error occurred while trying to remove the file {file_path}: {str(e)}") @staticmethod async def save_upload(file: UploadFile, name: str = "upload", path_id: str = None) -> Tuple[str, str]: ext = file.filename.split('.')[-1] path_id = str(uuid.uuid4()) if path_id is None else path_id os.makedirs(f'./tmp/{path_id}', exist_ok=True) tmp_filename = f'./tmp/{path_id}/{name}.{ext}' file_bytes: bytes = await file.read() async with aiofiles.open(tmp_filename, 'wb') as file: await file.write(file_bytes) return ext, path_id @staticmethod async def encode_image(image_path: str) -> str: async with aiofiles.open(image_path, "rb") as image_file: img = await image_file.read() return base64.b64encode(img).decode('utf-8')