import datetime from pathlib import Path import base64 import io import os import shutil import subprocess from typing import Optional import numpy as np import pypandoc from PIL import Image import aiofiles class FileHelper: @staticmethod def delete_files_older_than_one_day(directory: str): current_time = datetime.datetime.now() for entry in os.scandir(directory): if entry.is_file(): file_path = Path(entry) file_name = file_path.name file_modified_time = datetime.datetime.fromtimestamp(file_path.stat().st_mtime) time_difference = current_time - file_modified_time if time_difference.days > 1 and "placeholder" not in file_name: file_path.unlink() print(f"Deleted file: {file_path}") # Supposedly pandoc covers a wide range of file extensions only tested with docx @staticmethod def convert_file_to_pdf(input_path: str, output_path: str): pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[ '-V', 'geometry:paperwidth=5.5in', '-V', 'geometry:paperheight=8.5in', '-V', 'geometry:margin=0.5in', '-V', 'pagestyle=empty' ]) @staticmethod def convert_file_to_html(input_path: str, output_path: str): pypandoc.convert_file(input_path, 'html', outputfile=output_path) @staticmethod def pdf_to_png(path_id: str): to_png = f"pdftoppm -png exercises.pdf page" result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True) if result.returncode != 0: raise Exception( f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```") @staticmethod def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool: with Image.open(io.BytesIO(image_bytes)) as img: img_gray = img.convert('L') img_array = np.array(img_gray) non_white_pixels = np.sum(img_array < 255) return non_white_pixels <= image_threshold @classmethod async def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]: async with aiofiles.open(image_path, "rb") as image_file: image_bytes = await image_file.read() if cls.is_page_blank(image_bytes, image_threshold): return None return base64.b64encode(image_bytes).decode('utf-8') @classmethod def b64_pngs(cls, path_id: str, files: list[str]): png_messages = [] for filename in files: b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename)) if b64_string: png_messages.append({ "type": "image_url", "image_url": { "url": f"data:image/png;base64,{b64_string}" } }) return png_messages @staticmethod def remove_directory(path): try: if os.path.exists(path): if os.path.isdir(path): shutil.rmtree(path) except Exception as e: print(f"An error occurred while trying to remove {path}: {str(e)}")