import base64 import io import os import shutil import subprocess import uuid from typing import Optional, Tuple import numpy as np import pypandoc from PIL import Image class FileHelper: # Supposedly pandoc covers a wide range of file extensions only tested with docx @staticmethod def convert_file_to_pdf(input_path: str, output_path: str): pypandoc.convert_file(input_path, 'pdf', outputfile=output_path, extra_args=[ '-V', 'geometry:paperwidth=5.5in', '-V', 'geometry:paperheight=8.5in', '-V', 'geometry:margin=0.5in', '-V', 'pagestyle=empty' ]) @staticmethod def convert_file_to_html(input_path: str, output_path: str): pypandoc.convert_file(input_path, 'html', outputfile=output_path) @staticmethod def pdf_to_png(path_id: str): to_png = f"pdftoppm -png exercises.pdf page" result = subprocess.run(to_png, shell=True, cwd=f'./tmp/{path_id}', capture_output=True, text=True) if result.returncode != 0: raise Exception( f"Couldn't convert pdf to png. Failed to run command '{to_png}' -> ```cmd {result.stderr}```") @staticmethod def is_page_blank(image_bytes: bytes, image_threshold=10) -> bool: with Image.open(io.BytesIO(image_bytes)) as img: img_gray = img.convert('L') img_array = np.array(img_gray) non_white_pixels = np.sum(img_array < 255) return non_white_pixels <= image_threshold @classmethod def _encode_image(cls, image_path: str, image_threshold=10) -> Optional[str]: with open(image_path, "rb") as image_file: image_bytes = image_file.read() if cls.is_page_blank(image_bytes, image_threshold): return None return base64.b64encode(image_bytes).decode('utf-8') @classmethod def b64_pngs(cls, path_id: str, files: list[str]): png_messages = [] for filename in files: b64_string = cls._encode_image(os.path.join(f'./tmp/{path_id}', filename)) if b64_string: png_messages.append({ "type": "image_url", "image_url": { "url": f"data:image/png;base64,{b64_string}" } }) return png_messages @staticmethod def remove_directory(path): try: if os.path.exists(path): if os.path.isdir(path): shutil.rmtree(path) except Exception as e: print(f"An error occurred while trying to remove {path}: {str(e)}") @staticmethod def remove_file(file_path): try: if os.path.exists(file_path): if os.path.isfile(file_path): os.remove(file_path) except Exception as e: print(f"An error occurred while trying to remove the file {file_path}: {str(e)}") @staticmethod def save_upload(file) -> Tuple[str, str]: ext = file.filename.split('.')[-1] path_id = str(uuid.uuid4()) os.makedirs(f'./tmp/{path_id}', exist_ok=True) tmp_filename = f'./tmp/{path_id}/uploaded.{ext}' file.save(tmp_filename) return ext, path_id