import gradio as gr from PIL import Image import os import requests import time # Ensure the correct libraries are installed. os.system("pip uninstall -y fitz && pip install --force-reinstall pymupdf requests") import fitz # --- Constants for Configuration --- REQUESTS_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } IMAGE_ZOOM_FACTOR = 2 # 2 = 144 DPI SPACER_HEIGHT = 30 # Vertical space between pages BACKGROUND_COLOR = (255, 255, 255) # White def pdf_to_png_streaming(pdf_file, pdf_url, progress=gr.Progress(track_tqdm=True)): """ Converts a PDF (from file or URL) to a single PNG, streaming the output image as each page is processed. This function is a a generator, yielding the progressively built image. Args: pdf_file: Gradio file object. pdf_url: String URL to a PDF. progress: Gradio progress object, automatically provided by the UI. """ # --- 1. Get PDF Document Object --- doc = None try: if pdf_file is not None: progress(0, desc="Opening uploaded file...") source_desc = f"uploaded file '{os.path.basename(pdf_file.name)}'" doc = fitz.open(pdf_file.name) elif pdf_url and pdf_url.strip().startswith(('http://', 'https://')): progress(0, desc=f"Fetching PDF from URL...") source_desc = f"URL '{pdf_url}'" response = requests.get(pdf_url, headers=REQUESTS_HEADERS, timeout=30) response.raise_for_status() content_type = response.headers.get('Content-Type', '').lower() if 'application/pdf' not in content_type: raise gr.Error(f"URL content is not a PDF. Type: '{content_type}'.") pdf_bytes = response.content doc = fitz.open(stream=pdf_bytes, filetype="pdf") else: raise gr.Error("Please upload a PDF or provide a valid URL.") except Exception as e: raise gr.Error(f"Failed to load PDF from {source_desc}. Error: {e}") # --- 2. Pre-scan for Dimensions (for efficient canvas creation) --- progress(0, desc="Analyzing PDF layout...") page_dims = [page.rect for page in doc] if not page_dims: raise gr.Error("PDF is valid but contains no pages.") # Calculate final canvas size based on scanned dimensions and zoom factor matrix = fitz.Matrix(IMAGE_ZOOM_FACTOR, IMAGE_ZOOM_FACTOR) max_width = int(max(p.width for p in page_dims) * matrix.a) total_height = int(sum(p.height for p in page_dims) * matrix.d) + (SPACER_HEIGHT * (doc.page_count - 1)) # Create the final canvas ONCE combined_img = Image.new("RGB", (max_width, total_height), BACKGROUND_COLOR) # --- 3. Render, Paste, and Stream (Yield) --- current_y = 0 for i, page in enumerate(doc): page_num = i + 1 progress(page_num / doc.page_count, desc=f"Processing Page {page_num} of {doc.page_count}") # Render page to a pixmap pix = page.get_pixmap(matrix=matrix, alpha=False) page_img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # Calculate offset to center the page horizontally x_offset = (max_width - pix.width) // 2 # Paste the current page onto the main canvas combined_img.paste(page_img, (x_offset, current_y)) # Update the y-position for the next page current_y += pix.height + SPACER_HEIGHT # YIELD the progressively built image to update the Gradio UI yield combined_img doc.close() print("Streaming conversion complete.") # The final yield has already sent the completed image. No return needed. # --- Gradio Interface Definition using Blocks --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( """ # Live PDF to PNG Converter **Upload a PDF file OR enter a URL.** The output image will update in real-time as each page is processed. """ ) with gr.Row(): with gr.Column(scale=1): pdf_input = gr.File(label="Upload PDF File") url_input = gr.Textbox( label="Or Enter PDF URL", placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf" ) submit_btn = gr.Button("Convert to PNG", variant="primary") with gr.Column(scale=2): png_output = gr.Image(label="Live PNG Output", interactive=False) clear_btn = gr.ClearButton(components=[pdf_input, url_input, png_output], value="Clear All") gr.Examples( examples=[ [None, "https://arxiv.org/pdf/1706.03762.pdf"], # "Attention Is All You Need" paper [None, "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"], ], inputs=[pdf_input, url_input] ) # Wire the button to the streaming function submit_btn.click( fn=pdf_to_png_streaming, inputs=[pdf_input, url_input], outputs=png_output ) # Launch the app if __name__ == "__main__": print("Starting Gradio app with streaming...") demo.launch() print("Gradio app finished.")