import gradio as gr
from PIL import Image
import os
import requests
import time

# Ensure the correct libraries are installed.
os.system("pip uninstall -y fitz && pip install --force-reinstall pymupdf requests")
import fitz

# --- Constants for Configuration ---
REQUESTS_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
IMAGE_ZOOM_FACTOR = 2  # 2 = 144 DPI
SPACER_HEIGHT = 30     # Vertical space between pages
BACKGROUND_COLOR = (255, 255, 255) # White

def pdf_to_png_streaming(pdf_file, pdf_url, progress=gr.Progress(track_tqdm=True)):
    """
    Converts a PDF (from file or URL) to a single PNG, streaming the output
    image as each page is processed.

    This function is a a generator, yielding the progressively built image.

    Args:
        pdf_file: Gradio file object.
        pdf_url: String URL to a PDF.
        progress: Gradio progress object, automatically provided by the UI.
    """
    # --- 1. Get PDF Document Object ---
    doc = None
    try:
        if pdf_file is not None:
            progress(0, desc="Opening uploaded file...")
            source_desc = f"uploaded file '{os.path.basename(pdf_file.name)}'"
            doc = fitz.open(pdf_file.name)
        elif pdf_url and pdf_url.strip().startswith(('http://', 'https://')):
            progress(0, desc=f"Fetching PDF from URL...")
            source_desc = f"URL '{pdf_url}'"
            response = requests.get(pdf_url, headers=REQUESTS_HEADERS, timeout=30)
            response.raise_for_status()
            content_type = response.headers.get('Content-Type', '').lower()
            if 'application/pdf' not in content_type:
                raise gr.Error(f"URL content is not a PDF. Type: '{content_type}'.")
            pdf_bytes = response.content
            doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        else:
            raise gr.Error("Please upload a PDF or provide a valid URL.")
    except Exception as e:
        raise gr.Error(f"Failed to load PDF from {source_desc}. Error: {e}")

    # --- 2. Pre-scan for Dimensions (for efficient canvas creation) ---
    progress(0, desc="Analyzing PDF layout...")
    page_dims = [page.rect for page in doc]
    if not page_dims:
        raise gr.Error("PDF is valid but contains no pages.")

    # Calculate final canvas size based on scanned dimensions and zoom factor
    matrix = fitz.Matrix(IMAGE_ZOOM_FACTOR, IMAGE_ZOOM_FACTOR)
    max_width = int(max(p.width for p in page_dims) * matrix.a)
    total_height = int(sum(p.height for p in page_dims) * matrix.d) + (SPACER_HEIGHT * (doc.page_count - 1))
    
    # Create the final canvas ONCE
    combined_img = Image.new("RGB", (max_width, total_height), BACKGROUND_COLOR)
    
    # --- 3. Render, Paste, and Stream (Yield) ---
    current_y = 0
    for i, page in enumerate(doc):
        page_num = i + 1
        progress(page_num / doc.page_count, desc=f"Processing Page {page_num} of {doc.page_count}")
        
        # Render page to a pixmap
        pix = page.get_pixmap(matrix=matrix, alpha=False)
        page_img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        
        # Calculate offset to center the page horizontally
        x_offset = (max_width - pix.width) // 2
        
        # Paste the current page onto the main canvas
        combined_img.paste(page_img, (x_offset, current_y))
        
        # Update the y-position for the next page
        current_y += pix.height + SPACER_HEIGHT

        # YIELD the progressively built image to update the Gradio UI
        yield combined_img
    
    doc.close()
    print("Streaming conversion complete.")
    # The final yield has already sent the completed image. No return needed.


# --- Gradio Interface Definition using Blocks ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # Live PDF to PNG Converter
        **Upload a PDF file OR enter a URL.** The output image will update in real-time as each page is processed.
        """
    )
    with gr.Row():
        with gr.Column(scale=1):
            pdf_input = gr.File(label="Upload PDF File")
            url_input = gr.Textbox(
                label="Or Enter PDF URL",
                placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf"
            )
            submit_btn = gr.Button("Convert to PNG", variant="primary")
        with gr.Column(scale=2):
            png_output = gr.Image(label="Live PNG Output", interactive=False)

    clear_btn = gr.ClearButton(components=[pdf_input, url_input, png_output], value="Clear All")

    gr.Examples(
        examples=[
            [None, "https://arxiv.org/pdf/1706.03762.pdf"], # "Attention Is All You Need" paper
            [None, "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"],
        ],
        inputs=[pdf_input, url_input]
    )
    
    # Wire the button to the streaming function
    submit_btn.click(
        fn=pdf_to_png_streaming,
        inputs=[pdf_input, url_input],
        outputs=png_output
    )

# Launch the app
if __name__ == "__main__":
    print("Starting Gradio app with streaming...")
    demo.launch()
    print("Gradio app finished.")