mineru2 / runpod_handler_simple.py
marcosremar2's picture
Add RunPod serverless configuration with GitHub integration
4112422
import runpod
import base64
import fitz # PyMuPDF
def handler(job):
"""Simple PDF to text handler for testing"""
try:
job_input = job["input"]
# Get PDF data from base64
pdf_base64 = job_input.get("pdf_base64")
filename = job_input.get("filename", "document.pdf")
if not pdf_base64:
return {"error": "No PDF data provided", "status": "failed"}
# Decode base64 PDF
pdf_data = base64.b64decode(pdf_base64)
# Extract text using PyMuPDF
doc = fitz.open(stream=pdf_data, filetype="pdf")
text_content = ""
for page_num, page in enumerate(doc):
text_content += f"\n\n--- Page {page_num + 1} ---\n\n"
text_content += page.get_text()
doc.close()
# Convert to simple markdown
markdown_content = f"# {filename}\n\n"
markdown_content += f"*Extracted using PyMuPDF (simplified version)*\n\n"
markdown_content += text_content
return {
"markdown": markdown_content,
"filename": filename,
"status": "success",
"pages": len(doc)
}
except Exception as e:
return {
"error": str(e),
"status": "failed"
}
# RunPod serverless entrypoint
runpod.serverless.start({"handler": handler})