Spaces:

mutou0308
/

GSASR

Running on Zero

App Files Files Community

mutou0308 commited on Jun 21

Commit

67816f5

verified ·

1 Parent(s): af3f1a2

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

gradio_demo.py +359 -0

gradio_demo.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import torch
+import numpy as np
+import gradio as gr
+from PIL import Image
+import math
+import torch.nn.functional as F
+import os
+import tempfile
+import time
+import threading
+from utils.hatropeamp import HATNOUP_ROPE_AMP
+from utils.fea2gsropeamp import Fea2GS_ROPE_AMP
+from utils.edsrbaseline import EDSRNOUP
+from utils.hatropeamp import HATNOUP_ROPE_AMP
+from utils.rdn import RDNNOUP
+from utils.swinir import SwinIRNOUP
+from utils.fea2gsropeamp import Fea2GS_ROPE_AMP
+from utils.gaussian_splatting import generate_2D_gaussian_splatting_step
+from utils.split_and_joint_image import split_and_joint_image
+from huggingface_hub import hf_hub_download
+import subprocess
+import sys
+import spaces
+# Device setup
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# Global stop flag for interrupting inference
+stop_inference = False
+inference_lock = threading.Lock()
+def load_model(
+    pretrained_model_name_or_path: str = "mutou0308/GSASR",
+    model_name: str = "HATL_SA1B",
+    device: str | torch.device = "cuda"
+):
+    enc_path = hf_hub_download(
+            repo_id=pretrained_model_name_or_path, filename=os.path.join('GSASR_enhenced_ultra', model_name,  'encoder.pth')
+        )
+    dec_path = hf_hub_download(
+            repo_id=pretrained_model_name_or_path, filename=os.path.join('GSASR_enhenced_ultra', model_name, 'decoder.pth')
+        )
+    enc_weight = torch.load(enc_path, weights_only=True)['params_ema']
+    dec_weight = torch.load(dec_path, weights_only=True)['params_ema']
+    if model_name in ['EDSR_DIV2K', 'EDSR_DF2K']:
+        encoder = EDSRNOUP()
+        decoder = Fea2GS_ROPE_AMP()
+    elif model_name in ['RDN_DIV2K', 'RDN_DF2K']:
+        encoder = RDNNOUP()
+        decoder = Fea2GS_ROPE_AMP(num_crossattn_blocks = 2)
+    elif model_name in ['SwinIR_DIV2K', 'SwinIR_DF2K']:
+        encoder = SwinIRNOUP()
+        decoder = Fea2GS_ROPE_AMP(num_crossattn_blocks=2, num_crossattn_layers=4, num_gs_seed=256, window_size=16)
+    elif model_name in ['HATL_SA1B']:
+        encoder = HATNOUP_ROPE_AMP()
+        decoder = Fea2GS_ROPE_AMP(channel=192, num_crossattn_blocks=4, num_crossattn_layers=4, num_selfattn_blocks=8, num_selfattn_layers=6,
+                                  num_gs_seed=256, window_size=16)
+    else:
+        raise ValueError(f"args.model-{model_name} must be in ['EDSR_DIV2K', 'EDSR_DF2K', 'RDN_DIV2K', 'RDN_DF2K', 'SwinIR_DIV2K', 'SwinIR_DF2K', 'HATL_SA1B']")
+    encoder.load_state_dict(enc_weight, strict=True)
+    decoder.load_state_dict(dec_weight, strict=True)
+    encoder.eval()
+    decoder.eval()
+    encoder = encoder.to(device)
+    decoder = decoder.to(device)
+    return encoder, decoder
+def preprocess(x, denominator=16):
+    """Preprocess image to ensure dimensions are multiples of denominator"""
+    _, c, h, w = x.shape
+    if h % denominator > 0:
+        pad_h = denominator - h % denominator
+    else:
+        pad_h = 0
+    if w % denominator > 0:
+        pad_w = denominator - w % denominator
+    else:
+        pad_w = 0
+    x_new = F.pad(x, (0, pad_w, 0, pad_h), 'reflect')
+    return x_new
+def postprocess(x, gt_size_h, gt_size_w):
+    """Post-process by cropping to target size"""
+    x_new = x[:, :, :gt_size_h, :gt_size_w]
+    return x_new
+def should_use_tile(image_height, image_width, threshold=1024):
+    """Determine if tile processing should be used based on image resolution"""
+    return max(image_height, image_width) > threshold
+def set_stop_flag():
+    """Set the global stop flag to interrupt inference"""
+    global stop_inference
+    with inference_lock:
+        stop_inference = True
+    return "🛑 Stopping inference...", gr.update(interactive=False)
+def reset_stop_flag():
+    """Reset the global stop flag"""
+    global stop_inference
+    with inference_lock:
+        stop_inference = False
+def check_stop_flag():
+    """Check if inference should be stopped"""
+    global stop_inference
+    with inference_lock:
+        return stop_inference
+@spaces.GPU
+def super_resolution_inference(image, scale=4.0):
+    """Super-resolution inference function with automatic tile processing"""
+    # Check if gscuda setup has been run
+    setup_marker = ".setup_complete"
+    if not os.path.exists(setup_marker):
+        print("First run detected, installing dependencies...")
+        try:
+            # subprocess.check_call(["pip", "install", "-e", "."])
+            subprocess.check_call(["pip", "install", "dist/gscuda-0.0.0-cp310-cp310-linux_x86_64.whl"])
+            # Create marker file to indicate setup is complete
+            with open(setup_marker, "w") as f:
+                f.write("Setup completed")
+            print("Setup completed successfully!")
+        except subprocess.CalledProcessError as e:
+            return None, f"❌ Setup failed with error: {e}", None
+    if image is None:
+        return None, "Please upload an image", None
+    # Load model
+    encoder, decoder = load_model(model_name="HATL_SA1B")
+    # Reset stop flag at the beginning
+    reset_stop_flag()
+    # Fixed parameters
+    tile_overlap = 16  # Fixed overlap size
+    crop_size = 8     # Fixed crop size
+    tile_size = 1024   # Fixed tile size for large images
+    try:
+        # Check for interruption
+        if check_stop_flag():
+            return None, "❌ Inference interrupted", None
+        # Convert PIL image to numpy array
+        img_np = np.array(image)
+        if len(img_np.shape) == 3:
+            img_np = img_np[:, :, [2, 1, 0]]  # RGB to BGR
+        # Convert to tensor
+        img = torch.from_numpy(np.transpose(img_np.astype(np.float32) / 255., (2, 0, 1))).float()
+        img = img.unsqueeze(0).to(device)
+        # Check for interruption
+        if check_stop_flag():
+            return None, "❌ Inference interrupted", None
+        # Calculate target size
+        gt_size = [math.floor(scale * img.shape[2]), math.floor(scale * img.shape[3])]
+        # Determine if tile processing should be used
+        use_tile = should_use_tile(img.shape[2], img.shape[3])
+        # Force AMP mixed precision
+        with torch.inference_mode():
+            with torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16):
+                # Check for interruption before main processing
+                if check_stop_flag():
+                    return None, "❌ Inference interrupted", None
+                if use_tile:
+                    # Use tile processing
+                    assert tile_size % 16 == 0, f"tile_size-{tile_size} must be divisible by 16"
+                    assert 2 * tile_overlap < tile_size, f"2 * tile_overlap must be less than tile_size"
+                    assert 2 * crop_size <= tile_overlap, f"2 * crop_size must be less than or equal to tile_overlap"
+                    with torch.no_grad():
+                        output = split_and_joint_image(
+                            lq=img,
+                            scale_factor=scale,
+                            split_size=tile_size,
+                            overlap_size=tile_overlap,
+                            model_g=encoder,
+                            model_fea2gs=decoder,
+                            crop_size=crop_size,
+                            scale_modify=torch.tensor([scale, scale]),
+                            default_step_size=1.2,
+                            cuda_rendering=True,
+                            mode='scale_modify',
+                            if_dmax=True,
+                            dmax_mode='fix',
+                            dmax=0.1
+                        )
+                else:
+                    # Direct processing without tiles
+                    lq_pad = preprocess(img, 16)  # denominator=16 for HATL
+                    gt_size_pad = torch.tensor([math.floor(scale * lq_pad.shape[2]),
+                                            math.floor(scale * lq_pad.shape[3])])
+                    gt_size_pad = gt_size_pad.unsqueeze(0)
+                    with torch.no_grad():
+                        # Check for interruption before encoder
+                        if check_stop_flag():
+                            return None, "❌ Inference interrupted", None
+                        # Encoder output
+                        encoder_output = encoder(lq_pad)  # b,c,h,w
+                        # Check for interruption before decoder
+                        if check_stop_flag():
+                            return None, "❌ Inference interrupted", None
+                        scale_vector = torch.tensor(scale, dtype=torch.float32).unsqueeze(0).to(device)
+                        # Decoder output
+                        batch_gs_parameters = decoder(encoder_output, scale_vector)
+                        gs_parameters = batch_gs_parameters[0, :]
+                        # Check for interruption before gaussian rendering
+                        if check_stop_flag():
+                            return None, "❌ Inference interrupted", None
+                        # Gaussian rendering
+                        b_output = generate_2D_gaussian_splatting_step(
+                            gs_parameters=gs_parameters,
+                            sr_size=gt_size_pad[0],
+                            scale=scale,
+                            sample_coords=None,
+                            scale_modify=torch.tensor([scale, scale]),
+                            default_step_size=1.2,
+                            cuda_rendering=True,
+                            mode='scale_modify',
+                            if_dmax=True,
+                            dmax_mode='fix',
+                            dmax=0.1
+                        )
+                        output = b_output.unsqueeze(0)
+        # Check for interruption before post-processing
+        if check_stop_flag():
+            return None, "❌ Inference interrupted", None
+        # Post-processing
+        output = postprocess(output, gt_size[0], gt_size[1])
+        # Convert back to PIL image format
+        output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+        output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))  # BGR to RGB
+        output = (output * 255.0).round().astype(np.uint8)
+        # Convert to PIL image
+        output_pil = Image.fromarray(output)
+        # Generate result information
+        original_size = f"{img.shape[3]}x{img.shape[2]}"
+        output_size = f"{output.shape[1]}x{output.shape[0]}"
+        tile_info = f"Tile processing enabled (size: {tile_size})" if use_tile else "Direct processing (no tiles)"
+        result_info = f"✅ Processing completed successfully!\nOriginal size: {original_size}\nSuper-resolution size: {output_size}\nScale factor: {scale:.2f}x\nProcessing mode: {tile_info}\nAMP acceleration: Force enabled\nOverlap size: {tile_overlap}\nCrop size: {crop_size}"
+        return output_pil, result_info, output_pil
+    except Exception as e:
+        if check_stop_flag():
+            return None, "❌ Inference interrupted", None
+        return None, f"❌ Error during processing: {str(e)}", None
+def predict(image, scale):
+    """Gradio prediction function"""
+    output_image, info, download_image = super_resolution_inference(image, scale)
+    # If processing successful, save image for download
+    if output_image is not None:
+        # Create temporary filename
+        timestamp = int(time.time())
+        temp_filename = f"GSASR_SR_result_{scale}x_{timestamp}.png"
+        temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
+        # Save image
+        output_image.save(temp_path, "PNG")
+        return output_image, temp_path, "✅ Ready", gr.update(interactive=True)
+    else:
+        return output_image, None, info if info else "❌ Processing failed", gr.update(interactive=True)
+# Create Gradio interface
+with gr.Blocks(title="🚀 GSASR (2D Gaussian Splatting Super-Resolution)") as demo:
+    gr.Markdown("# **🚀 GSASR (Generalized and efficient 2d gaussian splatting for arbitrary-scale super-resolution)**")
+    gr.Markdown("Official demo for GSASR. Please refer to our [paper](https://arxiv.org/pdf/2501.06838), [project page](https://mt-cly.github.io/GSASR.github.io/), and [github](https://github.com/ChrisDud0257/GSASR) for more details.")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Input Image")
+            # Scale parameters
+            with gr.Group():
+                gr.Markdown("### SR Scale")
+                scale_slider = gr.Slider(minimum=1.0, maximum=30.0, value=4.0, step=0.1, label="SR Scale")
+            # Control buttons
+            with gr.Row():
+                submit_btn = gr.Button("🚀 Start Super-Resolution", variant="primary")
+                stop_btn = gr.Button("🛑 Stop Inference", variant="stop")
+        with gr.Column():
+            output_image = gr.Image(type="pil", label="Super-Resolution Result")
+            # Status display
+            status_text = gr.Textbox(label="Status", value="✅ Ready", interactive=False)
+            # Download component
+            with gr.Group():
+                gr.Markdown("### 📥 Download Super-Resolution Result")
+                download_btn = gr.File(visible=True)
+    # Event handlers
+    submit_event = submit_btn.click(
+        fn=predict,
+        inputs=[input_image, scale_slider],
+        outputs=[output_image, download_btn, status_text, stop_btn]
+    )
+    stop_btn.click(
+        fn=set_stop_flag,
+        inputs=[],
+        outputs=[status_text, stop_btn],
+        cancels=[submit_event]
+    )
+    # Example images
+    gr.Markdown("### 📚 Example Images")
+    gr.Markdown("Try these examples with different scales:")
+    gr.Examples(
+        examples=[
+            ["assets/0846x4.png", 1.5],
+            ["assets/0892x4.png", 2.8],
+            ["assets/0873x4_cropped_120x120.png", 30.0]
+        ],
+        inputs=[input_image, scale_slider],
+        examples_per_page=3,
+        cache_examples=False,
+        label="Examples"
+    )
+if __name__ == "__main__":
+    demo.launch(share=True, server_name="0.0.0.0")