Spaces:
Running
Running
File size: 7,315 Bytes
aa1e885 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# --- LIBRARIES ---
import torch
import gradio as gr
import random
import time
from diffusers import AutoPipelineForText2Image, TextToVideoSDPipeline
import gc
import os
import imageio
# --- AUTHENTICATION FOR HUGGING FACE SPACES ---
# This will read the token from a "Secret" you set in your Space's settings
# It's more secure and the correct way to do it on HF Spaces.
try:
from huggingface_hub import login
HF_TOKEN = os.environ.get('HF_TOKEN')
if HF_TOKEN:
login(token=HF_TOKEN)
print("✅ Hugging Face Authentication successful.")
else:
print("⚠️ Hugging Face token not found in Space Secrets. Gated models may not be available.")
except ImportError:
print("Could not import huggingface_hub. Please ensure it's in requirements.txt")
# --- CONFIGURATION & STATE ---
available_models = {
"Fast Image (SDXL Turbo)": "stabilityai/sdxl-turbo",
"Quality Image (SDXL)": "stabilityai/stable-diffusion-xl-base-1.0",
"Video (Zeroscope)": "cerspense/zeroscope-v2-576w"
}
model_state = { "current_pipe": None, "loaded_model_name": None }
# --- CORE GENERATION FUNCTION ---
# This is a generator function, which yields updates to the UI.
def generate_media(model_key, prompt, negative_prompt, steps, cfg_scale, width, height, seed, num_frames):
# --- Model Loading Logic ---
# If the requested model isn't the one we have loaded, switch them.
if model_state.get("loaded_model_name") != model_key:
print(f"Switching to {model_key}. Unloading previous model...")
yield {status_textbox: f"Unloading previous model..."} # UI Update
if model_state.get("current_pipe"):
del model_state["current_pipe"]
gc.collect()
torch.cuda.empty_cache()
model_id = available_models[model_key]
print(f"Loading {model_id}...")
yield {status_textbox: f"Loading {model_id}... This can take a minute."} # UI Update
# Load the correct pipeline based on model type
if "Image" in model_key:
pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
elif "Video" in model_key:
pipe = TextToVideoSDPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.to("cuda")
# Offload larger models to save VRAM, but keep fast models fully on GPU
if "Turbo" not in model_key and "Video" not in model_key:
pipe.enable_model_cpu_offload()
model_state["current_pipe"] = pipe
model_state["loaded_model_name"] = model_key
print("✅ Model loaded successfully.")
pipe = model_state["current_pipe"]
generator = torch.Generator("cuda").manual_seed(seed)
yield {status_textbox: f"Generating with {model_key}..."} # UI Update
# --- Generation Logic ---
if "Image" in model_key:
print("Generating image...")
if "Turbo" in model_key: # Special settings for SDXL Turbo
num_steps, guidance_scale = 1, 0.0
else:
num_steps, guidance_scale = int(steps), float(cfg_scale)
image = pipe(
prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_steps,
guidance_scale=guidance_scale, width=int(width), height=int(height), generator=generator
).images[0]
print("✅ Image generation complete.")
yield {output_image: image, output_video: None, status_textbox: f"Seed used: {seed}"}
elif "Video" in model_key:
print("Generating video...")
video_frames = pipe(prompt=prompt, num_inference_steps=int(steps), height=320, width=576, num_frames=int(num_frames), generator=generator).frames
video_path = f"/tmp/video_{seed}.mp4"
imageio.mimsave(video_path, video_frames, fps=12)
print(f"✅ Video saved to {video_path}")
yield {output_image: None, output_video: video_path, status_textbox: f"Seed used: {seed}"}
# --- GRADIO USER INTERFACE ---
with gr.Blocks(theme='gradio/soft') as demo:
gr.Markdown("# The Generative Media Suite")
gr.Markdown("Create fast images, high-quality images, or short videos. Created by cheeseman182.")
seed_state = gr.State(-1)
with gr.Row():
with gr.Column(scale=2):
model_selector = gr.Radio(label="Select Model", choices=list(available_models.keys()), value=list(available_models.keys())[0])
prompt_input = gr.Textbox(label="Prompt", lines=4, placeholder="An astronaut riding a horse on Mars, cinematic...")
negative_prompt_input = gr.Textbox(label="Negative Prompt", lines=2, value="ugly, blurry, deformed, watermark, text")
with gr.Accordion("Settings", open=True):
steps_slider = gr.Slider(1, 100, 30, step=1, label="Inference Steps")
cfg_slider = gr.Slider(0.0, 15.0, 7.5, step=0.5, label="Guidance Scale (CFG)")
with gr.Row():
width_slider = gr.Slider(256, 1024, 768, step=64, label="Width")
height_slider = gr.Slider(256, 1024, 768, step=64, label="Height")
num_frames_slider = gr.Slider(12, 48, 24, step=4, label="Video Frames", visible=False)
seed_input = gr.Number(-1, label="Seed (-1 for random)")
generate_button = gr.Button("Generate", variant="primary")
with gr.Column(scale=3):
output_image = gr.Image(label="Image Result", interactive=False, height="60vh", visible=True)
output_video = gr.Video(label="Video Result", interactive=False, height="60vh", visible=False)
status_textbox = gr.Textbox(label="Status", interactive=False)
# --- UI Logic ---
def update_ui_on_model_change(model_key):
is_video = "Video" in model_key
is_turbo = "Turbo" in model_key
return {
steps_slider: gr.update(interactive=not is_turbo, value=1 if is_turbo else 30),
cfg_slider: gr.update(interactive=not is_turbo, value=0.0 if is_turbo else 7.5),
width_slider: gr.update(visible=not is_video),
height_slider: gr.update(visible=not is_video),
num_frames_slider: gr.update(visible=is_video),
output_image: gr.update(visible=not is_video),
output_video: gr.update(visible=is_video)
}
model_selector.change(update_ui_on_model_change, model_selector, [steps_slider, cfg_slider, width_slider, height_slider, num_frames_slider, output_image, output_video])
# --- Button Logic ---
# This chain first sets the seed, then calls the main generation function.
click_event = generate_button.click(
fn=lambda s: (s if s != -1 else random.randint(0, 2**32 - 1)),
inputs=seed_input,
outputs=seed_state,
queue=False
).then(
fn=generate_media,
inputs=[model_selector, prompt_input, negative_prompt_input, steps_slider, cfg_slider, width_slider, height_slider, seed_state, num_frames_slider],
outputs=[output_image, output_video, status_textbox]
)
# This is the correct way to launch on Hugging Face Spaces
demo.launch() |