File size: 7,315 Bytes
aa1e885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# --- LIBRARIES ---
import torch
import gradio as gr
import random
import time
from diffusers import AutoPipelineForText2Image, TextToVideoSDPipeline
import gc
import os
import imageio

# --- AUTHENTICATION FOR HUGGING FACE SPACES ---
# This will read the token from a "Secret" you set in your Space's settings
# It's more secure and the correct way to do it on HF Spaces.
try:
    from huggingface_hub import login
    HF_TOKEN = os.environ.get('HF_TOKEN')
    if HF_TOKEN:
        login(token=HF_TOKEN)
        print("✅ Hugging Face Authentication successful.")
    else:
        print("⚠️ Hugging Face token not found in Space Secrets. Gated models may not be available.")
except ImportError:
    print("Could not import huggingface_hub. Please ensure it's in requirements.txt")

# --- CONFIGURATION & STATE ---
available_models = {
    "Fast Image (SDXL Turbo)": "stabilityai/sdxl-turbo",
    "Quality Image (SDXL)": "stabilityai/stable-diffusion-xl-base-1.0",
    "Video (Zeroscope)": "cerspense/zeroscope-v2-576w"
}
model_state = { "current_pipe": None, "loaded_model_name": None }


# --- CORE GENERATION FUNCTION ---
# This is a generator function, which yields updates to the UI.
def generate_media(model_key, prompt, negative_prompt, steps, cfg_scale, width, height, seed, num_frames):
    # --- Model Loading Logic ---
    # If the requested model isn't the one we have loaded, switch them.
    if model_state.get("loaded_model_name") != model_key:
        print(f"Switching to {model_key}. Unloading previous model...")
        yield {status_textbox: f"Unloading previous model..."} # UI Update
        if model_state.get("current_pipe"):
            del model_state["current_pipe"]
            gc.collect()
            torch.cuda.empty_cache()

        model_id = available_models[model_key]
        print(f"Loading {model_id}...")
        yield {status_textbox: f"Loading {model_id}... This can take a minute."} # UI Update

        # Load the correct pipeline based on model type
        if "Image" in model_key:
            pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
        elif "Video" in model_key:
            pipe = TextToVideoSDPipeline.from_pretrained(model_id, torch_dtype=torch.float16)

        pipe.to("cuda")
        # Offload larger models to save VRAM, but keep fast models fully on GPU
        if "Turbo" not in model_key and "Video" not in model_key:
            pipe.enable_model_cpu_offload()

        model_state["current_pipe"] = pipe
        model_state["loaded_model_name"] = model_key
        print("✅ Model loaded successfully.")

    pipe = model_state["current_pipe"]
    generator = torch.Generator("cuda").manual_seed(seed)
    yield {status_textbox: f"Generating with {model_key}..."} # UI Update

    # --- Generation Logic ---
    if "Image" in model_key:
        print("Generating image...")
        if "Turbo" in model_key: # Special settings for SDXL Turbo
            num_steps, guidance_scale = 1, 0.0
        else:
            num_steps, guidance_scale = int(steps), float(cfg_scale)

        image = pipe(
            prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_steps,
            guidance_scale=guidance_scale, width=int(width), height=int(height), generator=generator
        ).images[0]
        print("✅ Image generation complete.")
        yield {output_image: image, output_video: None, status_textbox: f"Seed used: {seed}"}

    elif "Video" in model_key:
        print("Generating video...")
        video_frames = pipe(prompt=prompt, num_inference_steps=int(steps), height=320, width=576, num_frames=int(num_frames), generator=generator).frames
        
        video_path = f"/tmp/video_{seed}.mp4"
        imageio.mimsave(video_path, video_frames, fps=12)
        print(f"✅ Video saved to {video_path}")
        yield {output_image: None, output_video: video_path, status_textbox: f"Seed used: {seed}"}


# --- GRADIO USER INTERFACE ---
with gr.Blocks(theme='gradio/soft') as demo:
    gr.Markdown("# The Generative Media Suite")
    gr.Markdown("Create fast images, high-quality images, or short videos. Created by cheeseman182.")
    seed_state = gr.State(-1)

    with gr.Row():
        with gr.Column(scale=2):
            model_selector = gr.Radio(label="Select Model", choices=list(available_models.keys()), value=list(available_models.keys())[0])
            prompt_input = gr.Textbox(label="Prompt", lines=4, placeholder="An astronaut riding a horse on Mars, cinematic...")
            negative_prompt_input = gr.Textbox(label="Negative Prompt", lines=2, value="ugly, blurry, deformed, watermark, text")
            
            with gr.Accordion("Settings", open=True):
                steps_slider = gr.Slider(1, 100, 30, step=1, label="Inference Steps")
                cfg_slider = gr.Slider(0.0, 15.0, 7.5, step=0.5, label="Guidance Scale (CFG)")
                with gr.Row():
                    width_slider = gr.Slider(256, 1024, 768, step=64, label="Width")
                    height_slider = gr.Slider(256, 1024, 768, step=64, label="Height")
                num_frames_slider = gr.Slider(12, 48, 24, step=4, label="Video Frames", visible=False)
                seed_input = gr.Number(-1, label="Seed (-1 for random)")
            
            generate_button = gr.Button("Generate", variant="primary")

        with gr.Column(scale=3):
            output_image = gr.Image(label="Image Result", interactive=False, height="60vh", visible=True)
            output_video = gr.Video(label="Video Result", interactive=False, height="60vh", visible=False)
            status_textbox = gr.Textbox(label="Status", interactive=False)

    # --- UI Logic ---
    def update_ui_on_model_change(model_key):
        is_video = "Video" in model_key
        is_turbo = "Turbo" in model_key
        return {
            steps_slider: gr.update(interactive=not is_turbo, value=1 if is_turbo else 30),
            cfg_slider: gr.update(interactive=not is_turbo, value=0.0 if is_turbo else 7.5),
            width_slider: gr.update(visible=not is_video),
            height_slider: gr.update(visible=not is_video),
            num_frames_slider: gr.update(visible=is_video),
            output_image: gr.update(visible=not is_video),
            output_video: gr.update(visible=is_video)
        }
    model_selector.change(update_ui_on_model_change, model_selector, [steps_slider, cfg_slider, width_slider, height_slider, num_frames_slider, output_image, output_video])

    # --- Button Logic ---
    # This chain first sets the seed, then calls the main generation function.
    click_event = generate_button.click(
        fn=lambda s: (s if s != -1 else random.randint(0, 2**32 - 1)),
        inputs=seed_input,
        outputs=seed_state,
        queue=False
    ).then(
        fn=generate_media,
        inputs=[model_selector, prompt_input, negative_prompt_input, steps_slider, cfg_slider, width_slider, height_slider, seed_state, num_frames_slider],
        outputs=[output_image, output_video, status_textbox]
    )

# This is the correct way to launch on Hugging Face Spaces
demo.launch()