GoHugo commited on
Commit
26d384a
·
verified ·
1 Parent(s): 0d7079e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +578 -0
app.py ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Copyright (C) 2025 NVIDIA Corporation. All rights reserved.
3
+ #
4
+ # This work is licensed under the LICENSE file
5
+ # located at the root directory.
6
+
7
+ import os
8
+ import gradio as gr
9
+ import spaces
10
+ import torch
11
+ import numpy as np
12
+ from PIL import Image
13
+ import tempfile
14
+ import gc
15
+ from datetime import datetime
16
+ from sam2.sam2_image_predictor import SAM2ImagePredictor
17
+
18
+ from addit_flux_pipeline import AdditFluxPipeline
19
+ from addit_flux_transformer import AdditFluxTransformer2DModel
20
+ from addit_scheduler import AdditFlowMatchEulerDiscreteScheduler
21
+ from addit_methods import add_object_generated, add_object_real
22
+
23
+ # Global variables for model
24
+ pipe = None
25
+ device = None
26
+ original_image_size = None
27
+
28
+ # Initialize model at startup
29
+ print("Initializing ADDIT model...")
30
+ try:
31
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
+ print(f"Using device: {device}")
33
+
34
+ # Load transformer
35
+ my_transformer = AdditFluxTransformer2DModel.from_pretrained(
36
+ "black-forest-labs/FLUX.1-dev",
37
+ subfolder="transformer",
38
+ torch_dtype=torch.bfloat16
39
+ )
40
+
41
+ # Load pipeline
42
+ pipe = AdditFluxPipeline.from_pretrained(
43
+ "black-forest-labs/FLUX.1-dev",
44
+ transformer=my_transformer,
45
+ torch_dtype=torch.bfloat16
46
+ ).to(device)
47
+
48
+ # Set scheduler
49
+ pipe.scheduler = AdditFlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config)
50
+
51
+ print("Model initialized successfully!")
52
+
53
+ print("Initialization SAM model:")
54
+ sam = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-large")
55
+
56
+ except Exception as e:
57
+ print(f"Error initializing model: {str(e)}")
58
+ print("The application will start but model functionality will be unavailable.")
59
+
60
+ def validate_inputs(prompt_source, prompt_target, subject_token):
61
+ """Validate user inputs"""
62
+ if not prompt_source.strip():
63
+ return "Source prompt cannot be empty"
64
+ if not prompt_target.strip():
65
+ return "Target prompt cannot be empty"
66
+ if not subject_token.strip():
67
+ return "Subject token cannot be empty"
68
+ if subject_token not in prompt_target:
69
+ return f"Subject token '{subject_token}' must appear in the target prompt"
70
+ return None
71
+
72
+ def resize_and_crop_image(image):
73
+ """
74
+ Resize and center crop image to 1024x1024.
75
+ Returns the processed image, a message about what was done, and original size info.
76
+ """
77
+ if image is None:
78
+ return None, "", None
79
+
80
+ original_width, original_height = image.size
81
+ original_size = (original_width, original_height)
82
+
83
+ # If already 1024x1024, no processing needed
84
+ if original_width == 1024 and original_height == 1024:
85
+ return image, "", original_size
86
+
87
+ # Calculate scaling to make smaller dimension 1024
88
+ scale = 1024 / min(original_width, original_height)
89
+ new_width = int(original_width * scale)
90
+ new_height = int(original_height * scale)
91
+
92
+ # Resize image
93
+ resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
94
+
95
+ # Center crop to 1024x1024
96
+ left = (new_width - 1024) // 2
97
+ top = (new_height - 1024) // 2
98
+ right = left + 1024
99
+ bottom = top + 1024
100
+
101
+ cropped_image = resized_image.crop((left, top, right, bottom))
102
+
103
+ # Create status message
104
+ if new_width == 1024 and new_height == 1024:
105
+ message = f"<div style='background-color: #e8f5e8; border: 1px solid #4caf50; border-radius: 5px; padding: 8px; margin-bottom: 10px;'><span style='color: #2e7d32; font-weight: bold;'>✅ Image resized to 1024×1024</span></div>"
106
+ else:
107
+ message = f"<div style='background-color: #e8f5e8; border: 1px solid #4caf50; border-radius: 5px; padding: 8px; margin-bottom: 10px;'><span style='color: #2e7d32; font-weight: bold;'>✅ Image resized and center cropped to 1024×1024</span></div>"
108
+
109
+ return cropped_image, message, original_size
110
+
111
+ def handle_image_upload(image):
112
+ """
113
+ Handle image upload and preprocessing for the Gradio interface.
114
+
115
+ This function is called when a user uploads an image to the real images tab.
116
+ It stores the original image size globally and processes the image to the required dimensions.
117
+
118
+ Args:
119
+ image: PIL.Image object uploaded by the user, or None if no image is uploaded.
120
+
121
+ Returns:
122
+ Tuple containing:
123
+ - processed_image: PIL.Image object resized and cropped to 1024x1024, or None if no image.
124
+ - message: HTML-formatted string indicating the processing status, or empty string.
125
+ """
126
+ global original_image_size
127
+
128
+ if image is None:
129
+ original_image_size = None
130
+ return None, ""
131
+
132
+ # Store original size
133
+ original_image_size = image.size
134
+
135
+ # Process image
136
+ processed_image, message, _ = resize_and_crop_image(image)
137
+ return processed_image, message
138
+
139
+ @spaces.GPU
140
+ def process_generated_image(
141
+ prompt_source,
142
+ prompt_target,
143
+ subject_token,
144
+ seed_src,
145
+ seed_obj,
146
+ extended_scale,
147
+ structure_transfer_step,
148
+ blend_steps,
149
+ localization_model,
150
+ progress=gr.Progress(track_tqdm=True)
151
+ ):
152
+ """
153
+ Process and generate images using ADDIT for the generated images workflow.
154
+
155
+ This function generates a source image from a text prompt and then adds an object to it
156
+ based on the target prompt and subject token using the ADDIT pipeline.
157
+
158
+ Args:
159
+ prompt_source: String describing the source scene without the object to be added.
160
+ prompt_target: String describing the target scene including the object to be added.
161
+ subject_token: String token representing the object to add (must appear in target prompt).
162
+ seed_src: Integer seed for generating the source image.
163
+ seed_obj: Integer seed for generating the object.
164
+ extended_scale: Float value (1.0-1.3) controlling the extended attention scale.
165
+ structure_transfer_step: Integer (0-10) controlling structure transfer strength.
166
+ blend_steps: String of comma-separated integers for blending steps, or empty string.
167
+ localization_model: String specifying the localization model to use.
168
+ progress: Gradio progress tracker for displaying progress updates.
169
+
170
+ Returns:
171
+ Tuple containing:
172
+ - src_image: PIL.Image of the generated source image, or None if error.
173
+ - edited_image: PIL.Image with the added object, or None if error.
174
+ - status_message: String describing the result or error message.
175
+ """
176
+ global pipe
177
+
178
+ if pipe is None:
179
+ return None, None, "Model not initialized. Please restart the application."
180
+
181
+ # Validate inputs
182
+ error_msg = validate_inputs(prompt_source, prompt_target, subject_token)
183
+ if error_msg:
184
+ return None, None, error_msg
185
+
186
+ # Print current time and input information
187
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
188
+ print(f"\n[{current_time}] Starting Generated Image Processing")
189
+ print(f"Source Prompt: '{prompt_source}'")
190
+ print(f"Target Prompt: '{prompt_target}'")
191
+ print(f"Subject Token: '{subject_token}'")
192
+ print(f"Source Seed: {seed_src}, Object Seed: {seed_obj}")
193
+ print(f"Extended Scale: {extended_scale}, Structure Transfer Step: {structure_transfer_step}")
194
+ print(f"Blend Steps: '{blend_steps}', Localization Model: '{localization_model}'")
195
+
196
+ try:
197
+ # Parse blend steps
198
+ if blend_steps.strip():
199
+ blend_steps_list = [int(x.strip()) for x in blend_steps.split(',') if x.strip()]
200
+ else:
201
+ blend_steps_list = []
202
+
203
+ # Generate images
204
+ src_image, edited_image = add_object_generated(
205
+ pipe=pipe,
206
+ prompt_source=prompt_source,
207
+ prompt_object=prompt_target,
208
+ subject_token=subject_token,
209
+ seed_src=seed_src,
210
+ seed_obj=seed_obj,
211
+ show_attention=False,
212
+ extended_scale=extended_scale,
213
+ structure_transfer_step=structure_transfer_step,
214
+ blend_steps=blend_steps_list,
215
+ localization_model=localization_model,
216
+ display_output=False
217
+ )
218
+
219
+ return src_image, edited_image, "Images generated successfully!"
220
+
221
+ except Exception as e:
222
+ error_msg = f"Error generating images: {str(e)}"
223
+ print(error_msg)
224
+ return None, None, error_msg
225
+
226
+ @spaces.GPU
227
+ def process_real_image(
228
+ source_image,
229
+ prompt_source,
230
+ prompt_target,
231
+ subject_token,
232
+ seed_src,
233
+ seed_obj,
234
+ extended_scale,
235
+ structure_transfer_step,
236
+ blend_steps,
237
+ localization_model,
238
+ use_offset,
239
+ disable_inversion,
240
+ progress=gr.Progress(track_tqdm=True)
241
+ ):
242
+ """
243
+ Process and edit a real uploaded image using ADDIT to add objects.
244
+
245
+ This function takes an uploaded image and adds an object to it based on the target prompt
246
+ and subject token using the ADDIT pipeline with optional inversion and offset techniques.
247
+
248
+ Args:
249
+ source_image: PIL.Image object of the uploaded source image to edit.
250
+ prompt_source: String describing the source image content.
251
+ prompt_target: String describing the desired result including the object to add.
252
+ subject_token: String token representing the object to add (must appear in target prompt).
253
+ seed_src: Integer seed for source image processing.
254
+ seed_obj: Integer seed for object generation.
255
+ extended_scale: Float value (1.0-1.3) controlling the extended attention scale.
256
+ structure_transfer_step: Integer (0-10) controlling structure transfer strength.
257
+ blend_steps: String of comma-separated integers for blending steps, or empty string.
258
+ localization_model: String specifying the localization model to use.
259
+ use_offset: Boolean indicating whether to use offset technique.
260
+ disable_inversion: Boolean indicating whether to disable DDIM inversion.
261
+ progress: Gradio progress tracker for displaying progress updates.
262
+
263
+ Returns:
264
+ Tuple containing:
265
+ - src_image: PIL.Image of the processed source image, or None if error.
266
+ - edited_image: PIL.Image with the added object, or None if error.
267
+ - status_message: String describing the result or error message.
268
+ """
269
+ global pipe
270
+
271
+ if pipe is None:
272
+ return None, None, "Model not initialized. Please restart the application."
273
+
274
+ if source_image is None:
275
+ return None, None, "Please upload a source image"
276
+
277
+ # Validate inputs
278
+ error_msg = validate_inputs(prompt_source, prompt_target, subject_token)
279
+ if error_msg:
280
+ return None, None, error_msg
281
+
282
+ # Print current time and input information
283
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
284
+ print(f"\n[{current_time}] Starting Real Image Processing")
285
+ if original_image_size:
286
+ print(f"Original uploaded image size: {original_image_size[0]}×{original_image_size[1]}")
287
+ print(f"Source Image Size: {source_image.size}")
288
+ print(f"Source Prompt: '{prompt_source}'")
289
+ print(f"Target Prompt: '{prompt_target}'")
290
+ print(f"Subject Token: '{subject_token}'")
291
+ print(f"Source Seed: {seed_src}, Object Seed: {seed_obj}")
292
+ print(f"Extended Scale: {extended_scale}, Structure Transfer Step: {structure_transfer_step}")
293
+ print(f"Blend Steps: '{blend_steps}', Localization Model: '{localization_model}'")
294
+ print(f"Use Offset: {use_offset}, Disable Inversion: {disable_inversion}")
295
+
296
+ try:
297
+ # Resize source image
298
+ source_image = source_image.resize((1024, 1024))
299
+
300
+ # Parse blend steps
301
+ if blend_steps.strip():
302
+ blend_steps_list = [int(x.strip()) for x in blend_steps.split(',') if x.strip()]
303
+ else:
304
+ blend_steps_list = []
305
+
306
+ # Process image
307
+ src_image, edited_image = add_object_real(
308
+ pipe=pipe,
309
+ source_image=source_image,
310
+ prompt_source=prompt_source,
311
+ prompt_object=prompt_target,
312
+ subject_token=subject_token,
313
+ seed_src=seed_src,
314
+ seed_obj=seed_obj,
315
+ extended_scale=extended_scale,
316
+ structure_transfer_step=structure_transfer_step,
317
+ blend_steps=blend_steps_list,
318
+ localization_model=localization_model,
319
+ use_offset=use_offset,
320
+ show_attention=False,
321
+ use_inversion=not disable_inversion,
322
+ display_output=False
323
+ )
324
+
325
+ return src_image, edited_image, "Image edited successfully!"
326
+
327
+ except Exception as e:
328
+ error_msg = f"Error processing image: {str(e)}"
329
+ print(error_msg)
330
+ return None, None, error_msg
331
+
332
+ def create_interface():
333
+ """Create the Gradio interface"""
334
+
335
+ # Show model status in the interface
336
+ model_status = "Model ready!" if pipe is not None else "Model initialization failed - functionality unavailable"
337
+
338
+ with gr.Blocks(title="🎨 Add-it: Training-Free Object Insertion in Images With Pretrained Diffusion Models", theme=gr.themes.Soft()) as demo:
339
+ gr.HTML(f"""
340
+ <div style="text-align: center; margin-bottom: 20px;">
341
+ <h1>🎨 Add-it: Training-Free Object Insertion</h1>
342
+ <p>Add objects to images using pretrained diffusion models</p>
343
+ <p><a href="https://research.nvidia.com/labs/par/addit/" target="_blank">🌐 Project Website</a> |
344
+ <a href="https://arxiv.org/abs/2411.07232" target="_blank">📄 Paper</a> |
345
+ <a href="https://github.com/NVlabs/addit" target="_blank">💻 Code</a></p>
346
+ <p style="color: {'green' if pipe is not None else 'red'}; font-weight: bold;">Status: {model_status}</p>
347
+ </div>
348
+ """)
349
+
350
+ # Main interface
351
+ with gr.Tabs():
352
+ # Generated Images Tab
353
+ with gr.TabItem("🎭 Generated Images"):
354
+ gr.Markdown("### Generate a base image and add objects to it")
355
+
356
+ with gr.Row():
357
+ with gr.Column(scale=1):
358
+ gen_prompt_source = gr.Textbox(
359
+ label="Source Prompt",
360
+ placeholder="A photo of a cat sitting on the couch",
361
+ value="A photo of a cat sitting on the couch"
362
+ )
363
+ gen_prompt_target = gr.Textbox(
364
+ label="Target Prompt",
365
+ placeholder="A photo of a cat wearing a blue hat sitting on the couch",
366
+ value="A photo of a cat wearing a blue hat sitting on the couch"
367
+ )
368
+ gen_subject_token = gr.Textbox(
369
+ label="Subject Token",
370
+ placeholder="hat",
371
+ value="hat",
372
+ info="Single token representing the object to add **(must appear in target prompt)**"
373
+ )
374
+
375
+ with gr.Accordion("Advanced Settings", open=False):
376
+ gen_seed_src = gr.Number(label="Source Seed", value=1, precision=0)
377
+ gen_seed_obj = gr.Number(label="Object Seed", value=42, precision=0)
378
+ gen_extended_scale = gr.Slider(
379
+ label="Extended Scale",
380
+ minimum=1.0,
381
+ maximum=1.3,
382
+ value=1.05,
383
+ step=0.01
384
+ )
385
+ gen_structure_transfer_step = gr.Slider(
386
+ label="Structure Transfer Step",
387
+ minimum=0,
388
+ maximum=10,
389
+ value=2,
390
+ step=1
391
+ )
392
+ gen_blend_steps = gr.Textbox(
393
+ label="Blend Steps",
394
+ value="15",
395
+ info="Comma-separated list of steps (e.g., '15,20') or empty for no blending"
396
+ )
397
+ gen_localization_model = gr.Dropdown(
398
+ label="Localization Model",
399
+ choices=[
400
+ "attention_points_sam",
401
+ "attention",
402
+ "attention_box_sam",
403
+ "attention_mask_sam",
404
+ "grounding_sam"
405
+ ],
406
+ value="attention_points_sam"
407
+ )
408
+
409
+ gen_submit_btn = gr.Button("🎨 Generate & Edit", variant="primary")
410
+
411
+ with gr.Column(scale=2):
412
+ with gr.Row():
413
+ gen_src_output = gr.Image(label="Generated Source Image", type="pil")
414
+ gen_edited_output = gr.Image(label="Edited Image", type="pil")
415
+ gen_status = gr.Textbox(label="Status", interactive=False)
416
+
417
+ gen_submit_btn.click(
418
+ fn=process_generated_image,
419
+ inputs=[
420
+ gen_prompt_source, gen_prompt_target, gen_subject_token,
421
+ gen_seed_src, gen_seed_obj, gen_extended_scale,
422
+ gen_structure_transfer_step, gen_blend_steps,
423
+ gen_localization_model
424
+ ],
425
+ outputs=[gen_src_output, gen_edited_output, gen_status]
426
+ )
427
+
428
+ # Examples for generated images
429
+ gr.Examples(
430
+ examples=[
431
+ ["An empty throne", "A king sitting on a throne", "king"],
432
+ ["A photo of a man sitting on a bench", "A photo of a man sitting on a bench with a dog", "dog"],
433
+ ["A photo of a cat sitting on the couch", "A photo of a cat wearing a blue hat sitting on the couch", "hat"],
434
+ ["A car driving through an empty street", "A pink car driving through an empty street", "car"]
435
+ ],
436
+ inputs=[
437
+ gen_prompt_source, gen_prompt_target, gen_subject_token
438
+ ],
439
+ label="Example Prompts"
440
+ )
441
+
442
+ # Real Images Tab
443
+ with gr.TabItem("📸 Real Images"):
444
+ gr.Markdown("### Upload an image and add objects to it")
445
+ gr.HTML("<p style='color: orange; font-weight: bold; margin: -15px -10px;'>Note: Images will be automatically resized and center cropped to 1024×1024 pixels.</p>")
446
+
447
+ with gr.Row():
448
+ with gr.Column(scale=1):
449
+ real_image_status = gr.HTML(visible=False)
450
+ real_source_image = gr.Image(label="Source Image", type="pil")
451
+ real_prompt_source = gr.Textbox(
452
+ label="Source Prompt",
453
+ placeholder="A photo of a bed in a dark room",
454
+ value="A photo of a bed in a dark room"
455
+ )
456
+ real_prompt_target = gr.Textbox(
457
+ label="Target Prompt",
458
+ placeholder="A photo of a dog lying on a bed in a dark room",
459
+ value="A photo of a dog lying on a bed in a dark room"
460
+ )
461
+ real_subject_token = gr.Textbox(
462
+ label="Subject Token",
463
+ placeholder="dog",
464
+ value="dog",
465
+ info="Single token representing the object to add **(must appear in target prompt)**"
466
+ )
467
+
468
+ with gr.Accordion("Advanced Settings", open=False):
469
+ real_seed_src = gr.Number(label="Source Seed", value=1, precision=0)
470
+ real_seed_obj = gr.Number(label="Object Seed", value=0, precision=0)
471
+ real_extended_scale = gr.Slider(
472
+ label="Extended Scale",
473
+ minimum=1.0,
474
+ maximum=1.3,
475
+ value=1.1,
476
+ step=0.01
477
+ )
478
+ real_structure_transfer_step = gr.Slider(
479
+ label="Structure Transfer Step",
480
+ minimum=0,
481
+ maximum=10,
482
+ value=4,
483
+ step=1
484
+ )
485
+ real_blend_steps = gr.Textbox(
486
+ label="Blend Steps",
487
+ value="18",
488
+ info="Comma-separated list of steps (e.g., '15,20') or empty for no blending"
489
+ )
490
+ real_localization_model = gr.Dropdown(
491
+ label="Localization Model",
492
+ choices=[
493
+ "attention",
494
+ "attention_points_sam",
495
+ "attention_box_sam",
496
+ "attention_mask_sam",
497
+ "grounding_sam"
498
+ ],
499
+ value="attention"
500
+ )
501
+ real_use_offset = gr.Checkbox(label="Use Offset", value=False)
502
+ real_disable_inversion = gr.Checkbox(label="Disable Inversion", value=False)
503
+
504
+ real_submit_btn = gr.Button("🎨 Edit Image", variant="primary")
505
+
506
+ with gr.Column(scale=2):
507
+ with gr.Row():
508
+ real_src_output = gr.Image(label="Source Image", type="pil")
509
+ real_edited_output = gr.Image(label="Edited Image", type="pil")
510
+ real_status = gr.Textbox(label="Status", interactive=False)
511
+
512
+ # Handle image upload and preprocessing
513
+ real_source_image.upload(
514
+ fn=handle_image_upload,
515
+ inputs=[real_source_image],
516
+ outputs=[real_source_image, real_image_status]
517
+ ).then(
518
+ fn=lambda status: gr.update(visible=bool(status.strip()), value=status),
519
+ inputs=[real_image_status],
520
+ outputs=[real_image_status]
521
+ )
522
+
523
+ real_submit_btn.click(
524
+ fn=process_real_image,
525
+ inputs=[
526
+ real_source_image, real_prompt_source, real_prompt_target, real_subject_token,
527
+ real_seed_src, real_seed_obj, real_extended_scale,
528
+ real_structure_transfer_step, real_blend_steps,
529
+ real_localization_model, real_use_offset,
530
+ real_disable_inversion
531
+ ],
532
+ outputs=[real_src_output, real_edited_output, real_status]
533
+ )
534
+
535
+ # Examples for real images
536
+ gr.Examples(
537
+ examples=[
538
+ [
539
+ "images/bed_dark_room.jpg",
540
+ "A photo of a bed in a dark room",
541
+ "A photo of a dog lying on a bed in a dark room",
542
+ "dog"
543
+ ],
544
+ [
545
+ "images/flower.jpg",
546
+ "A photo of a flower",
547
+ "A bee standing on a flower",
548
+ "bee"
549
+ ]
550
+ ],
551
+ inputs=[
552
+ real_source_image, real_prompt_source, real_prompt_target, real_subject_token
553
+ ],
554
+ label="Example Images & Prompts"
555
+ )
556
+
557
+ # Tips
558
+ with gr.Accordion("💡 Tips for Better Results", open=False):
559
+ gr.Markdown("""
560
+ - **Prompt Design**: The Target Prompt should be similar to the Source Prompt, but include a description of the new object to insert
561
+ - **Seed Variation**: Try different values for Object Seed - some prompts may require a few attempts to get satisfying results
562
+ - **Localization Models**: The most effective options are `attention_points_sam` and `attention`. Use Show Attention to visualize localization performance
563
+ - **Object Placement Issues**: If the object is not added to the image:
564
+ - Try **decreasing** Structure Transfer Step
565
+ - Try **increasing** Extended Scale
566
+ - **Flexibility**: To allow more flexibility in modifying the source image, leave Blend Steps empty to send an empty list
567
+ """)
568
+
569
+ return demo
570
+
571
+ demo = create_interface()
572
+ # demo.launch(
573
+ # server_name="0.0.0.0",
574
+ # server_port=7860,
575
+ # share=True,
576
+ # mcp_server=False
577
+ # )
578
+ demo.launch(mcp_server=True)