Spaces:
Running
on
Zero
Running
on
Zero
Adaptive Video Prompt
Browse files
app.py
CHANGED
@@ -516,7 +516,26 @@ set_seed(args.seed)
|
|
516 |
seq_len = args.seq_len
|
517 |
inferpipe = WanInferencePipeline(args)
|
518 |
|
|
|
|
|
|
|
|
|
|
|
519 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
520 |
def update_generate_button(image_path, audio_path, text, num_steps):
|
521 |
|
522 |
if image_path is None or audio_path is None:
|
@@ -653,12 +672,26 @@ def cleanup(request: gr.Request):
|
|
653 |
def start_session(request: gr.Request):
|
654 |
|
655 |
return request.session_hash
|
|
|
|
|
|
|
|
|
|
|
656 |
|
657 |
css = """
|
658 |
#col-container {
|
659 |
margin: 0 auto;
|
660 |
max-width: 1560px;
|
661 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
662 |
"""
|
663 |
|
664 |
with gr.Blocks(css=css) as demo:
|
@@ -700,9 +733,11 @@ with gr.Blocks(css=css) as demo:
|
|
700 |
|
701 |
output_video = gr.Video(label="Avatar", height=512)
|
702 |
num_steps = gr.Slider(4, 50, value=8, step=1, label="Steps")
|
|
|
703 |
time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
|
704 |
infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
|
705 |
-
|
|
|
706 |
|
707 |
with gr.Column():
|
708 |
|
@@ -778,8 +813,8 @@ with gr.Blocks(css=css) as demo:
|
|
778 |
image_input.upload(fn=preprocess_img, inputs=[image_input, session_state], outputs=[image_input])
|
779 |
image_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
|
780 |
audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
|
781 |
-
num_steps.change(fn=
|
782 |
-
|
783 |
|
784 |
if __name__ == "__main__":
|
785 |
demo.unload(cleanup)
|
|
|
516 |
seq_len = args.seq_len
|
517 |
inferpipe = WanInferencePipeline(args)
|
518 |
|
519 |
+
ADAPTIVE_PROMPT_TEMPLATES = [
|
520 |
+
"A realistic video of a person speaking and moving their head accordingly but without moving their hands.",
|
521 |
+
"A realistic video of a person speaking and moving their head and eyes accordingly, sometimes looking at the camera and sometimes looking away but without moving their hands.",
|
522 |
+
"A realistic video of a person speaking and sometimes looking directly to the camera and moving their eyes and pupils and head accordingly and turning and looking at the camera and looking away from the camera based on their movements with dynamic and rhythmic and extensive hand gestures that complement their speech. Their hands are clearly visible, independent, and unobstructed. Their facial expressions are expressive and full of emotion, enhancing the delivery. The camera remains steady, capturing sharp, clear movements and a focused, engaging presence."
|
523 |
+
]
|
524 |
|
525 |
+
def slider_value_change(image_path, audio_path, text, num_steps, adaptive_text):
|
526 |
+
|
527 |
+
if adaptive_text:
|
528 |
+
|
529 |
+
if num_steps < 8:
|
530 |
+
text = ADAPTIVE_PROMPT_TEMPLATES[0]
|
531 |
+
elif num_steps < 14:
|
532 |
+
text = ADAPTIVE_PROMPT_TEMPLATES[1]
|
533 |
+
else:
|
534 |
+
text = ADAPTIVE_PROMPT_TEMPLATES[2]
|
535 |
+
|
536 |
+
return update_generate_button(image_path, audio_path, text, num_steps), text
|
537 |
+
|
538 |
+
|
539 |
def update_generate_button(image_path, audio_path, text, num_steps):
|
540 |
|
541 |
if image_path is None or audio_path is None:
|
|
|
672 |
def start_session(request: gr.Request):
|
673 |
|
674 |
return request.session_hash
|
675 |
+
|
676 |
+
def check_box_clicked(adapative_tick):
|
677 |
+
print("checkbox clicked")
|
678 |
+
return gr.update(interactive=not adapative_tick)
|
679 |
+
|
680 |
|
681 |
css = """
|
682 |
#col-container {
|
683 |
margin: 0 auto;
|
684 |
max-width: 1560px;
|
685 |
}
|
686 |
+
|
687 |
+
/* editable vs locked, reusing theme variables that adapt to dark/light */
|
688 |
+
.stateful textarea:not(:disabled):not([readonly]) {
|
689 |
+
color: var(--color-text) !important; /* accent in both modes */
|
690 |
+
}
|
691 |
+
.stateful textarea:disabled,
|
692 |
+
.stateful textarea[readonly]{
|
693 |
+
color: var(--body-text-color-subdued) !important; /* subdued in both modes */
|
694 |
+
}
|
695 |
"""
|
696 |
|
697 |
with gr.Blocks(css=css) as demo:
|
|
|
733 |
|
734 |
output_video = gr.Video(label="Avatar", height=512)
|
735 |
num_steps = gr.Slider(4, 50, value=8, step=1, label="Steps")
|
736 |
+
|
737 |
time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
|
738 |
infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
|
739 |
+
adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
|
740 |
+
text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
|
741 |
|
742 |
with gr.Column():
|
743 |
|
|
|
813 |
image_input.upload(fn=preprocess_img, inputs=[image_input, session_state], outputs=[image_input])
|
814 |
image_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
|
815 |
audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
|
816 |
+
num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, text_input, num_steps, adaptive_text], outputs=[time_required, text_input])
|
817 |
+
adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
|
818 |
|
819 |
if __name__ == "__main__":
|
820 |
demo.unload(cleanup)
|