Spaces:

alexnasa
/

OmniAvatar

Running on Zero

App Files Files Community

alexnasa commited on 3 days ago

Commit

1d6436d

verified ·

1 Parent(s): 2c1d6c6

Adaptive Video Prompt

Browse files

Files changed (1) hide show

app.py +38 -3

app.py CHANGED Viewed

@@ -516,7 +516,26 @@ set_seed(args.seed)
 seq_len = args.seq_len
 inferpipe = WanInferencePipeline(args)
 def update_generate_button(image_path, audio_path, text, num_steps):
     if image_path is None or audio_path is None:
@@ -653,12 +672,26 @@ def cleanup(request: gr.Request):
 def start_session(request: gr.Request):
     return request.session_hash
 css = """
     #col-container {
         margin: 0 auto;
         max-width: 1560px;
     }
     """
 with gr.Blocks(css=css) as demo:
@@ -700,9 +733,11 @@ with gr.Blocks(css=css) as demo:
                 output_video = gr.Video(label="Avatar", height=512)
                 num_steps = gr.Slider(4, 50, value=8, step=1, label="Steps")
                 time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
                 infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
-                text_input = gr.Textbox(label="Video Prompt", lines=6, value="A realistic video of a woman speaking and moving her head accordingly but without moving her hands.")
             with gr.Column():
@@ -778,8 +813,8 @@ with gr.Blocks(css=css) as demo:
     image_input.upload(fn=preprocess_img, inputs=[image_input, session_state], outputs=[image_input])
     image_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
     audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
-    num_steps.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
 if __name__ == "__main__":
     demo.unload(cleanup)

 seq_len = args.seq_len
 inferpipe = WanInferencePipeline(args)
+ADAPTIVE_PROMPT_TEMPLATES = [
+    "A realistic video of a person speaking and moving their head accordingly but without moving their hands.",
+    "A realistic video of a person speaking and moving their head and eyes accordingly, sometimes looking at the camera and sometimes looking away but without moving their hands.",
+    "A realistic video of a person speaking and sometimes looking directly to the camera and moving their eyes and pupils and head accordingly and turning and looking at the camera and looking away from the camera based on their movements with dynamic and rhythmic and extensive hand gestures that complement their speech. Their hands are clearly visible, independent, and unobstructed. Their facial expressions are expressive and full of emotion, enhancing the delivery. The camera remains steady, capturing sharp, clear movements and a focused, engaging presence."
+]
+def slider_value_change(image_path, audio_path, text, num_steps, adaptive_text):
+    if adaptive_text:
+        if num_steps < 8:
+            text = ADAPTIVE_PROMPT_TEMPLATES[0]
+        elif num_steps < 14:
+            text = ADAPTIVE_PROMPT_TEMPLATES[1]
+        else:
+            text = ADAPTIVE_PROMPT_TEMPLATES[2]
+    return update_generate_button(image_path, audio_path, text, num_steps), text
 def update_generate_button(image_path, audio_path, text, num_steps):
     if image_path is None or audio_path is None:
 def start_session(request: gr.Request):
     return request.session_hash
+def check_box_clicked(adapative_tick):
+    print("checkbox clicked")
+    return gr.update(interactive=not adapative_tick)
 css = """
     #col-container {
         margin: 0 auto;
         max-width: 1560px;
     }
+    /* editable vs locked, reusing theme variables that adapt to dark/light */
+    .stateful textarea:not(:disabled):not([readonly]) {
+      color: var(--color-text) !important;            /* accent in both modes */
+    }
+    .stateful textarea:disabled,
+    .stateful textarea[readonly]{
+      color: var(--body-text-color-subdued) !important; /* subdued in both modes */
+    }
     """
 with gr.Blocks(css=css) as demo:
                 output_video = gr.Video(label="Avatar", height=512)
                 num_steps = gr.Slider(4, 50, value=8, step=1, label="Steps")
                 time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
                 infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
+                adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
+                text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
             with gr.Column():
     image_input.upload(fn=preprocess_img, inputs=[image_input, session_state], outputs=[image_input])
     image_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
     audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
+    num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, text_input, num_steps, adaptive_text], outputs=[time_required, text_input])
+    adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
 if __name__ == "__main__":
     demo.unload(cleanup)