alexnasa commited on
Commit
1d6436d
·
verified ·
1 Parent(s): 2c1d6c6

Adaptive Video Prompt

Browse files
Files changed (1) hide show
  1. app.py +38 -3
app.py CHANGED
@@ -516,7 +516,26 @@ set_seed(args.seed)
516
  seq_len = args.seq_len
517
  inferpipe = WanInferencePipeline(args)
518
 
 
 
 
 
 
519
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  def update_generate_button(image_path, audio_path, text, num_steps):
521
 
522
  if image_path is None or audio_path is None:
@@ -653,12 +672,26 @@ def cleanup(request: gr.Request):
653
  def start_session(request: gr.Request):
654
 
655
  return request.session_hash
 
 
 
 
 
656
 
657
  css = """
658
  #col-container {
659
  margin: 0 auto;
660
  max-width: 1560px;
661
  }
 
 
 
 
 
 
 
 
 
662
  """
663
 
664
  with gr.Blocks(css=css) as demo:
@@ -700,9 +733,11 @@ with gr.Blocks(css=css) as demo:
700
 
701
  output_video = gr.Video(label="Avatar", height=512)
702
  num_steps = gr.Slider(4, 50, value=8, step=1, label="Steps")
 
703
  time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
704
  infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
705
- text_input = gr.Textbox(label="Video Prompt", lines=6, value="A realistic video of a woman speaking and moving her head accordingly but without moving her hands.")
 
706
 
707
  with gr.Column():
708
 
@@ -778,8 +813,8 @@ with gr.Blocks(css=css) as demo:
778
  image_input.upload(fn=preprocess_img, inputs=[image_input, session_state], outputs=[image_input])
779
  image_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
780
  audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
781
- num_steps.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
782
-
783
 
784
  if __name__ == "__main__":
785
  demo.unload(cleanup)
 
516
  seq_len = args.seq_len
517
  inferpipe = WanInferencePipeline(args)
518
 
519
+ ADAPTIVE_PROMPT_TEMPLATES = [
520
+ "A realistic video of a person speaking and moving their head accordingly but without moving their hands.",
521
+ "A realistic video of a person speaking and moving their head and eyes accordingly, sometimes looking at the camera and sometimes looking away but without moving their hands.",
522
+ "A realistic video of a person speaking and sometimes looking directly to the camera and moving their eyes and pupils and head accordingly and turning and looking at the camera and looking away from the camera based on their movements with dynamic and rhythmic and extensive hand gestures that complement their speech. Their hands are clearly visible, independent, and unobstructed. Their facial expressions are expressive and full of emotion, enhancing the delivery. The camera remains steady, capturing sharp, clear movements and a focused, engaging presence."
523
+ ]
524
 
525
+ def slider_value_change(image_path, audio_path, text, num_steps, adaptive_text):
526
+
527
+ if adaptive_text:
528
+
529
+ if num_steps < 8:
530
+ text = ADAPTIVE_PROMPT_TEMPLATES[0]
531
+ elif num_steps < 14:
532
+ text = ADAPTIVE_PROMPT_TEMPLATES[1]
533
+ else:
534
+ text = ADAPTIVE_PROMPT_TEMPLATES[2]
535
+
536
+ return update_generate_button(image_path, audio_path, text, num_steps), text
537
+
538
+
539
  def update_generate_button(image_path, audio_path, text, num_steps):
540
 
541
  if image_path is None or audio_path is None:
 
672
  def start_session(request: gr.Request):
673
 
674
  return request.session_hash
675
+
676
+ def check_box_clicked(adapative_tick):
677
+ print("checkbox clicked")
678
+ return gr.update(interactive=not adapative_tick)
679
+
680
 
681
  css = """
682
  #col-container {
683
  margin: 0 auto;
684
  max-width: 1560px;
685
  }
686
+
687
+ /* editable vs locked, reusing theme variables that adapt to dark/light */
688
+ .stateful textarea:not(:disabled):not([readonly]) {
689
+ color: var(--color-text) !important; /* accent in both modes */
690
+ }
691
+ .stateful textarea:disabled,
692
+ .stateful textarea[readonly]{
693
+ color: var(--body-text-color-subdued) !important; /* subdued in both modes */
694
+ }
695
  """
696
 
697
  with gr.Blocks(css=css) as demo:
 
733
 
734
  output_video = gr.Video(label="Avatar", height=512)
735
  num_steps = gr.Slider(4, 50, value=8, step=1, label="Steps")
736
+
737
  time_required = gr.Text(value="⌚ Zero GPU Required: --", show_label=False)
738
  infer_btn = gr.Button("🦜 Avatar Me", variant="primary")
739
+ adaptive_text = gr.Checkbox(label="Adaptive Video Prompt", value=True)
740
+ text_input = gr.Textbox(show_label=False, lines=6, elem_classes=["stateful"], interactive=False, value= ADAPTIVE_PROMPT_TEMPLATES[1])
741
 
742
  with gr.Column():
743
 
 
813
  image_input.upload(fn=preprocess_img, inputs=[image_input, session_state], outputs=[image_input])
814
  image_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
815
  audio_input.change(fn=update_generate_button, inputs=[image_input, audio_input, text_input, num_steps], outputs=[time_required])
816
+ num_steps.change(fn=slider_value_change, inputs=[image_input, audio_input, text_input, num_steps, adaptive_text], outputs=[time_required, text_input])
817
+ adaptive_text.change(fn=check_box_clicked, inputs=[adaptive_text], outputs=[text_input])
818
 
819
  if __name__ == "__main__":
820
  demo.unload(cleanup)