Spaces:

alvinichi
/

imageToVideo

Running

App Files Files Community

alvinichi commited on Apr 17

Commit

4642fe2

1 Parent(s): da92cbc

update

Browse files

Files changed (3) hide show

README.md +16 -16
app.py +78 -99
requirements.txt +3 -4

README.md CHANGED Viewed

@@ -1,30 +1,30 @@
 ---
-title: Image to Video Generator
-emoji: 🎬
-colorFrom: blue
 colorTo: purple
 sdk: gradio
 sdk_version: 4.0.2
 app_file: app.py
 pinned: false
 ---
-# Ứng dụng tạo video từ hình ảnh
-Ứng dụng này cho phép người dùng tải lên hình ảnh và tạo video chuyển động từ hình ảnh đó sử dụng mô hình AI.
-## Cách sử dụng
-1. Tải lên một hình ảnh
-2. Nhập mô tả cho chuyển động bạn muốn tạo
 3. Điều chỉnh các tham số (tùy chọn)
 4. Nhấn "Tạo video"
-5. Đợi quá trình xử lý hoàn tất và tải xuống video
-## Mô hình được sử dụng
-Ứng dụng này sử dụng mô hình AnimateDiff để tạo hoạt ảnh từ hình ảnh tĩnh.
-## Tham khảo
-- [AnimateDiff GitHub](https://github.com/guoyww/AnimateDiff)
-- [HuggingFace Diffusers](https://huggingface.co/docs/diffusers/index)

 ---
+title: Animate Person From Image
+emoji: 🎭
+colorFrom: pink
 colorTo: purple
 sdk: gradio
 sdk_version: 4.0.2
 app_file: app.py
 pinned: false
 ---
+# Ứng dụng tạo video người chuyển động từ ảnh
+Ứng dụng này sử dụng AI để tạo video người chuyển động từ một ảnh tĩnh.
+## Cách sử dụng
+1. Tải lên ảnh chứa người
+2. Nhập mô tả cho kiểu chuyển động mong muốn
 3. Điều chỉnh các tham số (tùy chọn)
 4. Nhấn "Tạo video"
+## Các tham số
+- **Mô tả chuyển động**: Mô tả bằng lời cách người sẽ chuyển động
+- **Mức độ chuyển động**: Điều chỉnh cường độ chuyển động (1-255)
+- **FPS**: Số khung hình mỗi giây của video kết quả
+## Mẹo sử dụng
+- Ảnh nên có nền đơn giản để có kết quả tốt nhất
+- Người trong ảnh nên ở tư thế tự nhiên, không quá phức tạp
+- Thử nghiệm với các mô tả khác nhau để có hiệu quả tốt nhất

app.py CHANGED Viewed

@@ -1,130 +1,109 @@
 import gradio as gr
-import torch
 import os
-from PIL import Image
 import numpy as np
-import imageio
-import time
-# Kiểm tra device
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Hàm tạo video đơn giản
-def generate_video(image, effect_type, num_frames=16, effect_strength=5.0):
     if image is None:
-        return None, "Vui lòng tải lên một hình ảnh"
-    # Đảm bảo hình ảnh là định dạng RGB
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    # Thay đổi kích thước hình ảnh về 512x512
-    image = image.resize((512, 512))
     try:
-        # Tạo chuỗi khung hình với hiệu ứng khác nhau
-        frames = []
-        if effect_type == "zoom-in":
-            # Hiệu ứng zoom-in
-            for i in range(num_frames):
-                zoom_factor = 1.0 + (i / num_frames) * (effect_strength / 10)
-                img_copy = image.copy()
-                size = int(img_copy.width / zoom_factor)
-                left = (img_copy.width - size) // 2
-                top = (img_copy.height - size) // 2
-                right = left + size
-                bottom = top + size
-                cropped = img_copy.crop((left, top, right, bottom))
-                frame = cropped.resize((512, 512))
-                frames.append(np.array(frame))
-        elif effect_type == "pan-right":
-            # Hiệu ứng pan từ trái sang phải
-            width, height = image.size
-            for i in range(num_frames):
-                offset = int((i / num_frames) * width * (effect_strength / 10))
-                img_copy = image.copy()
-                # Tạo hiệu ứng pan
-                if offset > 0:
-                    # Lấy phần hình ảnh từ offset đến cuối
-                    right_part = img_copy.crop((offset, 0, width, height))
-                    # Lấy phần còn lại từ đầu
-                    left_part = img_copy.crop((0, 0, offset, height))
-                    # Tạo hình ảnh mới
-                    new_img = Image.new('RGB', (width, height))
-                    new_img.paste(right_part, (0, 0))
-                    new_img.paste(left_part, (width - offset, 0))
-                    frames.append(np.array(new_img))
-                else:
-                    frames.append(np.array(img_copy))
-        elif effect_type == "fade":
-            # Hiệu ứng fade in/out
-            base_frame = np.array(image)
-            for i in range(num_frames):
-                # Tính toán độ mờ
-                if i < num_frames / 2:
-                    # Fade in
-                    alpha = i / (num_frames / 2)
-                else:
-                    # Fade out
-                    alpha = 2.0 - (i / (num_frames / 2))
-                # Áp dụng độ mờ
-                frame = (base_frame * alpha * (effect_strength / 5)).astype(np.uint8)
-                frames.append(frame)
         else:
-            # Mặc định, chỉ thêm hiệu ứng đơn giản
-            for i in range(num_frames):
-                brightness = 1.0 + (effect_strength / 10) * np.sin(2 * np.pi * i / num_frames)
-                frame = (np.array(image) * brightness).clip(0, 255).astype(np.uint8)
-                frames.append(frame)
-        # Tạo video từ các khung hình
-        output_path = f"output_video_{int(time.time())}.mp4"
-        imageio.mimsave(output_path, frames, fps=8)
-        return output_path, "Video đã được tạo thành công!"
     except Exception as e:
         return None, f"Lỗi: {str(e)}"
 # Tạo giao diện Gradio
-with gr.Blocks(title="Ứng dụng tạo video từ hình ảnh") as demo:
-    gr.Markdown("# Tạo video từ hình ảnh")
-    gr.Markdown("Tải lên một hình ảnh và chọn hiệu ứng để tạo video.")
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(type="pil", label="Tải lên hình ảnh")
-            effect_input = gr.Radio(
-                ["zoom-in", "pan-right", "fade", "pulse"],
-                label="Chọn hiệu ứng",
-                value="zoom-in"
             )
-            num_frames = gr.Slider(minimum=10, maximum=30, value=16, step=2, label="Số khung hình")
-            effect_strength = gr.Slider(minimum=1.0, maximum=10.0, value=5.0, step=0.5, label="Độ mạnh của hiệu ứng")
             submit_btn = gr.Button("Tạo video")
         with gr.Column():
             output_video = gr.Video(label="Video kết quả")
             output_message = gr.Textbox(label="Thông báo")
     submit_btn.click(
-        fn=generate_video,
-        inputs=[image_input, effect_input, num_frames, effect_strength],
         outputs=[output_video, output_message]
     )
-    gr.Markdown("### Hướng dẫn")
-    gr.Markdown("- **Zoom-in**: Hiệu ứng phóng to hình ảnh")
-    gr.Markdown("- **Pan-right**: Hiệu ứng di chuyển từ trái sang phải")
-    gr.Markdown("- **Fade**: Hiệu ứng mờ dần rồi hiện dần")
-    gr.Markdown("- **Pulse**: Hiệu ứng thay đổi độ sáng theo nhịp")
 demo.launch()

 import gradio as gr
 import os
+import torch
 import numpy as np
+from PIL import Image
+from diffusers import DiffusionPipeline, DDIMScheduler
+from diffusers.utils import export_to_video
+# Khởi tạo mô hình
+def load_model():
+    pipe = DiffusionPipeline.from_pretrained(
+        "guoyww/animatediff-motion-adapter-v1-5",
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+    )
+    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
+    return pipe
+# Hàm xử lý chính để tạo video từ ảnh
+def animate_person(image, prompt, motion_bucket_id=127, fps=8):
     if image is None:
+        return None, "Vui lòng tải lên một hình ảnh."
     try:
+        # Xử lý và chuẩn bị hình ảnh
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Resize hình ảnh để phù hợp với mô hình
+        w, h = image.size
+        if w > h:
+            new_w, new_h = 512, int(h * 512 / w)
         else:
+            new_w, new_h = int(w * 512 / h), 512
+        image = image.resize((new_w, new_h))
+        # Tạo mặt nạ để tập trung vào chủ thể (người)
+        # Mặt nạ đơn giản - trong thực tế có thể cần mô hình phân đoạn người phức tạp hơn
+        # Tải mô hình
+        pipe = load_model()
+        # Tạo video
+        if not prompt:
+            prompt = "A person moving naturally, photorealistic, high quality"
+        # Thêm hướng dẫn về chuyển động người để có kết quả tốt hơn
+        full_prompt = f"{prompt}, person in motion, smooth movement, natural pose, high quality, detailed"
+        # Sinh video
+        output = pipe(
+            prompt=full_prompt,
+            image=image,
+            negative_prompt="blurry, low quality, distorted, disfigured, bad anatomy",
+            num_frames=24,
+            guidance_scale=7.5,
+            num_inference_steps=50,
+            motion_bucket_id=motion_bucket_id
+        )
+        # Xuất video
+        video_path = "animated_person.mp4"
+        frames = output.frames[0]
+        export_to_video(frames, video_path, fps=fps)
+        return video_path, "Video được tạo thành công!"
     except Exception as e:
         return None, f"Lỗi: {str(e)}"
 # Tạo giao diện Gradio
+with gr.Blocks(title="Tạo video người chuyển động từ ảnh") as demo:
+    gr.Markdown("# Tạo video người chuyển động từ ảnh")
+    gr.Markdown("Tải lên ảnh người và xem họ chuyển động tự nhiên trong video")
     with gr.Row():
         with gr.Column():
+            image_input = gr.Image(type="pil", label="Tải lên ảnh người")
+            prompt_input = gr.Textbox(
+                label="Mô tả chuyển động",
+                placeholder="Mô tả cách người trong ảnh sẽ chuyển động...",
+                value="Person walking naturally, photorealistic"
+            )
+            motion_input = gr.Slider(
+                minimum=1, maximum=255, value=127, step=1,
+                label="Mức độ chuyển động (1-255)"
+            )
+            fps_input = gr.Slider(
+                minimum=6, maximum=30, value=8, step=1,
+                label="Số khung hình mỗi giây (FPS)"
             )
             submit_btn = gr.Button("Tạo video")
         with gr.Column():
             output_video = gr.Video(label="Video kết quả")
             output_message = gr.Textbox(label="Thông báo")
     submit_btn.click(
+        fn=animate_person,
+        inputs=[image_input, prompt_input, motion_input, fps_input],
         outputs=[output_video, output_message]
     )
+    gr.Markdown("### Lưu ý")
+    gr.Markdown("- Quá trình tạo video có thể mất vài phút")
+    gr.Markdown("- Kết quả tốt nhất với ảnh người rõ nét, chụp thẳng")
+    gr.Markdown("- Sử dụng prompt cụ thể để điều khiển kiểu chuyển động")
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,7 +1,6 @@
 gradio==4.0.2
 torch
 torchvision
-pillow
-numpy
-imageio==2.31.1
-imageio-ffmpeg

 gradio==4.0.2
 torch
 torchvision
+diffusers>=0.24.0
+transformers>=4.31.0
+accelerate