Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import spaces | |
from diffusers import LTXConditionPipeline | |
from diffusers.utils import export_to_video | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import whisper | |
import ffmpeg | |
import os | |
# Load pipeline | |
pipe = LTXConditionPipeline.from_pretrained( | |
"Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=torch.float16 | |
) | |
pipe.to("cuda") | |
def generate_video(prompt): | |
generator = torch.Generator("cuda").manual_seed(42) | |
# Generate latent video | |
latents = pipe( | |
prompt=prompt, | |
width=512, | |
height=512, | |
num_frames=24, | |
output_type="latent", | |
generator=generator, | |
num_inference_steps=7 | |
).frames | |
# Decode frames | |
frames = pipe( | |
prompt=prompt, | |
latents=latents, | |
num_frames=24, | |
output_type="pil", | |
generator=generator, | |
num_inference_steps=7 | |
).frames[0] | |
# Save as video | |
video_path = "output.mp4" | |
export_to_video(frames, video_path, fps=12) | |
# TTS | |
tts = gTTS(text=prompt, lang='en') | |
tts.save("voice.mp3") | |
AudioSegment.from_mp3("voice.mp3").export("voice.wav", format="wav") | |
# Subtitles | |
model = whisper.load_model("base") | |
result = model.transcribe("voice.wav", language="en") | |
with open("subtitles.srt", "w") as f: | |
f.write(result["srt"]) | |
# Merge audio + subtitles into video | |
ffmpeg.input(video_path).output( | |
"final.mp4", | |
vf="subtitles=subtitles.srt", | |
i="voice.mp3", | |
c="copy", | |
shortest=None, | |
loglevel="error" | |
).run() | |
return "final.mp4" | |
# Gradio UI | |
demo = gr.Interface(fn=generate_video, inputs="text", outputs=gr.Video()) | |
demo.launch() | |