LTXpipeline / app.py
preSalesAIAutomation's picture
Update app.py
3756d66 verified
raw
history blame
1.74 kB
import gradio as gr
import torch
import spaces
from diffusers import LTXConditionPipeline
from diffusers.utils import export_to_video
from gtts import gTTS
from pydub import AudioSegment
import whisper
import ffmpeg
import os
# Load pipeline
pipe = LTXConditionPipeline.from_pretrained(
"Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=torch.float16
)
pipe.to("cuda")
@spaces.GPU(duration=120)
def generate_video(prompt):
generator = torch.Generator("cuda").manual_seed(42)
# Generate latent video
latents = pipe(
prompt=prompt,
width=512,
height=512,
num_frames=24,
output_type="latent",
generator=generator,
num_inference_steps=7
).frames
# Decode frames
frames = pipe(
prompt=prompt,
latents=latents,
num_frames=24,
output_type="pil",
generator=generator,
num_inference_steps=7
).frames[0]
# Save as video
video_path = "output.mp4"
export_to_video(frames, video_path, fps=12)
# TTS
tts = gTTS(text=prompt, lang='en')
tts.save("voice.mp3")
AudioSegment.from_mp3("voice.mp3").export("voice.wav", format="wav")
# Subtitles
model = whisper.load_model("base")
result = model.transcribe("voice.wav", language="en")
with open("subtitles.srt", "w") as f:
f.write(result["srt"])
# Merge audio + subtitles into video
ffmpeg.input(video_path).output(
"final.mp4",
vf="subtitles=subtitles.srt",
i="voice.mp3",
c="copy",
shortest=None,
loglevel="error"
).run()
return "final.mp4"
# Gradio UI
demo = gr.Interface(fn=generate_video, inputs="text", outputs=gr.Video())
demo.launch()