import os import torch import torchaudio import gradio as gr import look2hear.models # Setup environment and model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = look2hear.models.TIGERDNR.from_pretrained("JusperLee/TIGER-DnR", cache_dir="cache") model.to(device) model.eval() # Processing function def separate_audio(audio_file): audio, sr = torchaudio.load(audio_file) audio = audio.to(device) with torch.no_grad(): all_target_dialog, all_target_effect, all_target_music = model(audio[None]) # Save outputs dialog_path = "dialog_output.wav" effect_path = "effect_output.wav" music_path = "music_output.wav" torchaudio.save(dialog_path, all_target_dialog.cpu(), sr) torchaudio.save(effect_path, all_target_effect.cpu(), sr) torchaudio.save(music_path, all_target_music.cpu(), sr) return dialog_path, effect_path, music_path # Gradio UI demo = gr.Interface( fn=separate_audio, inputs=gr.Audio(type="filepath", label="Upload Audio File"), outputs=[ gr.Audio(label="Dialog", type="filepath"), gr.Audio(label="Effects", type="filepath"), gr.Audio(label="Music", type="filepath") ], title="TIGER-DnR Audio Separator", description="Upload a mixed audio file to separate it into dialog, effects, and music using the TIGER-DnR model." ) if __name__ == "__main__": demo.launch()