import torch import gradio as gr from transformers import VitsModel, VitsTokenizer # Load the TTS model and tokenizer for Acehnese model_id = "facebook/mms-tts-ace" tokenizer = VitsTokenizer.from_pretrained(model_id) model = VitsModel.from_pretrained(model_id) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # TTS function def tts_aceh(text): inputs = tokenizer(text, return_tensors="pt").to(device) with torch.no_grad(): output = model(**inputs) waveform = output.waveform[0].cpu().numpy() sample_rate = model.config.sampling_rate return (sample_rate, waveform) # Gradio UI demo = gr.Interface( fn=tts_aceh, inputs=gr.Textbox(label="Enter Acehnese text"), outputs=gr.Audio(type="numpy", label="Generated Speech"), title="Acehnese TTS (Text-to-Speech)", description=( "This is a text-to-speech tool for the Acehnese language using Meta's MMS model. " "To use: 1) Enter text in Acehnese, 2) Click Submit to hear it spoken aloud.\n\n" "Note: Reuse, redistribution, or derivative use is not allowed unless you ask for permission. " "Enjoy responsibly, and feel free to share feedback or support!" ) ) if __name__ == "__main__": demo.launch()