Spaces:

leenag
/

Multilingual_TTS

Running

App Files Files Community

leenag commited on Apr 16

Commit

66d0bf1

verified ·

1 Parent(s): d0eaaa1

Create app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import gradio as gr
+import torch
+from parler_tts import ParlerTTSForConditionalGeneration
+from transformers import AutoTokenizer
+import soundfile as sf
+import uuid
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "ai4bharat/indic-parler-tts"
+model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+desc_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
+def synthesize(language, text, gender, emotion, speed, pitch, quality):
+    desc = (
+        f"A native {language} {gender.lower()} speaker with a {emotion.lower()} and expressive tone, "
+        f"speaking at a {speed.lower()} rate with {pitch.lower()} pitch and {quality.lower()} voice quality."
+    )
+    desc_inputs = desc_tokenizer(desc, return_tensors="pt").to(device)
+    text_inputs = tokenizer(text, return_tensors="pt").to(device)
+    gen_audio = model.generate(
+        input_ids=desc_inputs.input_ids,
+        attention_mask=desc_inputs.attention_mask,
+        prompt_input_ids=text_inputs.input_ids,
+        prompt_attention_mask=torch.ones_like(text_inputs.input_ids).to(device)
+    )
+    audio_np = gen_audio.cpu().numpy().squeeze()
+    filename = f"{uuid.uuid4()}.wav"
+    sf.write(filename, audio_np, model.config.sampling_rate)
+    return filename
+iface = gr.Interface(
+    fn=synthesize,
+    inputs=[
+        gr.Dropdown(["Malayalam", "English", "Hindi", "Tamil"], label="Language"),
+        gr.Textbox(label="Text to Synthesize", lines=4),
+        gr.Radio(["Male", "Female"], label="Speaker Gender"),
+        gr.Dropdown(["Neutral", "Happy", "Sad", "Angry"], label="Emotion"),
+        gr.Dropdown(["Slow", "Moderate", "Fast"], label="Speaking Rate"),
+        gr.Dropdown(["Low", "Normal", "High"], label="Pitch"),
+        gr.Dropdown(["Basic", "Refined"], label="Voice Quality"),
+    ],
+    outputs=gr.Audio(type="filepath", label="Synthesized Audio"),
+    allow_flagging="never",
+    title="Multilingual TTS using Indic Parler-TTS",
+    description="Type text, choose a speaker style, and get synthesized speech for Malayalam, Hindi, Tamil, or English."
+)
+iface.launch()