IndicBharat / app.py
Paulie-Aditya's picture
first iteration
d0fe667
import torch
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from IndicTransToolkit import IndicProcessor
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Load model and tokenizer
model_name = "ai4bharat/indictrans2-indic-en-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True).to(DEVICE)
ip = IndicProcessor(inference=True)
def translate(text, src_lang="hin_Deva", tgt_lang="eng_Latn"):
input_sentences = [text]
batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)
inputs = tokenizer(batch, padding="longest", return_tensors="pt", truncation=True).to(DEVICE)
with torch.no_grad():
generated_tokens = model.generate(
**inputs,
use_cache=True,
min_length=0,
max_length=256,
num_beams=5,
num_return_sequences=1,
)
with tokenizer.as_target_tokenizer():
generated_tokens = tokenizer.batch_decode(
generated_tokens.detach().cpu().tolist(),
skip_special_tokens=True,
clean_up_tokenization_spaces=True,
)
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
return translations[0]
# Gradio UI and API
demo = gr.Interface(
fn=translate,
inputs="text",
outputs="text",
examples=[
["जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।"],
["हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।"]
],
title="IndicTrans2 Translator",
description="Translate Indic languages to English using AI4Bharat's IndicTrans2 model"
)
demo.launch()