Spaces:
Build error
Build error
File size: 1,907 Bytes
d0fe667 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import torch
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from IndicTransToolkit import IndicProcessor
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Load model and tokenizer
model_name = "ai4bharat/indictrans2-indic-en-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True).to(DEVICE)
ip = IndicProcessor(inference=True)
def translate(text, src_lang="hin_Deva", tgt_lang="eng_Latn"):
input_sentences = [text]
batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)
inputs = tokenizer(batch, padding="longest", return_tensors="pt", truncation=True).to(DEVICE)
with torch.no_grad():
generated_tokens = model.generate(
**inputs,
use_cache=True,
min_length=0,
max_length=256,
num_beams=5,
num_return_sequences=1,
)
with tokenizer.as_target_tokenizer():
generated_tokens = tokenizer.batch_decode(
generated_tokens.detach().cpu().tolist(),
skip_special_tokens=True,
clean_up_tokenization_spaces=True,
)
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
return translations[0]
# Gradio UI and API
demo = gr.Interface(
fn=translate,
inputs="text",
outputs="text",
examples=[
["जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।"],
["हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।"]
],
title="IndicTrans2 Translator",
description="Translate Indic languages to English using AI4Bharat's IndicTrans2 model"
)
demo.launch()
|