import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from peft import PeftModel # We'll load the base model and then the adapter

# --- Configuration ---
# IMPORTANT: Use the base model name for initial loading,
# as the uploaded model might be the merged one which we now know has issues
# when re-loaded without quantization settings.
# Or, if you want to load your *merged* model (eldntr/cendol-large-lora-indo2minang),
# you need to load it with the same BitsAndBytesConfig.
# Let's assume you push your 'final_merged_model' from the notebook to eldntr/cendol-large-lora-indo2minang
MODEL_ID = "eldntr/cendol-large-lora-indo2minang" 
# Ensure the base model used for fine-tuning is also available or mentioned
# It seems from your notebook you used "indonlp/cendol-mt5-large-inst" as the base
BASE_MODEL_ID = "indonlp/cendol-mt5-large-inst"


# Setup device (menggunakan GPU jika ada, jika tidak CPU)
# If running on a GPU, we will attempt 8-bit loading.
# If running on CPU (e.g., free Hugging Face Space), 8-bit loading won't apply directly.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Model Loading ---
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

    if device.type == "cuda":
        print(f"Attempting to load model {MODEL_ID} with 8-bit quantization on GPU...")
        quantization_config = BitsAndBytesConfig(load_in_8bit=True)
        
        # Load the merged model with the same quantization config
        # that it was likely saved with if you pushed the `final_merged_model`
        model = AutoModelForSeq2SeqLM.from_pretrained(
            MODEL_ID,
            quantization_config=quantization_config,
            device_map="auto", # Use "auto" for proper multi-GPU/CPU offloading with 8-bit
            # No torch_dtype specified here if it's 8-bit. BitsAndBytes handles the dtypes.
            low_cpu_mem_usage=True # Still good practice
        )
        # You don't need prepare_model_for_kbit_training if it's already merged and you're not training.
        print(f"Model {MODEL_ID} berhasil dimuat dengan kuantisasi 8-bit ke {next(model.parameters()).device}")
    else:
        print(f"CUDA not available. Loading model {MODEL_ID} in full precision (float32) on CPU...")
        model = AutoModelForSeq2SeqLM.from_pretrained(
            MODEL_ID,
            torch_dtype=torch.float32, # Force float32 on CPU
            low_cpu_mem_usage=True # Still good practice
        ).to(device)
        print(f"Model {MODEL_ID} berhasil dimuat ke {device}")

except Exception as e:
    print(f"Fatal error loading model: {e}")
    error_msg = "Gagal memuat model. Ini mungkin karena model disimpan dengan konfigurasi khusus (misalnya 8-bit quantization) dan perlu dimuat dengan konfigurasi yang sama. Pastikan `bitsandbytes` terinstal dan coba lagi."
    if "Only Tensors of floating point and complex dtype can require gradients" in str(e):
         error_msg += " Kesalahan ini mengindikasikan masalah tipe data bobot yang kemungkinan besar disebabkan oleh ketidaksesuaian saat memuat model terkuantisasi."
    raise gr.Error(error_msg)

# --- Translation Function (rest of the code remains the same) ---
PROMPT_PREFIX = "Terjemahkan Indonesia ke Minangkabau: "
MAX_SOURCE_LENGTH = 128
MAX_TARGET_LENGTH = 128

def translate(text_input):
    """
    Fungsi utama untuk melakukan translasi.
    """
    if not text_input:
        return ""

    prompt = f"Terjemahkan Indonesia ke Minangkabau: {text_input}"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_SOURCE_LENGTH).to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=MAX_TARGET_LENGTH,
            num_beams=5,
            early_stopping=True
        )

    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation

# --- Gradio Interface (rest of the code remains the same) ---
demo = gr.Interface(
    fn=translate,
    inputs=gr.Textbox(
        label="Teks Bahasa Indonesia",
        placeholder="Ketik kalimat di sini...",
        lines=4
    ),
    outputs=gr.Textbox(
        label="Terjemahan Minangkabau",
        interactive=False, # Output box tidak bisa diedit
        lines=4
    ),
    title="Penerjemah Indonesia ke Minangkabau",
    description="Aplikasi demo ini menggunakan model Cendol MT5 yang telah di-fine-tune pada dataset terjemahan Indonesia-Minangkabau.",
    article=f"Model yang digunakan: <a href='https://huggingface.co/{MODEL_ID}' target='_blank'>{MODEL_ID}</a>. Dibuat dengan Gradio dan dihosting di Hugging Face Spaces.",
    examples=[
        ["Saya ingin pergi ke pasar besok pagi."],
        ["Apa makanan favoritmu?"],
        ["Rumah makan itu sangat terkenal di kota ini."]
    ])

# --- Launch Application ---
if __name__ == "__main__":
    demo.launch()