import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch
import logging
import time

# Configura il logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Carica il modello e tokenizer una sola volta
logger.info("Caricamento modello e tokenizer...")
start_time = time.time()

try:
    tokenizer = AutoTokenizer.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
    model = AutoModelForTokenClassification.from_pretrained("oliverguhr/fullstop-punctuation-multilang-large")
    logger.info(f"Modello caricato in {time.time() - start_time:.2f} secondi")
except Exception as e:
    logger.error(f"Errore nel caricamento del modello: {str(e)}")
    raise RuntimeError("Impossibile caricare il modello") from e

# Mappatura delle etichette di punteggiatura
PUNCTUATION_MAP = {
    0: "",      # Nessuna punteggiatura
    1: ".",     # Punto
    2: ",",     # Virgola  
    3: "?",     # Punto interrogativo
    4: "-",     # Trattino
    5: ":"      # Due punti
}

def restore_punctuation(text: str) -> str:
    """Ripristina la punteggiatura in un testo"""
    if not text or not text.strip():
        return text
    
    try:
        logger.info(f"Elaborazione testo: '{text}'")
        
        # Tokenizza il testo
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        
        # Esegui la predizione
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Ottieni le predizioni
        predictions = torch.argmax(outputs.logits, dim=2)
        
        # Converti i token
        tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
        labels = predictions[0].tolist()
        
        # Ricostruisci il testo parola per parola
        words = []
        current_word = ""
        word_labels = []
        
        for token, label in zip(tokens, labels):
            # Salta i token speciali
            if token in [tokenizer.cls_token, tokenizer.sep_token, tokenizer.pad_token]:
                continue
                
            if token.startswith("▁"):  # Carattere di inizio parola
                # Processa la parola precedente
                if current_word:
                    process_word(words, current_word, word_labels)
                    current_word = ""
                    word_labels = []
                
                # Inizia nuova parola
                current_word = token[1:]
                word_labels.append(label)
            else:
                # Continua la parola corrente
                current_word += token
                word_labels.append(label)
        
        # Processa l'ultima parola
        if current_word:
            process_word(words, current_word, word_labels)
        
        result = " ".join(words)
        logger.info(f"Risultato: '{result}'")
        return result
        
    except Exception as e:
        logger.error(f"Errore durante la punteggiatura: {str(e)}")
        return f"Errore nell'elaborazione: {str(e)}"

def process_word(words: list, word: str, labels: list):
    """Aggiunge la punteggiatura appropriata a una parola"""
    # Trova la label di punteggiatura più comune (escludendo nessuna punteggiatura)
    non_zero_labels = [l for l in labels if l != 0]
    
    if non_zero_labels:
        # Prendi la label più frequente
        label_counts = {label: non_zero_labels.count(label) for label in set(non_zero_labels)}
        most_common_label = max(label_counts, key=label_counts.get)
        punctuation = PUNCTUATION_MAP.get(most_common_label, "")
        words.append(word + punctuation)
    else:
        words.append(word)

# Crea l'interfaccia usando Gradio 5
def create_gradio5_interface():
    """Crea l'interfaccia ottimizzata per Gradio 5"""
    
    # Esempi predefiniti
    examples = [
        "ciao come stai",
        "buongiorno a tutti",
        "mi chiamo marco piacere di conoscerti",
        "que horas son en madrid",
        "bonjour comment allez vous aujourdhui",
        "hello how are you today",
        "what time is it now",
        "ich liebe diese musik sehr"
    ]
    
    # Crea l'interfaccia principale
    with gr.Blocks(
        title="🔤 Punteggiatura Automatica Multilingua",
        theme=gr.themes.Soft(),
        css="""
        .container {
            max-width: 1200px;
            margin: 0 auto;
        }
        .main-header {
            text-align: center;
            margin-bottom: 2rem;
        }
        .example-box {
            background: #f8f9fa;
            border-radius: 10px;
            padding: 1rem;
            margin: 1rem 0;
        }
        """
    ) as demo:
        
        # Header
        gr.HTML("""
        <div class="main-header">
            <h1>🔤 Punteggiatura Automatica Multilingua</h1>
            <p>Aggiungi automaticamente la punteggiatura al tuo testo usando AI avanzata</p>
            <p><em>Supporta italiano, inglese, spagnolo, francese, tedesco e molte altre lingue</em></p>
        </div>
        """)
        
        # Interfaccia principale
        with gr.Row():
            with gr.Column(scale=1):
                input_text = gr.Textbox(
                    label="📝 Testo senza punteggiatura",
                    placeholder="Scrivi o incolla qui il tuo testo senza punteggiatura...",
                    lines=8,
                    max_lines=20,
                    show_copy_button=True
                )
                
                with gr.Row():
                    submit_btn = gr.Button(
                        "✨ Aggiungi Punteggiatura", 
                        variant="primary",
                        size="lg"
                    )
                    clear_btn = gr.Button(
                        "🗑️ Pulisci", 
                        variant="secondary"
                    )
                
            with gr.Column(scale=1):
                output_text = gr.Textbox(
                    label="✅ Testo con punteggiatura",
                    lines=8,
                    max_lines=20,
                    show_copy_button=True,
                    interactive=False
                )
                
                # Statistiche
                stats = gr.HTML(
                    value="<div class='example-box'>Inserisci del testo per vedere le statistiche</div>",
                    label="📊 Statistiche"
                )
        
        # Sezione esempi
        gr.Markdown("## 🎯 Esempi da provare")
        
        with gr.Row():
            example_buttons = []
            for i, example in enumerate(examples[:4]):
                btn = gr.Button(f"💬 {example}", size="sm")
                example_buttons.append((btn, example))
        
        with gr.Row():
            for i, example in enumerate(examples[4:]):
                btn = gr.Button(f"💬 {example}", size="sm")
                example_buttons.append((btn, example))
        
        # Funzione per calcolare statistiche
        def calculate_stats(original, processed):
            if not original or not processed:
                return "<div class='example-box'>Nessuna statistica disponibile</div>"
            
            orig_words = len(original.split())
            proc_chars = len(processed)
            punct_added = proc_chars - len(original)
            
            return f"""
            <div class='example-box'>
                <strong>📊 Statistiche di elaborazione:</strong><br>
                • Parole elaborate: {orig_words}<br>
                • Caratteri aggiunti: {punct_added}<br>
                • Lunghezza finale: {proc_chars} caratteri
            </div>
            """
        
        # Funzione principale che include statistiche
        def process_with_stats(text):
            result = restore_punctuation(text)
            stats_html = calculate_stats(text, result)
            return result, stats_html
        
        # Eventi
        submit_btn.click(
            fn=process_with_stats,
            inputs=input_text,
            outputs=[output_text, stats],
            api_name="predict"
        )
        
        input_text.submit(
            fn=process_with_stats,
            inputs=input_text,
            outputs=[output_text, stats]
        )
        
        clear_btn.click(
            fn=lambda: ("", "", "<div class='example-box'>Inserisci del testo per vedere le statistiche</div>"),
            inputs=None,
            outputs=[input_text, output_text, stats]
        )
        
        # Eventi per i bottoni degli esempi
        for btn, example in example_buttons:
            btn.click(
                fn=lambda x=example: (x, *process_with_stats(x)),
                inputs=None,
                outputs=[input_text, output_text, stats]
            )
        
        # Footer
        gr.HTML("""
        <div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f8f9fa; border-radius: 10px;">
            <p><strong>🤖 Powered by:</strong> oliverguhr/fullstop-punctuation-multilang-large</p>
            <p><em>Modello di AI specializzato nel ripristino della punteggiatura multilingua</em></p>
        </div>
        """)
    
    return demo

# Crea l'interfaccia
demo = create_gradio5_interface()

# Avvia l'applicazione
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_api=True,
        share=False,
        debug=True
    )