# app.py import gradio as gr from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM import torch import matplotlib.pyplot as plt import numpy as np # Load some default model MODEL_NAME = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModel.from_pretrained(MODEL_NAME, output_attentions=True) def visualize_attention(text): inputs = tokenizer(text, return_tensors="pt") outputs = model(**inputs) # Grab attentions from output attentions = outputs.attentions # List of (num_layers, batch, num_heads, seq_len, seq_len) tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) fig, ax = plt.subplots(figsize=(8, 6)) # Just visualize attention from last layer, first head attn_matrix = attentions[-1][0][0].detach().numpy() cax = ax.matshow(attn_matrix, cmap='viridis') fig.colorbar(cax) ax.set_xticks(range(len(tokens))) ax.set_yticks(range(len(tokens))) ax.set_xticklabels(tokens, rotation=90) ax.set_yticklabels(tokens) ax.set_title("Attention Map - Last Layer, Head 1") return fig iface = gr.Interface( fn=visualize_attention, inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."), outputs=gr.Plot(), title="🧠 Transformer Attention Visualizer", description="Visualizes the self-attention of the BERT model's last layer." ) iface.launch()