import gradio as gr
from pyrit.core.pyrit import PyRIT
from pyrit.core.config import LLMProvider
from pyrit.llm_providers.huggingface_local import HuggingFaceLocal

# Initialize PyRIT with a local HF model (you can change this)
provider = HuggingFaceLocal(
    model_name="HuggingFaceH4/zephyr-7b-beta",  # Change model here
    max_tokens=512,
)
pyrit = PyRIT(provider=provider)

def attack_prompt(prompt):
    try:
        result = pyrit.run(prompt, attack="jailbreak", max_tokens=512)
        return f"✅ Successful Attack:\n{result}"
    except Exception as e:
        return f"❌ Error: {str(e)}"

gr.Interface(
    fn=attack_prompt,
    inputs=gr.Textbox(label="Prompt to Attack", placeholder="Enter a benign-looking prompt..."),
    outputs=gr.Textbox(label="Attack Result"),
    title="🧪 PyRIT - Red Teaming Hugging Face LLMs",
    description="This tool uses PyRIT to test Hugging Face models for jailbreak-style adversarial prompts.",
).launch()