import gradio as gr from pyrit.core.pyrit import PyRIT from pyrit.core.config import LLMProvider from pyrit.llm_providers.huggingface_local import HuggingFaceLocal # Initialize PyRIT with a local HF model (you can change this) provider = HuggingFaceLocal( model_name="HuggingFaceH4/zephyr-7b-beta", # Change model here max_tokens=512, ) pyrit = PyRIT(provider=provider) def attack_prompt(prompt): try: result = pyrit.run(prompt, attack="jailbreak", max_tokens=512) return f"โœ… Successful Attack:\n{result}" except Exception as e: return f"โŒ Error: {str(e)}" gr.Interface( fn=attack_prompt, inputs=gr.Textbox(label="Prompt to Attack", placeholder="Enter a benign-looking prompt..."), outputs=gr.Textbox(label="Attack Result"), title="๐Ÿงช PyRIT - Red Teaming Hugging Face LLMs", description="This tool uses PyRIT to test Hugging Face models for jailbreak-style adversarial prompts.", ).launch()