Spaces:
Running
Running
import os | |
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Model configuration | |
MODEL_NAME = "UnfilteredAI/UNfilteredAI-1B" | |
OFFLOAD_DIR = "./offload" | |
# Ensure offload directory exists | |
os.makedirs(OFFLOAD_DIR, exist_ok=True) | |
# Load tokenizer and model safely for large weights | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
device_map="auto", | |
offload_folder=OFFLOAD_DIR, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 | |
) | |
model.eval() | |
STYLE_PROMPT = """ | |
You are a professional ghostwriter. Your writing is: | |
- Thick with rich description | |
- Heavy and raw in tone | |
- Mature, triggering, and relatable | |
- Explicit within acceptable content guidelines | |
- Italicized for internal monologue or emotional emphasis | |
- Structured and catchy | |
""" | |
def stream_generate(prompt, temperature, max_tokens): | |
input_text = f"{STYLE_PROMPT}\n\nUser prompt:\n{prompt.strip()}" | |
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(model.device) | |
output_ids = input_ids | |
past_key_values = None | |
for _ in range(max_tokens): | |
with torch.no_grad(): | |
outputs = model(input_ids=output_ids[:, -1:], past_key_values=past_key_values, use_cache=True) | |
next_token_logits = outputs.logits[:, -1, :] / temperature | |
next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True) | |
output_ids = torch.cat([output_ids, next_token], dim=-1) | |
past_key_values = outputs.past_key_values | |
decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
generated = decoded.replace(input_text, "").strip() | |
yield generated | |
if tokenizer.decode(next_token[0]) in [tokenizer.eos_token, "\n\n"]: | |
break | |
# Gradio interface | |
with gr.Blocks(title="🧠 HuggingChat Stream Writer") as demo: | |
gr.Markdown("## ✍️ Real-Time HuggingChat-Style Generator") | |
gr.Markdown("*Watch your story unfold word by word...*") | |
with gr.Row(): | |
prompt = gr.Textbox(label="Prompt", lines=5, placeholder="Describe a rainy night and inner conflict...") | |
temperature = gr.Slider(0.5, 1.5, value=0.9, step=0.1, label="Temperature") | |
max_tokens = gr.Slider(50, 800, value=300, step=10, label="Max Tokens") | |
with gr.Row(): | |
output = gr.Textbox(label="Generated Output (streaming)", lines=15) | |
gr.Button("Generate").click(fn=stream_generate, inputs=[prompt, temperature, max_tokens], outputs=output) | |
demo.launch() |