Ghostwriter / app.py
Tinkabellaa's picture
Update app.py
2ba3906 verified
import os
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Model configuration
MODEL_NAME = "UnfilteredAI/UNfilteredAI-1B"
OFFLOAD_DIR = "./offload"
# Ensure offload directory exists
os.makedirs(OFFLOAD_DIR, exist_ok=True)
# Load tokenizer and model safely for large weights
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
offload_folder=OFFLOAD_DIR,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
model.eval()
STYLE_PROMPT = """
You are a professional ghostwriter. Your writing is:
- Thick with rich description
- Heavy and raw in tone
- Mature, triggering, and relatable
- Explicit within acceptable content guidelines
- Italicized for internal monologue or emotional emphasis
- Structured and catchy
"""
def stream_generate(prompt, temperature, max_tokens):
input_text = f"{STYLE_PROMPT}\n\nUser prompt:\n{prompt.strip()}"
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
output_ids = input_ids
past_key_values = None
for _ in range(max_tokens):
with torch.no_grad():
outputs = model(input_ids=output_ids[:, -1:], past_key_values=past_key_values, use_cache=True)
next_token_logits = outputs.logits[:, -1, :] / temperature
next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
output_ids = torch.cat([output_ids, next_token], dim=-1)
past_key_values = outputs.past_key_values
decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
generated = decoded.replace(input_text, "").strip()
yield generated
if tokenizer.decode(next_token[0]) in [tokenizer.eos_token, "\n\n"]:
break
# Gradio interface
with gr.Blocks(title="🧠 HuggingChat Stream Writer") as demo:
gr.Markdown("## ✍️ Real-Time HuggingChat-Style Generator")
gr.Markdown("*Watch your story unfold word by word...*")
with gr.Row():
prompt = gr.Textbox(label="Prompt", lines=5, placeholder="Describe a rainy night and inner conflict...")
temperature = gr.Slider(0.5, 1.5, value=0.9, step=0.1, label="Temperature")
max_tokens = gr.Slider(50, 800, value=300, step=10, label="Max Tokens")
with gr.Row():
output = gr.Textbox(label="Generated Output (streaming)", lines=15)
gr.Button("Generate").click(fn=stream_generate, inputs=[prompt, temperature, max_tokens], outputs=output)
demo.launch()