Spaces:

Tinkabellaa
/

Ghostwriter

Running

App Files Files Community

Ghostwriter / app.py

Tinkabellaa

Update app.py

2ba3906 verified 2 months ago

raw

history blame contribute delete

2.58 kB

	import os
	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Model configuration
	MODEL_NAME = "UnfilteredAI/UNfilteredAI-1B"
	OFFLOAD_DIR = "./offload"

	# Ensure offload directory exists
	os.makedirs(OFFLOAD_DIR, exist_ok=True)

	# Load tokenizer and model safely for large weights
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	device_map="auto",
	offload_folder=OFFLOAD_DIR,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
	)
	model.eval()

	STYLE_PROMPT = """
	You are a professional ghostwriter. Your writing is:
	- Thick with rich description
	- Heavy and raw in tone
	- Mature, triggering, and relatable
	- Explicit within acceptable content guidelines
	- Italicized for internal monologue or emotional emphasis
	- Structured and catchy
	"""

	def stream_generate(prompt, temperature, max_tokens):
	input_text = f"{STYLE_PROMPT}\n\nUser prompt:\n{prompt.strip()}"
	input_ids = tokenizer.encode(input_text, return_tensors="pt").to(model.device)

	output_ids = input_ids
	past_key_values = None

	for _ in range(max_tokens):
	with torch.no_grad():
	outputs = model(input_ids=output_ids[:, -1:], past_key_values=past_key_values, use_cache=True)
	next_token_logits = outputs.logits[:, -1, :] / temperature
	next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
	output_ids = torch.cat([output_ids, next_token], dim=-1)
	past_key_values = outputs.past_key_values

	decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	generated = decoded.replace(input_text, "").strip()
	yield generated

	if tokenizer.decode(next_token[0]) in [tokenizer.eos_token, "\n\n"]:
	break

	# Gradio interface
	with gr.Blocks(title="🧠 HuggingChat Stream Writer") as demo:
	gr.Markdown("## ✍️ Real-Time HuggingChat-Style Generator")
	gr.Markdown("Watch your story unfold word by word...")

	with gr.Row():
	prompt = gr.Textbox(label="Prompt", lines=5, placeholder="Describe a rainy night and inner conflict...")
	temperature = gr.Slider(0.5, 1.5, value=0.9, step=0.1, label="Temperature")
	max_tokens = gr.Slider(50, 800, value=300, step=10, label="Max Tokens")

	with gr.Row():
	output = gr.Textbox(label="Generated Output (streaming)", lines=15)

	gr.Button("Generate").click(fn=stream_generate, inputs=[prompt, temperature, max_tokens], outputs=output)

	demo.launch()