Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,21 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
|
|
|
5 |
MODEL_NAME = "OpenAssistant/oasst-sft-1-pythia-12b"
|
|
|
6 |
|
7 |
-
#
|
|
|
|
|
|
|
8 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
9 |
model = AutoModelForCausalLM.from_pretrained(
|
10 |
MODEL_NAME,
|
11 |
device_map="auto",
|
|
|
12 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
13 |
)
|
14 |
model.eval()
|
@@ -21,9 +28,6 @@ You are a professional ghostwriter. Your writing is:
|
|
21 |
- Explicit within acceptable content guidelines
|
22 |
- Italicized for internal monologue or emotional emphasis
|
23 |
- Structured and catchy
|
24 |
-
- Explicit and authentic
|
25 |
-
- Sexual and mixture of sfw and nsfw
|
26 |
-
- Realistic and slice of life
|
27 |
"""
|
28 |
|
29 |
def stream_generate(prompt, temperature, max_tokens):
|
@@ -48,6 +52,7 @@ def stream_generate(prompt, temperature, max_tokens):
|
|
48 |
if tokenizer.decode(next_token[0]) in [tokenizer.eos_token, "\n\n"]:
|
49 |
break
|
50 |
|
|
|
51 |
with gr.Blocks(title="🧠 HuggingChat Stream Writer") as demo:
|
52 |
gr.Markdown("## ✍️ Real-Time HuggingChat-Style Generator")
|
53 |
gr.Markdown("*Watch your story unfold word by word...*")
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
|
6 |
+
# Model configuration
|
7 |
MODEL_NAME = "OpenAssistant/oasst-sft-1-pythia-12b"
|
8 |
+
OFFLOAD_DIR = "./offload"
|
9 |
|
10 |
+
# Ensure offload directory exists
|
11 |
+
os.makedirs(OFFLOAD_DIR, exist_ok=True)
|
12 |
+
|
13 |
+
# Load tokenizer and model safely for large weights
|
14 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
15 |
model = AutoModelForCausalLM.from_pretrained(
|
16 |
MODEL_NAME,
|
17 |
device_map="auto",
|
18 |
+
offload_folder=OFFLOAD_DIR,
|
19 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
20 |
)
|
21 |
model.eval()
|
|
|
28 |
- Explicit within acceptable content guidelines
|
29 |
- Italicized for internal monologue or emotional emphasis
|
30 |
- Structured and catchy
|
|
|
|
|
|
|
31 |
"""
|
32 |
|
33 |
def stream_generate(prompt, temperature, max_tokens):
|
|
|
52 |
if tokenizer.decode(next_token[0]) in [tokenizer.eos_token, "\n\n"]:
|
53 |
break
|
54 |
|
55 |
+
# Gradio interface
|
56 |
with gr.Blocks(title="🧠 HuggingChat Stream Writer") as demo:
|
57 |
gr.Markdown("## ✍️ Real-Time HuggingChat-Style Generator")
|
58 |
gr.Markdown("*Watch your story unfold word by word...*")
|