Spaces:
Sleeping
Sleeping
Commit
Β·
67dff27
1
Parent(s):
1283f75
story agent
Browse files- Tools/audio_agent.py +131 -0
- Tools/build_world.py +91 -0
- Tools/extract_facts.py +96 -0
- Tools/extract_image.py +46 -0
- Tools/generate_choices.py +80 -0
- Tools/image_agent.py +86 -0
- Tools/imagedecider.py +77 -0
- Tools/story_generator.py +123 -0
- Tools/validate_consistency.py +50 -0
- app.py +333 -0
Tools/audio_agent.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# voice_narration_tool.py
|
2 |
+
#-------------libraries to install----------#
|
3 |
+
#pip install g2p-en, nltk, smolagents
|
4 |
+
import torch
|
5 |
+
import soundfile as sf
|
6 |
+
import nltk
|
7 |
+
from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
|
8 |
+
from smolagents import tool
|
9 |
+
import os
|
10 |
+
import warnings
|
11 |
+
warnings.filterwarnings("ignore")
|
12 |
+
nltk.download('all')
|
13 |
+
|
14 |
+
# ββββββββββββββββββββββββββββββ
|
15 |
+
# Global FastSpeech2 model for voice narration
|
16 |
+
VOICE_MODEL = "espnet/fastspeech2_conformer_with_hifigan"
|
17 |
+
_tokenizer_voice = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
|
18 |
+
_model_voice = FastSpeech2ConformerWithHifiGan.from_pretrained(VOICE_MODEL)
|
19 |
+
_model_voice.eval()
|
20 |
+
# ββββββββββββββββββββββββββββββ
|
21 |
+
|
22 |
+
@tool
|
23 |
+
def generate_voice_narration(text: str, output_filename: str = "narration.wav") -> str:
|
24 |
+
"""
|
25 |
+
Generate voice narration from text using FastSpeech2 TTS model.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
text (str): The text to convert to speech
|
29 |
+
output_filename (str): Output audio filename (default: "narration.wav")
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
str: Path to the generated audio file
|
33 |
+
"""
|
34 |
+
try:
|
35 |
+
# Clean and prepare text
|
36 |
+
clean_text = text.strip()
|
37 |
+
if not clean_text:
|
38 |
+
return "Error: Empty text provided"
|
39 |
+
|
40 |
+
# Tokenize the input text
|
41 |
+
inputs = _tokenizer_voice(clean_text, return_tensors="pt")
|
42 |
+
|
43 |
+
# Generate audio
|
44 |
+
with torch.no_grad():
|
45 |
+
# Move inputs to same device as model if needed
|
46 |
+
if torch.cuda.is_available() and next(_model_voice.parameters()).is_cuda:
|
47 |
+
inputs = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
|
48 |
+
|
49 |
+
# Generate speech
|
50 |
+
output = _model_voice(**inputs)
|
51 |
+
audio = output.waveform.squeeze().cpu().numpy()
|
52 |
+
|
53 |
+
# Ensure output directory exists
|
54 |
+
os.makedirs(os.path.dirname(output_filename) if os.path.dirname(output_filename) else ".", exist_ok=True)
|
55 |
+
|
56 |
+
# Save audio file
|
57 |
+
sample_rate = 22050 # FastSpeech2 default sample rate
|
58 |
+
sf.write(output_filename, audio, samplerate=sample_rate)
|
59 |
+
|
60 |
+
return f"Voice narration saved to: {output_filename}"
|
61 |
+
|
62 |
+
except Exception as e:
|
63 |
+
return f"Error generating voice narration: {str(e)}"
|
64 |
+
|
65 |
+
@tool
|
66 |
+
def generate_story_narration(story_text: str, chapter_name: str = "chapter") -> str:
|
67 |
+
"""
|
68 |
+
Generate voice narration for story text, handling longer texts by splitting into sentences.
|
69 |
+
|
70 |
+
Args:
|
71 |
+
story_text (str): The story text to narrate
|
72 |
+
chapter_name (str): Name prefix for the output file
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
str: Status message with output file path
|
76 |
+
"""
|
77 |
+
try:
|
78 |
+
# Clean text
|
79 |
+
clean_text = story_text.strip()
|
80 |
+
if not clean_text:
|
81 |
+
return "Error: Empty story text provided"
|
82 |
+
|
83 |
+
# For longer texts, we might want to split into sentences and combine
|
84 |
+
# or just process the whole text at once (FastSpeech2 can handle reasonably long texts)
|
85 |
+
|
86 |
+
output_filename = f"{chapter_name}_narration.wav"
|
87 |
+
|
88 |
+
# Tokenize and generate
|
89 |
+
inputs = _tokenizer_voice(clean_text, return_tensors="pt")
|
90 |
+
|
91 |
+
with torch.no_grad():
|
92 |
+
# Move to appropriate device
|
93 |
+
if torch.cuda.is_available() and next(_model_voice.parameters()).is_cuda:
|
94 |
+
inputs = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
|
95 |
+
|
96 |
+
# Generate speech
|
97 |
+
output = _model_voice(**inputs)
|
98 |
+
audio = output.waveform.squeeze().cpu().numpy()
|
99 |
+
|
100 |
+
# Save the narration
|
101 |
+
sample_rate = 22050
|
102 |
+
sf.write(output_filename, audio, samplerate=sample_rate)
|
103 |
+
|
104 |
+
# Calculate duration for user feedback
|
105 |
+
duration = len(audio) / sample_rate
|
106 |
+
|
107 |
+
return f"Story narration completed! Saved to: {output_filename} (Duration: {duration:.2f} seconds)"
|
108 |
+
|
109 |
+
except Exception as e:
|
110 |
+
return f"Error generating story narration: {str(e)}"
|
111 |
+
|
112 |
+
if __name__ == "__main__":
|
113 |
+
# --- Test Cases ---
|
114 |
+
test_texts = [
|
115 |
+
"The warrior stepped into the shadowy forest, his sword gleaming under the moonlight.....there appeared a monster and he said WHAT ARE YOU DOING I WILL KILL YOU ",
|
116 |
+
"Captain Sarah Martinez floated weightlessly in the observation deck of the starship Enterprise.",
|
117 |
+
"The old lighthouse keeper climbed the spiral stairs one last time, as the storm raged outside."
|
118 |
+
]
|
119 |
+
|
120 |
+
print("Testing voice narration tool...")
|
121 |
+
|
122 |
+
for i, text in enumerate(test_texts, 1):
|
123 |
+
print(f"\nTest {i}: Generating narration for: '{text[:50]}...'")
|
124 |
+
result = generate_voice_narration(text=text, output_filename=f"test_narration_{i}.wav")
|
125 |
+
print(f"Result: {result}")
|
126 |
+
|
127 |
+
# Test story narration
|
128 |
+
story = " ".join(test_texts)
|
129 |
+
print(f"\nTesting story narration with combined text...")
|
130 |
+
story_result = generate_story_narration(story_text=story, chapter_name="test_story")
|
131 |
+
print(f"Story Result: {story_result}")
|
Tools/build_world.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Tools/build_world.py
|
2 |
+
|
3 |
+
from typing import Dict, Any
|
4 |
+
from smolagents import tool
|
5 |
+
import json
|
6 |
+
import torch
|
7 |
+
# from llm_utils import tokenizer, model, generate_completion # These are loaded globally elsewhere
|
8 |
+
|
9 |
+
@tool
|
10 |
+
def build_world(facts: Dict[str, Any]) -> Dict[str, Any]:
|
11 |
+
"""
|
12 |
+
Given a structured `facts` dictionary, returns a world-building dictionary with keys:
|
13 |
+
- setting_description: a vivid 2β3 sentence paragraph describing the environment.
|
14 |
+
- flora: a list of 3β5 plant species commonly found here.
|
15 |
+
- fauna: a list of 3β5 animals or creatures one might encounter.
|
16 |
+
- ambiance: a list of 3β5 sensory details (sounds, smells, tactile sensations).
|
17 |
+
|
18 |
+
Args:
|
19 |
+
facts (Dict[str, Any]): The structured facts extracted from the scene.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
Dict[str, Any]: A dictionary with exactly the four keys:
|
23 |
+
'setting_description', 'flora', 'fauna', and 'ambiance'.
|
24 |
+
"""
|
25 |
+
# 1) Prepare the JSON-extraction prompt
|
26 |
+
facts_json = json.dumps(facts, indent=2)
|
27 |
+
prompt = f"""
|
28 |
+
You are a world-building assistant. Given these structured facts:
|
29 |
+
|
30 |
+
{facts_json}
|
31 |
+
|
32 |
+
Generate a JSON object with exactly these fields:
|
33 |
+
1) setting_description: a 2β3 sentence vivid paragraph describing the environment.
|
34 |
+
2) flora: a list of 3β5 plant species commonly found here.
|
35 |
+
3) fauna: a list of 3β5 animals or creatures one might encounter.
|
36 |
+
4) ambiance: a list of 3β5 sensory details (sounds, smells, tactile feelings).
|
37 |
+
|
38 |
+
Return ONLY valid JSON with those four keys.
|
39 |
+
"""
|
40 |
+
|
41 |
+
# 2) Tokenize & move to device
|
42 |
+
inputs = tokenizer.apply_chat_template(
|
43 |
+
[
|
44 |
+
{"role": "system", "content": "You convert structured facts into world-building JSON."},
|
45 |
+
{"role": "user", "content": prompt}
|
46 |
+
],
|
47 |
+
tokenize=True,
|
48 |
+
add_generation_prompt=True,
|
49 |
+
return_tensors="pt",
|
50 |
+
return_dict=True # Keep return_dict=True to get attention_mask
|
51 |
+
)
|
52 |
+
# Move each tensor to the model device
|
53 |
+
for k,v in inputs.items():
|
54 |
+
inputs[k] = v.to(model.device)
|
55 |
+
|
56 |
+
# 3) Generate up to 256 new tokens via shared generate_completion
|
57 |
+
with torch.no_grad():
|
58 |
+
# Pass the input_ids tensor directly and the attention_mask
|
59 |
+
outputs = model.generate(
|
60 |
+
inputs["input_ids"], # Pass the tensor directly
|
61 |
+
max_new_tokens=256,
|
62 |
+
attention_mask=inputs.get("attention_mask") # Pass attention_mask if present
|
63 |
+
)
|
64 |
+
|
65 |
+
|
66 |
+
# 4) Slice off the prompt tokens
|
67 |
+
prompt_len = inputs["input_ids"].shape[-1]
|
68 |
+
gen_ids = outputs[0][prompt_len:]
|
69 |
+
|
70 |
+
# 5) Decode the JSON string
|
71 |
+
raw = tokenizer.decode(gen_ids, skip_special_tokens=True)
|
72 |
+
start = raw.find("{")
|
73 |
+
candidate = raw[start:] if start >= 0 else raw
|
74 |
+
|
75 |
+
# 6) Parse, with a defaults fallback
|
76 |
+
defaults = {
|
77 |
+
"setting_description": "",
|
78 |
+
"flora": [],
|
79 |
+
"fauna": [],
|
80 |
+
"ambiance": []
|
81 |
+
}
|
82 |
+
try:
|
83 |
+
world_dict = json.loads(candidate)
|
84 |
+
except Exception:
|
85 |
+
world_dict = defaults.copy()
|
86 |
+
|
87 |
+
# 7) Ensure all keys are present
|
88 |
+
for key, val in defaults.items():
|
89 |
+
world_dict.setdefault(key, val)
|
90 |
+
|
91 |
+
return world_dict
|
Tools/extract_facts.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools.py
|
2 |
+
|
3 |
+
from typing import Dict, Any
|
4 |
+
from smolagents import Tool
|
5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
+
from llm_utils import tokenizer, model, generate_completion
|
7 |
+
import torch
|
8 |
+
import os
|
9 |
+
import json
|
10 |
+
|
11 |
+
|
12 |
+
class ExtractFactsTool(Tool):
|
13 |
+
"""
|
14 |
+
Extracts structured facts from a scene using a local Transformers LLM.
|
15 |
+
"""
|
16 |
+
|
17 |
+
name = "extract_facts"
|
18 |
+
description = (
|
19 |
+
"Given a narrative paragraph, extracts and returns JSON with keys: "
|
20 |
+
"location, weather, time_of_day, main_character, npc_states, "
|
21 |
+
"inventory_items, events."
|
22 |
+
)
|
23 |
+
|
24 |
+
inputs = {
|
25 |
+
"scene_text": {
|
26 |
+
"type": "string",
|
27 |
+
"description": "The narrative paragraph from which to extract facts.",
|
28 |
+
"required": True
|
29 |
+
}
|
30 |
+
}
|
31 |
+
# Change output_type from "json" or "dict" to "object"
|
32 |
+
output_type = "object"
|
33 |
+
|
34 |
+
def forward(self, scene_text: str) -> Dict[str, Any]:
|
35 |
+
# 1) Build the instruction + content prompt
|
36 |
+
prompt = f"""
|
37 |
+
You are a fact-extraction assistant. Extract exactly the following keys and output valid JSON:
|
38 |
+
1) location: e.g. "rainy_forest" or null
|
39 |
+
2) weather: e.g. "rainy" or null
|
40 |
+
3) time_of_day: e.g. "evening" or null
|
41 |
+
4) main_character: protagonist name or null
|
42 |
+
5) npc_states: dict of other characters β {{status, location}}, or {{}}
|
43 |
+
6) inventory_items: list of item names, or []
|
44 |
+
7) events: 1β2 sentence summary of what happened
|
45 |
+
|
46 |
+
Scene:
|
47 |
+
\"\"\"
|
48 |
+
{scene_text}
|
49 |
+
\"\"\"
|
50 |
+
"""
|
51 |
+
|
52 |
+
# 2) Tokenize using the chat template
|
53 |
+
# The output of apply_chat_template with return_tensors="pt" is a single tensor.
|
54 |
+
# It does not need to be converted to a dictionary for model.generate.
|
55 |
+
inputs_tensor = tokenizer.apply_chat_template(
|
56 |
+
[{"role":"system","content":"You extract JSON facts."},
|
57 |
+
{"role":"user","content":prompt}],
|
58 |
+
tokenize=True,
|
59 |
+
add_generation_prompt=True,
|
60 |
+
return_tensors="pt"
|
61 |
+
).to(model.device)
|
62 |
+
|
63 |
+
# 3) Generate up to 256 new tokens
|
64 |
+
with torch.no_grad():
|
65 |
+
# Pass the tensor directly to generate
|
66 |
+
outputs = model.generate(inputs_tensor, max_new_tokens=256)
|
67 |
+
|
68 |
+
# 4) Slice off the prompt tokens
|
69 |
+
input_len = inputs_tensor.size(-1) # Use inputs_tensor to get the original input length
|
70 |
+
gen_ids = outputs[0][input_len:]
|
71 |
+
|
72 |
+
# 5) Decode and strip out anything before the first '{'
|
73 |
+
raw = tokenizer.decode(gen_ids, skip_special_tokens=True)
|
74 |
+
json_start = raw.find("{")
|
75 |
+
candidate = raw[json_start:] if json_start >= 0 else raw
|
76 |
+
|
77 |
+
# 6) Parse JSON (fallback to defaults on error)
|
78 |
+
defaults = {
|
79 |
+
"location": None,
|
80 |
+
"weather": None,
|
81 |
+
"time_of_day": None,
|
82 |
+
"main_character": None,
|
83 |
+
"npc_states": {},
|
84 |
+
"inventory_items": [],
|
85 |
+
"events": ""
|
86 |
+
}
|
87 |
+
try:
|
88 |
+
fact_dict = json.loads(candidate)
|
89 |
+
except Exception:
|
90 |
+
fact_dict = defaults.copy()
|
91 |
+
|
92 |
+
# 7) Ensure all required keys exist
|
93 |
+
for k, v in defaults.items():
|
94 |
+
fact_dict.setdefault(k, v)
|
95 |
+
|
96 |
+
return fact_dict
|
Tools/extract_image.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Tools/scene_extractor_tool.py
|
2 |
+
|
3 |
+
from typing import Dict
|
4 |
+
from smolagents import tool
|
5 |
+
from llm_utils import tokenizer, generate_completion
|
6 |
+
import torch
|
7 |
+
|
8 |
+
@tool
|
9 |
+
def extract_scene(context: str) -> str:
|
10 |
+
"""
|
11 |
+
Identify the key visual scene in one vivid paragraph (β€77 tokens).
|
12 |
+
"""
|
13 |
+
prompt = f"""
|
14 |
+
You are a visual scene extractor. Given the text below,
|
15 |
+
produce one vivid paragraph (max 77 tokens) describing the key visual moment. Return only that paragraph.
|
16 |
+
|
17 |
+
Text:
|
18 |
+
\"\"\"
|
19 |
+
{context}
|
20 |
+
\"\"\"
|
21 |
+
|
22 |
+
Visual description:"""
|
23 |
+
|
24 |
+
inputs = tokenizer.apply_chat_template(
|
25 |
+
[{"role":"system","content":"Extract a single visual scene."},
|
26 |
+
{"role":"user","content":prompt}],
|
27 |
+
tokenize=True,
|
28 |
+
add_generation_prompt=True,
|
29 |
+
return_tensors="pt",
|
30 |
+
return_dict=True
|
31 |
+
).to(tokenizer.device)
|
32 |
+
|
33 |
+
with torch.no_grad():
|
34 |
+
outputs = generate_completion(inputs,
|
35 |
+
max_new_tokens=100,
|
36 |
+
temperature=0.0,
|
37 |
+
do_sample=False)
|
38 |
+
|
39 |
+
plen = inputs["input_ids"].shape[-1]
|
40 |
+
gen_ids = outputs[0][plen:]
|
41 |
+
raw = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
|
42 |
+
# enforce token limit
|
43 |
+
words = raw.split()
|
44 |
+
if len(words) > 77:
|
45 |
+
raw = " ".join(words[:77])
|
46 |
+
return raw
|
Tools/generate_choices.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Tools/generate_choices.py
|
2 |
+
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
from smolagents import tool
|
5 |
+
from llm_utils import tokenizer, model, generate_completion
|
6 |
+
import torch
|
7 |
+
import json
|
8 |
+
|
9 |
+
@tool
|
10 |
+
def generate_choices(scene_text: str, facts: Dict[str, Any]) -> List[str]:
|
11 |
+
"""
|
12 |
+
Generate 2β4 next-step choices for the reader based on the scene and facts.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
scene_text (str): The latest narrative paragraph.
|
16 |
+
facts (Dict[str,Any]): Structured facts (location, weather, npc_states, etc.)
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
List[str]: A list of between 2 and 4 short choice strings.
|
20 |
+
"""
|
21 |
+
facts_json = json.dumps(facts, indent=2)
|
22 |
+
prompt = f"""
|
23 |
+
You are an interactive-story choice generator. Given the scene and known facts below,
|
24 |
+
propose between 2 and 4 plausible next-step choices. Return *only* a JSON array of strings.
|
25 |
+
|
26 |
+
Scene:
|
27 |
+
\"\"\"
|
28 |
+
{scene_text}
|
29 |
+
\"\"\"
|
30 |
+
|
31 |
+
Facts:
|
32 |
+
{facts_json}
|
33 |
+
|
34 |
+
Requirements:
|
35 |
+
- 2 to 4 concise, actionable choices (max one sentence each).
|
36 |
+
- No extra commentaryβjust the JSON list.
|
37 |
+
"""
|
38 |
+
|
39 |
+
# wrap in a chat template
|
40 |
+
messages = [
|
41 |
+
{"role": "system", "content": "You produce JSON arrays of story choices."},
|
42 |
+
{"role": "user", "content": prompt}
|
43 |
+
]
|
44 |
+
|
45 |
+
# tokenize & move to device
|
46 |
+
inputs = tokenizer.apply_chat_template(
|
47 |
+
messages,
|
48 |
+
tokenize=True,
|
49 |
+
add_generation_prompt=True,
|
50 |
+
return_tensors="pt",
|
51 |
+
return_dict=True
|
52 |
+
).to(model.device)
|
53 |
+
|
54 |
+
# generate
|
55 |
+
with torch.no_grad():
|
56 |
+
outputs = model.generate(inputs, max_new_tokens=128)
|
57 |
+
|
58 |
+
# slice off prompt
|
59 |
+
prompt_len = inputs["input_ids"].shape[-1]
|
60 |
+
gen_ids = outputs[0][prompt_len:]
|
61 |
+
|
62 |
+
# decode, find JSON
|
63 |
+
raw = tokenizer.decode(gen_ids, skip_special_tokens=True)
|
64 |
+
start = raw.find("[")
|
65 |
+
candidate = raw[start:] if start >= 0 else raw
|
66 |
+
|
67 |
+
# parse JSON, fallback
|
68 |
+
try:
|
69 |
+
choices = json.loads(candidate)
|
70 |
+
if (
|
71 |
+
isinstance(choices, list)
|
72 |
+
and 2 <= len(choices) <= 4
|
73 |
+
and all(isinstance(c, str) for c in choices)
|
74 |
+
):
|
75 |
+
return choices
|
76 |
+
except:
|
77 |
+
pass
|
78 |
+
|
79 |
+
# fallback
|
80 |
+
return ["Continue forward", "Turn back"]
|
Tools/image_agent.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
|
5 |
+
import warnings
|
6 |
+
from smolagents import tool
|
7 |
+
|
8 |
+
warnings.filterwarnings("ignore")
|
9 |
+
|
10 |
+
# Global pipeline variable for reuse
|
11 |
+
_pipeline = None
|
12 |
+
|
13 |
+
def get_pipeline():
|
14 |
+
"""Initialize and return the Stable Diffusion pipeline."""
|
15 |
+
global _pipeline
|
16 |
+
if _pipeline is None:
|
17 |
+
try:
|
18 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
19 |
+
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
20 |
+
|
21 |
+
_pipeline = StableDiffusionPipeline.from_pretrained(
|
22 |
+
"runwayml/stable-diffusion-v1-5",
|
23 |
+
torch_dtype=dtype,
|
24 |
+
safety_checker=None,
|
25 |
+
requires_safety_checker=False
|
26 |
+
).to(device)
|
27 |
+
|
28 |
+
if hasattr(_pipeline, 'enable_attention_slicing'):
|
29 |
+
_pipeline.enable_attention_slicing()
|
30 |
+
|
31 |
+
except Exception as e:
|
32 |
+
print(f"Failed to load pipeline: {e}")
|
33 |
+
_pipeline = "mock"
|
34 |
+
|
35 |
+
return _pipeline
|
36 |
+
|
37 |
+
@tool
|
38 |
+
def generate_image(scene_prompt: str) -> Image.Image:
|
39 |
+
"""
|
40 |
+
Generates a cartoon-style image from a scene prompt using Stable Diffusion v1.5.
|
41 |
+
Falls back to a placeholder if loading fails.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
scene_prompt (str): Description of the scene to generate
|
45 |
+
|
46 |
+
Returns:
|
47 |
+
PIL.Image.Image: Generated cartoon-style image
|
48 |
+
"""
|
49 |
+
pipe = get_pipeline()
|
50 |
+
|
51 |
+
# Fallback to placeholder if pipeline loading failed
|
52 |
+
if pipe == "mock":
|
53 |
+
return Image.new('RGB', (512, 512), color='lightblue')
|
54 |
+
|
55 |
+
# Enhance prompt for cartoon style
|
56 |
+
prompt = f"cartoon style, {scene_prompt}, colorful, animated"
|
57 |
+
|
58 |
+
# Generate image with fixed seed for reproducibility
|
59 |
+
gen = torch.Generator(device=pipe.device).manual_seed(42)
|
60 |
+
|
61 |
+
result = pipe(
|
62 |
+
prompt,
|
63 |
+
guidance_scale=7.5,
|
64 |
+
num_inference_steps=20,
|
65 |
+
height=512,
|
66 |
+
width=512,
|
67 |
+
generator=gen
|
68 |
+
)
|
69 |
+
|
70 |
+
return result.images[0]
|
71 |
+
|
72 |
+
if __name__ == "__main__":
|
73 |
+
# Test the function
|
74 |
+
test_prompts = [
|
75 |
+
"Cartoon cat wearing a wizard hat in a magical forest",
|
76 |
+
"Cartoon robot dancing in a disco with neon lights",
|
77 |
+
"Cartoon dragon flying over a rainbow castle"
|
78 |
+
]
|
79 |
+
|
80 |
+
for i, prompt in enumerate(test_prompts, 1):
|
81 |
+
print(f"Generating image {i}: '{prompt}'")
|
82 |
+
img = generate_image(prompt)
|
83 |
+
print(f"Result: Image size={img.size}, mode={img.mode}")
|
84 |
+
|
85 |
+
# Optionally save the image
|
86 |
+
# img.save(f"test_image_{i}.png")
|
Tools/imagedecider.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
# from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
+
from llm_utils import tokenizer, model, generate_completion
|
4 |
+
from smolagents import tool
|
5 |
+
import warnings
|
6 |
+
warnings.filterwarnings("ignore")
|
7 |
+
|
8 |
+
@tool
|
9 |
+
def check_significant_change(previous_context: str, current_context: str) -> int:
|
10 |
+
"""
|
11 |
+
Compare previous and current context; return 1 if major change (new scene/env), else 0.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
previous_context (str): The previous context text
|
15 |
+
current_context (str): The current context text
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
int: 1 if major significant change detected, 0 otherwise
|
19 |
+
"""
|
20 |
+
prompt = f"""
|
21 |
+
Compare these two contexts and determine if there is a major significant change (like a new scene, environment, or dramatic shift in situation). Reply with only "change" for a major significant change, or "unchange" if the contexts are similar or show minor differences.
|
22 |
+
|
23 |
+
Previous: {previous_context}
|
24 |
+
Current: {current_context}
|
25 |
+
|
26 |
+
Answer (change or unchange):"""
|
27 |
+
|
28 |
+
# wrap in a chat template
|
29 |
+
messages = [
|
30 |
+
{"role": "system", "content": "You detect significant changes between contexts. Reply only with 'change' or 'unchange'."},
|
31 |
+
{"role": "user", "content": prompt}
|
32 |
+
]
|
33 |
+
|
34 |
+
# tokenize & move to device
|
35 |
+
inputs = tokenizer.apply_chat_template(
|
36 |
+
messages,
|
37 |
+
tokenize=True,
|
38 |
+
add_generation_prompt=True,
|
39 |
+
return_tensors="pt",
|
40 |
+
return_dict=True
|
41 |
+
).to(model.device)
|
42 |
+
|
43 |
+
# generate
|
44 |
+
with torch.no_grad():
|
45 |
+
outputs = generate_completion(
|
46 |
+
**inputs,
|
47 |
+
max_new_tokens=10,
|
48 |
+
temperature=0.0,
|
49 |
+
do_sample=False,
|
50 |
+
pad_token_id=tokenizer.eos_token_id
|
51 |
+
)
|
52 |
+
|
53 |
+
# slice off prompt
|
54 |
+
prompt_len = inputs["input_ids"].shape[-1]
|
55 |
+
gen_ids = outputs[0][prompt_len:]
|
56 |
+
|
57 |
+
# decode response
|
58 |
+
raw = tokenizer.decode(gen_ids, skip_special_tokens=True)
|
59 |
+
response = raw.strip().lower()
|
60 |
+
|
61 |
+
# check for change indicators
|
62 |
+
if "change" in response and "unchange" not in response:
|
63 |
+
return 1
|
64 |
+
else:
|
65 |
+
return 0
|
66 |
+
|
67 |
+
if __name__ == "__main__":
|
68 |
+
# --- Test Cases ---
|
69 |
+
tests = [
|
70 |
+
("John types at his desk in the morning light.", "John now types with a cup of coffee beside him."),
|
71 |
+
("Sarah walks through the quiet library browsing books.", "She stands on a cliff overlooking crashing waves."),
|
72 |
+
("Morning vendors set up at the market.", "The empty market is silent under the moonlight.")
|
73 |
+
]
|
74 |
+
|
75 |
+
for i, (prev, curr) in enumerate(tests, 1):
|
76 |
+
result = check_significant_change(previous_context=prev, current_context=curr)
|
77 |
+
print(f"Test {i}: Prev='{prev}' | Curr='{curr}' -> Change Detected: {result}")
|
Tools/story_generator.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# story_generator.py
|
2 |
+
|
3 |
+
from typing import Optional
|
4 |
+
import os
|
5 |
+
|
6 |
+
from smolagents import Tool
|
7 |
+
from huggingface_hub import InferenceClient
|
8 |
+
|
9 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
10 |
+
import torch
|
11 |
+
torch.manual_seed(30)
|
12 |
+
|
13 |
+
|
14 |
+
HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN", "")
|
15 |
+
MODEL_NAME = "deepseek-ai/DeepSeek-V3-0324"
|
16 |
+
|
17 |
+
if not HF_TOKEN:
|
18 |
+
raise RuntimeError("Please set HUGGINGFACE_API_TOKEN in your environment.")
|
19 |
+
|
20 |
+
class StoryGeneratorTool(Tool):
|
21 |
+
name = "story_generator"
|
22 |
+
description = "Generates the next scene of an interactive story."
|
23 |
+
|
24 |
+
inputs = {
|
25 |
+
"context": {
|
26 |
+
"type": "string",
|
27 |
+
"description": "Concatenated last N scenes + facts.",
|
28 |
+
"required": True
|
29 |
+
},
|
30 |
+
"initial_prompt": {
|
31 |
+
"type": "string",
|
32 |
+
"description": "The very first user prompt to start the story.",
|
33 |
+
"required": False,
|
34 |
+
"nullable": True
|
35 |
+
},
|
36 |
+
"last_choice": {
|
37 |
+
"type": "string",
|
38 |
+
"description": "The userβs choice from the previous step.",
|
39 |
+
"required": False,
|
40 |
+
"nullable": True
|
41 |
+
},
|
42 |
+
}
|
43 |
+
|
44 |
+
output_type = "string"
|
45 |
+
_client: Optional[InferenceClient] = None
|
46 |
+
|
47 |
+
def _get_client(self) -> InferenceClient:
|
48 |
+
if self._client is None:
|
49 |
+
self._client = InferenceClient(token=HF_TOKEN)
|
50 |
+
return self._client
|
51 |
+
|
52 |
+
def forward(
|
53 |
+
self,
|
54 |
+
context: str,
|
55 |
+
initial_prompt: Optional[str] = None,
|
56 |
+
last_choice: Optional[str] = None,
|
57 |
+
) -> str:
|
58 |
+
if initial_prompt and last_choice:
|
59 |
+
raise ValueError("Provide exactly one of `initial_prompt` or `last_choice`.")
|
60 |
+
|
61 |
+
# Build prompt
|
62 |
+
if initial_prompt:
|
63 |
+
system_content = (
|
64 |
+
"You are a children's-book style storyteller. Generate a vivid opening scene."
|
65 |
+
)
|
66 |
+
user_content = f"User seed prompt:\n\"{initial_prompt}\"\n\nGenerate the opening scene."
|
67 |
+
|
68 |
+
elif last_choice:
|
69 |
+
system_content = (
|
70 |
+
"You are a children's-book style storyteller. Continue from the last choice."
|
71 |
+
)
|
72 |
+
user_content = (
|
73 |
+
f"Context:\n{context}\n\n"
|
74 |
+
f"Last choice: \"{last_choice}\"\n\nGenerate the next scene."
|
75 |
+
)
|
76 |
+
|
77 |
+
else:
|
78 |
+
system_content = (
|
79 |
+
"You are a children's-book style storyteller. Continue based on context alone."
|
80 |
+
)
|
81 |
+
user_content = f"Context:\n{context}\n\nGenerate the next scene."
|
82 |
+
|
83 |
+
messages = [
|
84 |
+
{"role": "system", "content": system_content},
|
85 |
+
{"role": "user", "content": user_content},
|
86 |
+
]
|
87 |
+
|
88 |
+
# 1) Tokenize/conform the messages
|
89 |
+
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-V3-0324")
|
90 |
+
model = AutoModelForCausalLM.from_pretrained(
|
91 |
+
"deepseek-ai/DeepSeek-V3-0324", device_map="auto", torch_dtype=torch.bfloat16
|
92 |
+
)
|
93 |
+
|
94 |
+
inputs = tokenizer.apply_chat_template(
|
95 |
+
messages,
|
96 |
+
tokenize=True,
|
97 |
+
add_generation_prompt=True,
|
98 |
+
return_tensors="pt"
|
99 |
+
).to(model.device)
|
100 |
+
|
101 |
+
# 2) Generate
|
102 |
+
outputs = model.generate(**inputs, max_new_tokens=400)
|
103 |
+
|
104 |
+
# 3) Extract only the generated portion (not the prompt)
|
105 |
+
input_length = inputs["input_ids"].shape[-1]
|
106 |
+
generated_ids = outputs[0][input_length:]
|
107 |
+
|
108 |
+
# 4) Decode and return
|
109 |
+
scene_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
|
110 |
+
|
111 |
+
return scene_text
|
112 |
+
|
113 |
+
# client = self._get_client()
|
114 |
+
# # Nonβstreaming chat call
|
115 |
+
# resp = client.chat_completion(
|
116 |
+
# model=MODEL_NAME,
|
117 |
+
# messages=messages,
|
118 |
+
# temperature=0.7,
|
119 |
+
# max_tokens=400,
|
120 |
+
# stream=False
|
121 |
+
# )
|
122 |
+
|
123 |
+
# return
|
Tools/validate_consistency.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools.py
|
2 |
+
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
from smolagents import tool, Tool, InferenceClientModel
|
5 |
+
from typing import Optional
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
|
9 |
+
# Make sure your HF token is set in the environment already:
|
10 |
+
# export HUGGINGFACE_API_TOKEN="Enter your hf token"
|
11 |
+
HF_TOKEN = os.getenv("Enter your hf token", "")
|
12 |
+
if not HF_TOKEN:
|
13 |
+
raise RuntimeError("Please set HUGGINGFACE_API_TOKEN in your environment.")
|
14 |
+
|
15 |
+
# === Choose your HF model name here ===
|
16 |
+
# For instance, "gpt2βhfβchat", or any chatβcapable endpoint.
|
17 |
+
# If you are using a locallyβdeployed endpoint, point to its URL:
|
18 |
+
# model_name = "https://api-inference.huggingface.co/models/your-username/your-chat-model"
|
19 |
+
# If you want to use an HFβhosted chat LLM (e.g. a fineβtuned Llama 2), use:
|
20 |
+
# model_name = "meta-llama/Llama-2-7b-chat-hf"
|
21 |
+
model_name = "meta-llama/Llama-2-7b-chat-hf"
|
22 |
+
|
23 |
+
def _get_hf_client() -> InferenceClientModel:
|
24 |
+
global _hf_client
|
25 |
+
if _hf_client is None:
|
26 |
+
_hf_client = InferenceClientModel(
|
27 |
+
model_name=model_name,
|
28 |
+
api_token=HF_TOKEN
|
29 |
+
)
|
30 |
+
return _hf_client
|
31 |
+
|
32 |
+
|
33 |
+
@tool
|
34 |
+
def validate_consistency(old_facts: Dict[str, Any], new_facts: Dict[str, Any]) -> bool:
|
35 |
+
"""
|
36 |
+
Validate that the new_facts do not contradict the old_facts.
|
37 |
+
For each core key ("location", "weather", "time_of_day"):
|
38 |
+
- If old_facts[key] is not None and new_facts[key] is not None
|
39 |
+
and they differ, return False (inconsistent).
|
40 |
+
Otherwise, return True.
|
41 |
+
"""
|
42 |
+
core_keys = ["location", "weather", "time_of_day"]
|
43 |
+
|
44 |
+
for key in core_keys:
|
45 |
+
old_val = old_facts.get(key)
|
46 |
+
new_val = new_facts.get(key)
|
47 |
+
if old_val is not None and new_val is not None and old_val != new_val:
|
48 |
+
return True
|
49 |
+
|
50 |
+
return True
|
app.py
ADDED
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import warnings
|
4 |
+
from typing import Optional, Dict, Any
|
5 |
+
|
6 |
+
warnings.filterwarnings("ignore")
|
7 |
+
|
8 |
+
# Output directories
|
9 |
+
AUDIO_DIR = "outputs/audio"
|
10 |
+
os.makedirs(AUDIO_DIR, exist_ok=True)
|
11 |
+
|
12 |
+
# Mock implementations for story generation components
|
13 |
+
class StoryState:
|
14 |
+
def __init__(self):
|
15 |
+
self.scene_history = []
|
16 |
+
self.facts = []
|
17 |
+
self.world_meta = {}
|
18 |
+
|
19 |
+
def get_context_window(self, n=3):
|
20 |
+
return self.scene_history[-n:] if len(self.scene_history) > n else self.scene_history
|
21 |
+
|
22 |
+
def update_facts(self, new_facts):
|
23 |
+
self.facts.extend(new_facts)
|
24 |
+
|
25 |
+
def append_scene(self, scene):
|
26 |
+
self.scene_history.append(scene)
|
27 |
+
|
28 |
+
def update_world_meta(self, meta):
|
29 |
+
self.world_meta.update(meta)
|
30 |
+
|
31 |
+
def to_dict(self):
|
32 |
+
return {
|
33 |
+
"scenes": len(self.scene_history),
|
34 |
+
"facts": len(self.facts)
|
35 |
+
}
|
36 |
+
|
37 |
+
class StoryGeneratorTool:
|
38 |
+
def forward(self, context, initial_prompt=None, last_choice=None):
|
39 |
+
if initial_prompt:
|
40 |
+
return f"Story begins: {initial_prompt}\n\nThe adventure unfolds as mysterious events shape your path. You find yourself in an unfamiliar place, with shadows dancing around you and the sound of distant whispers filling the air. The path ahead splits into multiple directions, each promising different adventures and challenges."
|
41 |
+
elif last_choice:
|
42 |
+
return f"Following your choice to '{last_choice}', the story continues...\n\nYour decision leads you deeper into the mystery. New challenges emerge as the plot thickens. The environment around you shifts and changes, revealing hidden secrets and unexpected allies. What seemed like a simple choice now opens up entirely new possibilities for your adventure."
|
43 |
+
else:
|
44 |
+
return "The story continues with unexpected twists and turns. Ancient mysteries begin to unravel as you delve deeper into this strange world. Each step forward reveals new questions that demand answers, and you realize that your journey is far from over."
|
45 |
+
|
46 |
+
class ExtractFactsTool:
|
47 |
+
def forward(self, scene_text):
|
48 |
+
# Extract meaningful facts from scene text
|
49 |
+
facts = []
|
50 |
+
if "mysterious" in scene_text.lower():
|
51 |
+
facts.append("There are mysterious elements in this world")
|
52 |
+
if "path" in scene_text.lower():
|
53 |
+
facts.append("Multiple paths and choices are available")
|
54 |
+
if "adventure" in scene_text.lower():
|
55 |
+
facts.append("This is an adventure-type story")
|
56 |
+
if "challenge" in scene_text.lower():
|
57 |
+
facts.append("Challenges and obstacles exist")
|
58 |
+
|
59 |
+
return facts if facts else [f"Story element: {scene_text[:30]}..."]
|
60 |
+
|
61 |
+
def build_world(facts):
|
62 |
+
return {
|
63 |
+
"world_complexity": len(facts),
|
64 |
+
"narrative_depth": "high" if len(facts) > 5 else "medium",
|
65 |
+
"story_themes": ["mystery", "adventure", "choice-driven"]
|
66 |
+
}
|
67 |
+
|
68 |
+
def generate_choices(scene_text, facts):
|
69 |
+
base_choices = [
|
70 |
+
"Investigate the mysterious occurrence further",
|
71 |
+
"Seek help from potential allies nearby",
|
72 |
+
"Explore the unknown path cautiously",
|
73 |
+
"Take a bold and direct approach"
|
74 |
+
]
|
75 |
+
|
76 |
+
# Customize choices based on scene content
|
77 |
+
if "mystery" in scene_text.lower():
|
78 |
+
base_choices[0] = "Solve the mystery using your wits"
|
79 |
+
if "danger" in scene_text.lower():
|
80 |
+
base_choices[3] = "Face the danger head-on"
|
81 |
+
if "friend" in scene_text.lower() or "ally" in scene_text.lower():
|
82 |
+
base_choices[1] = "Rally your allies for support"
|
83 |
+
|
84 |
+
return base_choices
|
85 |
+
|
86 |
+
def advance_story(
|
87 |
+
state: StoryState,
|
88 |
+
initial_prompt: Optional[str] = None,
|
89 |
+
last_choice: Optional[str] = None
|
90 |
+
) -> Dict[str, Any]:
|
91 |
+
"""
|
92 |
+
Runs one step of the story pipeline focusing on NLP
|
93 |
+
"""
|
94 |
+
# 1) Generate scene
|
95 |
+
scene_tool = StoryGeneratorTool()
|
96 |
+
scene_args = {
|
97 |
+
"context": state.get_context_window(n=3),
|
98 |
+
"initial_prompt": initial_prompt,
|
99 |
+
"last_choice": last_choice,
|
100 |
+
}
|
101 |
+
scene_text = scene_tool.forward(**scene_args)
|
102 |
+
|
103 |
+
# 2) Extract facts
|
104 |
+
fact_tool = ExtractFactsTool()
|
105 |
+
new_facts = fact_tool.forward(scene_text)
|
106 |
+
|
107 |
+
# 3) Update state & build world metadata
|
108 |
+
state.update_facts(new_facts)
|
109 |
+
state.append_scene(scene_text)
|
110 |
+
world_meta = build_world(state.facts)
|
111 |
+
state.update_world_meta(world_meta)
|
112 |
+
|
113 |
+
# 4) Generate next-step choices
|
114 |
+
choices = generate_choices(scene_text, state.facts)
|
115 |
+
|
116 |
+
# 5) Package and return
|
117 |
+
return {
|
118 |
+
"scene_text": scene_text,
|
119 |
+
"choices": choices,
|
120 |
+
"updated_state": state.to_dict(),
|
121 |
+
}
|
122 |
+
|
123 |
+
# Global variables to store story state
|
124 |
+
story_state = StoryState()
|
125 |
+
current_story = ""
|
126 |
+
story_choices = []
|
127 |
+
|
128 |
+
def initialize_story(user_input):
|
129 |
+
"""Initialize the story with user input and generate first response"""
|
130 |
+
global current_story, story_choices, story_state
|
131 |
+
|
132 |
+
if not user_input.strip():
|
133 |
+
# Hide everything if empty
|
134 |
+
return (
|
135 |
+
gr.update(visible=False, value=""), # chat_output
|
136 |
+
gr.update(visible=False), # choice1
|
137 |
+
gr.update(visible=False), # choice2
|
138 |
+
gr.update(visible=False), # choice3
|
139 |
+
gr.update(visible=False), # choice4
|
140 |
+
gr.update(visible=False), # prev_btn
|
141 |
+
gr.update(visible=False), # next_btn
|
142 |
+
gr.update(visible=True, value=""), # keep user_input visible
|
143 |
+
gr.update(visible=True), # keep submit_btn visible
|
144 |
+
gr.update(visible=False) # choices_header
|
145 |
+
)
|
146 |
+
|
147 |
+
# Reset story state
|
148 |
+
story_state = StoryState()
|
149 |
+
|
150 |
+
# Generate story using pipeline
|
151 |
+
result = advance_story(story_state, initial_prompt=user_input)
|
152 |
+
|
153 |
+
# Update global state
|
154 |
+
current_story = result["scene_text"]
|
155 |
+
story_choices = result["choices"]
|
156 |
+
|
157 |
+
print(f"Generated story: {current_story}")
|
158 |
+
print(f"Generated choices: {story_choices}")
|
159 |
+
|
160 |
+
# Return updates
|
161 |
+
return (
|
162 |
+
gr.update(visible=True, value=current_story), # chat_output
|
163 |
+
gr.update(visible=True, value=story_choices[0]), # choice1
|
164 |
+
gr.update(visible=True, value=story_choices[1]), # choice2
|
165 |
+
gr.update(visible=True, value=story_choices[2]), # choice3
|
166 |
+
gr.update(visible=True, value=story_choices[3]), # choice4
|
167 |
+
gr.update(visible=True), # prev_btn
|
168 |
+
gr.update(visible=True), # next_btn
|
169 |
+
gr.update(visible=False), # hide user_input
|
170 |
+
gr.update(visible=False), # hide submit_btn
|
171 |
+
gr.update(visible=True) # show choices_header
|
172 |
+
)
|
173 |
+
|
174 |
+
def make_choice(choice_num):
|
175 |
+
"""Handle story choice selection and generate next scene"""
|
176 |
+
global current_story, story_choices, story_state
|
177 |
+
|
178 |
+
if choice_num < len(story_choices):
|
179 |
+
selected_choice = story_choices[choice_num]
|
180 |
+
|
181 |
+
print(f"User selected choice {choice_num}: {selected_choice}")
|
182 |
+
|
183 |
+
# Generate next story scene
|
184 |
+
result = advance_story(story_state, last_choice=selected_choice)
|
185 |
+
|
186 |
+
# Update global state
|
187 |
+
current_story = result["scene_text"]
|
188 |
+
story_choices = result["choices"]
|
189 |
+
|
190 |
+
print(f"Generated new story: {current_story}")
|
191 |
+
print(f"Generated new choices: {story_choices}")
|
192 |
+
|
193 |
+
return (
|
194 |
+
gr.update(value=current_story), # chat_output
|
195 |
+
gr.update(value=story_choices[0]), # choice1
|
196 |
+
gr.update(value=story_choices[1]), # choice2
|
197 |
+
gr.update(value=story_choices[2]), # choice3
|
198 |
+
gr.update(value=story_choices[3]) # choice4
|
199 |
+
)
|
200 |
+
return tuple([gr.update()]*5)
|
201 |
+
|
202 |
+
def go_previous():
|
203 |
+
"""Navigate to previous story segment"""
|
204 |
+
global story_state, current_story
|
205 |
+
if len(story_state.scene_history) > 1:
|
206 |
+
# Get previous scene
|
207 |
+
prev_scene = story_state.scene_history[-2]
|
208 |
+
return gr.update(value=f"π Previous Scene:\n\n{prev_scene}")
|
209 |
+
return gr.update(value="π No previous story segment available")
|
210 |
+
|
211 |
+
def go_next():
|
212 |
+
"""Navigate back to current story segment"""
|
213 |
+
global current_story
|
214 |
+
return gr.update(value=current_story)
|
215 |
+
|
216 |
+
def restart_story():
|
217 |
+
"""Restart the story from beginning"""
|
218 |
+
global story_state, current_story, story_choices
|
219 |
+
story_state = StoryState()
|
220 |
+
current_story = ""
|
221 |
+
story_choices = []
|
222 |
+
|
223 |
+
return (
|
224 |
+
gr.update(visible=False, value=""), # chat_output
|
225 |
+
gr.update(visible=False), # choice1
|
226 |
+
gr.update(visible=False), # choice2
|
227 |
+
gr.update(visible=False), # choice3
|
228 |
+
gr.update(visible=False), # choice4
|
229 |
+
gr.update(visible=False), # prev_btn
|
230 |
+
gr.update(visible=False), # next_btn
|
231 |
+
gr.update(visible=True, value=""), # show user_input
|
232 |
+
gr.update(visible=True), # show submit_btn
|
233 |
+
gr.update(visible=False) # hide choices_header
|
234 |
+
)
|
235 |
+
|
236 |
+
# Create the Gradio interface
|
237 |
+
with gr.Blocks(title="AI Story Generation Platform", theme=gr.themes.Soft(), css="""
|
238 |
+
.enter-button { margin-top: 20px !important; }
|
239 |
+
.story-output { font-family: 'Georgia', serif !important; line-height: 1.6 !important; }
|
240 |
+
.choice-button { margin: 5px !important; padding: 10px !important; }
|
241 |
+
.nav-button { margin: 10px 5px !important; }
|
242 |
+
.restart-button { margin-top: 20px !important; background: #ff6b6b !important; }
|
243 |
+
""") as demo:
|
244 |
+
|
245 |
+
gr.Markdown("# π AI Interactive Story Generation Platform")
|
246 |
+
gr.Markdown("Enter your story idea to begin an interactive text-based adventure!")
|
247 |
+
|
248 |
+
# Input section
|
249 |
+
user_input = gr.Textbox(
|
250 |
+
placeholder="Enter your story beginning (e.g., 'I wake up in a mysterious forest...')",
|
251 |
+
label="Story Input",
|
252 |
+
lines=3
|
253 |
+
)
|
254 |
+
submit_btn = gr.Button("π Start Adventure", variant="primary", elem_classes="enter-button")
|
255 |
+
|
256 |
+
# Story output
|
257 |
+
chat_output = gr.Textbox(
|
258 |
+
label="π Your Story",
|
259 |
+
lines=8,
|
260 |
+
interactive=False,
|
261 |
+
visible=False,
|
262 |
+
elem_classes="story-output"
|
263 |
+
)
|
264 |
+
|
265 |
+
# Choices section
|
266 |
+
choices_header = gr.Markdown("### π― Choose Your Next Action", visible=False)
|
267 |
+
|
268 |
+
with gr.Row():
|
269 |
+
choice1_btn = gr.Button("Choice 1", visible=False, variant="secondary", elem_classes="choice-button")
|
270 |
+
choice2_btn = gr.Button("Choice 2", visible=False, variant="secondary", elem_classes="choice-button")
|
271 |
+
|
272 |
+
with gr.Row():
|
273 |
+
choice3_btn = gr.Button("Choice 3", visible=False, variant="secondary", elem_classes="choice-button")
|
274 |
+
choice4_btn = gr.Button("Choice 4", visible=False, variant="secondary", elem_classes="choice-button")
|
275 |
+
|
276 |
+
# Navigation section
|
277 |
+
with gr.Row():
|
278 |
+
prev_btn = gr.Button("β¬
οΈ Previous Scene", visible=False, variant="outline", elem_classes="nav-button")
|
279 |
+
next_btn = gr.Button("Current Scene β‘οΈ", visible=False, variant="outline", elem_classes="nav-button")
|
280 |
+
restart_btn = gr.Button("π Restart Story", visible=False, variant="stop", elem_classes="restart-button")
|
281 |
+
|
282 |
+
# Story statistics
|
283 |
+
story_stats = gr.Markdown("", visible=False)
|
284 |
+
|
285 |
+
# Event handlers
|
286 |
+
submit_btn.click(
|
287 |
+
fn=initialize_story,
|
288 |
+
inputs=[user_input],
|
289 |
+
outputs=[
|
290 |
+
chat_output, choice1_btn, choice2_btn, choice3_btn, choice4_btn,
|
291 |
+
prev_btn, next_btn, user_input, submit_btn, choices_header
|
292 |
+
]
|
293 |
+
).then(
|
294 |
+
fn=lambda: gr.update(visible=True),
|
295 |
+
outputs=[restart_btn]
|
296 |
+
)
|
297 |
+
|
298 |
+
# Choice event handlers
|
299 |
+
choice1_btn.click(
|
300 |
+
fn=lambda: make_choice(0),
|
301 |
+
outputs=[chat_output, choice1_btn, choice2_btn, choice3_btn, choice4_btn]
|
302 |
+
)
|
303 |
+
choice2_btn.click(
|
304 |
+
fn=lambda: make_choice(1),
|
305 |
+
outputs=[chat_output, choice1_btn, choice2_btn, choice3_btn, choice4_btn]
|
306 |
+
)
|
307 |
+
choice3_btn.click(
|
308 |
+
fn=lambda: make_choice(2),
|
309 |
+
outputs=[chat_output, choice1_btn, choice2_btn, choice3_btn, choice4_btn]
|
310 |
+
)
|
311 |
+
choice4_btn.click(
|
312 |
+
fn=lambda: make_choice(3),
|
313 |
+
outputs=[chat_output, choice1_btn, choice2_btn, choice3_btn, choice4_btn]
|
314 |
+
)
|
315 |
+
|
316 |
+
# Navigation event handlers
|
317 |
+
prev_btn.click(fn=go_previous, outputs=[chat_output])
|
318 |
+
next_btn.click(fn=go_next, outputs=[chat_output])
|
319 |
+
|
320 |
+
# Restart handler
|
321 |
+
restart_btn.click(
|
322 |
+
fn=restart_story,
|
323 |
+
outputs=[
|
324 |
+
chat_output, choice1_btn, choice2_btn, choice3_btn, choice4_btn,
|
325 |
+
prev_btn, next_btn, user_input, submit_btn, choices_header
|
326 |
+
]
|
327 |
+
).then(
|
328 |
+
fn=lambda: gr.update(visible=False),
|
329 |
+
outputs=[restart_btn]
|
330 |
+
)
|
331 |
+
|
332 |
+
if __name__ == "__main__":
|
333 |
+
demo.launch()
|