Spaces:
Sleeping
Sleeping
File size: 6,503 Bytes
aec8d33 a59b971 5e963cc eb90ee7 aec8d33 a59b971 eb1edb2 eb90ee7 a59b971 fcf60fd a7e3088 eb90ee7 5e963cc aec8d33 7686e99 aec8d33 5e963cc eb1edb2 5e963cc eb1edb2 5e963cc aec8d33 5e963cc eb1edb2 5e963cc eb90ee7 eb1edb2 5e963cc fcf60fd 86d1e46 eb1edb2 7686e99 eb1edb2 7397167 aec8d33 fcf60fd aec8d33 7397167 eb90ee7 eb1edb2 5e963cc a7e3088 5e963cc eb1edb2 5e963cc eb1edb2 fcf60fd 5e963cc eb1edb2 5e963cc fcf60fd aec8d33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# Hugging Face Space Configuration - app.py
# This file should be placed in your Hugging Face space repository
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import logging
import json
import re
import ast
# ✅ Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ✅ Model name - using Microsoft Phi-4 multimodal model
model_name = "microsoft/Phi-4-multimodal-instruct"
def load_model():
logger.info(f"🔄 Loading model: {model_name}")
try:
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
device_map="auto", # Automatically map to available GPUs
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
)
logger.info("✅ Model loaded successfully.")
return model, tokenizer
except Exception as e:
logger.error(f"❌ Error loading model: {e}")
raise
# Load on startup
try:
model, tokenizer = load_model()
except Exception as e:
logger.error(f"❌ Error loading model: {e}")
model, tokenizer = None, None
def is_function_call(single_message):
"""Determine whether the current system message is a function call."""
pattern = re.compile(r'([^\n`]*?)\n({.*?})(?=\w*\n|$)', re.DOTALL)
matches = pattern.findall(single_message)
if not matches:
return False
func_name, args_str = matches[0]
func_name = func_name.strip()
try:
parsed_args = json.loads(args_str)
except json.JSONDecodeError:
try:
parsed_args = ast.literal_eval(args_str)
except:
return False
return {"name": func_name, "arguments": parsed_args}
def realtime_aqi(city):
"""Weather Query Tool"""
if '北京' in city.lower():
return json.dumps({'city': '北京', 'aqi': '10', 'unit': 'celsius'}, ensure_ascii=False)
elif '上海' in city.lower():
return json.dumps({'city': '上海', 'aqi': '72', 'unit': 'fahrenheit'}, ensure_ascii=False)
else:
return json.dumps({'city': city, 'aqi': 'unknown'}, ensure_ascii=False)
def build_system_prompt(tools):
"""Construct system prompt based on the list of available tools."""
if tools is None:
tools = []
value = "# 可用工具"
contents = []
for tool in tools:
content = f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], ensure_ascii=False, indent=4)}"
content += "\n在调用上述函数时,请使用 Json 格式表示调用的参数。"
contents.append(content)
value += "".join(contents)
return value
# Define available tools for function calling
tools = [
{
"type": "function",
"function": {
"name": "realtime_aqi",
"description": "天气预报。获取实时空气质量。当前空气质量,PM2.5,PM10信息",
"parameters": {
"type": "object",
"properties": {
"city": {
"description": "城市名"
}
},
"required": [
"city"
]
}
}
}
]
system_prompt = build_system_prompt(tools)
def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, repetition_penalty=1.1):
if model is None or tokenizer is None:
return "❌ Model failed to load."
if not prompt.strip():
return "⚠️ Please enter a prompt."
try:
logger.info(f"📝 Prompt: {prompt[:80]}...")
# For Phi-4 multimodal, we'll use a simpler approach
# Format the prompt for Phi-4
formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n"
# Tokenize
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
no_repeat_ngram_size=2,
use_cache=True,
min_length=20,
early_stopping=True
)
# Decode the response
decoded = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
# Check for function calls
function_calls = []
for m in decoded.split("<|assistant|>"):
fc_decode = is_function_call(m.strip())
if fc_decode:
function_calls.append(fc_decode)
# If there are function calls, execute them
if function_calls:
result = "Function calls detected:\n"
for fc in function_calls:
if fc["name"] == "realtime_aqi":
function_response = realtime_aqi(city=fc["arguments"]["city"])
result += f"Function: {fc['name']}\nArguments: {fc['arguments']}\nResponse: {function_response}\n\n"
return result
else:
# Return the normal response
return decoded.strip()
except Exception as e:
logger.error(f"❌ Error during response generation: {e}")
return f"Generation error: {str(e)}"
# ✅ Gradio UI
iface = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(label="Your Prompt", placeholder="Ask anything...", lines=4),
gr.Slider(64, 2048, value=512, step=64, label="Max Tokens"),
gr.Slider(0.1, 1.2, value=0.4, step=0.1, label="Temperature"),
gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p"),
gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty")
],
outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True),
title="🤖 Microsoft Phi-4 Multimodal AI Assistant",
description="Ask questions in English or 中文 — Powered by microsoft/Phi-4-multimodal-instruct",
theme=gr.themes.Soft()
)
# ✅ Run the app
if __name__ == "__main__":
logger.info("🚀 Starting Microsoft Phi-4 Multimodal Assistant...")
iface.launch(server_name="0.0.0.0", server_port=7860, share=False) |