Spaces:
Sleeping
Sleeping
# Hugging Face Space Configuration - app.py | |
# This file should be placed in your Hugging Face space repository | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
import logging | |
import json | |
import re | |
import ast | |
# ✅ Logging setup | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# ✅ Model name - using Microsoft Phi-4 multimodal model | |
model_name = "microsoft/Phi-4-multimodal-instruct" | |
def load_model(): | |
logger.info(f"🔄 Loading model: {model_name}") | |
try: | |
tokenizer = AutoTokenizer.from_pretrained( | |
model_name, | |
trust_remote_code=True | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
trust_remote_code=True, | |
device_map="auto", # Automatically map to available GPUs | |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32 | |
) | |
logger.info("✅ Model loaded successfully.") | |
return model, tokenizer | |
except Exception as e: | |
logger.error(f"❌ Error loading model: {e}") | |
raise | |
# Load on startup | |
try: | |
model, tokenizer = load_model() | |
except Exception as e: | |
logger.error(f"❌ Error loading model: {e}") | |
model, tokenizer = None, None | |
def is_function_call(single_message): | |
"""Determine whether the current system message is a function call.""" | |
pattern = re.compile(r'([^\n`]*?)\n({.*?})(?=\w*\n|$)', re.DOTALL) | |
matches = pattern.findall(single_message) | |
if not matches: | |
return False | |
func_name, args_str = matches[0] | |
func_name = func_name.strip() | |
try: | |
parsed_args = json.loads(args_str) | |
except json.JSONDecodeError: | |
try: | |
parsed_args = ast.literal_eval(args_str) | |
except: | |
return False | |
return {"name": func_name, "arguments": parsed_args} | |
def realtime_aqi(city): | |
"""Weather Query Tool""" | |
if '北京' in city.lower(): | |
return json.dumps({'city': '北京', 'aqi': '10', 'unit': 'celsius'}, ensure_ascii=False) | |
elif '上海' in city.lower(): | |
return json.dumps({'city': '上海', 'aqi': '72', 'unit': 'fahrenheit'}, ensure_ascii=False) | |
else: | |
return json.dumps({'city': city, 'aqi': 'unknown'}, ensure_ascii=False) | |
def build_system_prompt(tools): | |
"""Construct system prompt based on the list of available tools.""" | |
if tools is None: | |
tools = [] | |
value = "# 可用工具" | |
contents = [] | |
for tool in tools: | |
content = f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], ensure_ascii=False, indent=4)}" | |
content += "\n在调用上述函数时,请使用 Json 格式表示调用的参数。" | |
contents.append(content) | |
value += "".join(contents) | |
return value | |
# Define available tools for function calling | |
tools = [ | |
{ | |
"type": "function", | |
"function": { | |
"name": "realtime_aqi", | |
"description": "天气预报。获取实时空气质量。当前空气质量,PM2.5,PM10信息", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"city": { | |
"description": "城市名" | |
} | |
}, | |
"required": [ | |
"city" | |
] | |
} | |
} | |
} | |
] | |
system_prompt = build_system_prompt(tools) | |
def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, repetition_penalty=1.1): | |
if model is None or tokenizer is None: | |
return "❌ Model failed to load." | |
if not prompt.strip(): | |
return "⚠️ Please enter a prompt." | |
try: | |
logger.info(f"📝 Prompt: {prompt[:80]}...") | |
# For Phi-4 multimodal, we'll use a simpler approach | |
# Format the prompt for Phi-4 | |
formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n" | |
# Tokenize | |
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=max_new_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
repetition_penalty=repetition_penalty, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
no_repeat_ngram_size=2, | |
use_cache=True, | |
min_length=20, | |
early_stopping=True | |
) | |
# Decode the response | |
decoded = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) | |
# Check for function calls | |
function_calls = [] | |
for m in decoded.split("<|assistant|>"): | |
fc_decode = is_function_call(m.strip()) | |
if fc_decode: | |
function_calls.append(fc_decode) | |
# If there are function calls, execute them | |
if function_calls: | |
result = "Function calls detected:\n" | |
for fc in function_calls: | |
if fc["name"] == "realtime_aqi": | |
function_response = realtime_aqi(city=fc["arguments"]["city"]) | |
result += f"Function: {fc['name']}\nArguments: {fc['arguments']}\nResponse: {function_response}\n\n" | |
return result | |
else: | |
# Return the normal response | |
return decoded.strip() | |
except Exception as e: | |
logger.error(f"❌ Error during response generation: {e}") | |
return f"Generation error: {str(e)}" | |
# ✅ Gradio UI | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=[ | |
gr.Textbox(label="Your Prompt", placeholder="Ask anything...", lines=4), | |
gr.Slider(64, 2048, value=512, step=64, label="Max Tokens"), | |
gr.Slider(0.1, 1.2, value=0.4, step=0.1, label="Temperature"), | |
gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p"), | |
gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty") | |
], | |
outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True), | |
title="🤖 Microsoft Phi-4 Multimodal AI Assistant", | |
description="Ask questions in English or 中文 — Powered by microsoft/Phi-4-multimodal-instruct", | |
theme=gr.themes.Soft() | |
) | |
# ✅ Run the app | |
if __name__ == "__main__": | |
logger.info("🚀 Starting Microsoft Phi-4 Multimodal Assistant...") | |
iface.launch(server_name="0.0.0.0", server_port=7860, share=False) |