nouvas-ai-llm / app.py
eddybaraka's picture
Update app.py
fcf60fd verified
# Hugging Face Space Configuration - app.py
# This file should be placed in your Hugging Face space repository
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import logging
import json
import re
import ast
# ✅ Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ✅ Model name - using Microsoft Phi-4 multimodal model
model_name = "microsoft/Phi-4-multimodal-instruct"
def load_model():
logger.info(f"🔄 Loading model: {model_name}")
try:
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
device_map="auto", # Automatically map to available GPUs
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
)
logger.info("✅ Model loaded successfully.")
return model, tokenizer
except Exception as e:
logger.error(f"❌ Error loading model: {e}")
raise
# Load on startup
try:
model, tokenizer = load_model()
except Exception as e:
logger.error(f"❌ Error loading model: {e}")
model, tokenizer = None, None
def is_function_call(single_message):
"""Determine whether the current system message is a function call."""
pattern = re.compile(r'([^\n`]*?)\n({.*?})(?=\w*\n|$)', re.DOTALL)
matches = pattern.findall(single_message)
if not matches:
return False
func_name, args_str = matches[0]
func_name = func_name.strip()
try:
parsed_args = json.loads(args_str)
except json.JSONDecodeError:
try:
parsed_args = ast.literal_eval(args_str)
except:
return False
return {"name": func_name, "arguments": parsed_args}
def realtime_aqi(city):
"""Weather Query Tool"""
if '北京' in city.lower():
return json.dumps({'city': '北京', 'aqi': '10', 'unit': 'celsius'}, ensure_ascii=False)
elif '上海' in city.lower():
return json.dumps({'city': '上海', 'aqi': '72', 'unit': 'fahrenheit'}, ensure_ascii=False)
else:
return json.dumps({'city': city, 'aqi': 'unknown'}, ensure_ascii=False)
def build_system_prompt(tools):
"""Construct system prompt based on the list of available tools."""
if tools is None:
tools = []
value = "# 可用工具"
contents = []
for tool in tools:
content = f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], ensure_ascii=False, indent=4)}"
content += "\n在调用上述函数时,请使用 Json 格式表示调用的参数。"
contents.append(content)
value += "".join(contents)
return value
# Define available tools for function calling
tools = [
{
"type": "function",
"function": {
"name": "realtime_aqi",
"description": "天气预报。获取实时空气质量。当前空气质量,PM2.5,PM10信息",
"parameters": {
"type": "object",
"properties": {
"city": {
"description": "城市名"
}
},
"required": [
"city"
]
}
}
}
]
system_prompt = build_system_prompt(tools)
def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, repetition_penalty=1.1):
if model is None or tokenizer is None:
return "❌ Model failed to load."
if not prompt.strip():
return "⚠️ Please enter a prompt."
try:
logger.info(f"📝 Prompt: {prompt[:80]}...")
# For Phi-4 multimodal, we'll use a simpler approach
# Format the prompt for Phi-4
formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n"
# Tokenize
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
no_repeat_ngram_size=2,
use_cache=True,
min_length=20,
early_stopping=True
)
# Decode the response
decoded = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
# Check for function calls
function_calls = []
for m in decoded.split("<|assistant|>"):
fc_decode = is_function_call(m.strip())
if fc_decode:
function_calls.append(fc_decode)
# If there are function calls, execute them
if function_calls:
result = "Function calls detected:\n"
for fc in function_calls:
if fc["name"] == "realtime_aqi":
function_response = realtime_aqi(city=fc["arguments"]["city"])
result += f"Function: {fc['name']}\nArguments: {fc['arguments']}\nResponse: {function_response}\n\n"
return result
else:
# Return the normal response
return decoded.strip()
except Exception as e:
logger.error(f"❌ Error during response generation: {e}")
return f"Generation error: {str(e)}"
# ✅ Gradio UI
iface = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(label="Your Prompt", placeholder="Ask anything...", lines=4),
gr.Slider(64, 2048, value=512, step=64, label="Max Tokens"),
gr.Slider(0.1, 1.2, value=0.4, step=0.1, label="Temperature"),
gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p"),
gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty")
],
outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True),
title="🤖 Microsoft Phi-4 Multimodal AI Assistant",
description="Ask questions in English or 中文 — Powered by microsoft/Phi-4-multimodal-instruct",
theme=gr.themes.Soft()
)
# ✅ Run the app
if __name__ == "__main__":
logger.info("🚀 Starting Microsoft Phi-4 Multimodal Assistant...")
iface.launch(server_name="0.0.0.0", server_port=7860, share=False)