# Hugging Face Space Configuration - app.py # This file should be placed in your Hugging Face space repository import torch from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr import logging import json import re import ast # ✅ Logging setup logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # ✅ Model name - using Microsoft Phi-4 multimodal model model_name = "microsoft/Phi-4-multimodal-instruct" def load_model(): logger.info(f"🔄 Loading model: {model_name}") try: tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, device_map="auto", # Automatically map to available GPUs torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32 ) logger.info("✅ Model loaded successfully.") return model, tokenizer except Exception as e: logger.error(f"❌ Error loading model: {e}") raise # Load on startup try: model, tokenizer = load_model() except Exception as e: logger.error(f"❌ Error loading model: {e}") model, tokenizer = None, None def is_function_call(single_message): """Determine whether the current system message is a function call.""" pattern = re.compile(r'([^\n`]*?)\n({.*?})(?=\w*\n|$)', re.DOTALL) matches = pattern.findall(single_message) if not matches: return False func_name, args_str = matches[0] func_name = func_name.strip() try: parsed_args = json.loads(args_str) except json.JSONDecodeError: try: parsed_args = ast.literal_eval(args_str) except: return False return {"name": func_name, "arguments": parsed_args} def realtime_aqi(city): """Weather Query Tool""" if '北京' in city.lower(): return json.dumps({'city': '北京', 'aqi': '10', 'unit': 'celsius'}, ensure_ascii=False) elif '上海' in city.lower(): return json.dumps({'city': '上海', 'aqi': '72', 'unit': 'fahrenheit'}, ensure_ascii=False) else: return json.dumps({'city': city, 'aqi': 'unknown'}, ensure_ascii=False) def build_system_prompt(tools): """Construct system prompt based on the list of available tools.""" if tools is None: tools = [] value = "# 可用工具" contents = [] for tool in tools: content = f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], ensure_ascii=False, indent=4)}" content += "\n在调用上述函数时,请使用 Json 格式表示调用的参数。" contents.append(content) value += "".join(contents) return value # Define available tools for function calling tools = [ { "type": "function", "function": { "name": "realtime_aqi", "description": "天气预报。获取实时空气质量。当前空气质量,PM2.5,PM10信息", "parameters": { "type": "object", "properties": { "city": { "description": "城市名" } }, "required": [ "city" ] } } } ] system_prompt = build_system_prompt(tools) def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, repetition_penalty=1.1): if model is None or tokenizer is None: return "❌ Model failed to load." if not prompt.strip(): return "⚠️ Please enter a prompt." try: logger.info(f"📝 Prompt: {prompt[:80]}...") # For Phi-4 multimodal, we'll use a simpler approach # Format the prompt for Phi-4 formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n" # Tokenize inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, no_repeat_ngram_size=2, use_cache=True, min_length=20, early_stopping=True ) # Decode the response decoded = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) # Check for function calls function_calls = [] for m in decoded.split("<|assistant|>"): fc_decode = is_function_call(m.strip()) if fc_decode: function_calls.append(fc_decode) # If there are function calls, execute them if function_calls: result = "Function calls detected:\n" for fc in function_calls: if fc["name"] == "realtime_aqi": function_response = realtime_aqi(city=fc["arguments"]["city"]) result += f"Function: {fc['name']}\nArguments: {fc['arguments']}\nResponse: {function_response}\n\n" return result else: # Return the normal response return decoded.strip() except Exception as e: logger.error(f"❌ Error during response generation: {e}") return f"Generation error: {str(e)}" # ✅ Gradio UI iface = gr.Interface( fn=generate_response, inputs=[ gr.Textbox(label="Your Prompt", placeholder="Ask anything...", lines=4), gr.Slider(64, 2048, value=512, step=64, label="Max Tokens"), gr.Slider(0.1, 1.2, value=0.4, step=0.1, label="Temperature"), gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p"), gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty") ], outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True), title="🤖 Microsoft Phi-4 Multimodal AI Assistant", description="Ask questions in English or 中文 — Powered by microsoft/Phi-4-multimodal-instruct", theme=gr.themes.Soft() ) # ✅ Run the app if __name__ == "__main__": logger.info("🚀 Starting Microsoft Phi-4 Multimodal Assistant...") iface.launch(server_name="0.0.0.0", server_port=7860, share=False)