# Hugging Face Space Configuration - app.py
# This file should be placed in your Hugging Face space repository

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import logging
import json
import re
import ast

# ✅ Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# ✅ Model name - using Microsoft Phi-4 multimodal model
model_name = "microsoft/Phi-4-multimodal-instruct"

def load_model():
    logger.info(f"🔄 Loading model: {model_name}")
    try:
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True
        )
        
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            trust_remote_code=True,
            device_map="auto",  # Automatically map to available GPUs
            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
        )
        logger.info("✅ Model loaded successfully.")
        return model, tokenizer
    except Exception as e:
        logger.error(f"❌ Error loading model: {e}")
        raise

# Load on startup
try:
    model, tokenizer = load_model()
except Exception as e:
    logger.error(f"❌ Error loading model: {e}")
    model, tokenizer = None, None

def is_function_call(single_message):
    """Determine whether the current system message is a function call."""
    pattern = re.compile(r'([^\n`]*?)\n({.*?})(?=\w*\n|$)', re.DOTALL)
    matches = pattern.findall(single_message)
    if not matches:
        return False

    func_name, args_str = matches[0]
    func_name = func_name.strip()
    try:
        parsed_args = json.loads(args_str)
    except json.JSONDecodeError:
        try:
            parsed_args = ast.literal_eval(args_str)
        except:
            return False
    
    return {"name": func_name, "arguments": parsed_args}

def realtime_aqi(city):
    """Weather Query Tool"""
    if '北京' in city.lower():
        return json.dumps({'city': '北京', 'aqi': '10', 'unit': 'celsius'}, ensure_ascii=False)
    elif '上海' in city.lower():
        return json.dumps({'city': '上海', 'aqi': '72', 'unit': 'fahrenheit'}, ensure_ascii=False)
    else:
        return json.dumps({'city': city, 'aqi': 'unknown'}, ensure_ascii=False)

def build_system_prompt(tools):
    """Construct system prompt based on the list of available tools."""
    if tools is None:
        tools = []
    value = "# 可用工具"
    contents = []
    for tool in tools:
        content = f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], ensure_ascii=False, indent=4)}"
        content += "\n在调用上述函数时，请使用 Json 格式表示调用的参数。"
        contents.append(content)
    value += "".join(contents)
    return value

# Define available tools for function calling
tools = [
  {
    "type": "function", 
    "function": {
      "name": "realtime_aqi",
      "description": "天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息",
      "parameters": {
          "type": "object",
          "properties": {
              "city": {
                  "description": "城市名"
              }
          },
          "required": [
              "city"
          ]
      }
    }
  }
]

system_prompt = build_system_prompt(tools)

def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, repetition_penalty=1.1):
    if model is None or tokenizer is None:
        return "❌ Model failed to load."

    if not prompt.strip():
        return "⚠️ Please enter a prompt."

    try:
        logger.info(f"📝 Prompt: {prompt[:80]}...")

        # For Phi-4 multimodal, we'll use a simpler approach
        # Format the prompt for Phi-4
        formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n"
        
        # Tokenize
        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=top_p,
                repetition_penalty=repetition_penalty,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id,
                no_repeat_ngram_size=2,
                use_cache=True,
                min_length=20,
                early_stopping=True
            )

        # Decode the response
        decoded = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
        
        # Check for function calls
        function_calls = []
        for m in decoded.split("<|assistant|>"):
            fc_decode = is_function_call(m.strip())
            if fc_decode:
                function_calls.append(fc_decode)
        
        # If there are function calls, execute them
        if function_calls:
            result = "Function calls detected:\n"
            for fc in function_calls:
                if fc["name"] == "realtime_aqi":
                    function_response = realtime_aqi(city=fc["arguments"]["city"])
                    result += f"Function: {fc['name']}\nArguments: {fc['arguments']}\nResponse: {function_response}\n\n"
            return result
        else:
            # Return the normal response
            return decoded.strip()

    except Exception as e:
        logger.error(f"❌ Error during response generation: {e}")
        return f"Generation error: {str(e)}"

# ✅ Gradio UI
iface = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.Textbox(label="Your Prompt", placeholder="Ask anything...", lines=4),
        gr.Slider(64, 2048, value=512, step=64, label="Max Tokens"),
        gr.Slider(0.1, 1.2, value=0.4, step=0.1, label="Temperature"),
        gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p"),
        gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty")
    ],
    outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True),
    title="🤖 Microsoft Phi-4 Multimodal AI Assistant",
    description="Ask questions in English or 中文 — Powered by microsoft/Phi-4-multimodal-instruct",
    theme=gr.themes.Soft()
)

# ✅ Run the app
if __name__ == "__main__":
    logger.info("🚀 Starting Microsoft Phi-4 Multimodal Assistant...")
    iface.launch(server_name="0.0.0.0", server_port=7860, share=False)