Spaces:

eddybaraka
/

nouvas-ai-llm

Sleeping

App Files Files Community

nouvas-ai-llm / app.py

eddybaraka

Update app.py

fcf60fd verified 14 days ago

raw

history blame contribute delete

6.5 kB

	# Hugging Face Space Configuration - app.py
	# This file should be placed in your Hugging Face space repository

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr
	import logging
	import json
	import re
	import ast

	# ✅ Logging setup
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# ✅ Model name - using Microsoft Phi-4 multimodal model
	model_name = "microsoft/Phi-4-multimodal-instruct"

	def load_model():
	logger.info(f"🔄 Loading model: {model_name}")
	try:
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	device_map="auto", # Automatically map to available GPUs
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
	)
	logger.info("✅ Model loaded successfully.")
	return model, tokenizer
	except Exception as e:
	logger.error(f"❌ Error loading model: {e}")
	raise

	# Load on startup
	try:
	model, tokenizer = load_model()
	except Exception as e:
	logger.error(f"❌ Error loading model: {e}")
	model, tokenizer = None, None

	def is_function_call(single_message):
	"""Determine whether the current system message is a function call."""
	pattern = re.compile(r'([^\n`]?)\n({.?})(?=\w*\n\|$)', re.DOTALL)
	matches = pattern.findall(single_message)
	if not matches:
	return False

	func_name, args_str = matches[0]
	func_name = func_name.strip()
	try:
	parsed_args = json.loads(args_str)
	except json.JSONDecodeError:
	try:
	parsed_args = ast.literal_eval(args_str)
	except:
	return False

	return {"name": func_name, "arguments": parsed_args}

	def realtime_aqi(city):
	"""Weather Query Tool"""
	if '北京' in city.lower():
	return json.dumps({'city': '北京', 'aqi': '10', 'unit': 'celsius'}, ensure_ascii=False)
	elif '上海' in city.lower():
	return json.dumps({'city': '上海', 'aqi': '72', 'unit': 'fahrenheit'}, ensure_ascii=False)
	else:
	return json.dumps({'city': city, 'aqi': 'unknown'}, ensure_ascii=False)

	def build_system_prompt(tools):
	"""Construct system prompt based on the list of available tools."""
	if tools is None:
	tools = []
	value = "# 可用工具"
	contents = []
	for tool in tools:
	content = f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], ensure_ascii=False, indent=4)}"
	content += "\n在调用上述函数时，请使用 Json 格式表示调用的参数。"
	contents.append(content)
	value += "".join(contents)
	return value

	# Define available tools for function calling
	tools = [
	{
	"type": "function",
	"function": {
	"name": "realtime_aqi",
	"description": "天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息",
	"parameters": {
	"type": "object",
	"properties": {
	"city": {
	"description": "城市名"
	}
	},
	"required": [
	"city"
	]
	}
	}
	}
	]

	system_prompt = build_system_prompt(tools)

	def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, repetition_penalty=1.1):
	if model is None or tokenizer is None:
	return "❌ Model failed to load."

	if not prompt.strip():
	return "⚠️ Please enter a prompt."

	try:
	logger.info(f"📝 Prompt: {prompt[:80]}...")

	# For Phi-4 multimodal, we'll use a simpler approach
	# Format the prompt for Phi-4
	formatted_prompt = f"<\|user\|>\n{prompt}<\|end\|>\n<\|assistant\|>\n"

	# Tokenize
	inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	no_repeat_ngram_size=2,
	use_cache=True,
	min_length=20,
	early_stopping=True
	)

	# Decode the response
	decoded = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

	# Check for function calls
	function_calls = []
	for m in decoded.split("<\|assistant\|>"):
	fc_decode = is_function_call(m.strip())
	if fc_decode:
	function_calls.append(fc_decode)

	# If there are function calls, execute them
	if function_calls:
	result = "Function calls detected:\n"
	for fc in function_calls:
	if fc["name"] == "realtime_aqi":
	function_response = realtime_aqi(city=fc["arguments"]["city"])
	result += f"Function: {fc['name']}\nArguments: {fc['arguments']}\nResponse: {function_response}\n\n"
	return result
	else:
	# Return the normal response
	return decoded.strip()

	except Exception as e:
	logger.error(f"❌ Error during response generation: {e}")
	return f"Generation error: {str(e)}"

	# ✅ Gradio UI
	iface = gr.Interface(
	fn=generate_response,
	inputs=[
	gr.Textbox(label="Your Prompt", placeholder="Ask anything...", lines=4),
	gr.Slider(64, 2048, value=512, step=64, label="Max Tokens"),
	gr.Slider(0.1, 1.2, value=0.4, step=0.1, label="Temperature"),
	gr.Slider(0.5, 1.0, value=0.9, step=0.05, label="Top-p"),
	gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty")
	],
	outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True),
	title="🤖 Microsoft Phi-4 Multimodal AI Assistant",
	description="Ask questions in English or 中文 — Powered by microsoft/Phi-4-multimodal-instruct",
	theme=gr.themes.Soft()
	)

	# ✅ Run the app
	if __name__ == "__main__":
	logger.info("🚀 Starting Microsoft Phi-4 Multimodal Assistant...")
	iface.launch(server_name="0.0.0.0", server_port=7860, share=False)