Spaces:

gradio
/

chat.gradio.app-HFIPs

Running

App Files Files Community

ysharma HF Staff commited on 8 days ago

Commit

5738139

verified ·

1 Parent(s): 7bab86d

Create mcp_client.py

Browse files

Files changed (1) hide show

mcp_client.py +759 -0

mcp_client.py ADDED Viewed

	@@ -0,0 +1,759 @@

+"""
+MCP Client implementation for Universal MCP Client - Fixed Version
+"""
+import asyncio
+import json
+import re
+import logging
+import traceback
+from typing import Dict, Optional, Tuple, List, Any
+from openai import OpenAI
+# Import the proper MCP client components
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+from config import MCPServerConfig, AppConfig, HTTPX_AVAILABLE
+logger = logging.getLogger(__name__)
+class UniversalMCPClient:
+    """Universal MCP Client using HuggingFace Inference Providers instead of Anthropic"""
+    def __init__(self):
+        self.servers: Dict[str, MCPServerConfig] = {}
+        self.enabled_servers: Dict[str, bool] = {}  # Track enabled/disabled servers
+        self.hf_client = None
+        self.current_provider = None
+        self.current_model = None
+        self.server_tools = {}  # Cache for server tools
+        # Initialize HF Inference Client if token is available
+        if AppConfig.HF_TOKEN:
+            self.hf_client = OpenAI(
+                base_url="https://router.huggingface.co/v1",
+                api_key=AppConfig.HF_TOKEN
+            )
+            logger.info("✅ HuggingFace Inference client initialized")
+        else:
+            logger.warning("⚠️ HF_TOKEN not found")
+    def enable_server(self, server_name: str, enabled: bool = True):
+        """Enable or disable a server"""
+        if server_name in self.servers:
+            self.enabled_servers[server_name] = enabled
+            logger.info(f"🔧 Server {server_name} {'enabled' if enabled else 'disabled'}")
+    def get_enabled_servers(self) -> Dict[str, MCPServerConfig]:
+        """Get only enabled servers"""
+        return {name: config for name, config in self.servers.items()
+                if self.enabled_servers.get(name, True)}
+    def remove_all_servers(self):
+        """Remove all servers"""
+        count = len(self.servers)
+        self.servers.clear()
+        self.enabled_servers.clear()
+        self.server_tools.clear()
+        logger.info(f"🗑️ Removed all {count} servers")
+        return count
+    def set_model_and_provider(self, provider_id: str, model_id: str):
+        """Set the current provider and model"""
+        self.current_provider = provider_id
+        self.current_model = model_id
+        logger.info(f"🔧 Set provider: {provider_id}, model: {model_id}")
+    def get_model_endpoint(self) -> str:
+        """Get the current model endpoint for API calls"""
+        if not self.current_provider or not self.current_model:
+            raise ValueError("Provider and model must be set before making API calls")
+        return AppConfig.get_model_endpoint(self.current_model, self.current_provider)
+    async def add_server_async(self, config: MCPServerConfig) -> Tuple[bool, str]:
+        """Add an MCP server using pure MCP protocol"""
+        try:
+            logger.info(f"🔧 Adding MCP server: {config.name} at {config.url}")
+            # Clean and validate URL - handle various input formats
+            original_url = config.url.strip()
+            # Remove common MCP endpoint variations
+            base_url = original_url
+            for endpoint in ["/gradio_api/mcp/sse", "/gradio_api/mcp/", "/gradio_api/mcp"]:
+                if base_url.endswith(endpoint):
+                    base_url = base_url[:-len(endpoint)]
+                    break
+            # Remove trailing slashes
+            base_url = base_url.rstrip("/")
+            # Construct proper MCP URL
+            mcp_url = f"{base_url}/gradio_api/mcp/sse"
+            logger.info(f"🔧 Original URL: {original_url}")
+            logger.info(f"🔧 Base URL: {base_url}")
+            logger.info(f"🔧 MCP URL: {mcp_url}")
+            # Extract space ID if it's a HuggingFace space
+            if "hf.space" in base_url:
+                space_parts = base_url.split("/")
+                if len(space_parts) >= 1:
+                    space_id = space_parts[-1].replace('.hf.space', '').replace('https://', '').replace('http://', '')
+                    if '-' in space_id:
+                        # Format: username-spacename.hf.space
+                        config.space_id = space_id.replace('-', '/', 1)
+                    else:
+                        config.space_id = space_id
+                    logger.info(f"📍 Detected HF Space ID: {config.space_id}")
+            # Update config with proper MCP URL
+            config.url = mcp_url
+            # Test MCP connection and cache tools
+            success, message = await self._test_mcp_connection(config)
+            if success:
+                self.servers[config.name] = config
+                self.enabled_servers[config.name] = True  # Enable by default
+                logger.info(f"✅ MCP Server {config.name} added successfully")
+                return True, f"✅ Successfully added MCP server: {config.name}\n{message}"
+            else:
+                logger.error(f"❌ Failed to connect to MCP server {config.name}: {message}")
+                return False, f"❌ Failed to add server: {config.name}\n{message}"
+        except Exception as e:
+            error_msg = f"Failed to add server {config.name}: {str(e)}"
+            logger.error(error_msg)
+            logger.error(traceback.format_exc())
+            return False, f"❌ {error_msg}"
+    async def _test_mcp_connection(self, config: MCPServerConfig) -> Tuple[bool, str]:
+        """Test MCP server connection with detailed debugging and tool caching"""
+        try:
+            logger.info(f"🔍 Testing MCP connection to {config.url}")
+            async with sse_client(config.url, timeout=20.0) as (read_stream, write_stream):
+                async with ClientSession(read_stream, write_stream) as session:
+                    # Initialize MCP session
+                    logger.info("🔧 Initializing MCP session...")
+                    await session.initialize()
+                    # List available tools
+                    logger.info("📋 Listing available tools...")
+                    tools = await session.list_tools()
+                    # Cache tools for this server
+                    server_tools = {}
+                    tool_info = []
+                    for tool in tools.tools:
+                        server_tools[tool.name] = {
+                            'description': tool.description,
+                            'schema': tool.inputSchema if hasattr(tool, 'inputSchema') else None
+                        }
+                        tool_info.append(f"  - {tool.name}: {tool.description}")
+                        logger.info(f"  📍 Tool: {tool.name}")
+                        logger.info(f"    Description: {tool.description}")
+                        if hasattr(tool, 'inputSchema') and tool.inputSchema:
+                            logger.info(f"    Input Schema: {tool.inputSchema}")
+                    # Cache tools for this server
+                    self.server_tools[config.name] = server_tools
+                    if len(tools.tools) == 0:
+                        return False, "No tools found on MCP server"
+                    message = f"Connected successfully!\nFound {len(tools.tools)} tools:\n" + "\n".join(tool_info)
+                    return True, message
+        except asyncio.TimeoutError:
+            return False, "Connection timeout - server may be sleeping or unreachable"
+        except Exception as e:
+            logger.error(f"MCP connection failed: {e}")
+            logger.error(traceback.format_exc())
+            return False, f"Connection failed: {str(e)}"
+    async def call_mcp_tool_async(self, server_name: str, tool_name: str, arguments: dict) -> Tuple[bool, str]:
+        """Call a tool on a specific MCP server"""
+        logger.info(f"🔧 MCP Tool Call - Server: {server_name}, Tool: {tool_name}")
+        logger.info(f"🔧 Arguments: {arguments}")
+        if server_name not in self.servers:
+            error_msg = f"Server {server_name} not found. Available servers: {list(self.servers.keys())}"
+            logger.error(f"❌ {error_msg}")
+            return False, error_msg
+        config = self.servers[server_name]
+        logger.info(f"🔧 Using server config: {config.url}")
+        try:
+            logger.info(f"🔗 Connecting to MCP server at {config.url}")
+            async with sse_client(config.url, timeout=30.0) as (read_stream, write_stream):
+                async with ClientSession(read_stream, write_stream) as session:
+                    # Initialize MCP session
+                    logger.info("🔧 Initializing MCP session...")
+                    await session.initialize()
+                    # Call the tool
+                    logger.info(f"🔧 Calling tool {tool_name} with arguments: {arguments}")
+                    result = await session.call_tool(tool_name, arguments)
+                    # Extract result content
+                    if result.content:
+                        result_text = result.content[0].text if hasattr(result.content[0], 'text') else str(result.content[0])
+                        logger.info(f"✅ Tool call successful, result length: {len(result_text)}")
+                        logger.info(f"📋 Result preview: {result_text[:200]}...")
+                        return True, result_text
+                    else:
+                        error_msg = "No content returned from tool"
+                        logger.error(f"❌ {error_msg}")
+                        return False, error_msg
+        except asyncio.TimeoutError:
+            error_msg = f"Tool call timeout for {tool_name} on {server_name}"
+            logger.error(f"❌ {error_msg}")
+            return False, error_msg
+        except Exception as e:
+            error_msg = f"Tool call failed: {str(e)}"
+            logger.error(f"❌ MCP tool call failed: {e}")
+            logger.error(traceback.format_exc())
+            return False, error_msg
+    def generate_chat_completion(self, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]:
+        """Generate chat completion using HuggingFace Inference Providers"""
+        if not self.hf_client:
+            raise ValueError("HuggingFace client not initialized. Please set HF_TOKEN.")
+        if not self.current_provider or not self.current_model:
+            raise ValueError("Provider and model must be set before making API calls")
+        # Get the model endpoint
+        model_endpoint = self.get_model_endpoint()
+        # Set up default parameters for GPT OSS models with higher limits
+        params = {
+            "model": model_endpoint,
+            "messages": messages,
+            "max_tokens": kwargs.pop("max_tokens", 8192),  # Use pop to avoid conflicts
+            "temperature": kwargs.get("temperature", 0.3),
+            "stream": kwargs.get("stream", False)
+        }
+        # Add any remaining kwargs
+        params.update(kwargs)
+        # Add reasoning effort if specified (GPT OSS feature)
+        reasoning_effort = kwargs.pop("reasoning_effort", AppConfig.DEFAULT_REASONING_EFFORT)
+        if reasoning_effort:
+            # For GPT OSS models, we can set reasoning in system prompt
+            system_message = None
+            for msg in messages:
+                if msg.get("role") == "system":
+                    system_message = msg
+                    break
+            if system_message:
+                system_message["content"] += f"\n\nReasoning: {reasoning_effort}"
+            else:
+                messages.insert(0, {
+                    "role": "system",
+                    "content": f"You are a helpful AI assistant. Reasoning: {reasoning_effort}"
+                })
+        try:
+            logger.info(f"🤖 Calling {model_endpoint} via {self.current_provider}")
+            response = self.hf_client.chat.completions.create(**params)
+            return response
+        except Exception as e:
+            logger.error(f"HF Inference API call failed: {e}")
+            raise
+    def generate_chat_completion_with_mcp_tools(self, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]:
+        """Generate chat completion with MCP tool support"""
+        enabled_servers = self.get_enabled_servers()
+        if not enabled_servers:
+            # No enabled MCP servers available, use regular completion
+            logger.info("🤖 No enabled MCP servers available, using regular chat completion")
+            return self.generate_chat_completion(messages, **kwargs)
+        logger.info(f"🔧 Processing chat with {len(enabled_servers)} enabled MCP servers available")
+        # Add system message about available tools with exact tool names
+        tool_descriptions = []
+        server_names = []
+        exact_tool_mappings = []
+        for server_name, config in enabled_servers.items():
+            tool_descriptions.append(f"- **{server_name}**: {config.description}")
+            server_names.append(server_name)
+            # Add exact tool names if we have them cached
+            if server_name in self.server_tools:
+                for tool_name, tool_info in self.server_tools[server_name].items():
+                    exact_tool_mappings.append(f"  * Server '{server_name}' has tool '{tool_name}': {tool_info['description']}")
+        # Get the actual server name (not the space ID)
+        server_list = ", ".join([f'"{name}"' for name in server_names])
+        tools_system_msg = f"""
+You have access to the following MCP tools:
+{chr(10).join(tool_descriptions)}
+EXACT TOOL MAPPINGS:
+{chr(10).join(exact_tool_mappings) if exact_tool_mappings else "Loading tool mappings..."}
+IMPORTANT SERVER NAMES: {server_list}
+When you need to use a tool, respond with ONLY a JSON object in this EXACT format:
+{{"use_tool": true, "server": "exact_server_name", "tool": "exact_tool_name", "arguments": {{"param": "value"}}}}
+CRITICAL INSTRUCTIONS:
+- Use ONLY the exact server names from this list: {server_list}
+- Use the exact tool names as shown in the mappings above
+- Always include all required parameters in the arguments
+- Do not include any other text before or after the JSON
+- Make sure the JSON is complete and properly formatted
+If you don't need to use a tool, respond normally without any JSON.
+"""
+        # Add tools system message with increased context
+        enhanced_messages = messages.copy()
+        if enhanced_messages and enhanced_messages[0].get("role") == "system":
+            enhanced_messages[0]["content"] += "\n\n" + tools_system_msg
+        else:
+            enhanced_messages.insert(0, {"role": "system", "content": tools_system_msg})
+        # Get initial response with higher token limit
+        logger.info("🤖 Getting initial response from LLM...")
+        response = self.generate_chat_completion(enhanced_messages, **{"max_tokens": 8192})
+        response_text = response.choices[0].message.content
+        logger.info(f"🤖 LLM Response (length: {len(response_text)}): {response_text}")
+        # Check if the response indicates tool usage
+        if '"use_tool": true' in response_text:
+            logger.info("🔧 Tool usage detected, parsing JSON...")
+            # Extract and parse JSON more robustly
+            tool_request = self._extract_tool_json(response_text)
+            if not tool_request:
+                # Fallback: try to extract tool info manually
+                logger.info("🔧 JSON parsing failed, trying manual extraction...")
+                tool_request = self._manual_tool_extraction(response_text)
+            if tool_request:
+                server_name = tool_request.get("server")
+                tool_name = tool_request.get("tool")
+                arguments = tool_request.get("arguments", {})
+                # Replace any local file paths in arguments with uploaded URLs
+                if hasattr(self, 'chat_handler_file_mapping'):
+                    for arg_key, arg_value in arguments.items():
+                        if isinstance(arg_value, str) and arg_value.startswith('/tmp/gradio/'):
+                            # Check if we have an uploaded URL for this local path
+                            for local_path, uploaded_url in self.chat_handler_file_mapping.items():
+                                if local_path in arg_value or arg_value in local_path:
+                                    logger.info(f"🔄 Replacing local path {arg_value} with uploaded URL {uploaded_url}")
+                                    arguments[arg_key] = uploaded_url
+                                    break
+                logger.info(f"🔧 Tool request - Server: {server_name}, Tool: {tool_name}, Args: {arguments}")
+                if server_name not in self.servers:
+                    available_servers = list(self.servers.keys())
+                    logger.error(f"❌ Server '{server_name}' not found. Available servers: {available_servers}")
+                    # Try to find a matching server by space_id or similar name
+                    matching_server = None
+                    for srv_name, srv_config in self.servers.items():
+                        if (srv_config.space_id and server_name in srv_config.space_id) or server_name in srv_name:
+                            matching_server = srv_name
+                            logger.info(f"🔧 Found matching server: {matching_server}")
+                            break
+                    if matching_server and self.enabled_servers.get(matching_server, True):
+                        server_name = matching_server
+                        logger.info(f"🔧 Using corrected server name: {server_name}")
+                    else:
+                        # Return error response with server name correction
+                        error_msg = f"Server '{server_name}' not found or disabled. Available enabled servers: {[name for name, enabled in self.enabled_servers.items() if enabled]}"
+                        response._tool_execution = {
+                            "server": server_name,
+                            "tool": tool_name,
+                            "result": error_msg,
+                            "success": False
+                        }
+                        return response
+                elif not self.enabled_servers.get(server_name, True):
+                    logger.error(f"❌ Server '{server_name}' is disabled")
+                    response._tool_execution = {
+                        "server": server_name,
+                        "tool": tool_name,
+                        "result": f"Server '{server_name}' is currently disabled",
+                        "success": False
+                    }
+                    return response
+                # Validate tool name exists for this server
+                if server_name in self.server_tools and tool_name not in self.server_tools[server_name]:
+                    available_tools = list(self.server_tools[server_name].keys())
+                    logger.warning(f"⚠️ Tool '{tool_name}' not found for server '{server_name}'. Available tools: {available_tools}")
+                    # Try to find the correct tool name
+                    if available_tools:
+                        # Use the first available tool if there's only one
+                        if len(available_tools) == 1:
+                            tool_name = available_tools[0]
+                            logger.info(f"🔧 Using only available tool: {tool_name}")
+                        # Or try to find a similar tool name
+                        else:
+                            for available_tool in available_tools:
+                                if tool_name.lower() in available_tool.lower() or available_tool.lower() in tool_name.lower():
+                                    tool_name = available_tool
+                                    logger.info(f"🔧 Found similar tool name: {tool_name}")
+                                    break
+                # Call the MCP tool
+                def run_mcp_tool():
+                    loop = asyncio.new_event_loop()
+                    asyncio.set_event_loop(loop)
+                    try:
+                        return loop.run_until_complete(
+                            self.call_mcp_tool_async(server_name, tool_name, arguments)
+                        )
+                    finally:
+                        loop.close()
+                success, result = run_mcp_tool()
+                if success:
+                    logger.info(f"✅ Tool call successful, result length: {len(str(result))}")
+                    # Add tool result to conversation and get final response with better prompting
+                    enhanced_messages.append({"role": "assistant", "content": response_text})
+                    enhanced_messages.append({"role": "user", "content": f"Tool '{tool_name}' from server '{server_name}' completed successfully. Result: {result}\n\nPlease provide a helpful response based on this tool result. If the result contains media URLs, present them appropriately."})
+                    # Remove the tool instruction from the system message for the final response
+                    final_messages = enhanced_messages.copy()
+                    if final_messages[0].get("role") == "system":
+                        final_messages[0]["content"] = final_messages[0]["content"].split("You have access to the following MCP tools:")[0].strip()
+                    logger.info("🤖 Getting final response with tool result...")
+                    final_response = self.generate_chat_completion(final_messages, **{"max_tokens": 4096})
+                    # Store tool execution info for the chat handler
+                    final_response._tool_execution = {
+                        "server": server_name,
+                        "tool": tool_name,
+                        "result": result,
+                        "success": True
+                    }
+                    return final_response
+                else:
+                    logger.error(f"❌ Tool call failed: {result}")
+                    # Return original response with error info
+                    response._tool_execution = {
+                        "server": server_name,
+                        "tool": tool_name,
+                        "result": result,
+                        "success": False
+                    }
+                    return response
+            else:
+                logger.warning("⚠️ Failed to parse tool request JSON")
+        else:
+            logger.info("💬 No tool usage detected, returning normal response")
+        # Return original response if no tool usage or tool call failed
+        return response
+    def _extract_tool_json(self, text: str) -> Optional[Dict[str, Any]]:
+        """Extract JSON from LLM response more robustly"""
+        import json
+        import re
+        logger.info(f"🔍 Full LLM response text: {text}")
+        # Try multiple strategies to extract JSON
+        strategies = [
+            # Strategy 1: Find complete JSON between outer braces
+            lambda t: re.search(r'\{[^{}]*"use_tool"[^{}]*"arguments"[^{}]*\{[^{}]*\}[^{}]*\}', t),
+            # Strategy 2: Find JSON that starts with {"use_tool" and reconstruct if needed
+            lambda t: self._reconstruct_json_from_start(t),
+            # Strategy 3: Find any complete JSON object
+            lambda t: re.search(r'\{(?:[^{}]|\{[^{}]*\})*\}', t),
+        ]
+        for i, strategy in enumerate(strategies, 1):
+            try:
+                if i == 2:
+                    # Strategy 2 returns a string directly
+                    json_str = strategy(text)
+                    if not json_str:
+                        continue
+                else:
+                    match = strategy(text)
+                    if not match:
+                        continue
+                    json_str = match.group(0)
+                logger.info(f"🔍 JSON extraction strategy {i} found: {json_str}")
+                # Clean up the JSON string
+                json_str = json_str.strip()
+                # Try to parse
+                parsed = json.loads(json_str)
+                # Validate it's a tool request
+                if parsed.get("use_tool") is True:
+                    logger.info(f"✅ Valid tool request parsed: {parsed}")
+                    return parsed
+            except json.JSONDecodeError as e:
+                logger.warning(f"⚠️ JSON parse error with strategy {i}: {e}")
+                logger.warning(f"⚠️ Problematic JSON: {json_str if 'json_str' in locals() else 'N/A'}")
+                continue
+            except Exception as e:
+                logger.warning(f"⚠️ Strategy {i} failed: {e}")
+                continue
+        logger.error("❌ Failed to extract valid JSON from response")
+        return None
+    def _manual_tool_extraction(self, text: str) -> Optional[Dict[str, Any]]:
+        """Manually extract tool information as fallback"""
+        import re
+        logger.info("🔧 Attempting manual tool extraction...")
+        try:
+            # Extract server name
+            server_match = re.search(r'"server":\s*"([^"]+)"', text)
+            tool_match = re.search(r'"tool":\s*"([^"]+)"', text)
+            if not server_match or not tool_match:
+                logger.warning("⚠️ Could not find server or tool in manual extraction")
+                return None
+            server_name = server_match.group(1)
+            tool_name = tool_match.group(1)
+            # Try to extract arguments
+            args_match = re.search(r'"arguments":\s*\{([^}]+)\}', text)
+            arguments = {}
+            if args_match:
+                args_content = args_match.group(1)
+                # Simple extraction of key-value pairs
+                pairs = re.findall(r'"([^"]+)":\s*"([^"]+)"', args_content)
+                arguments = dict(pairs)
+            manual_request = {
+                "use_tool": True,
+                "server": server_name,
+                "tool": tool_name,
+                "arguments": arguments
+            }
+            logger.info(f"🔧 Manual extraction successful: {manual_request}")
+            return manual_request
+        except Exception as e:
+            logger.error(f"❌ Manual extraction failed: {e}")
+            return None
+    def _reconstruct_json_from_start(self, text: str) -> Optional[str]:
+        """Try to reconstruct JSON if it's truncated"""
+        import re
+        # Find start of JSON
+        match = re.search(r'\{"use_tool":\s*true[^}]*', text)
+        if not match:
+            return None
+        json_start = match.start()
+        json_part = text[json_start:]
+        logger.info(f"🔧 Reconstructing JSON from: {json_part[:200]}...")
+        # Try to find the end or reconstruct
+        brace_count = 0
+        end_pos = 0
+        in_string = False
+        escape_next = False
+        for i, char in enumerate(json_part):
+            if escape_next:
+                escape_next = False
+                continue
+            if char == '\\':
+                escape_next = True
+                continue
+            if char == '"' and not escape_next:
+                in_string = not in_string
+                continue
+            if not in_string:
+                if char == '{':
+                    brace_count += 1
+                elif char == '}':
+                    brace_count -= 1
+                    if brace_count == 0:
+                        end_pos = i + 1
+                        break
+        if end_pos > 0:
+            reconstructed = json_part[:end_pos]
+            logger.info(f"🔧 Reconstructed JSON: {reconstructed}")
+            return reconstructed
+        else:
+            # Try to add missing closing braces
+            missing_braces = json_part.count('{') - json_part.count('}')
+            if missing_braces > 0:
+                reconstructed = json_part + '}' * missing_braces
+                logger.info(f"🔧 Added {missing_braces} closing braces: {reconstructed}")
+                return reconstructed
+        return None
+    def _extract_media_from_mcp_response(self, result_text: str, config: MCPServerConfig) -> Optional[str]:
+        """Enhanced media extraction from MCP responses with better URL resolution"""
+        if not isinstance(result_text, str):
+            logger.info(f"🔍 Non-string result: {type(result_text)}")
+            return None
+        base_url = config.url.replace("/gradio_api/mcp/sse", "")
+        logger.info(f"🔍 Processing MCP result for media: {result_text[:300]}...")
+        logger.info(f"🔍 Base URL: {base_url}")
+        # 1. Try to parse as JSON (most Gradio MCP servers return structured data)
+        try:
+            if result_text.strip().startswith('[') or result_text.strip().startswith('{'):
+                logger.info("🔍 Attempting JSON parse...")
+                data = json.loads(result_text.strip())
+                logger.info(f"🔍 Parsed JSON structure: {data}")
+                # Handle array format: [{'image': {'url': '...'}}] or [{'url': '...'}]
+                if isinstance(data, list) and len(data) > 0:
+                    item = data[0]
+                    logger.info(f"🔍 First array item: {item}")
+                    if isinstance(item, dict):
+                        # Check for nested media structure
+                        for media_type in ['image', 'audio', 'video']:
+                            if media_type in item and isinstance(item[media_type], dict):
+                                media_data = item[media_type]
+                                if 'url' in media_data:
+                                    url = media_data['url'].strip('\'"')  # Clean quotes
+                                    logger.info(f"🎯 Found {media_type} URL: {url}")
+                                    return self._resolve_media_url(url, base_url)
+                        # Check for direct URL
+                        if 'url' in item:
+                            url = item['url'].strip('\'"')  # Clean quotes
+                            logger.info(f"🎯 Found direct URL: {url}")
+                            return self._resolve_media_url(url, base_url)
+                # Handle object format: {'image': {'url': '...'}} or {'url': '...'}
+                elif isinstance(data, dict):
+                    logger.info(f"🔍 Processing dict: {data}")
+                    # Check for nested media structure
+                    for media_type in ['image', 'audio', 'video']:
+                        if media_type in data and isinstance(data[media_type], dict):
+                            media_data = data[media_type]
+                            if 'url' in media_data:
+                                url = media_data['url'].strip('\'"')  # Clean quotes
+                                logger.info(f"🎯 Found {media_type} URL: {url}")
+                                return self._resolve_media_url(url, base_url)
+                    # Check for direct URL
+                    if 'url' in data:
+                        url = data['url'].strip('\'"')  # Clean quotes
+                        logger.info(f"🎯 Found direct URL: {url}")
+                        return self._resolve_media_url(url, base_url)
+        except json.JSONDecodeError:
+            logger.info("🔍 Not valid JSON, trying other formats...")
+        except Exception as e:
+            logger.warning(f"🔍 JSON parsing error: {e}")
+        # 2. Check for Gradio file URLs (common pattern) with better cleaning
+        gradio_file_patterns = [
+            r'https://[^/]+\.hf\.space/gradio_api/file=/[^/]+/[^/]+/[^"\s\',]+',
+            r'https://[^/]+\.hf\.space/file=[^"\s\',]+',
+            r'/gradio_api/file=/[^"\s\',]+'
+        ]
+        for pattern in gradio_file_patterns:
+            match = re.search(pattern, result_text)
+            if match:
+                url = match.group(0).rstrip('\'",:;')  # Remove trailing punctuation
+                logger.info(f"🎯 Found Gradio file URL: {url}")
+                if url.startswith('/'):
+                    url = f"{base_url}{url}"
+                return url
+        # 3. Check for simple HTTP URLs in the text
+        http_url_pattern = r'https?://[^\s"<>]+'
+        matches = re.findall(http_url_pattern, result_text)
+        for url in matches:
+            if AppConfig.is_media_file(url):
+                logger.info(f"🎯 Found HTTP media URL: {url}")
+                return url
+        # 4. Check for data URLs (base64 encoded media)
+        if result_text.startswith('data:'):
+            logger.info("🎯 Found data URL")
+            return result_text
+        # 5. For simple file paths, create proper Gradio URLs
+        if AppConfig.is_media_file(result_text):
+            # Extract just the filename if it's a path
+            if '/' in result_text:
+                filename = result_text.split('/')[-1]
+            else:
+                filename = result_text.strip()
+            # Create proper Gradio file URL
+            media_url = f"{base_url}/file={filename}"
+            logger.info(f"🎯 Created media URL from filename: {media_url}")
+            return media_url
+        logger.info("❌ No media detected in result")
+        return None
+    def _resolve_media_url(self, url: str, base_url: str) -> str:
+        """Resolve relative URLs to absolute URLs with better handling"""
+        if url.startswith('http') or url.startswith('data:'):
+            return url
+        elif url.startswith('/gradio_api/file='):
+            return f"{base_url}{url}"
+        elif url.startswith('/file='):
+            return f"{base_url}/gradio_api{url}"
+        elif url.startswith('file='):
+            return f"{base_url}/gradio_api/{url}"
+        elif url.startswith('/'):
+            return f"{base_url}/file={url}"
+        else:
+            return f"{base_url}/file={url}"
+    def get_server_status(self) -> Dict[str, str]:
+        """Get status of all configured servers"""
+        status = {}
+        for name in self.servers:
+            compatibility = self._check_file_upload_compatibility(self.servers[name])
+            status[name] = f"✅ Connected (MCP Protocol) - {compatibility}"
+        return status
+    def _check_file_upload_compatibility(self, config: MCPServerConfig) -> str:
+        """Check if a server likely supports file uploads"""
+        if "hf.space" in config.url:
+            return "🟡 Hugging Face Space (usually compatible)"
+        elif "gradio" in config.url.lower():
+            return "🟢 Gradio server (likely compatible)"
+        elif "localhost" in config.url or "127.0.0.1" in config.url:
+            return "🟢 Local server (file access available)"
+        else:
+            return "🔴 Remote server (may need public URLs)"