Spaces:

gradio
/

chat.gradio.app-HFIPs

Running

App Files Files Community

ysharma HF Staff commited on 6 days ago

Commit

7bab86d

verified ·

1 Parent(s): 1cdb77b

Create config.py

Browse files

Files changed (1) hide show

config.py +362 -0

config.py ADDED Viewed

	@@ -0,0 +1,362 @@

+"""
+Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
+"""
+import os
+from dataclasses import dataclass
+from typing import Optional, Dict, List
+import logging
+# Set up enhanced logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+@dataclass
+class MCPServerConfig:
+    """Configuration for an MCP server connection"""
+    name: str
+    url: str
+    description: str
+    space_id: Optional[str] = None
+class AppConfig:
+    """Application configuration settings"""
+    # HuggingFace Configuration
+    HF_TOKEN = os.getenv("HF_TOKEN")
+    # OpenAI GPT OSS Models with enhanced configurations
+    AVAILABLE_MODELS = {
+        "openai/gpt-oss-120b": {
+            "name": "GPT OSS 120B",
+            "description": "117B parameters, 5.1B active - Production use with reasoning",
+            "size": "120B",
+            "context_length": 128000,  # Full 128k context length
+            "supports_reasoning": True,
+            "supports_tool_calling": True,
+            "active_params": "5.1B"
+        },
+        "openai/gpt-oss-20b": {
+            "name": "GPT OSS 20B",
+            "description": "21B parameters, 3.6B active - Lower latency with reasoning",
+            "size": "20B",
+            "context_length": 128000,  # Full 128k context length
+            "supports_reasoning": True,
+            "supports_tool_calling": True,
+            "active_params": "3.6B"
+        }
+    }
+    # Enhanced Inference Providers supporting GPT OSS models
+    INFERENCE_PROVIDERS = {
+        "cerebras": {
+            "name": "Cerebras",
+            "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "cerebras",
+            "speed": "Very Fast",
+            "recommended_for": ["production", "high-throughput"],
+            "max_context_support": 128000  # Full context support
+        },
+        "fireworks-ai": {
+            "name": "Fireworks AI",
+            "description": "Fast and reliable inference with excellent reliability",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "fireworks-ai",
+            "speed": "Fast",
+            "recommended_for": ["production", "general-use"],
+            "max_context_support": 128000  # Full context support
+        },
+        "together-ai": {
+            "name": "Together AI",
+            "description": "Collaborative AI inference with good performance",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "together-ai",
+            "speed": "Fast",
+            "recommended_for": ["development", "experimentation"],
+            "max_context_support": 128000  # Full context support
+        },
+        "replicate": {
+            "name": "Replicate",
+            "description": "Machine learning deployment platform",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "replicate",
+            "speed": "Medium",
+            "recommended_for": ["prototyping", "low-volume"],
+            "max_context_support": 128000  # Full context support
+        }
+    }
+    # Enhanced Model Configuration for GPT-OSS - Utilizing full context
+    MAX_TOKENS = 128000  # Full context length for GPT-OSS models
+    # Response token allocation - increased for longer responses
+    DEFAULT_MAX_RESPONSE_TOKENS = 16384  # Increased from 8192 for longer responses
+    MIN_RESPONSE_TOKENS = 4096  # Minimum response size
+    # Context management - optimized for full 128k usage
+    SYSTEM_PROMPT_RESERVE = 3000  # Reserve for system prompt (includes MCP tool descriptions)
+    MCP_TOOLS_RESERVE = 2000  # Additional reserve when MCP servers are enabled
+    # History management - much larger with 128k context
+    MAX_HISTORY_MESSAGES = 100  # Increased from 50 for better context retention
+    DEFAULT_HISTORY_MESSAGES = 50  # Default for good performance
+    # Reasoning configuration
+    DEFAULT_REASONING_EFFORT = "medium"  # low, medium, high
+    # UI Configuration
+    GRADIO_THEME = "ocean"
+    DEBUG_MODE = True
+    # MCP Server recommendations
+    OPTIMAL_MCP_SERVER_COUNT = 6  # Recommended maximum for good performance
+    WARNING_MCP_SERVER_COUNT = 10  # Show warning if more than this
+    # File Support
+    SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
+    SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
+    SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
+    SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
+    @classmethod
+    def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
+        """Get models available for a specific provider"""
+        if provider_id not in cls.INFERENCE_PROVIDERS:
+            return []
+        provider = cls.INFERENCE_PROVIDERS[provider_id]
+        available_models = []
+        for model_id, model_info in cls.AVAILABLE_MODELS.items():
+            if model_info["size"] == "120B" and provider["supports_120b"]:
+                available_models.append(model_id)
+            elif model_info["size"] == "20B" and provider["supports_20b"]:
+                available_models.append(model_id)
+        return available_models
+    @classmethod
+    def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
+        """Get the full model endpoint for HF Inference Providers"""
+        if provider_id not in cls.INFERENCE_PROVIDERS:
+            raise ValueError(f"Unknown provider: {provider_id}")
+        provider = cls.INFERENCE_PROVIDERS[provider_id]
+        return f"{model_id}:{provider['endpoint_suffix']}"
+    @classmethod
+    def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
+        """Get optimal context settings for a model/provider combination"""
+        model_info = cls.AVAILABLE_MODELS.get(model_id, {})
+        provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
+        # Get the minimum of model and provider context support
+        model_context = model_info.get("context_length", 128000)
+        provider_context = provider_info.get("max_context_support", 128000)
+        context_length = min(model_context, provider_context)
+        # Calculate reserves based on MCP server count
+        system_reserve = cls.SYSTEM_PROMPT_RESERVE
+        if mcp_servers_count > 0:
+            # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
+            system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
+        # Dynamic response token allocation based on available context
+        if context_length >= 100000:
+            max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS  # 16384
+        elif context_length >= 50000:
+            max_response_tokens = 12288
+        elif context_length >= 20000:
+            max_response_tokens = 8192
+        else:
+            max_response_tokens = cls.MIN_RESPONSE_TOKENS  # 4096
+        # Calculate available context for history
+        available_context = context_length - system_reserve - max_response_tokens
+        # Calculate recommended history limit
+        # Assume average message is ~200 tokens
+        avg_message_tokens = 200
+        recommended_history = min(
+            cls.MAX_HISTORY_MESSAGES,
+            available_context // avg_message_tokens
+        )
+        return {
+            "max_context": context_length,
+            "available_context": available_context,
+            "max_response_tokens": max_response_tokens,
+            "system_reserve": system_reserve,
+            "recommended_history_limit": max(10, recommended_history),  # At least 10 messages
+            "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
+        }
+    @classmethod
+    def get_all_media_extensions(cls):
+        """Get all supported media file extensions"""
+        return (cls.SUPPORTED_IMAGE_EXTENSIONS +
+                cls.SUPPORTED_AUDIO_EXTENSIONS +
+                cls.SUPPORTED_VIDEO_EXTENSIONS)
+    @classmethod
+    def is_image_file(cls, file_path: str) -> bool:
+        """Check if file is an image"""
+        if not file_path:
+            return False
+        return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
+    @classmethod
+    def is_audio_file(cls, file_path: str) -> bool:
+        """Check if file is an audio file"""
+        if not file_path:
+            return False
+        return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
+    @classmethod
+    def is_video_file(cls, file_path: str) -> bool:
+        """Check if file is a video file"""
+        if not file_path:
+            return False
+        return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
+    @classmethod
+    def is_media_file(cls, file_path: str) -> bool:
+        """Check if file is any supported media type"""
+        if not file_path:
+            return False
+        return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
+    @classmethod
+    def get_provider_recommendation(cls, use_case: str) -> List[str]:
+        """Get recommended providers for specific use cases"""
+        recommendations = {
+            "production": ["cerebras", "fireworks-ai"],
+            "development": ["together-ai", "fireworks-ai"],
+            "experimentation": ["together-ai", "replicate"],
+            "high-throughput": ["cerebras"],
+            "cost-effective": ["together-ai", "replicate"],
+            "maximum-context": ["cerebras", "fireworks-ai"]  # Providers with best context support
+        }
+        return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
+# Check for dependencies
+try:
+    import httpx
+    HTTPX_AVAILABLE = True
+except ImportError:
+    HTTPX_AVAILABLE = False
+    logger.warning("httpx not available - file upload functionality limited")
+try:
+    import huggingface_hub
+    HF_HUB_AVAILABLE = True
+except ImportError:
+    HF_HUB_AVAILABLE = False
+    logger.warning("huggingface_hub not available - login functionality disabled")
+# Enhanced CSS Configuration with better media display
+CUSTOM_CSS = """
+/* Hide Gradio footer */
+footer {
+    display: none !important;
+}
+/* Make chatbot expand to fill available space */
+.gradio-container {
+    height: 100vh !important;
+}
+/* Ensure proper flex layout */
+.main-content {
+    display: flex;
+    flex-direction: column;
+    height: 100%;
+}
+/* Input area stays at bottom with minimal padding */
+.input-area {
+    margin-top: auto;
+    padding-top: 0.25rem !important;
+    padding-bottom: 0 !important;
+    margin-bottom: 0 !important;
+}
+/* Reduce padding around chatbot */
+.chatbot {
+    margin-bottom: 0 !important;
+    padding-bottom: 0 !important;
+}
+/* Provider and model selection styling */
+.provider-model-selection {
+    padding: 10px;
+    border-radius: 8px;
+    margin-bottom: 10px;
+    border-left: 4px solid #007bff;
+}
+/* Login section styling */
+.login-section {
+    padding: 10px;
+    border-radius: 8px;
+    margin-bottom: 10px;
+    border-left: 4px solid #4caf50;
+}
+/* Tool usage indicator */
+.tool-usage {
+    background: #fff3cd;
+    border: 1px solid #ffeaa7;
+    border-radius: 4px;
+    padding: 8px;
+    margin: 4px 0;
+}
+/* Media display improvements */
+.media-container {
+    max-width: 100%;
+    border-radius: 8px;
+    overflow: hidden;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+}
+/* Enhanced audio player styling */
+audio {
+    width: 100%;
+    max-width: 500px;
+    height: 54px;
+    border-radius: 27px;
+    outline: none;
+    margin: 10px 0;
+}
+/* Enhanced video player styling */
+video {
+    width: 100%;
+    max-width: 700px;
+    height: auto;
+    object-fit: contain;
+    border-radius: 8px;
+    margin: 10px 0;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+}
+/* Server status indicators */
+.server-status {
+    display: inline-block;
+    padding: 2px 8px;
+    border-radius: 12px;
+    font-size: 12px;
+    font-weight: bold;
+}
+.server-status.online {
+    background: #d4edda;
+    color: #155724;
+}
+.server-status.offline {
+    background: #f8d7da;
+    color: #721c24;
+}
+/* Message metadata styling */
+.message-metadata {
+    font-size: 0.85em;
+    color: #666;
+    margin-top: 4px;
+    padding: 4px 8px;
+    background: #f0f0f0;
+    border-radius: 4px;
+}
+"""