Spaces:
Running
Running
Create config.py
Browse files
config.py
ADDED
@@ -0,0 +1,362 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
|
3 |
+
"""
|
4 |
+
import os
|
5 |
+
from dataclasses import dataclass
|
6 |
+
from typing import Optional, Dict, List
|
7 |
+
import logging
|
8 |
+
|
9 |
+
# Set up enhanced logging
|
10 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
@dataclass
|
14 |
+
class MCPServerConfig:
|
15 |
+
"""Configuration for an MCP server connection"""
|
16 |
+
name: str
|
17 |
+
url: str
|
18 |
+
description: str
|
19 |
+
space_id: Optional[str] = None
|
20 |
+
|
21 |
+
class AppConfig:
|
22 |
+
"""Application configuration settings"""
|
23 |
+
|
24 |
+
# HuggingFace Configuration
|
25 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
26 |
+
|
27 |
+
# OpenAI GPT OSS Models with enhanced configurations
|
28 |
+
AVAILABLE_MODELS = {
|
29 |
+
"openai/gpt-oss-120b": {
|
30 |
+
"name": "GPT OSS 120B",
|
31 |
+
"description": "117B parameters, 5.1B active - Production use with reasoning",
|
32 |
+
"size": "120B",
|
33 |
+
"context_length": 128000, # Full 128k context length
|
34 |
+
"supports_reasoning": True,
|
35 |
+
"supports_tool_calling": True,
|
36 |
+
"active_params": "5.1B"
|
37 |
+
},
|
38 |
+
"openai/gpt-oss-20b": {
|
39 |
+
"name": "GPT OSS 20B",
|
40 |
+
"description": "21B parameters, 3.6B active - Lower latency with reasoning",
|
41 |
+
"size": "20B",
|
42 |
+
"context_length": 128000, # Full 128k context length
|
43 |
+
"supports_reasoning": True,
|
44 |
+
"supports_tool_calling": True,
|
45 |
+
"active_params": "3.6B"
|
46 |
+
}
|
47 |
+
}
|
48 |
+
|
49 |
+
# Enhanced Inference Providers supporting GPT OSS models
|
50 |
+
INFERENCE_PROVIDERS = {
|
51 |
+
"cerebras": {
|
52 |
+
"name": "Cerebras",
|
53 |
+
"description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
|
54 |
+
"supports_120b": True,
|
55 |
+
"supports_20b": True,
|
56 |
+
"endpoint_suffix": "cerebras",
|
57 |
+
"speed": "Very Fast",
|
58 |
+
"recommended_for": ["production", "high-throughput"],
|
59 |
+
"max_context_support": 128000 # Full context support
|
60 |
+
},
|
61 |
+
"fireworks-ai": {
|
62 |
+
"name": "Fireworks AI",
|
63 |
+
"description": "Fast and reliable inference with excellent reliability",
|
64 |
+
"supports_120b": True,
|
65 |
+
"supports_20b": True,
|
66 |
+
"endpoint_suffix": "fireworks-ai",
|
67 |
+
"speed": "Fast",
|
68 |
+
"recommended_for": ["production", "general-use"],
|
69 |
+
"max_context_support": 128000 # Full context support
|
70 |
+
},
|
71 |
+
"together-ai": {
|
72 |
+
"name": "Together AI",
|
73 |
+
"description": "Collaborative AI inference with good performance",
|
74 |
+
"supports_120b": True,
|
75 |
+
"supports_20b": True,
|
76 |
+
"endpoint_suffix": "together-ai",
|
77 |
+
"speed": "Fast",
|
78 |
+
"recommended_for": ["development", "experimentation"],
|
79 |
+
"max_context_support": 128000 # Full context support
|
80 |
+
},
|
81 |
+
"replicate": {
|
82 |
+
"name": "Replicate",
|
83 |
+
"description": "Machine learning deployment platform",
|
84 |
+
"supports_120b": True,
|
85 |
+
"supports_20b": True,
|
86 |
+
"endpoint_suffix": "replicate",
|
87 |
+
"speed": "Medium",
|
88 |
+
"recommended_for": ["prototyping", "low-volume"],
|
89 |
+
"max_context_support": 128000 # Full context support
|
90 |
+
}
|
91 |
+
}
|
92 |
+
|
93 |
+
# Enhanced Model Configuration for GPT-OSS - Utilizing full context
|
94 |
+
MAX_TOKENS = 128000 # Full context length for GPT-OSS models
|
95 |
+
|
96 |
+
# Response token allocation - increased for longer responses
|
97 |
+
DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses
|
98 |
+
MIN_RESPONSE_TOKENS = 4096 # Minimum response size
|
99 |
+
|
100 |
+
# Context management - optimized for full 128k usage
|
101 |
+
SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions)
|
102 |
+
MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled
|
103 |
+
|
104 |
+
# History management - much larger with 128k context
|
105 |
+
MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention
|
106 |
+
DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance
|
107 |
+
|
108 |
+
# Reasoning configuration
|
109 |
+
DEFAULT_REASONING_EFFORT = "medium" # low, medium, high
|
110 |
+
|
111 |
+
# UI Configuration
|
112 |
+
GRADIO_THEME = "ocean"
|
113 |
+
DEBUG_MODE = True
|
114 |
+
|
115 |
+
# MCP Server recommendations
|
116 |
+
OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance
|
117 |
+
WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this
|
118 |
+
|
119 |
+
# File Support
|
120 |
+
SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
|
121 |
+
SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
|
122 |
+
SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
|
123 |
+
SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
|
124 |
+
|
125 |
+
@classmethod
|
126 |
+
def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
|
127 |
+
"""Get models available for a specific provider"""
|
128 |
+
if provider_id not in cls.INFERENCE_PROVIDERS:
|
129 |
+
return []
|
130 |
+
|
131 |
+
provider = cls.INFERENCE_PROVIDERS[provider_id]
|
132 |
+
available_models = []
|
133 |
+
|
134 |
+
for model_id, model_info in cls.AVAILABLE_MODELS.items():
|
135 |
+
if model_info["size"] == "120B" and provider["supports_120b"]:
|
136 |
+
available_models.append(model_id)
|
137 |
+
elif model_info["size"] == "20B" and provider["supports_20b"]:
|
138 |
+
available_models.append(model_id)
|
139 |
+
|
140 |
+
return available_models
|
141 |
+
|
142 |
+
@classmethod
|
143 |
+
def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
|
144 |
+
"""Get the full model endpoint for HF Inference Providers"""
|
145 |
+
if provider_id not in cls.INFERENCE_PROVIDERS:
|
146 |
+
raise ValueError(f"Unknown provider: {provider_id}")
|
147 |
+
|
148 |
+
provider = cls.INFERENCE_PROVIDERS[provider_id]
|
149 |
+
return f"{model_id}:{provider['endpoint_suffix']}"
|
150 |
+
|
151 |
+
@classmethod
|
152 |
+
def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
|
153 |
+
"""Get optimal context settings for a model/provider combination"""
|
154 |
+
model_info = cls.AVAILABLE_MODELS.get(model_id, {})
|
155 |
+
provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
|
156 |
+
|
157 |
+
# Get the minimum of model and provider context support
|
158 |
+
model_context = model_info.get("context_length", 128000)
|
159 |
+
provider_context = provider_info.get("max_context_support", 128000)
|
160 |
+
context_length = min(model_context, provider_context)
|
161 |
+
|
162 |
+
# Calculate reserves based on MCP server count
|
163 |
+
system_reserve = cls.SYSTEM_PROMPT_RESERVE
|
164 |
+
if mcp_servers_count > 0:
|
165 |
+
# Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
|
166 |
+
system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
|
167 |
+
|
168 |
+
# Dynamic response token allocation based on available context
|
169 |
+
if context_length >= 100000:
|
170 |
+
max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384
|
171 |
+
elif context_length >= 50000:
|
172 |
+
max_response_tokens = 12288
|
173 |
+
elif context_length >= 20000:
|
174 |
+
max_response_tokens = 8192
|
175 |
+
else:
|
176 |
+
max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096
|
177 |
+
|
178 |
+
# Calculate available context for history
|
179 |
+
available_context = context_length - system_reserve - max_response_tokens
|
180 |
+
|
181 |
+
# Calculate recommended history limit
|
182 |
+
# Assume average message is ~200 tokens
|
183 |
+
avg_message_tokens = 200
|
184 |
+
recommended_history = min(
|
185 |
+
cls.MAX_HISTORY_MESSAGES,
|
186 |
+
available_context // avg_message_tokens
|
187 |
+
)
|
188 |
+
|
189 |
+
return {
|
190 |
+
"max_context": context_length,
|
191 |
+
"available_context": available_context,
|
192 |
+
"max_response_tokens": max_response_tokens,
|
193 |
+
"system_reserve": system_reserve,
|
194 |
+
"recommended_history_limit": max(10, recommended_history), # At least 10 messages
|
195 |
+
"context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
|
196 |
+
}
|
197 |
+
|
198 |
+
@classmethod
|
199 |
+
def get_all_media_extensions(cls):
|
200 |
+
"""Get all supported media file extensions"""
|
201 |
+
return (cls.SUPPORTED_IMAGE_EXTENSIONS +
|
202 |
+
cls.SUPPORTED_AUDIO_EXTENSIONS +
|
203 |
+
cls.SUPPORTED_VIDEO_EXTENSIONS)
|
204 |
+
|
205 |
+
@classmethod
|
206 |
+
def is_image_file(cls, file_path: str) -> bool:
|
207 |
+
"""Check if file is an image"""
|
208 |
+
if not file_path:
|
209 |
+
return False
|
210 |
+
return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
|
211 |
+
|
212 |
+
@classmethod
|
213 |
+
def is_audio_file(cls, file_path: str) -> bool:
|
214 |
+
"""Check if file is an audio file"""
|
215 |
+
if not file_path:
|
216 |
+
return False
|
217 |
+
return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
|
218 |
+
|
219 |
+
@classmethod
|
220 |
+
def is_video_file(cls, file_path: str) -> bool:
|
221 |
+
"""Check if file is a video file"""
|
222 |
+
if not file_path:
|
223 |
+
return False
|
224 |
+
return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
|
225 |
+
|
226 |
+
@classmethod
|
227 |
+
def is_media_file(cls, file_path: str) -> bool:
|
228 |
+
"""Check if file is any supported media type"""
|
229 |
+
if not file_path:
|
230 |
+
return False
|
231 |
+
return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
|
232 |
+
|
233 |
+
@classmethod
|
234 |
+
def get_provider_recommendation(cls, use_case: str) -> List[str]:
|
235 |
+
"""Get recommended providers for specific use cases"""
|
236 |
+
recommendations = {
|
237 |
+
"production": ["cerebras", "fireworks-ai"],
|
238 |
+
"development": ["together-ai", "fireworks-ai"],
|
239 |
+
"experimentation": ["together-ai", "replicate"],
|
240 |
+
"high-throughput": ["cerebras"],
|
241 |
+
"cost-effective": ["together-ai", "replicate"],
|
242 |
+
"maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support
|
243 |
+
}
|
244 |
+
return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
|
245 |
+
|
246 |
+
# Check for dependencies
|
247 |
+
try:
|
248 |
+
import httpx
|
249 |
+
HTTPX_AVAILABLE = True
|
250 |
+
except ImportError:
|
251 |
+
HTTPX_AVAILABLE = False
|
252 |
+
logger.warning("httpx not available - file upload functionality limited")
|
253 |
+
|
254 |
+
try:
|
255 |
+
import huggingface_hub
|
256 |
+
HF_HUB_AVAILABLE = True
|
257 |
+
except ImportError:
|
258 |
+
HF_HUB_AVAILABLE = False
|
259 |
+
logger.warning("huggingface_hub not available - login functionality disabled")
|
260 |
+
|
261 |
+
# Enhanced CSS Configuration with better media display
|
262 |
+
CUSTOM_CSS = """
|
263 |
+
/* Hide Gradio footer */
|
264 |
+
footer {
|
265 |
+
display: none !important;
|
266 |
+
}
|
267 |
+
/* Make chatbot expand to fill available space */
|
268 |
+
.gradio-container {
|
269 |
+
height: 100vh !important;
|
270 |
+
}
|
271 |
+
/* Ensure proper flex layout */
|
272 |
+
.main-content {
|
273 |
+
display: flex;
|
274 |
+
flex-direction: column;
|
275 |
+
height: 100%;
|
276 |
+
}
|
277 |
+
/* Input area stays at bottom with minimal padding */
|
278 |
+
.input-area {
|
279 |
+
margin-top: auto;
|
280 |
+
padding-top: 0.25rem !important;
|
281 |
+
padding-bottom: 0 !important;
|
282 |
+
margin-bottom: 0 !important;
|
283 |
+
}
|
284 |
+
/* Reduce padding around chatbot */
|
285 |
+
.chatbot {
|
286 |
+
margin-bottom: 0 !important;
|
287 |
+
padding-bottom: 0 !important;
|
288 |
+
}
|
289 |
+
/* Provider and model selection styling */
|
290 |
+
.provider-model-selection {
|
291 |
+
padding: 10px;
|
292 |
+
border-radius: 8px;
|
293 |
+
margin-bottom: 10px;
|
294 |
+
border-left: 4px solid #007bff;
|
295 |
+
}
|
296 |
+
/* Login section styling */
|
297 |
+
.login-section {
|
298 |
+
padding: 10px;
|
299 |
+
border-radius: 8px;
|
300 |
+
margin-bottom: 10px;
|
301 |
+
border-left: 4px solid #4caf50;
|
302 |
+
}
|
303 |
+
/* Tool usage indicator */
|
304 |
+
.tool-usage {
|
305 |
+
background: #fff3cd;
|
306 |
+
border: 1px solid #ffeaa7;
|
307 |
+
border-radius: 4px;
|
308 |
+
padding: 8px;
|
309 |
+
margin: 4px 0;
|
310 |
+
}
|
311 |
+
/* Media display improvements */
|
312 |
+
.media-container {
|
313 |
+
max-width: 100%;
|
314 |
+
border-radius: 8px;
|
315 |
+
overflow: hidden;
|
316 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
317 |
+
}
|
318 |
+
/* Enhanced audio player styling */
|
319 |
+
audio {
|
320 |
+
width: 100%;
|
321 |
+
max-width: 500px;
|
322 |
+
height: 54px;
|
323 |
+
border-radius: 27px;
|
324 |
+
outline: none;
|
325 |
+
margin: 10px 0;
|
326 |
+
}
|
327 |
+
/* Enhanced video player styling */
|
328 |
+
video {
|
329 |
+
width: 100%;
|
330 |
+
max-width: 700px;
|
331 |
+
height: auto;
|
332 |
+
object-fit: contain;
|
333 |
+
border-radius: 8px;
|
334 |
+
margin: 10px 0;
|
335 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
336 |
+
}
|
337 |
+
/* Server status indicators */
|
338 |
+
.server-status {
|
339 |
+
display: inline-block;
|
340 |
+
padding: 2px 8px;
|
341 |
+
border-radius: 12px;
|
342 |
+
font-size: 12px;
|
343 |
+
font-weight: bold;
|
344 |
+
}
|
345 |
+
.server-status.online {
|
346 |
+
background: #d4edda;
|
347 |
+
color: #155724;
|
348 |
+
}
|
349 |
+
.server-status.offline {
|
350 |
+
background: #f8d7da;
|
351 |
+
color: #721c24;
|
352 |
+
}
|
353 |
+
/* Message metadata styling */
|
354 |
+
.message-metadata {
|
355 |
+
font-size: 0.85em;
|
356 |
+
color: #666;
|
357 |
+
margin-top: 4px;
|
358 |
+
padding: 4px 8px;
|
359 |
+
background: #f0f0f0;
|
360 |
+
border-radius: 4px;
|
361 |
+
}
|
362 |
+
"""
|