ysharma HF Staff commited on
Commit
7bab86d
·
verified ·
1 Parent(s): 1cdb77b

Create config.py

Browse files
Files changed (1) hide show
  1. config.py +362 -0
config.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
3
+ """
4
+ import os
5
+ from dataclasses import dataclass
6
+ from typing import Optional, Dict, List
7
+ import logging
8
+
9
+ # Set up enhanced logging
10
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
11
+ logger = logging.getLogger(__name__)
12
+
13
+ @dataclass
14
+ class MCPServerConfig:
15
+ """Configuration for an MCP server connection"""
16
+ name: str
17
+ url: str
18
+ description: str
19
+ space_id: Optional[str] = None
20
+
21
+ class AppConfig:
22
+ """Application configuration settings"""
23
+
24
+ # HuggingFace Configuration
25
+ HF_TOKEN = os.getenv("HF_TOKEN")
26
+
27
+ # OpenAI GPT OSS Models with enhanced configurations
28
+ AVAILABLE_MODELS = {
29
+ "openai/gpt-oss-120b": {
30
+ "name": "GPT OSS 120B",
31
+ "description": "117B parameters, 5.1B active - Production use with reasoning",
32
+ "size": "120B",
33
+ "context_length": 128000, # Full 128k context length
34
+ "supports_reasoning": True,
35
+ "supports_tool_calling": True,
36
+ "active_params": "5.1B"
37
+ },
38
+ "openai/gpt-oss-20b": {
39
+ "name": "GPT OSS 20B",
40
+ "description": "21B parameters, 3.6B active - Lower latency with reasoning",
41
+ "size": "20B",
42
+ "context_length": 128000, # Full 128k context length
43
+ "supports_reasoning": True,
44
+ "supports_tool_calling": True,
45
+ "active_params": "3.6B"
46
+ }
47
+ }
48
+
49
+ # Enhanced Inference Providers supporting GPT OSS models
50
+ INFERENCE_PROVIDERS = {
51
+ "cerebras": {
52
+ "name": "Cerebras",
53
+ "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
54
+ "supports_120b": True,
55
+ "supports_20b": True,
56
+ "endpoint_suffix": "cerebras",
57
+ "speed": "Very Fast",
58
+ "recommended_for": ["production", "high-throughput"],
59
+ "max_context_support": 128000 # Full context support
60
+ },
61
+ "fireworks-ai": {
62
+ "name": "Fireworks AI",
63
+ "description": "Fast and reliable inference with excellent reliability",
64
+ "supports_120b": True,
65
+ "supports_20b": True,
66
+ "endpoint_suffix": "fireworks-ai",
67
+ "speed": "Fast",
68
+ "recommended_for": ["production", "general-use"],
69
+ "max_context_support": 128000 # Full context support
70
+ },
71
+ "together-ai": {
72
+ "name": "Together AI",
73
+ "description": "Collaborative AI inference with good performance",
74
+ "supports_120b": True,
75
+ "supports_20b": True,
76
+ "endpoint_suffix": "together-ai",
77
+ "speed": "Fast",
78
+ "recommended_for": ["development", "experimentation"],
79
+ "max_context_support": 128000 # Full context support
80
+ },
81
+ "replicate": {
82
+ "name": "Replicate",
83
+ "description": "Machine learning deployment platform",
84
+ "supports_120b": True,
85
+ "supports_20b": True,
86
+ "endpoint_suffix": "replicate",
87
+ "speed": "Medium",
88
+ "recommended_for": ["prototyping", "low-volume"],
89
+ "max_context_support": 128000 # Full context support
90
+ }
91
+ }
92
+
93
+ # Enhanced Model Configuration for GPT-OSS - Utilizing full context
94
+ MAX_TOKENS = 128000 # Full context length for GPT-OSS models
95
+
96
+ # Response token allocation - increased for longer responses
97
+ DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses
98
+ MIN_RESPONSE_TOKENS = 4096 # Minimum response size
99
+
100
+ # Context management - optimized for full 128k usage
101
+ SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions)
102
+ MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled
103
+
104
+ # History management - much larger with 128k context
105
+ MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention
106
+ DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance
107
+
108
+ # Reasoning configuration
109
+ DEFAULT_REASONING_EFFORT = "medium" # low, medium, high
110
+
111
+ # UI Configuration
112
+ GRADIO_THEME = "ocean"
113
+ DEBUG_MODE = True
114
+
115
+ # MCP Server recommendations
116
+ OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance
117
+ WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this
118
+
119
+ # File Support
120
+ SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
121
+ SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
122
+ SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
123
+ SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
124
+
125
+ @classmethod
126
+ def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
127
+ """Get models available for a specific provider"""
128
+ if provider_id not in cls.INFERENCE_PROVIDERS:
129
+ return []
130
+
131
+ provider = cls.INFERENCE_PROVIDERS[provider_id]
132
+ available_models = []
133
+
134
+ for model_id, model_info in cls.AVAILABLE_MODELS.items():
135
+ if model_info["size"] == "120B" and provider["supports_120b"]:
136
+ available_models.append(model_id)
137
+ elif model_info["size"] == "20B" and provider["supports_20b"]:
138
+ available_models.append(model_id)
139
+
140
+ return available_models
141
+
142
+ @classmethod
143
+ def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
144
+ """Get the full model endpoint for HF Inference Providers"""
145
+ if provider_id not in cls.INFERENCE_PROVIDERS:
146
+ raise ValueError(f"Unknown provider: {provider_id}")
147
+
148
+ provider = cls.INFERENCE_PROVIDERS[provider_id]
149
+ return f"{model_id}:{provider['endpoint_suffix']}"
150
+
151
+ @classmethod
152
+ def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
153
+ """Get optimal context settings for a model/provider combination"""
154
+ model_info = cls.AVAILABLE_MODELS.get(model_id, {})
155
+ provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
156
+
157
+ # Get the minimum of model and provider context support
158
+ model_context = model_info.get("context_length", 128000)
159
+ provider_context = provider_info.get("max_context_support", 128000)
160
+ context_length = min(model_context, provider_context)
161
+
162
+ # Calculate reserves based on MCP server count
163
+ system_reserve = cls.SYSTEM_PROMPT_RESERVE
164
+ if mcp_servers_count > 0:
165
+ # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
166
+ system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
167
+
168
+ # Dynamic response token allocation based on available context
169
+ if context_length >= 100000:
170
+ max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384
171
+ elif context_length >= 50000:
172
+ max_response_tokens = 12288
173
+ elif context_length >= 20000:
174
+ max_response_tokens = 8192
175
+ else:
176
+ max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096
177
+
178
+ # Calculate available context for history
179
+ available_context = context_length - system_reserve - max_response_tokens
180
+
181
+ # Calculate recommended history limit
182
+ # Assume average message is ~200 tokens
183
+ avg_message_tokens = 200
184
+ recommended_history = min(
185
+ cls.MAX_HISTORY_MESSAGES,
186
+ available_context // avg_message_tokens
187
+ )
188
+
189
+ return {
190
+ "max_context": context_length,
191
+ "available_context": available_context,
192
+ "max_response_tokens": max_response_tokens,
193
+ "system_reserve": system_reserve,
194
+ "recommended_history_limit": max(10, recommended_history), # At least 10 messages
195
+ "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
196
+ }
197
+
198
+ @classmethod
199
+ def get_all_media_extensions(cls):
200
+ """Get all supported media file extensions"""
201
+ return (cls.SUPPORTED_IMAGE_EXTENSIONS +
202
+ cls.SUPPORTED_AUDIO_EXTENSIONS +
203
+ cls.SUPPORTED_VIDEO_EXTENSIONS)
204
+
205
+ @classmethod
206
+ def is_image_file(cls, file_path: str) -> bool:
207
+ """Check if file is an image"""
208
+ if not file_path:
209
+ return False
210
+ return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
211
+
212
+ @classmethod
213
+ def is_audio_file(cls, file_path: str) -> bool:
214
+ """Check if file is an audio file"""
215
+ if not file_path:
216
+ return False
217
+ return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
218
+
219
+ @classmethod
220
+ def is_video_file(cls, file_path: str) -> bool:
221
+ """Check if file is a video file"""
222
+ if not file_path:
223
+ return False
224
+ return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
225
+
226
+ @classmethod
227
+ def is_media_file(cls, file_path: str) -> bool:
228
+ """Check if file is any supported media type"""
229
+ if not file_path:
230
+ return False
231
+ return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
232
+
233
+ @classmethod
234
+ def get_provider_recommendation(cls, use_case: str) -> List[str]:
235
+ """Get recommended providers for specific use cases"""
236
+ recommendations = {
237
+ "production": ["cerebras", "fireworks-ai"],
238
+ "development": ["together-ai", "fireworks-ai"],
239
+ "experimentation": ["together-ai", "replicate"],
240
+ "high-throughput": ["cerebras"],
241
+ "cost-effective": ["together-ai", "replicate"],
242
+ "maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support
243
+ }
244
+ return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
245
+
246
+ # Check for dependencies
247
+ try:
248
+ import httpx
249
+ HTTPX_AVAILABLE = True
250
+ except ImportError:
251
+ HTTPX_AVAILABLE = False
252
+ logger.warning("httpx not available - file upload functionality limited")
253
+
254
+ try:
255
+ import huggingface_hub
256
+ HF_HUB_AVAILABLE = True
257
+ except ImportError:
258
+ HF_HUB_AVAILABLE = False
259
+ logger.warning("huggingface_hub not available - login functionality disabled")
260
+
261
+ # Enhanced CSS Configuration with better media display
262
+ CUSTOM_CSS = """
263
+ /* Hide Gradio footer */
264
+ footer {
265
+ display: none !important;
266
+ }
267
+ /* Make chatbot expand to fill available space */
268
+ .gradio-container {
269
+ height: 100vh !important;
270
+ }
271
+ /* Ensure proper flex layout */
272
+ .main-content {
273
+ display: flex;
274
+ flex-direction: column;
275
+ height: 100%;
276
+ }
277
+ /* Input area stays at bottom with minimal padding */
278
+ .input-area {
279
+ margin-top: auto;
280
+ padding-top: 0.25rem !important;
281
+ padding-bottom: 0 !important;
282
+ margin-bottom: 0 !important;
283
+ }
284
+ /* Reduce padding around chatbot */
285
+ .chatbot {
286
+ margin-bottom: 0 !important;
287
+ padding-bottom: 0 !important;
288
+ }
289
+ /* Provider and model selection styling */
290
+ .provider-model-selection {
291
+ padding: 10px;
292
+ border-radius: 8px;
293
+ margin-bottom: 10px;
294
+ border-left: 4px solid #007bff;
295
+ }
296
+ /* Login section styling */
297
+ .login-section {
298
+ padding: 10px;
299
+ border-radius: 8px;
300
+ margin-bottom: 10px;
301
+ border-left: 4px solid #4caf50;
302
+ }
303
+ /* Tool usage indicator */
304
+ .tool-usage {
305
+ background: #fff3cd;
306
+ border: 1px solid #ffeaa7;
307
+ border-radius: 4px;
308
+ padding: 8px;
309
+ margin: 4px 0;
310
+ }
311
+ /* Media display improvements */
312
+ .media-container {
313
+ max-width: 100%;
314
+ border-radius: 8px;
315
+ overflow: hidden;
316
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
317
+ }
318
+ /* Enhanced audio player styling */
319
+ audio {
320
+ width: 100%;
321
+ max-width: 500px;
322
+ height: 54px;
323
+ border-radius: 27px;
324
+ outline: none;
325
+ margin: 10px 0;
326
+ }
327
+ /* Enhanced video player styling */
328
+ video {
329
+ width: 100%;
330
+ max-width: 700px;
331
+ height: auto;
332
+ object-fit: contain;
333
+ border-radius: 8px;
334
+ margin: 10px 0;
335
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
336
+ }
337
+ /* Server status indicators */
338
+ .server-status {
339
+ display: inline-block;
340
+ padding: 2px 8px;
341
+ border-radius: 12px;
342
+ font-size: 12px;
343
+ font-weight: bold;
344
+ }
345
+ .server-status.online {
346
+ background: #d4edda;
347
+ color: #155724;
348
+ }
349
+ .server-status.offline {
350
+ background: #f8d7da;
351
+ color: #721c24;
352
+ }
353
+ /* Message metadata styling */
354
+ .message-metadata {
355
+ font-size: 0.85em;
356
+ color: #666;
357
+ margin-top: 4px;
358
+ padding: 4px 8px;
359
+ background: #f0f0f0;
360
+ border-radius: 4px;
361
+ }
362
+ """