Spaces:
Running
Running
dylanebert
commited on
Commit
·
262aca8
1
Parent(s):
057e151
simplify
Browse files
app.py
CHANGED
@@ -1,18 +1,31 @@
|
|
1 |
"""
|
2 |
Research Tracker MCP Server
|
3 |
|
4 |
-
A
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"""
|
7 |
|
8 |
import os
|
9 |
import requests
|
10 |
import gradio as gr
|
11 |
-
from typing import List, Dict, Any
|
12 |
import logging
|
13 |
|
14 |
# Configure logging
|
15 |
-
logging.basicConfig(
|
|
|
|
|
|
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
# Configuration
|
@@ -24,102 +37,27 @@ if not HF_TOKEN:
|
|
24 |
logger.warning("HF_TOKEN not found in environment variables")
|
25 |
|
26 |
|
27 |
-
def validate_input(input_data: str, input_name: str = "input") -> str:
|
28 |
-
"""
|
29 |
-
Validate and sanitize input data.
|
30 |
-
|
31 |
-
Args:
|
32 |
-
input_data: The input string to validate
|
33 |
-
input_name: Name of the input for error messages
|
34 |
-
|
35 |
-
Returns:
|
36 |
-
Cleaned input string
|
37 |
-
|
38 |
-
Raises:
|
39 |
-
ValueError: If input is invalid
|
40 |
-
"""
|
41 |
-
if not input_data:
|
42 |
-
raise ValueError(f"{input_name} cannot be empty or None")
|
43 |
-
|
44 |
-
cleaned = input_data.strip()
|
45 |
-
if not cleaned:
|
46 |
-
raise ValueError(f"{input_name} cannot be empty after trimming")
|
47 |
-
|
48 |
-
# Basic URL validation if it looks like a URL
|
49 |
-
if cleaned.startswith(("http://", "https://")):
|
50 |
-
if len(cleaned) > 2000:
|
51 |
-
raise ValueError(f"{input_name} URL is too long (max 2000 characters)")
|
52 |
-
# Check for suspicious patterns
|
53 |
-
suspicious_patterns = ["javascript:", "data:", "file:", "ftp:"]
|
54 |
-
if any(pattern in cleaned.lower() for pattern in suspicious_patterns):
|
55 |
-
raise ValueError(f"{input_name} contains invalid URL scheme")
|
56 |
-
|
57 |
-
return cleaned
|
58 |
-
|
59 |
-
|
60 |
def make_backend_request(endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
61 |
-
"""
|
62 |
-
Make a request to the research-tracker-backend with comprehensive error handling.
|
63 |
-
|
64 |
-
Args:
|
65 |
-
endpoint: The backend endpoint to call (e.g., 'infer-authors')
|
66 |
-
data: The data to send in the request body
|
67 |
-
|
68 |
-
Returns:
|
69 |
-
The response data from the backend
|
70 |
-
|
71 |
-
Raises:
|
72 |
-
Exception: If the request fails or returns an error
|
73 |
-
"""
|
74 |
-
if not HF_TOKEN:
|
75 |
-
logger.warning("HF_TOKEN not available - backend requests may fail")
|
76 |
-
|
77 |
url = f"{BACKEND_URL}/{endpoint}"
|
78 |
headers = {
|
79 |
"Content-Type": "application/json",
|
80 |
-
"Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else ""
|
|
|
81 |
}
|
82 |
|
83 |
try:
|
84 |
-
logger.debug(f"Making request to {endpoint} with data: {data}")
|
85 |
response = requests.post(url, json=data, headers=headers, timeout=REQUEST_TIMEOUT)
|
86 |
-
|
87 |
-
if response.status_code == 401:
|
88 |
-
raise Exception("Authentication failed - please check HF_TOKEN")
|
89 |
-
elif response.status_code == 403:
|
90 |
-
raise Exception("Access forbidden - insufficient permissions")
|
91 |
-
elif response.status_code == 404:
|
92 |
-
raise Exception(f"Backend endpoint {endpoint} not found")
|
93 |
-
elif response.status_code == 422:
|
94 |
-
raise Exception("Invalid request data format")
|
95 |
-
elif response.status_code >= 500:
|
96 |
-
raise Exception(f"Backend server error (status {response.status_code})")
|
97 |
-
|
98 |
response.raise_for_status()
|
99 |
-
|
100 |
-
|
101 |
-
return result
|
102 |
-
|
103 |
-
except requests.exceptions.Timeout:
|
104 |
-
raise Exception(f"Backend request to {endpoint} timed out after {REQUEST_TIMEOUT}s")
|
105 |
-
except requests.exceptions.ConnectionError:
|
106 |
-
raise Exception(f"Failed to connect to backend - service may be unavailable")
|
107 |
except requests.exceptions.RequestException as e:
|
|
|
108 |
raise Exception(f"Backend request to {endpoint} failed: {str(e)}")
|
109 |
-
except ValueError as e:
|
110 |
-
raise Exception(f"Invalid JSON response from backend: {str(e)}")
|
111 |
|
112 |
|
113 |
def create_row_data(input_data: str) -> Dict[str, Any]:
|
114 |
-
"""
|
115 |
-
Create standardized row data structure for backend requests.
|
116 |
-
|
117 |
-
Args:
|
118 |
-
input_data: The input string to analyze
|
119 |
-
|
120 |
-
Returns:
|
121 |
-
Dictionary with appropriate field populated
|
122 |
-
"""
|
123 |
row_data = {
|
124 |
"Name": None,
|
125 |
"Authors": [],
|
@@ -163,35 +101,20 @@ def infer_authors(input_data: str) -> List[str]:
|
|
163 |
author extraction from paper metadata and repository contributor information.
|
164 |
|
165 |
Args:
|
166 |
-
input_data: A URL, paper title, or other research-related input.
|
167 |
-
|
168 |
-
|
169 |
|
170 |
Returns:
|
171 |
-
A list of author names as strings, or empty list if no authors found.
|
172 |
-
|
173 |
-
|
174 |
-
Examples:
|
175 |
-
>>> infer_authors("https://arxiv.org/abs/2010.11929")
|
176 |
-
["Alexey Dosovitskiy", "Lucas Beyer", "Alexander Kolesnikov", ...]
|
177 |
-
|
178 |
-
>>> infer_authors("https://github.com/google-research/vision_transformer")
|
179 |
-
["Alexey Dosovitskiy", "Lucas Beyer", ...]
|
180 |
-
|
181 |
-
>>> infer_authors("Vision Transformer")
|
182 |
-
["Alexey Dosovitskiy", "Lucas Beyer", ...]
|
183 |
-
|
184 |
-
Raises:
|
185 |
-
No exceptions are raised - errors are logged and empty list returned.
|
186 |
"""
|
|
|
|
|
|
|
187 |
try:
|
188 |
-
|
189 |
-
cleaned_input = validate_input(input_data, "input_data")
|
190 |
-
|
191 |
-
# Create structured data for backend
|
192 |
row_data = create_row_data(cleaned_input)
|
193 |
-
|
194 |
-
# Call the backend
|
195 |
result = make_backend_request("infer-authors", row_data)
|
196 |
|
197 |
# Extract and validate authors from response
|
@@ -200,7 +123,6 @@ def infer_authors(input_data: str) -> List[str]:
|
|
200 |
# Handle comma-separated string format
|
201 |
authors = [author.strip() for author in authors.split(",") if author.strip()]
|
202 |
elif not isinstance(authors, list):
|
203 |
-
logger.warning(f"Unexpected authors format: {type(authors)}")
|
204 |
authors = []
|
205 |
|
206 |
# Filter out empty or invalid author names
|
@@ -215,9 +137,6 @@ def infer_authors(input_data: str) -> List[str]:
|
|
215 |
logger.info(f"Successfully inferred {len(valid_authors)} authors from input")
|
216 |
return valid_authors
|
217 |
|
218 |
-
except ValueError as e:
|
219 |
-
logger.error(f"Input validation error: {e}")
|
220 |
-
return []
|
221 |
except Exception as e:
|
222 |
logger.error(f"Error inferring authors: {e}")
|
223 |
return []
|
@@ -228,10 +147,10 @@ def infer_paper_url(input_data: str) -> str:
|
|
228 |
Infer the paper URL from various research-related inputs.
|
229 |
|
230 |
Args:
|
231 |
-
input_data: A URL, repository link, or other research-related input
|
232 |
|
233 |
Returns:
|
234 |
-
The paper URL (typically arXiv or Hugging Face papers), or empty string if not found
|
235 |
"""
|
236 |
if not input_data or not input_data.strip():
|
237 |
return ""
|
@@ -251,10 +170,10 @@ def infer_code_repository(input_data: str) -> str:
|
|
251 |
Infer the code repository URL from research-related inputs.
|
252 |
|
253 |
Args:
|
254 |
-
input_data: A URL, paper link, or other research-related input
|
255 |
|
256 |
Returns:
|
257 |
-
The code repository URL (typically GitHub), or empty string if not found
|
258 |
"""
|
259 |
if not input_data or not input_data.strip():
|
260 |
return ""
|
@@ -274,10 +193,10 @@ def infer_research_name(input_data: str) -> str:
|
|
274 |
Infer the research paper or project name from various inputs.
|
275 |
|
276 |
Args:
|
277 |
-
input_data: A URL, repository link, or other research-related input
|
278 |
|
279 |
Returns:
|
280 |
-
The research name/title, or empty string if not found
|
281 |
"""
|
282 |
if not input_data or not input_data.strip():
|
283 |
return ""
|
@@ -292,7 +211,7 @@ def infer_research_name(input_data: str) -> str:
|
|
292 |
return ""
|
293 |
|
294 |
|
295 |
-
def classify_research_url(
|
296 |
"""
|
297 |
Classify the type of research-related URL or input.
|
298 |
|
@@ -300,29 +219,16 @@ def classify_research_url(url: str) -> str:
|
|
300 |
or input represents (paper, code, model, dataset, etc.).
|
301 |
|
302 |
Args:
|
303 |
-
|
304 |
|
305 |
Returns:
|
306 |
-
The field type: "Paper", "Code", "Space", "Model", "Dataset", "Project", or "Unknown"
|
307 |
-
|
308 |
-
Examples:
|
309 |
-
>>> classify_research_url("https://arxiv.org/abs/2010.11929")
|
310 |
-
"Paper"
|
311 |
-
|
312 |
-
>>> classify_research_url("https://github.com/google-research/vision_transformer")
|
313 |
-
"Code"
|
314 |
-
|
315 |
-
>>> classify_research_url("https://huggingface.co/google/vit-base-patch16-224")
|
316 |
-
"Model"
|
317 |
"""
|
318 |
-
if not
|
319 |
return "Unknown"
|
320 |
|
321 |
try:
|
322 |
-
|
323 |
-
result = make_backend_request("infer-field", {"value": url})
|
324 |
-
|
325 |
-
# Extract field from response
|
326 |
field = result.get("field", "Unknown")
|
327 |
return field if field else "Unknown"
|
328 |
|
@@ -335,11 +241,15 @@ def infer_organizations(input_data: str) -> List[str]:
|
|
335 |
"""
|
336 |
Infer affiliated organizations from research paper or project information.
|
337 |
|
|
|
|
|
|
|
|
|
338 |
Args:
|
339 |
-
input_data: A URL, paper title, or other research-related input
|
340 |
|
341 |
Returns:
|
342 |
-
A list of organization names, or empty list if no organizations found
|
343 |
"""
|
344 |
if not input_data or not input_data.strip():
|
345 |
return []
|
@@ -365,11 +275,15 @@ def infer_publication_date(input_data: str) -> str:
|
|
365 |
"""
|
366 |
Infer publication date from research paper or project information.
|
367 |
|
|
|
|
|
|
|
|
|
368 |
Args:
|
369 |
-
input_data: A URL, paper title, or other research-related input
|
370 |
|
371 |
Returns:
|
372 |
-
Publication date as string (YYYY-MM-DD format), or empty string if not found
|
373 |
"""
|
374 |
if not input_data or not input_data.strip():
|
375 |
return ""
|
@@ -388,11 +302,15 @@ def infer_model(input_data: str) -> str:
|
|
388 |
"""
|
389 |
Infer associated HuggingFace model from research paper or project information.
|
390 |
|
|
|
|
|
|
|
|
|
391 |
Args:
|
392 |
-
input_data: A URL, paper title, or other research-related input
|
393 |
|
394 |
Returns:
|
395 |
-
HuggingFace model URL, or empty string if no model found
|
396 |
"""
|
397 |
if not input_data or not input_data.strip():
|
398 |
return ""
|
@@ -411,11 +329,15 @@ def infer_dataset(input_data: str) -> str:
|
|
411 |
"""
|
412 |
Infer associated HuggingFace dataset from research paper or project information.
|
413 |
|
|
|
|
|
|
|
|
|
414 |
Args:
|
415 |
-
input_data: A URL, paper title, or other research-related input
|
416 |
|
417 |
Returns:
|
418 |
-
HuggingFace dataset URL, or empty string if no dataset found
|
419 |
"""
|
420 |
if not input_data or not input_data.strip():
|
421 |
return ""
|
@@ -434,11 +356,15 @@ def infer_space(input_data: str) -> str:
|
|
434 |
"""
|
435 |
Infer associated HuggingFace space from research paper or project information.
|
436 |
|
|
|
|
|
|
|
|
|
437 |
Args:
|
438 |
-
input_data: A URL, paper title, or other research-related input
|
439 |
|
440 |
Returns:
|
441 |
-
HuggingFace space URL, or empty string if no space found
|
442 |
"""
|
443 |
if not input_data or not input_data.strip():
|
444 |
return ""
|
@@ -457,11 +383,15 @@ def infer_license(input_data: str) -> str:
|
|
457 |
"""
|
458 |
Infer license information from research repository or project.
|
459 |
|
|
|
|
|
|
|
|
|
460 |
Args:
|
461 |
-
input_data: A URL, repository link, or other research-related input
|
462 |
|
463 |
Returns:
|
464 |
-
License name/type, or empty string if no license found
|
465 |
"""
|
466 |
if not input_data or not input_data.strip():
|
467 |
return ""
|
@@ -476,111 +406,6 @@ def infer_license(input_data: str) -> str:
|
|
476 |
return ""
|
477 |
|
478 |
|
479 |
-
def batch_infer_research(input_list: List[str], inference_type: str = "authors") -> List[Dict[str, Any]]:
|
480 |
-
"""
|
481 |
-
Perform batch inference on multiple research items for scale analysis.
|
482 |
-
|
483 |
-
This function processes multiple research URLs or titles simultaneously,
|
484 |
-
applying the specified inference type to each item. Useful for analyzing
|
485 |
-
large research datasets, comparing multiple papers, or building research
|
486 |
-
knowledge graphs.
|
487 |
-
|
488 |
-
Args:
|
489 |
-
input_list: List of URLs, paper titles, or research-related inputs to process
|
490 |
-
inference_type: Type of inference to perform on each item.
|
491 |
-
Options: "authors", "paper", "code", "name", "organizations",
|
492 |
-
"date", "model", "dataset", "space", "license", "classify"
|
493 |
-
|
494 |
-
Returns:
|
495 |
-
List of dictionaries, each containing:
|
496 |
-
- "input": The original input string
|
497 |
-
- "result": The inference result (format depends on inference_type)
|
498 |
-
- "success": Boolean indicating if inference succeeded
|
499 |
-
- "error": Error message if inference failed
|
500 |
-
|
501 |
-
Examples:
|
502 |
-
>>> papers = [
|
503 |
-
... "https://arxiv.org/abs/2010.11929",
|
504 |
-
... "https://arxiv.org/abs/1706.03762",
|
505 |
-
... "https://github.com/openai/gpt-2"
|
506 |
-
... ]
|
507 |
-
>>> results = batch_infer_research(papers, "authors")
|
508 |
-
>>> for result in results:
|
509 |
-
... print(f"{result['input']}: {len(result['result'])} authors")
|
510 |
-
|
511 |
-
>>> urls = ["https://huggingface.co/bert-base-uncased", "https://github.com/pytorch/pytorch"]
|
512 |
-
>>> classifications = batch_infer_research(urls, "classify")
|
513 |
-
|
514 |
-
Notes:
|
515 |
-
- Processing is done sequentially to avoid overwhelming the backend
|
516 |
-
- Failed inferences return empty results rather than raising exceptions
|
517 |
-
- Large batches may take significant time - consider chunking for very large datasets
|
518 |
-
"""
|
519 |
-
if not input_list:
|
520 |
-
return []
|
521 |
-
|
522 |
-
# Map inference types to their corresponding functions
|
523 |
-
inference_functions = {
|
524 |
-
"authors": infer_authors,
|
525 |
-
"paper": infer_paper_url,
|
526 |
-
"code": infer_code_repository,
|
527 |
-
"name": infer_research_name,
|
528 |
-
"organizations": infer_organizations,
|
529 |
-
"date": infer_publication_date,
|
530 |
-
"model": infer_model,
|
531 |
-
"dataset": infer_dataset,
|
532 |
-
"space": infer_space,
|
533 |
-
"license": infer_license,
|
534 |
-
"classify": classify_research_url,
|
535 |
-
}
|
536 |
-
|
537 |
-
if inference_type not in inference_functions:
|
538 |
-
logger.error(f"Invalid inference type: {inference_type}")
|
539 |
-
return []
|
540 |
-
|
541 |
-
inference_func = inference_functions[inference_type]
|
542 |
-
results = []
|
543 |
-
|
544 |
-
logger.info(f"Starting batch inference of type '{inference_type}' on {len(input_list)} items")
|
545 |
-
|
546 |
-
for i, input_item in enumerate(input_list):
|
547 |
-
try:
|
548 |
-
if not input_item or not isinstance(input_item, str):
|
549 |
-
results.append({
|
550 |
-
"input": str(input_item),
|
551 |
-
"result": None,
|
552 |
-
"success": False,
|
553 |
-
"error": "Invalid input: must be non-empty string"
|
554 |
-
})
|
555 |
-
continue
|
556 |
-
|
557 |
-
# Perform inference
|
558 |
-
result = inference_func(input_item)
|
559 |
-
|
560 |
-
results.append({
|
561 |
-
"input": input_item,
|
562 |
-
"result": result,
|
563 |
-
"success": True,
|
564 |
-
"error": None
|
565 |
-
})
|
566 |
-
|
567 |
-
logger.debug(f"Batch item {i+1}/{len(input_list)} completed successfully")
|
568 |
-
|
569 |
-
except Exception as e:
|
570 |
-
logger.error(f"Batch inference failed for item {i+1}: {e}")
|
571 |
-
results.append({
|
572 |
-
"input": input_item,
|
573 |
-
"result": None,
|
574 |
-
"success": False,
|
575 |
-
"error": str(e)
|
576 |
-
})
|
577 |
-
|
578 |
-
successful_count = sum(1 for r in results if r["success"])
|
579 |
-
logger.info(f"Batch inference completed: {successful_count}/{len(input_list)} successful")
|
580 |
-
|
581 |
-
return results
|
582 |
-
|
583 |
-
|
584 |
def find_research_relationships(input_data: str) -> Dict[str, Any]:
|
585 |
"""
|
586 |
Find ALL related research resources across platforms for comprehensive analysis.
|
@@ -591,10 +416,10 @@ def find_research_relationships(input_data: str) -> Dict[str, Any]:
|
|
591 |
and understanding the complete ecosystem around a research topic.
|
592 |
|
593 |
Args:
|
594 |
-
input_data: A URL, paper title, or other research-related input
|
595 |
|
596 |
Returns:
|
597 |
-
Dictionary containing all discovered related resources:
|
598 |
{
|
599 |
"paper": str | None, # Associated research paper
|
600 |
"code": str | None, # Code repository URL
|
@@ -610,23 +435,12 @@ def find_research_relationships(input_data: str) -> Dict[str, Any]:
|
|
610 |
"success_count": int, # Number of successful inferences
|
611 |
"total_inferences": int # Total inferences attempted
|
612 |
}
|
613 |
-
|
614 |
-
Examples:
|
615 |
-
>>> relationships = find_research_relationships("https://arxiv.org/abs/2010.11929")
|
616 |
-
>>> print(f"Found {relationships['success_count']} related resources")
|
617 |
-
>>> print(f"Authors: {relationships['authors']}")
|
618 |
-
>>> print(f"Code: {relationships['code']}")
|
619 |
-
>>> print(f"Model: {relationships['model']}")
|
620 |
-
|
621 |
-
>>> ecosystem = find_research_relationships("Vision Transformer")
|
622 |
-
>>> if ecosystem['paper']:
|
623 |
-
... print(f"Paper: {ecosystem['paper']}")
|
624 |
-
>>> if ecosystem['code']:
|
625 |
-
... print(f"Implementation: {ecosystem['code']}")
|
626 |
"""
|
|
|
|
|
|
|
627 |
try:
|
628 |
-
|
629 |
-
cleaned_input = validate_input(input_data, "input_data")
|
630 |
|
631 |
# Initialize result structure
|
632 |
relationships = {
|
@@ -683,306 +497,54 @@ def find_research_relationships(input_data: str) -> Dict[str, Any]:
|
|
683 |
logger.info(f"Research relationship analysis completed: {relationships['success_count']}/{relationships['total_inferences']} successful")
|
684 |
return relationships
|
685 |
|
686 |
-
except ValueError as e:
|
687 |
-
logger.error(f"Input validation error: {e}")
|
688 |
-
return {"error": str(e), "success_count": 0, "total_inferences": 0}
|
689 |
except Exception as e:
|
690 |
logger.error(f"Error finding research relationships: {e}")
|
691 |
return {"error": str(e), "success_count": 0, "total_inferences": 0}
|
692 |
|
693 |
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
This
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
"format_valid": False,
|
736 |
-
"platform": "unknown",
|
737 |
-
"error": None
|
738 |
-
}
|
739 |
-
|
740 |
-
try:
|
741 |
-
# Basic format validation
|
742 |
-
if not isinstance(url, str) or not url.strip():
|
743 |
-
result["error"] = "Invalid URL format: empty or non-string"
|
744 |
-
results.append(result)
|
745 |
-
continue
|
746 |
-
|
747 |
-
cleaned_url = url.strip()
|
748 |
-
|
749 |
-
# URL format validation
|
750 |
-
if not cleaned_url.startswith(("http://", "https://")):
|
751 |
-
result["error"] = "Invalid URL format: must start with http:// or https://"
|
752 |
-
results.append(result)
|
753 |
-
continue
|
754 |
-
|
755 |
-
result["format_valid"] = True
|
756 |
-
|
757 |
-
# Platform detection
|
758 |
-
if "arxiv.org" in cleaned_url:
|
759 |
-
result["platform"] = "arxiv"
|
760 |
-
elif "github.com" in cleaned_url:
|
761 |
-
result["platform"] = "github"
|
762 |
-
elif "huggingface.co" in cleaned_url:
|
763 |
-
result["platform"] = "huggingface"
|
764 |
-
elif "github.io" in cleaned_url:
|
765 |
-
result["platform"] = "github_pages"
|
766 |
-
|
767 |
-
# Accessibility check
|
768 |
-
try:
|
769 |
-
response = requests.head(cleaned_url, timeout=10, allow_redirects=True)
|
770 |
-
result["status_code"] = response.status_code
|
771 |
-
result["accessible"] = 200 <= response.status_code < 400
|
772 |
-
|
773 |
-
except requests.exceptions.Timeout:
|
774 |
-
result["error"] = "Timeout: URL not accessible within 10 seconds"
|
775 |
-
except requests.exceptions.ConnectionError:
|
776 |
-
result["error"] = "Connection error: Unable to reach URL"
|
777 |
-
except requests.exceptions.RequestException as e:
|
778 |
-
result["error"] = f"Request failed: {str(e)}"
|
779 |
-
|
780 |
-
except Exception as e:
|
781 |
-
result["error"] = f"Validation error: {str(e)}"
|
782 |
-
|
783 |
-
results.append(result)
|
784 |
-
|
785 |
-
accessible_count = sum(1 for r in results if r["accessible"])
|
786 |
-
logger.info(f"URL validation completed: {accessible_count}/{len(urls)} accessible")
|
787 |
-
|
788 |
-
return results
|
789 |
-
|
790 |
-
|
791 |
-
# Create Gradio interface
|
792 |
-
def create_demo():
|
793 |
-
"""Create the Gradio demo interface for testing."""
|
794 |
-
|
795 |
-
with gr.Blocks(title="Research Tracker MCP Server") as demo:
|
796 |
-
gr.Markdown("# Research Tracker MCP Server")
|
797 |
-
gr.Markdown("Test the comprehensive research inference utilities available through MCP. This server provides cross-platform research analysis, batch processing, and relationship discovery.")
|
798 |
-
|
799 |
-
# Core inference functions
|
800 |
-
with gr.TabItem("Core Inference"):
|
801 |
-
with gr.Tab("Authors"):
|
802 |
-
with gr.Row():
|
803 |
-
author_input = gr.Textbox(
|
804 |
-
label="Input (URL, paper title, etc.)",
|
805 |
-
placeholder="https://arxiv.org/abs/2010.11929",
|
806 |
-
lines=1
|
807 |
-
)
|
808 |
-
author_output = gr.JSON(label="Authors")
|
809 |
-
author_btn = gr.Button("Infer Authors")
|
810 |
-
author_btn.click(infer_authors, inputs=author_input, outputs=author_output)
|
811 |
-
|
812 |
-
with gr.Tab("Paper"):
|
813 |
-
with gr.Row():
|
814 |
-
paper_input = gr.Textbox(
|
815 |
-
label="Input (GitHub repo, project name, etc.)",
|
816 |
-
placeholder="https://github.com/google-research/vision_transformer",
|
817 |
-
lines=1
|
818 |
-
)
|
819 |
-
paper_output = gr.Textbox(label="Paper URL")
|
820 |
-
paper_btn = gr.Button("Infer Paper")
|
821 |
-
paper_btn.click(infer_paper_url, inputs=paper_input, outputs=paper_output)
|
822 |
-
|
823 |
-
with gr.Tab("Code"):
|
824 |
-
with gr.Row():
|
825 |
-
code_input = gr.Textbox(
|
826 |
-
label="Input (paper URL, project name, etc.)",
|
827 |
-
placeholder="https://arxiv.org/abs/2010.11929",
|
828 |
-
lines=1
|
829 |
-
)
|
830 |
-
code_output = gr.Textbox(label="Code Repository URL")
|
831 |
-
code_btn = gr.Button("Infer Code")
|
832 |
-
code_btn.click(infer_code_repository, inputs=code_input, outputs=code_output)
|
833 |
-
|
834 |
-
with gr.Tab("Name"):
|
835 |
-
with gr.Row():
|
836 |
-
name_input = gr.Textbox(
|
837 |
-
label="Input (URL, repo, etc.)",
|
838 |
-
placeholder="https://github.com/google-research/vision_transformer",
|
839 |
-
lines=1
|
840 |
-
)
|
841 |
-
name_output = gr.Textbox(label="Research Name/Title")
|
842 |
-
name_btn = gr.Button("Infer Name")
|
843 |
-
name_btn.click(infer_research_name, inputs=name_input, outputs=name_output)
|
844 |
-
|
845 |
-
with gr.Tab("Classify"):
|
846 |
-
with gr.Row():
|
847 |
-
classify_input = gr.Textbox(
|
848 |
-
label="URL to classify",
|
849 |
-
placeholder="https://huggingface.co/google/vit-base-patch16-224",
|
850 |
-
lines=1
|
851 |
-
)
|
852 |
-
classify_output = gr.Textbox(label="URL Type")
|
853 |
-
classify_btn = gr.Button("Classify URL")
|
854 |
-
classify_btn.click(classify_research_url, inputs=classify_input, outputs=classify_output)
|
855 |
-
|
856 |
-
# Extended inference functions
|
857 |
-
with gr.TabItem("Extended Inference"):
|
858 |
-
with gr.Tab("Organizations"):
|
859 |
-
with gr.Row():
|
860 |
-
orgs_input = gr.Textbox(
|
861 |
-
label="Input (paper URL, repo, etc.)",
|
862 |
-
placeholder="https://arxiv.org/abs/2010.11929",
|
863 |
-
lines=1
|
864 |
-
)
|
865 |
-
orgs_output = gr.JSON(label="Organizations")
|
866 |
-
orgs_btn = gr.Button("Infer Organizations")
|
867 |
-
orgs_btn.click(infer_organizations, inputs=orgs_input, outputs=orgs_output)
|
868 |
-
|
869 |
-
with gr.Tab("Publication Date"):
|
870 |
-
with gr.Row():
|
871 |
-
date_input = gr.Textbox(
|
872 |
-
label="Input (paper URL, repo, etc.)",
|
873 |
-
placeholder="https://arxiv.org/abs/2010.11929",
|
874 |
-
lines=1
|
875 |
-
)
|
876 |
-
date_output = gr.Textbox(label="Publication Date")
|
877 |
-
date_btn = gr.Button("Infer Date")
|
878 |
-
date_btn.click(infer_publication_date, inputs=date_input, outputs=date_output)
|
879 |
-
|
880 |
-
with gr.Tab("Model"):
|
881 |
-
with gr.Row():
|
882 |
-
model_input = gr.Textbox(
|
883 |
-
label="Input (paper URL, project name, etc.)",
|
884 |
-
placeholder="https://arxiv.org/abs/2010.11929",
|
885 |
-
lines=1
|
886 |
-
)
|
887 |
-
model_output = gr.Textbox(label="HuggingFace Model URL")
|
888 |
-
model_btn = gr.Button("Infer Model")
|
889 |
-
model_btn.click(infer_model, inputs=model_input, outputs=model_output)
|
890 |
-
|
891 |
-
with gr.Tab("Dataset"):
|
892 |
-
with gr.Row():
|
893 |
-
dataset_input = gr.Textbox(
|
894 |
-
label="Input (paper URL, project name, etc.)",
|
895 |
-
placeholder="https://arxiv.org/abs/1706.03762",
|
896 |
-
lines=1
|
897 |
-
)
|
898 |
-
dataset_output = gr.Textbox(label="HuggingFace Dataset URL")
|
899 |
-
dataset_btn = gr.Button("Infer Dataset")
|
900 |
-
dataset_btn.click(infer_dataset, inputs=dataset_input, outputs=dataset_output)
|
901 |
-
|
902 |
-
with gr.Tab("Space"):
|
903 |
-
with gr.Row():
|
904 |
-
space_input = gr.Textbox(
|
905 |
-
label="Input (model URL, paper, etc.)",
|
906 |
-
placeholder="https://huggingface.co/google/vit-base-patch16-224",
|
907 |
-
lines=1
|
908 |
-
)
|
909 |
-
space_output = gr.Textbox(label="HuggingFace Space URL")
|
910 |
-
space_btn = gr.Button("Infer Space")
|
911 |
-
space_btn.click(infer_space, inputs=space_input, outputs=space_output)
|
912 |
-
|
913 |
-
with gr.Tab("License"):
|
914 |
-
with gr.Row():
|
915 |
-
license_input = gr.Textbox(
|
916 |
-
label="Input (repository URL, project, etc.)",
|
917 |
-
placeholder="https://github.com/google-research/vision_transformer",
|
918 |
-
lines=1
|
919 |
-
)
|
920 |
-
license_output = gr.Textbox(label="License Information")
|
921 |
-
license_btn = gr.Button("Infer License")
|
922 |
-
license_btn.click(infer_license, inputs=license_input, outputs=license_output)
|
923 |
-
|
924 |
-
# Research intelligence functions
|
925 |
-
with gr.TabItem("Research Intelligence"):
|
926 |
-
with gr.Tab("Research Relationships"):
|
927 |
-
gr.Markdown("Find ALL related resources for comprehensive research analysis")
|
928 |
-
with gr.Row():
|
929 |
-
relationships_input = gr.Textbox(
|
930 |
-
label="Input (URL, paper title, etc.)",
|
931 |
-
placeholder="https://arxiv.org/abs/2010.11929",
|
932 |
-
lines=1
|
933 |
-
)
|
934 |
-
relationships_output = gr.JSON(label="Related Resources")
|
935 |
-
relationships_btn = gr.Button("Find Research Relationships")
|
936 |
-
relationships_btn.click(find_research_relationships, inputs=relationships_input, outputs=relationships_output)
|
937 |
-
|
938 |
-
with gr.Tab("Batch Processing"):
|
939 |
-
gr.Markdown("Process multiple research items simultaneously")
|
940 |
-
with gr.Row():
|
941 |
-
with gr.Column():
|
942 |
-
batch_input = gr.Textbox(
|
943 |
-
label="Input URLs/Titles (one per line)",
|
944 |
-
placeholder="https://arxiv.org/abs/2010.11929\nhttps://github.com/openai/gpt-2\nVision Transformer",
|
945 |
-
lines=5
|
946 |
-
)
|
947 |
-
batch_type = gr.Dropdown(
|
948 |
-
choices=["authors", "paper", "code", "name", "organizations", "date", "model", "dataset", "space", "license", "classify"],
|
949 |
-
value="authors",
|
950 |
-
label="Inference Type"
|
951 |
-
)
|
952 |
-
batch_output = gr.JSON(label="Batch Results")
|
953 |
-
|
954 |
-
def process_batch(input_text, inference_type):
|
955 |
-
if not input_text.strip():
|
956 |
-
return []
|
957 |
-
input_list = [line.strip() for line in input_text.strip().split('\n') if line.strip()]
|
958 |
-
return batch_infer_research(input_list, inference_type)
|
959 |
-
|
960 |
-
batch_btn = gr.Button("Process Batch")
|
961 |
-
batch_btn.click(process_batch, inputs=[batch_input, batch_type], outputs=batch_output)
|
962 |
-
|
963 |
-
with gr.Tab("URL Validation"):
|
964 |
-
gr.Markdown("Validate accessibility and format of research URLs")
|
965 |
-
with gr.Row():
|
966 |
-
with gr.Column():
|
967 |
-
url_input = gr.Textbox(
|
968 |
-
label="URLs to validate (one per line)",
|
969 |
-
placeholder="https://arxiv.org/abs/2010.11929\nhttps://github.com/google-research/vision_transformer\nhttps://huggingface.co/google/vit-base-patch16-224",
|
970 |
-
lines=5
|
971 |
-
)
|
972 |
-
url_output = gr.JSON(label="Validation Results")
|
973 |
-
|
974 |
-
def validate_urls(input_text):
|
975 |
-
if not input_text.strip():
|
976 |
-
return []
|
977 |
-
url_list = [line.strip() for line in input_text.strip().split('\n') if line.strip()]
|
978 |
-
return validate_research_urls(url_list)
|
979 |
-
|
980 |
-
url_btn = gr.Button("Validate URLs")
|
981 |
-
url_btn.click(validate_urls, inputs=url_input, outputs=url_output)
|
982 |
-
|
983 |
-
return demo
|
984 |
|
985 |
|
986 |
if __name__ == "__main__":
|
987 |
-
|
988 |
demo.launch(mcp_server=True, share=False)
|
|
|
1 |
"""
|
2 |
Research Tracker MCP Server
|
3 |
|
4 |
+
A clean, simple MCP server that provides research inference utilities.
|
5 |
+
Exposes functions to infer research metadata from paper URLs, repository links,
|
6 |
+
or research names using the research-tracker-backend inference engine.
|
7 |
+
|
8 |
+
Key Features:
|
9 |
+
- Author inference from papers and repositories
|
10 |
+
- Cross-platform resource discovery (papers, code, models, datasets)
|
11 |
+
- Research metadata extraction (names, dates, licenses, organizations)
|
12 |
+
- URL classification and relationship mapping
|
13 |
+
- Comprehensive research ecosystem analysis
|
14 |
+
|
15 |
+
All functions are optimized for MCP usage with clear type hints and docstrings.
|
16 |
"""
|
17 |
|
18 |
import os
|
19 |
import requests
|
20 |
import gradio as gr
|
21 |
+
from typing import List, Dict, Any
|
22 |
import logging
|
23 |
|
24 |
# Configure logging
|
25 |
+
logging.basicConfig(
|
26 |
+
level=logging.INFO,
|
27 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
28 |
+
)
|
29 |
logger = logging.getLogger(__name__)
|
30 |
|
31 |
# Configuration
|
|
|
37 |
logger.warning("HF_TOKEN not found in environment variables")
|
38 |
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
def make_backend_request(endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
41 |
+
"""Make a request to the research-tracker-backend."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
url = f"{BACKEND_URL}/{endpoint}"
|
43 |
headers = {
|
44 |
"Content-Type": "application/json",
|
45 |
+
"Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
|
46 |
+
"User-Agent": "Research-Tracker-MCP/1.0"
|
47 |
}
|
48 |
|
49 |
try:
|
|
|
50 |
response = requests.post(url, json=data, headers=headers, timeout=REQUEST_TIMEOUT)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
response.raise_for_status()
|
52 |
+
return response.json()
|
53 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
except requests.exceptions.RequestException as e:
|
55 |
+
logger.error(f"Backend request to {endpoint} failed: {e}")
|
56 |
raise Exception(f"Backend request to {endpoint} failed: {str(e)}")
|
|
|
|
|
57 |
|
58 |
|
59 |
def create_row_data(input_data: str) -> Dict[str, Any]:
|
60 |
+
"""Create standardized row data structure for backend requests."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
row_data = {
|
62 |
"Name": None,
|
63 |
"Authors": [],
|
|
|
101 |
author extraction from paper metadata and repository contributor information.
|
102 |
|
103 |
Args:
|
104 |
+
input_data (str): A URL, paper title, or other research-related input.
|
105 |
+
Supports arXiv URLs, GitHub repositories, HuggingFace resources,
|
106 |
+
project pages, and natural language paper titles.
|
107 |
|
108 |
Returns:
|
109 |
+
List[str]: A list of author names as strings, or empty list if no authors found.
|
110 |
+
Authors are returned in the order they appear in the original source.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
"""
|
112 |
+
if not input_data or not input_data.strip():
|
113 |
+
return []
|
114 |
+
|
115 |
try:
|
116 |
+
cleaned_input = input_data.strip()
|
|
|
|
|
|
|
117 |
row_data = create_row_data(cleaned_input)
|
|
|
|
|
118 |
result = make_backend_request("infer-authors", row_data)
|
119 |
|
120 |
# Extract and validate authors from response
|
|
|
123 |
# Handle comma-separated string format
|
124 |
authors = [author.strip() for author in authors.split(",") if author.strip()]
|
125 |
elif not isinstance(authors, list):
|
|
|
126 |
authors = []
|
127 |
|
128 |
# Filter out empty or invalid author names
|
|
|
137 |
logger.info(f"Successfully inferred {len(valid_authors)} authors from input")
|
138 |
return valid_authors
|
139 |
|
|
|
|
|
|
|
140 |
except Exception as e:
|
141 |
logger.error(f"Error inferring authors: {e}")
|
142 |
return []
|
|
|
147 |
Infer the paper URL from various research-related inputs.
|
148 |
|
149 |
Args:
|
150 |
+
input_data (str): A URL, repository link, or other research-related input
|
151 |
|
152 |
Returns:
|
153 |
+
str: The paper URL (typically arXiv or Hugging Face papers), or empty string if not found
|
154 |
"""
|
155 |
if not input_data or not input_data.strip():
|
156 |
return ""
|
|
|
170 |
Infer the code repository URL from research-related inputs.
|
171 |
|
172 |
Args:
|
173 |
+
input_data (str): A URL, paper link, or other research-related input
|
174 |
|
175 |
Returns:
|
176 |
+
str: The code repository URL (typically GitHub), or empty string if not found
|
177 |
"""
|
178 |
if not input_data or not input_data.strip():
|
179 |
return ""
|
|
|
193 |
Infer the research paper or project name from various inputs.
|
194 |
|
195 |
Args:
|
196 |
+
input_data (str): A URL, repository link, or other research-related input
|
197 |
|
198 |
Returns:
|
199 |
+
str: The research name/title, or empty string if not found
|
200 |
"""
|
201 |
if not input_data or not input_data.strip():
|
202 |
return ""
|
|
|
211 |
return ""
|
212 |
|
213 |
|
214 |
+
def classify_research_url(input_data: str) -> str:
|
215 |
"""
|
216 |
Classify the type of research-related URL or input.
|
217 |
|
|
|
219 |
or input represents (paper, code, model, dataset, etc.).
|
220 |
|
221 |
Args:
|
222 |
+
input_data (str): The URL or input to classify
|
223 |
|
224 |
Returns:
|
225 |
+
str: The field type: "Paper", "Code", "Space", "Model", "Dataset", "Project", or "Unknown"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
"""
|
227 |
+
if not input_data or not input_data.strip():
|
228 |
return "Unknown"
|
229 |
|
230 |
try:
|
231 |
+
result = make_backend_request("infer-field", {"value": input_data})
|
|
|
|
|
|
|
232 |
field = result.get("field", "Unknown")
|
233 |
return field if field else "Unknown"
|
234 |
|
|
|
241 |
"""
|
242 |
Infer affiliated organizations from research paper or project information.
|
243 |
|
244 |
+
This function attempts to extract organization names from research metadata,
|
245 |
+
author affiliations, and repository information using NLP analysis to identify
|
246 |
+
institutional affiliations from paper authors and project contributors.
|
247 |
+
|
248 |
Args:
|
249 |
+
input_data (str): A URL, paper title, or other research-related input
|
250 |
|
251 |
Returns:
|
252 |
+
List[str]: A list of organization names, or empty list if no organizations found
|
253 |
"""
|
254 |
if not input_data or not input_data.strip():
|
255 |
return []
|
|
|
275 |
"""
|
276 |
Infer publication date from research paper or project information.
|
277 |
|
278 |
+
This function attempts to extract publication dates from paper metadata,
|
279 |
+
repository creation dates, or release information. Returns dates in
|
280 |
+
standardized format (YYYY-MM-DD) when possible.
|
281 |
+
|
282 |
Args:
|
283 |
+
input_data (str): A URL, paper title, or other research-related input
|
284 |
|
285 |
Returns:
|
286 |
+
str: Publication date as string (YYYY-MM-DD format), or empty string if not found
|
287 |
"""
|
288 |
if not input_data or not input_data.strip():
|
289 |
return ""
|
|
|
302 |
"""
|
303 |
Infer associated HuggingFace model from research paper or project information.
|
304 |
|
305 |
+
This function attempts to find HuggingFace models associated with research papers,
|
306 |
+
GitHub repositories, or project pages. It searches for model references in papers,
|
307 |
+
README files, and related documentation.
|
308 |
+
|
309 |
Args:
|
310 |
+
input_data (str): A URL, paper title, or other research-related input
|
311 |
|
312 |
Returns:
|
313 |
+
str: HuggingFace model URL, or empty string if no model found
|
314 |
"""
|
315 |
if not input_data or not input_data.strip():
|
316 |
return ""
|
|
|
329 |
"""
|
330 |
Infer associated HuggingFace dataset from research paper or project information.
|
331 |
|
332 |
+
This function attempts to find HuggingFace datasets used or created by research papers,
|
333 |
+
GitHub repositories, or projects. It analyzes paper content, repository documentation,
|
334 |
+
and project descriptions.
|
335 |
+
|
336 |
Args:
|
337 |
+
input_data (str): A URL, paper title, or other research-related input
|
338 |
|
339 |
Returns:
|
340 |
+
str: HuggingFace dataset URL, or empty string if no dataset found
|
341 |
"""
|
342 |
if not input_data or not input_data.strip():
|
343 |
return ""
|
|
|
356 |
"""
|
357 |
Infer associated HuggingFace space from research paper or project information.
|
358 |
|
359 |
+
This function attempts to find HuggingFace spaces (demos/applications) associated
|
360 |
+
with research papers, models, or GitHub repositories. It looks for interactive
|
361 |
+
demos and applications built around research.
|
362 |
+
|
363 |
Args:
|
364 |
+
input_data (str): A URL, paper title, or other research-related input
|
365 |
|
366 |
Returns:
|
367 |
+
str: HuggingFace space URL, or empty string if no space found
|
368 |
"""
|
369 |
if not input_data or not input_data.strip():
|
370 |
return ""
|
|
|
383 |
"""
|
384 |
Infer license information from research repository or project.
|
385 |
|
386 |
+
This function attempts to extract license information from GitHub repositories,
|
387 |
+
project documentation, or associated code. It checks license files, repository
|
388 |
+
metadata, and project descriptions.
|
389 |
+
|
390 |
Args:
|
391 |
+
input_data (str): A URL, repository link, or other research-related input
|
392 |
|
393 |
Returns:
|
394 |
+
str: License name/type, or empty string if no license found
|
395 |
"""
|
396 |
if not input_data or not input_data.strip():
|
397 |
return ""
|
|
|
406 |
return ""
|
407 |
|
408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
def find_research_relationships(input_data: str) -> Dict[str, Any]:
|
410 |
"""
|
411 |
Find ALL related research resources across platforms for comprehensive analysis.
|
|
|
416 |
and understanding the complete ecosystem around a research topic.
|
417 |
|
418 |
Args:
|
419 |
+
input_data (str): A URL, paper title, or other research-related input
|
420 |
|
421 |
Returns:
|
422 |
+
Dict[str, Any]: Dictionary containing all discovered related resources:
|
423 |
{
|
424 |
"paper": str | None, # Associated research paper
|
425 |
"code": str | None, # Code repository URL
|
|
|
435 |
"success_count": int, # Number of successful inferences
|
436 |
"total_inferences": int # Total inferences attempted
|
437 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
"""
|
439 |
+
if not input_data or not input_data.strip():
|
440 |
+
return {"error": "Input data cannot be empty", "success_count": 0, "total_inferences": 0}
|
441 |
+
|
442 |
try:
|
443 |
+
cleaned_input = input_data.strip()
|
|
|
444 |
|
445 |
# Initialize result structure
|
446 |
relationships = {
|
|
|
497 |
logger.info(f"Research relationship analysis completed: {relationships['success_count']}/{relationships['total_inferences']} successful")
|
498 |
return relationships
|
499 |
|
|
|
|
|
|
|
500 |
except Exception as e:
|
501 |
logger.error(f"Error finding research relationships: {e}")
|
502 |
return {"error": str(e), "success_count": 0, "total_inferences": 0}
|
503 |
|
504 |
|
505 |
+
# Create minimal Gradio interface focused on MCP tool exposure
|
506 |
+
with gr.Blocks(title="Research Tracker MCP Server") as demo:
|
507 |
+
gr.Markdown("# Research Tracker MCP Server")
|
508 |
+
gr.Markdown("""
|
509 |
+
This server provides MCP tools for research inference and metadata extraction.
|
510 |
+
|
511 |
+
**Available MCP Tools:**
|
512 |
+
- `infer_authors` - Extract author names from papers and repositories
|
513 |
+
- `infer_paper_url` - Find associated research paper URLs
|
514 |
+
- `infer_code_repository` - Discover code repository links
|
515 |
+
- `infer_research_name` - Extract research project names
|
516 |
+
- `classify_research_url` - Classify URL types (paper/code/model/etc.)
|
517 |
+
- `infer_organizations` - Identify affiliated organizations
|
518 |
+
- `infer_publication_date` - Extract publication dates
|
519 |
+
- `infer_model` - Find associated HuggingFace models
|
520 |
+
- `infer_dataset` - Find associated HuggingFace datasets
|
521 |
+
- `infer_space` - Find associated HuggingFace spaces
|
522 |
+
- `infer_license` - Extract license information
|
523 |
+
- `find_research_relationships` - Comprehensive research ecosystem analysis
|
524 |
+
|
525 |
+
**Input Support:**
|
526 |
+
- arXiv paper URLs (https://arxiv.org/abs/...)
|
527 |
+
- GitHub repository URLs (https://github.com/...)
|
528 |
+
- HuggingFace model/dataset/space URLs
|
529 |
+
- Research paper titles and project names
|
530 |
+
- Project page URLs
|
531 |
+
""")
|
532 |
+
|
533 |
+
# Expose all core functions as MCP tools
|
534 |
+
gr.api(infer_authors)
|
535 |
+
gr.api(infer_paper_url)
|
536 |
+
gr.api(infer_code_repository)
|
537 |
+
gr.api(infer_research_name)
|
538 |
+
gr.api(classify_research_url)
|
539 |
+
gr.api(infer_organizations)
|
540 |
+
gr.api(infer_publication_date)
|
541 |
+
gr.api(infer_model)
|
542 |
+
gr.api(infer_dataset)
|
543 |
+
gr.api(infer_space)
|
544 |
+
gr.api(infer_license)
|
545 |
+
gr.api(find_research_relationships)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
|
547 |
|
548 |
if __name__ == "__main__":
|
549 |
+
logger.info("Starting Research Tracker MCP Server")
|
550 |
demo.launch(mcp_server=True, share=False)
|