dylanebert Claude commited on
Commit
057e151
·
1 Parent(s): bfcd620

Simplify and optimize research-tracker-mcp functions

Browse files

- Extract common URL classification logic into create_row_data utility
- Simplify all inference functions by removing redundant row_data creation
- Streamline docstrings and remove verbose examples
- Reduce code duplication across inference functions
- Maintain full functionality with cleaner, more maintainable code
- Functions now consistently use create_row_data helper

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +17 -253
app.py CHANGED
@@ -114,9 +114,6 @@ def create_row_data(input_data: str) -> Dict[str, Any]:
114
  """
115
  Create standardized row data structure for backend requests.
116
 
117
- This function analyzes the input and places it in the appropriate field
118
- based on URL patterns and content analysis.
119
-
120
  Args:
121
  input_data: The input string to analyze
122
 
@@ -147,13 +144,10 @@ def create_row_data(input_data: str) -> Dict[str, Any]:
147
  elif "huggingface.co/datasets" in input_data:
148
  row_data["Dataset"] = input_data
149
  elif "huggingface.co/" in input_data:
150
- # Likely a model URL (huggingface.co/org/model-name)
151
  row_data["Model"] = input_data
152
  else:
153
- # Unknown URL type - try as paper
154
  row_data["Paper"] = input_data
155
  else:
156
- # Non-URL input - likely a paper title or project name
157
  row_data["Name"] = input_data
158
 
159
  return row_data
@@ -233,44 +227,19 @@ def infer_paper_url(input_data: str) -> str:
233
  """
234
  Infer the paper URL from various research-related inputs.
235
 
236
- This function attempts to find the associated research paper from
237
- inputs like GitHub repositories, project pages, or partial URLs.
238
-
239
  Args:
240
  input_data: A URL, repository link, or other research-related input
241
 
242
  Returns:
243
  The paper URL (typically arXiv or Hugging Face papers), or empty string if not found
244
-
245
- Examples:
246
- >>> infer_paper_url("https://github.com/google-research/vision_transformer")
247
- "https://arxiv.org/abs/2010.11929"
248
-
249
- >>> infer_paper_url("Vision Transformer")
250
- "https://arxiv.org/abs/2010.11929"
251
  """
252
  if not input_data or not input_data.strip():
253
  return ""
254
 
255
  try:
256
- # Create row data structure
257
- row_data = {
258
- "Name": input_data if not input_data.startswith("http") else None,
259
- "Authors": [],
260
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
261
- "Code": input_data if "github.com" in input_data else None,
262
- "Project": input_data if "github.io" in input_data else None,
263
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
264
- "Model": input_data if "huggingface.co/models" in input_data else None,
265
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
266
- }
267
-
268
- # Call the backend
269
  result = make_backend_request("infer-paper", row_data)
270
-
271
- # Extract paper URL from response
272
- paper_url = result.get("paper", "")
273
- return paper_url if paper_url else ""
274
 
275
  except Exception as e:
276
  logger.error(f"Error inferring paper: {e}")
@@ -281,44 +250,19 @@ def infer_code_repository(input_data: str) -> str:
281
  """
282
  Infer the code repository URL from research-related inputs.
283
 
284
- This function attempts to find the associated code repository from
285
- inputs like paper URLs, project pages, or partial information.
286
-
287
  Args:
288
  input_data: A URL, paper link, or other research-related input
289
 
290
  Returns:
291
  The code repository URL (typically GitHub), or empty string if not found
292
-
293
- Examples:
294
- >>> infer_code_repository("https://arxiv.org/abs/2010.11929")
295
- "https://github.com/google-research/vision_transformer"
296
-
297
- >>> infer_code_repository("Vision Transformer")
298
- "https://github.com/google-research/vision_transformer"
299
  """
300
  if not input_data or not input_data.strip():
301
  return ""
302
 
303
  try:
304
- # Create row data structure
305
- row_data = {
306
- "Name": input_data if not input_data.startswith("http") else None,
307
- "Authors": [],
308
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
309
- "Code": input_data if "github.com" in input_data else None,
310
- "Project": input_data if "github.io" in input_data else None,
311
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
312
- "Model": input_data if "huggingface.co/models" in input_data else None,
313
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
314
- }
315
-
316
- # Call the backend
317
  result = make_backend_request("infer-code", row_data)
318
-
319
- # Extract code URL from response
320
- code_url = result.get("code", "")
321
- return code_url if code_url else ""
322
 
323
  except Exception as e:
324
  logger.error(f"Error inferring code: {e}")
@@ -329,44 +273,19 @@ def infer_research_name(input_data: str) -> str:
329
  """
330
  Infer the research paper or project name from various inputs.
331
 
332
- This function attempts to extract the formal name/title of a research
333
- paper or project from URLs, repositories, or partial information.
334
-
335
  Args:
336
  input_data: A URL, repository link, or other research-related input
337
 
338
  Returns:
339
  The research name/title, or empty string if not found
340
-
341
- Examples:
342
- >>> infer_research_name("https://arxiv.org/abs/2010.11929")
343
- "An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale"
344
-
345
- >>> infer_research_name("https://github.com/google-research/vision_transformer")
346
- "Vision Transformer"
347
  """
348
  if not input_data or not input_data.strip():
349
  return ""
350
 
351
  try:
352
- # Create row data structure
353
- row_data = {
354
- "Name": None,
355
- "Authors": [],
356
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
357
- "Code": input_data if "github.com" in input_data else None,
358
- "Project": input_data if "github.io" in input_data else None,
359
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
360
- "Model": input_data if "huggingface.co/models" in input_data else None,
361
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
362
- }
363
-
364
- # Call the backend
365
  result = make_backend_request("infer-name", row_data)
366
-
367
- # Extract name from response
368
- name = result.get("name", "")
369
- return name if name else ""
370
 
371
  except Exception as e:
372
  logger.error(f"Error inferring name: {e}")
@@ -416,46 +335,21 @@ def infer_organizations(input_data: str) -> List[str]:
416
  """
417
  Infer affiliated organizations from research paper or project information.
418
 
419
- This function attempts to extract organization names from research metadata,
420
- author affiliations, and repository information. It uses NLP analysis to
421
- identify institutional affiliations from paper authors and project contributors.
422
-
423
  Args:
424
  input_data: A URL, paper title, or other research-related input
425
 
426
  Returns:
427
  A list of organization names, or empty list if no organizations found
428
-
429
- Examples:
430
- >>> infer_organizations("https://arxiv.org/abs/2010.11929")
431
- ["Google Research", "University of Amsterdam", "ETH Zurich"]
432
-
433
- >>> infer_organizations("https://github.com/openai/gpt-2")
434
- ["OpenAI"]
435
  """
436
  if not input_data or not input_data.strip():
437
  return []
438
 
439
  try:
440
- # Create row data structure
441
- row_data = {
442
- "Name": input_data if not input_data.startswith("http") else None,
443
- "Authors": [],
444
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
445
- "Code": input_data if "github.com" in input_data else None,
446
- "Project": input_data if "github.io" in input_data else None,
447
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
448
- "Model": input_data if "huggingface.co/models" in input_data else None,
449
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
450
- }
451
-
452
- # Call the backend
453
  result = make_backend_request("infer-orgs", row_data)
454
 
455
- # Extract organizations from response
456
  orgs = result.get("orgs", [])
457
  if isinstance(orgs, str):
458
- # Handle comma-separated string format
459
  orgs = [org.strip() for org in orgs.split(",") if org.strip()]
460
  elif not isinstance(orgs, list):
461
  orgs = []
@@ -471,45 +365,19 @@ def infer_publication_date(input_data: str) -> str:
471
  """
472
  Infer publication date from research paper or project information.
473
 
474
- This function attempts to extract publication dates from paper metadata,
475
- repository creation dates, or release information. Returns dates in
476
- standardized format (YYYY-MM-DD) when possible.
477
-
478
  Args:
479
  input_data: A URL, paper title, or other research-related input
480
 
481
  Returns:
482
  Publication date as string (YYYY-MM-DD format), or empty string if not found
483
-
484
- Examples:
485
- >>> infer_publication_date("https://arxiv.org/abs/2010.11929")
486
- "2020-10-22"
487
-
488
- >>> infer_publication_date("https://github.com/google-research/vision_transformer")
489
- "2020-10-22"
490
  """
491
  if not input_data or not input_data.strip():
492
  return ""
493
 
494
  try:
495
- # Create row data structure
496
- row_data = {
497
- "Name": input_data if not input_data.startswith("http") else None,
498
- "Authors": [],
499
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
500
- "Code": input_data if "github.com" in input_data else None,
501
- "Project": input_data if "github.io" in input_data else None,
502
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
503
- "Model": input_data if "huggingface.co/models" in input_data else None,
504
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
505
- }
506
-
507
- # Call the backend
508
  result = make_backend_request("infer-date", row_data)
509
-
510
- # Extract date from response
511
- date = result.get("date", "")
512
- return date if date else ""
513
 
514
  except Exception as e:
515
  logger.error(f"Error inferring publication date: {e}")
@@ -520,45 +388,19 @@ def infer_model(input_data: str) -> str:
520
  """
521
  Infer associated HuggingFace model from research paper or project information.
522
 
523
- This function attempts to find HuggingFace models associated with research
524
- papers, GitHub repositories, or project pages. It searches for model
525
- references in papers, README files, and related documentation.
526
-
527
  Args:
528
  input_data: A URL, paper title, or other research-related input
529
 
530
  Returns:
531
  HuggingFace model URL, or empty string if no model found
532
-
533
- Examples:
534
- >>> infer_model("https://arxiv.org/abs/2010.11929")
535
- "https://huggingface.co/google/vit-base-patch16-224"
536
-
537
- >>> infer_model("Vision Transformer")
538
- "https://huggingface.co/google/vit-base-patch16-224"
539
  """
540
  if not input_data or not input_data.strip():
541
  return ""
542
 
543
  try:
544
- # Create row data structure
545
- row_data = {
546
- "Name": input_data if not input_data.startswith("http") else None,
547
- "Authors": [],
548
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
549
- "Code": input_data if "github.com" in input_data else None,
550
- "Project": input_data if "github.io" in input_data else None,
551
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
552
- "Model": input_data if "huggingface.co/models" in input_data else None,
553
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
554
- }
555
-
556
- # Call the backend
557
  result = make_backend_request("infer-model", row_data)
558
-
559
- # Extract model URL from response
560
- model = result.get("model", "")
561
- return model if model else ""
562
 
563
  except Exception as e:
564
  logger.error(f"Error inferring model: {e}")
@@ -569,45 +411,19 @@ def infer_dataset(input_data: str) -> str:
569
  """
570
  Infer associated HuggingFace dataset from research paper or project information.
571
 
572
- This function attempts to find HuggingFace datasets used or created by
573
- research papers, GitHub repositories, or projects. It analyzes paper
574
- content, repository documentation, and project descriptions.
575
-
576
  Args:
577
  input_data: A URL, paper title, or other research-related input
578
 
579
  Returns:
580
  HuggingFace dataset URL, or empty string if no dataset found
581
-
582
- Examples:
583
- >>> infer_dataset("https://arxiv.org/abs/1706.03762")
584
- "https://huggingface.co/datasets/wmt14"
585
-
586
- >>> infer_dataset("https://github.com/huggingface/transformers")
587
- "https://huggingface.co/datasets/glue"
588
  """
589
  if not input_data or not input_data.strip():
590
  return ""
591
 
592
  try:
593
- # Create row data structure
594
- row_data = {
595
- "Name": input_data if not input_data.startswith("http") else None,
596
- "Authors": [],
597
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
598
- "Code": input_data if "github.com" in input_data else None,
599
- "Project": input_data if "github.io" in input_data else None,
600
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
601
- "Model": input_data if "huggingface.co/models" in input_data else None,
602
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
603
- }
604
-
605
- # Call the backend
606
  result = make_backend_request("infer-dataset", row_data)
607
-
608
- # Extract dataset URL from response
609
- dataset = result.get("dataset", "")
610
- return dataset if dataset else ""
611
 
612
  except Exception as e:
613
  logger.error(f"Error inferring dataset: {e}")
@@ -618,45 +434,19 @@ def infer_space(input_data: str) -> str:
618
  """
619
  Infer associated HuggingFace space from research paper or project information.
620
 
621
- This function attempts to find HuggingFace spaces (demos/applications)
622
- associated with research papers, models, or GitHub repositories. It looks
623
- for interactive demos and applications built around research.
624
-
625
  Args:
626
  input_data: A URL, paper title, or other research-related input
627
 
628
  Returns:
629
  HuggingFace space URL, or empty string if no space found
630
-
631
- Examples:
632
- >>> infer_space("https://huggingface.co/google/vit-base-patch16-224")
633
- "https://huggingface.co/spaces/google/vit-demo"
634
-
635
- >>> infer_space("https://arxiv.org/abs/2010.11929")
636
- "https://huggingface.co/spaces/google/vision-transformer-demo"
637
  """
638
  if not input_data or not input_data.strip():
639
  return ""
640
 
641
  try:
642
- # Create row data structure
643
- row_data = {
644
- "Name": input_data if not input_data.startswith("http") else None,
645
- "Authors": [],
646
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
647
- "Code": input_data if "github.com" in input_data else None,
648
- "Project": input_data if "github.io" in input_data else None,
649
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
650
- "Model": input_data if "huggingface.co/models" in input_data else None,
651
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
652
- }
653
-
654
- # Call the backend
655
  result = make_backend_request("infer-space", row_data)
656
-
657
- # Extract space URL from response
658
- space = result.get("space", "")
659
- return space if space else ""
660
 
661
  except Exception as e:
662
  logger.error(f"Error inferring space: {e}")
@@ -667,45 +457,19 @@ def infer_license(input_data: str) -> str:
667
  """
668
  Infer license information from research repository or project.
669
 
670
- This function attempts to extract license information from GitHub
671
- repositories, project documentation, or associated code. It checks
672
- license files, repository metadata, and project descriptions.
673
-
674
  Args:
675
  input_data: A URL, repository link, or other research-related input
676
 
677
  Returns:
678
  License name/type, or empty string if no license found
679
-
680
- Examples:
681
- >>> infer_license("https://github.com/google-research/vision_transformer")
682
- "Apache License 2.0"
683
-
684
- >>> infer_license("https://github.com/openai/gpt-2")
685
- "MIT License"
686
  """
687
  if not input_data or not input_data.strip():
688
  return ""
689
 
690
  try:
691
- # Create row data structure
692
- row_data = {
693
- "Name": input_data if not input_data.startswith("http") else None,
694
- "Authors": [],
695
- "Paper": input_data if "arxiv" in input_data or "huggingface.co/papers" in input_data else None,
696
- "Code": input_data if "github.com" in input_data else None,
697
- "Project": input_data if "github.io" in input_data else None,
698
- "Space": input_data if "huggingface.co/spaces" in input_data else None,
699
- "Model": input_data if "huggingface.co/models" in input_data else None,
700
- "Dataset": input_data if "huggingface.co/datasets" in input_data else None,
701
- }
702
-
703
- # Call the backend
704
  result = make_backend_request("infer-license", row_data)
705
-
706
- # Extract license from response
707
- license_info = result.get("license", "")
708
- return license_info if license_info else ""
709
 
710
  except Exception as e:
711
  logger.error(f"Error inferring license: {e}")
 
114
  """
115
  Create standardized row data structure for backend requests.
116
 
 
 
 
117
  Args:
118
  input_data: The input string to analyze
119
 
 
144
  elif "huggingface.co/datasets" in input_data:
145
  row_data["Dataset"] = input_data
146
  elif "huggingface.co/" in input_data:
 
147
  row_data["Model"] = input_data
148
  else:
 
149
  row_data["Paper"] = input_data
150
  else:
 
151
  row_data["Name"] = input_data
152
 
153
  return row_data
 
227
  """
228
  Infer the paper URL from various research-related inputs.
229
 
 
 
 
230
  Args:
231
  input_data: A URL, repository link, or other research-related input
232
 
233
  Returns:
234
  The paper URL (typically arXiv or Hugging Face papers), or empty string if not found
 
 
 
 
 
 
 
235
  """
236
  if not input_data or not input_data.strip():
237
  return ""
238
 
239
  try:
240
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
241
  result = make_backend_request("infer-paper", row_data)
242
+ return result.get("paper", "")
 
 
 
243
 
244
  except Exception as e:
245
  logger.error(f"Error inferring paper: {e}")
 
250
  """
251
  Infer the code repository URL from research-related inputs.
252
 
 
 
 
253
  Args:
254
  input_data: A URL, paper link, or other research-related input
255
 
256
  Returns:
257
  The code repository URL (typically GitHub), or empty string if not found
 
 
 
 
 
 
 
258
  """
259
  if not input_data or not input_data.strip():
260
  return ""
261
 
262
  try:
263
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
264
  result = make_backend_request("infer-code", row_data)
265
+ return result.get("code", "")
 
 
 
266
 
267
  except Exception as e:
268
  logger.error(f"Error inferring code: {e}")
 
273
  """
274
  Infer the research paper or project name from various inputs.
275
 
 
 
 
276
  Args:
277
  input_data: A URL, repository link, or other research-related input
278
 
279
  Returns:
280
  The research name/title, or empty string if not found
 
 
 
 
 
 
 
281
  """
282
  if not input_data or not input_data.strip():
283
  return ""
284
 
285
  try:
286
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
287
  result = make_backend_request("infer-name", row_data)
288
+ return result.get("name", "")
 
 
 
289
 
290
  except Exception as e:
291
  logger.error(f"Error inferring name: {e}")
 
335
  """
336
  Infer affiliated organizations from research paper or project information.
337
 
 
 
 
 
338
  Args:
339
  input_data: A URL, paper title, or other research-related input
340
 
341
  Returns:
342
  A list of organization names, or empty list if no organizations found
 
 
 
 
 
 
 
343
  """
344
  if not input_data or not input_data.strip():
345
  return []
346
 
347
  try:
348
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
349
  result = make_backend_request("infer-orgs", row_data)
350
 
 
351
  orgs = result.get("orgs", [])
352
  if isinstance(orgs, str):
 
353
  orgs = [org.strip() for org in orgs.split(",") if org.strip()]
354
  elif not isinstance(orgs, list):
355
  orgs = []
 
365
  """
366
  Infer publication date from research paper or project information.
367
 
 
 
 
 
368
  Args:
369
  input_data: A URL, paper title, or other research-related input
370
 
371
  Returns:
372
  Publication date as string (YYYY-MM-DD format), or empty string if not found
 
 
 
 
 
 
 
373
  """
374
  if not input_data or not input_data.strip():
375
  return ""
376
 
377
  try:
378
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
379
  result = make_backend_request("infer-date", row_data)
380
+ return result.get("date", "")
 
 
 
381
 
382
  except Exception as e:
383
  logger.error(f"Error inferring publication date: {e}")
 
388
  """
389
  Infer associated HuggingFace model from research paper or project information.
390
 
 
 
 
 
391
  Args:
392
  input_data: A URL, paper title, or other research-related input
393
 
394
  Returns:
395
  HuggingFace model URL, or empty string if no model found
 
 
 
 
 
 
 
396
  """
397
  if not input_data or not input_data.strip():
398
  return ""
399
 
400
  try:
401
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
402
  result = make_backend_request("infer-model", row_data)
403
+ return result.get("model", "")
 
 
 
404
 
405
  except Exception as e:
406
  logger.error(f"Error inferring model: {e}")
 
411
  """
412
  Infer associated HuggingFace dataset from research paper or project information.
413
 
 
 
 
 
414
  Args:
415
  input_data: A URL, paper title, or other research-related input
416
 
417
  Returns:
418
  HuggingFace dataset URL, or empty string if no dataset found
 
 
 
 
 
 
 
419
  """
420
  if not input_data or not input_data.strip():
421
  return ""
422
 
423
  try:
424
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
425
  result = make_backend_request("infer-dataset", row_data)
426
+ return result.get("dataset", "")
 
 
 
427
 
428
  except Exception as e:
429
  logger.error(f"Error inferring dataset: {e}")
 
434
  """
435
  Infer associated HuggingFace space from research paper or project information.
436
 
 
 
 
 
437
  Args:
438
  input_data: A URL, paper title, or other research-related input
439
 
440
  Returns:
441
  HuggingFace space URL, or empty string if no space found
 
 
 
 
 
 
 
442
  """
443
  if not input_data or not input_data.strip():
444
  return ""
445
 
446
  try:
447
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
448
  result = make_backend_request("infer-space", row_data)
449
+ return result.get("space", "")
 
 
 
450
 
451
  except Exception as e:
452
  logger.error(f"Error inferring space: {e}")
 
457
  """
458
  Infer license information from research repository or project.
459
 
 
 
 
 
460
  Args:
461
  input_data: A URL, repository link, or other research-related input
462
 
463
  Returns:
464
  License name/type, or empty string if no license found
 
 
 
 
 
 
 
465
  """
466
  if not input_data or not input_data.strip():
467
  return ""
468
 
469
  try:
470
+ row_data = create_row_data(input_data.strip())
 
 
 
 
 
 
 
 
 
 
 
 
471
  result = make_backend_request("infer-license", row_data)
472
+ return result.get("license", "")
 
 
 
473
 
474
  except Exception as e:
475
  logger.error(f"Error inferring license: {e}")