leaderboard / app.py
akera's picture
Update app.py
e179a7b verified
# app.py
import subprocess
import sys
import os
from pathlib import Path
import traceback
from datetime import datetime
from typing import Optional, Dict, Tuple, List
def setup_salt():
"""Clone and setup SALT library like in Colab."""
try:
import salt.dataset
print("βœ… SALT library already available")
return True
except ImportError:
pass
print("πŸ“₯ Setting up SALT library...")
try:
salt_dir = Path("salt")
if not salt_dir.exists():
print("πŸ”„ Cloning SALT repository...")
subprocess.check_call([
"git", "clone", "https://github.com/sunbirdai/salt.git"
])
else:
print("πŸ“ SALT repository already exists")
salt_requirements = salt_dir / "requirements.txt"
if salt_requirements.exists():
print("πŸ“¦ Installing SALT requirements...")
subprocess.check_call([
sys.executable, "-m", "pip", "install", "-q", "-r", str(salt_requirements)
])
salt_path = str(salt_dir.absolute())
if salt_path not in sys.path:
sys.path.insert(0, salt_path)
print(f"πŸ”— Added {salt_path} to Python path")
import salt.dataset
print("βœ… SALT library setup completed successfully")
return True
except Exception as e:
print(f"❌ Failed to setup SALT: {e}")
return False
# Setup SALT on startup
print("πŸš€ Starting SALT Translation Leaderboard...")
if not setup_salt():
print("❌ Cannot continue without SALT library")
sys.exit(1)
import gradio as gr
import pandas as pd
import json
# Import our modules
from src.test_set import (
get_public_test_set,
get_complete_test_set,
create_test_set_download
)
from src.validation import validate_submission
from src.evaluation import evaluate_predictions, generate_evaluation_report
from src.leaderboard import (
load_leaderboard,
add_model_to_leaderboard,
get_track_leaderboard,
prepare_leaderboard_display
)
from src.plotting import (
create_leaderboard_plot,
create_language_pair_heatmap,
create_performance_comparison_plot,
create_language_pair_comparison_plot
)
from src.utils import sanitize_model_name, get_all_language_pairs
from config import *
# Global variables for caching
current_leaderboard = None
public_test_set = None
complete_test_set = None
def initialize_data():
"""Initialize test sets and leaderboard data."""
global public_test_set, complete_test_set, current_leaderboard
try:
print("πŸ“₯ Loading test sets...")
public_test_set = get_public_test_set()
complete_test_set = get_complete_test_set()
print("πŸ† Loading leaderboard...")
current_leaderboard = load_leaderboard()
# Debug leaderboard content
print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
if not current_leaderboard.empty:
print(f"Leaderboard columns: {list(current_leaderboard.columns)}")
print(f"Sample row types: {current_leaderboard.dtypes.to_dict()}")
else:
print("Leaderboard is empty - will show empty interface")
print(f"βœ… Initialization complete!")
print(f" - Test set: {len(public_test_set):,} samples")
print(f" - Current models: {len(current_leaderboard)}")
return True
except Exception as e:
print(f"❌ Initialization failed: {e}")
import traceback
traceback.print_exc()
return False
def download_test_set() -> Tuple[str, str]:
"""Create downloadable test set and return file path and info."""
try:
global public_test_set
if public_test_set is None:
public_test_set = get_public_test_set()
download_path, stats = create_test_set_download()
info_msg = f"""
## πŸ“₯ SALT Test Set Downloaded Successfully!
### πŸ“Š Dataset Statistics:
- **Total Samples**: {stats['total_samples']:,}
- **Languages**: {len(stats.get('languages', []))} ({', '.join(stats.get('languages', []))})
- **Google Comparable**: {stats.get('google_comparable_samples', 0):,} samples
- **Language Pairs**: {stats.get('language_pairs', 0)}
### 🏁 Track Breakdown:
"""
track_breakdown = stats.get('track_breakdown', {})
for track_name, track_info in track_breakdown.items():
info_msg += f"""
**{EVALUATION_TRACKS[track_name]['name']}**:
- Samples: {track_info.get('total_samples', 0):,}
- Language Pairs: {track_info.get('language_pairs', 0)}
"""
info_msg += f"""
### πŸ“‹ File Format:
- `sample_id`: Unique identifier for each sample
- `source_text`: Text to be translated
- `source_language`: Source language code
- `target_language`: Target language code
- `domain`: Content domain (if available)
- `google_comparable`: Whether this pair can be compared with Google Translate
### πŸ”¬ Next Steps:
1. **Run your model** on the source texts to generate translations
2. **Create a predictions file** with columns: `sample_id`, `prediction`
3. **Submit** your predictions using the submission tab
"""
return download_path, info_msg
except Exception as e:
error_msg = f"❌ Error creating test set download: {str(e)}"
return None, error_msg
def validate_submission_file(file, model_name: str, author: str, description: str) -> Tuple[str, Optional[pd.DataFrame], str]:
"""Validate uploaded prediction file."""
try:
if file is None:
return "❌ Please upload a predictions file", None, "community"
if not model_name.strip():
return "❌ Please provide a model name", None, "community"
# Handle different file input types
if isinstance(file, bytes):
file_content = file
elif isinstance(file, str):
if os.path.exists(file):
with open(file, "rb") as f:
file_content = f.read()
else:
file_content = file.encode("utf-8")
elif hasattr(file, "name") and os.path.exists(file.name):
with open(file.name, "rb") as f:
file_content = f.read()
else:
return "❌ Could not read uploaded file", None, "community"
filename = getattr(file, "name", None) or getattr(file, "filename", None) or "predictions.csv"
global complete_test_set
if complete_test_set is None:
complete_test_set = get_complete_test_set()
validation_result = validate_submission(
file_content, filename, complete_test_set, model_name, author, description
)
detected_category = validation_result.get("category", "community")
if validation_result.get("can_evaluate", False):
return validation_result["report"], validation_result["predictions"], detected_category
else:
return validation_result["report"], None, detected_category
except Exception as e:
return f"❌ Validation error: {e}\n\nTraceback:\n{traceback.format_exc()}", None, "community"
def evaluate_submission(
predictions_df: pd.DataFrame,
model_name: str,
author: str,
description: str,
detected_category: str,
) -> Tuple[str, pd.DataFrame, object, object]:
"""Evaluate validated predictions."""
try:
if predictions_df is None:
return "❌ No valid predictions to evaluate", None, None, None
global complete_test_set, current_leaderboard
if complete_test_set is None:
complete_test_set = get_complete_test_set()
print(f"πŸ”¬ Starting evaluation for {model_name}...")
evaluation_results = evaluate_predictions(predictions_df, complete_test_set, detected_category)
if evaluation_results.get('error'):
return f"❌ Evaluation error: {evaluation_results['error']}", None, None, None
print("πŸ† Adding to leaderboard...")
updated_leaderboard = add_model_to_leaderboard(
model_name=sanitize_model_name(model_name),
author=author or "Anonymous",
evaluation_results=evaluation_results,
model_category=detected_category,
description=description or ""
)
current_leaderboard = updated_leaderboard
report = generate_evaluation_report(evaluation_results, model_name)
# Create visualizations
summary_plot = create_performance_comparison_plot(updated_leaderboard, "google_comparable")
google_leaderboard = get_track_leaderboard(updated_leaderboard, "google_comparable")
display_leaderboard = prepare_leaderboard_display(google_leaderboard, "google_comparable")
success_msg = f"""
## πŸŽ‰ Evaluation Complete!
### πŸ“Š Model Information:
- **Model**: {model_name}
- **Category**: {MODEL_CATEGORIES.get(detected_category, {}).get('name', detected_category)}
- **Author**: {author or 'Anonymous'}
{report}
"""
return success_msg, display_leaderboard, summary_plot, None
except Exception as e:
error_msg = f"❌ Evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
return error_msg, None, None, None
def refresh_track_leaderboard(track: str, search_query: str = "", category_filter: str = "all") -> Tuple[pd.DataFrame, object, object, str]:
"""Refresh leaderboard for a specific track with filters."""
try:
print(f"Refreshing {track} leaderboard...")
global current_leaderboard
if current_leaderboard is None:
print("Loading leaderboard...")
current_leaderboard = load_leaderboard()
print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
# Get track leaderboard with robust error handling
try:
print(f"Getting track leaderboard for {track}...")
track_leaderboard = get_track_leaderboard(current_leaderboard, track, category_filter=category_filter)
print(f"Track leaderboard has {len(track_leaderboard)} entries")
except Exception as e:
print(f"Error getting track leaderboard: {e}")
track_leaderboard = pd.DataFrame()
# Apply search filter
if search_query and not track_leaderboard.empty:
try:
print(f"Applying search filter: {search_query}")
query_lower = search_query.lower()
mask = (
track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
)
track_leaderboard = track_leaderboard[mask]
print(f"After search filter: {len(track_leaderboard)} entries")
except Exception as e:
print(f"Error applying search filter: {e}")
# Prepare display with error handling
try:
print("Preparing display...")
display_df = prepare_leaderboard_display(track_leaderboard, track)
print(f"Display prepared with {len(display_df)} rows")
except Exception as e:
print(f"Error preparing display: {e}")
display_df = pd.DataFrame()
# Create plots with error handling
try:
print("Creating ranking plot...")
ranking_plot = create_leaderboard_plot(track_leaderboard, track)
except Exception as e:
print(f"Error creating ranking plot: {e}")
ranking_plot = None
try:
print("Creating comparison plot...")
comparison_plot = create_performance_comparison_plot(track_leaderboard, track)
except Exception as e:
print(f"Error creating comparison plot: {e}")
comparison_plot = None
# Generate stats text with safe formatting
try:
print("Generating stats...")
track_config = EVALUATION_TRACKS[track]
best_model = "None"
best_score = 0.0
if not track_leaderboard.empty:
best_model = str(track_leaderboard.iloc[0]['model_name'])
quality_col = f'{track}_quality'
if quality_col in track_leaderboard.columns:
try:
score_val = track_leaderboard.iloc[0][quality_col]
best_score = float(score_val) if pd.notnull(score_val) else 0.0
except (ValueError, TypeError):
best_score = 0.0
stats_text = f"""
### πŸ“Š {track_config['name']} Statistics
- **Total Models**: {len(track_leaderboard)}
- **Best Model**: {best_model}
- **Best Score**: {best_score:.4f}
### πŸ”¬ Track Information:
{track_config.get('description', 'No description available')}
"""
print("Stats generated successfully")
except Exception as e:
print(f"Error generating stats: {e}")
stats_text = f"Error loading {track} statistics: {str(e)}"
print("Track refresh completed successfully")
return display_df, ranking_plot, comparison_plot, stats_text
except Exception as e:
error_msg = f"Error loading {track} leaderboard: {str(e)}"
print(f"MAIN ERROR: {error_msg}")
import traceback
traceback.print_exc()
return pd.DataFrame(), None, None, error_msg
def get_language_pair_comparison(track: str) -> Tuple[pd.DataFrame, object]:
"""Get language pair comparison data and visualization."""
try:
global current_leaderboard
if current_leaderboard is None:
return pd.DataFrame(), None
track_leaderboard = get_track_leaderboard(current_leaderboard, track)
if track_leaderboard.empty:
return pd.DataFrame(), None
# Create language pair comparison table
pairs_data = []
track_languages = EVALUATION_TRACKS[track]["languages"]
for src in track_languages:
for tgt in track_languages:
if src == tgt:
continue
pair_key = f"{src}_to_{tgt}"
pair_display = f"{LANGUAGE_NAMES.get(src, src)} β†’ {LANGUAGE_NAMES.get(tgt, tgt)}"
for _, model in track_leaderboard.iterrows():
# Extract detailed results if available
detailed_col = f'detailed_{track}'
if detailed_col in model and pd.notna(model[detailed_col]):
try:
detailed_results = json.loads(model[detailed_col])
pair_metrics = detailed_results.get('pair_metrics', {})
if pair_key in pair_metrics:
metrics = pair_metrics[pair_key]
pairs_data.append({
'Language Pair': pair_display,
'Model': model['model_name'],
'Category': model['model_category'],
'Quality Score': metrics.get('quality_score', {}).get('mean', 0),
'BLEU': metrics.get('bleu', {}).get('mean', 0),
'ChrF': metrics.get('chrf', {}).get('mean', 0),
'Samples': metrics.get('sample_count', 0)
})
except (json.JSONDecodeError, KeyError):
continue
pairs_df = pd.DataFrame(pairs_data)
if pairs_df.empty:
return pd.DataFrame(), None
# Create visualization
comparison_plot = create_language_pair_comparison_plot(pairs_df, track)
return pairs_df, comparison_plot
except Exception as e:
print(f"Error in language pair comparison: {e}")
return pd.DataFrame(), None
# Initialize data on startup
initialization_success = initialize_data()
# Create Gradio interface
with gr.Blocks(
title="πŸ† SALT Translation Leaderboard",
theme=gr.themes.Soft(),
css="""
.gradio-container {
max-width: 1600px !important;
margin: 0 auto;
}
/* Force readable text in all themes */
.markdown, .gr-markdown, .gr-html {
color: var(--body-text-color) !important;
background: var(--background-fill-primary) !important;
}
.markdown h1, .markdown h2, .markdown h3,
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
color: var(--body-text-color) !important;
}
.markdown p, .markdown li, .markdown strong,
.gr-markdown p, .gr-markdown li, .gr-markdown strong {
color: var(--body-text-color) !important;
}
/* Table styling */
.dataframe, .gr-dataframe {
color: var(--body-text-color) !important;
background: var(--background-fill-primary) !important;
}
/* Button and input styling */
.gr-button, .gr-textbox, .gr-dropdown {
color: var(--body-text-color) !important;
}
/* Ensure plot backgrounds work in both themes */
.plot-container {
background: var(--background-fill-primary) !important;
}
"""
) as demo:
# Header
gr.HTML("""
<div style="text-align: center; margin-bottom: 2rem; padding: 2rem; background: linear-gradient(135deg, #1e3a8a 0%, #3730a3 50%, #1e40af 100%); color: white !important; border-radius: 10px;">
<h1 style="color: white !important;">πŸ† SALT Translation Leaderboard</h1>
<p style="color: white !important;"><strong>Rigorous Evaluation of Translation Models on Ugandan Languages</strong></p>
<p style="color: white !important;">Three-tier evaluation β€’ Statistical confidence intervals β€’ Research-grade analysis</p>
</div>
""")
# Status indicator
if initialization_success:
status_msg = "βœ… System initialized successfully"
else:
status_msg = "❌ System initialization failed - some features may not work"
gr.Markdown(f"**System Status**: {status_msg}")
with gr.Tabs():
# Tab 1: Download Test Set
with gr.Tab("πŸ“₯ Download Test Set", id="download"):
gr.Markdown("""
## πŸ“‹ Get the SALT Test Set
Download our test set for translation model evaluation.
""")
download_btn = gr.Button("πŸ“₯ Download Test Set", variant="primary", size="lg")
with gr.Row():
with gr.Column():
download_file = gr.File(label="πŸ“‚ Test Set File", interactive=False)
with gr.Column():
download_info = gr.Markdown()
# Tab 2: Submit Predictions
with gr.Tab("πŸš€ Submit Predictions", id="submit"):
gr.Markdown("""
## 🎯 Submit Your Model's Predictions
Upload predictions for evaluation across all tracks.
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“ Model Information")
model_name_input = gr.Textbox(
label="πŸ€– Model Name",
placeholder="e.g., MyTranslator-v2.0",
info="Unique name for your model"
)
author_input = gr.Textbox(
label="πŸ‘€ Author/Organization",
placeholder="Your name or organization",
value="Anonymous"
)
description_input = gr.Textbox(
label="πŸ“„ Model Description",
placeholder="Architecture, training data, special features...",
lines=4
)
predictions_file = gr.File(
label="πŸ“‚ Predictions File",
file_types=[".csv", ".tsv", ".json"]
)
validate_btn = gr.Button("βœ… Validate Submission", variant="secondary")
submit_btn = gr.Button("πŸš€ Submit for Evaluation", variant="primary", interactive=False)
with gr.Column(scale=1):
validation_output = gr.Markdown()
gr.Markdown("### πŸ† Evaluation Results")
evaluation_output = gr.Markdown()
with gr.Row():
with gr.Column():
submission_plot = gr.Plot(label="πŸ“ˆ Performance Analysis")
with gr.Column():
results_table = gr.Dataframe(label="πŸ“Š Updated Leaderboard", interactive=False)
# Tab 3: Google-Comparable Track
with gr.Tab("πŸ€– Google-Comparable Track", id="google_track"):
gr.Markdown(f"""
## {EVALUATION_TRACKS['google_comparable']['name']}
**{EVALUATION_TRACKS['google_comparable']['description']}**
This track evaluates models on language pairs supported by Google Translate,
enabling direct comparison with commercial baselines.
""")
with gr.Row():
with gr.Column(scale=2):
google_search = gr.Textbox(label="πŸ” Search Models", placeholder="Search by model name, author...")
with gr.Column(scale=1):
google_category = gr.Dropdown(
label="🏷️ Category Filter",
choices=["all"] + list(MODEL_CATEGORIES.keys()),
value="all"
)
with gr.Column(scale=1):
google_refresh = gr.Button("πŸ”„ Refresh", variant="secondary")
google_stats = gr.Markdown()
with gr.Row():
with gr.Column():
google_ranking_plot = gr.Plot(label="πŸ† Rankings")
with gr.Column():
google_comparison_plot = gr.Plot(label="πŸ“Š Performance Comparison")
google_leaderboard = gr.Dataframe(label="πŸ“ˆ Google-Comparable Leaderboard", interactive=False)
# Tab 4: UG40-Complete Track
with gr.Tab("🌍 UG40-Complete Track", id="ug40_track"):
gr.Markdown(f"""
## {EVALUATION_TRACKS['ug40_complete']['name']}
**{EVALUATION_TRACKS['ug40_complete']['description']}**
This track evaluates models on all UG40 language pairs,
providing comprehensive assessment of Ugandan language translation capabilities.
""")
with gr.Row():
with gr.Column(scale=2):
ug40_search = gr.Textbox(label="πŸ” Search Models", placeholder="Search by model name, author...")
with gr.Column(scale=1):
ug40_category = gr.Dropdown(
label="🏷️ Category Filter",
choices=["all"] + list(MODEL_CATEGORIES.keys()),
value="all"
)
with gr.Column(scale=1):
ug40_refresh = gr.Button("πŸ”„ Refresh", variant="secondary")
ug40_stats = gr.Markdown()
with gr.Row():
with gr.Column():
ug40_ranking_plot = gr.Plot(label="πŸ† Rankings")
with gr.Column():
ug40_comparison_plot = gr.Plot(label="πŸ“Š Performance Comparison")
ug40_leaderboard = gr.Dataframe(label="πŸ“ˆ UG40-Complete Leaderboard", interactive=False)
# Tab 5: Language Pair Analysis
with gr.Tab("πŸ“Š Language Pair Analysis", id="pairs_analysis"):
gr.Markdown("""
## πŸ“Š Language Pair Performance Analysis
Compare model performance across individual language pairs with detailed breakdowns.
""")
with gr.Row():
with gr.Column(scale=1):
pairs_track_select = gr.Dropdown(
label="🏁 Select Track",
choices=list(EVALUATION_TRACKS.keys()),
value="google_comparable"
)
with gr.Column(scale=1):
pairs_refresh = gr.Button("πŸ”„ Analyze Language Pairs", variant="primary")
pairs_comparison_plot = gr.Plot(label="πŸ“Š Language Pair Comparison")
pairs_table = gr.Dataframe(label="πŸ“ˆ Language Pair Performance", interactive=False)
# Tab 6: Documentation
with gr.Tab("πŸ“š Documentation", id="docs"):
gr.Markdown(f"""
# πŸ“– SALT Translation Leaderboard Documentation
## 🎯 Overview
The SALT Translation Leaderboard provides rigorous evaluation of translation models
on Ugandan languages using three different tracks for fair comparison.
## 🏁 Evaluation Tracks
**1. πŸ€– Google-Comparable Track**
- **Languages**: {', '.join([LANGUAGE_NAMES[lang] for lang in GOOGLE_SUPPORTED_LANGUAGES])}
- **Purpose**: Fair comparison with commercial translation systems
- **Language Pairs**: {len([1 for src in GOOGLE_SUPPORTED_LANGUAGES for tgt in GOOGLE_SUPPORTED_LANGUAGES if src != tgt])}
**2. 🌍 UG40-Complete Track**
- **Languages**: All {len(ALL_UG40_LANGUAGES)} UG40 languages
- **Purpose**: Comprehensive Ugandan language capability assessment
- **Language Pairs**: {len([1 for src in ALL_UG40_LANGUAGES for tgt in ALL_UG40_LANGUAGES if src != tgt])}
## πŸ“Š Evaluation Metrics
### Primary Metrics
- **Quality Score**: Composite metric (0-1) combining BLEU, ChrF, and error rates
- **BLEU**: Bilingual Evaluation Understudy (0-100)
- **ChrF**: Character-level F-score (0-1)
### Model Categories
Models are automatically categorized for fair comparison:
- **🏒 Commercial**: Production translation systems
- **πŸ”¬ Research**: Academic and research institution models
- **πŸ“Š Baseline**: Simple baseline and reference models
- **πŸ‘₯ Community**: User-submitted models
## πŸ”„ Submission Process
### Step 1: Download Test Set
1. Click "Download Test Set" in the first tab
2. Save the test set file
### Step 2: Generate Predictions
1. Load the test set in your evaluation pipeline
2. For each row, translate `source_text` from `source_language` to `target_language`
3. Save results as CSV with columns: `sample_id`, `prediction`
### Step 3: Submit & Evaluate
1. Fill in model information
2. Upload your predictions file
3. Review validation report
4. Submit for evaluation
## πŸ“‹ File Formats
### Test Set Format
```csv
sample_id,source_text,source_language,target_language,domain,google_comparable
salt_000001,"Hello world",eng,lug,general,true
salt_000002,"How are you?",eng,ach,conversation,true
```
### Predictions Format
```csv
sample_id,prediction
salt_000001,"Amakuru ensi"
salt_000002,"Ibino nining?"
```
## 🀝 Contributing
This leaderboard is designed for the research community. When using results:
1. Consider the appropriate track for your comparison
2. Report confidence intervals when available
3. Acknowledge the model category in comparisons
---
*For questions, contact the team at research@sunbird.ai*
""")
# Event handlers
predictions_validated = gr.State(value=None)
detected_category_state = gr.State(value="community")
# Download test set
download_btn.click(
fn=download_test_set,
outputs=[download_file, download_info]
)
# Validate predictions
def handle_validation(file, model_name, author, description):
report, predictions, category = validate_submission_file(file, model_name, author, description)
can_evaluate = predictions is not None
if can_evaluate:
button_status = "\n\nβœ… **Ready to submit for evaluation!**"
else:
button_status = "\n\n❌ **Please fix issues above before evaluation**"
enhanced_report = report + button_status
return (
enhanced_report,
predictions,
category,
gr.update(interactive=can_evaluate)
)
validate_btn.click(
fn=handle_validation,
inputs=[predictions_file, model_name_input, author_input, description_input],
outputs=[validation_output, predictions_validated, detected_category_state, submit_btn]
)
# Submit for evaluation
submit_btn.click(
fn=evaluate_submission,
inputs=[predictions_validated, model_name_input, author_input, description_input, detected_category_state],
outputs=[evaluation_output, results_table, submission_plot, gr.Plot(visible=False)]
)
# Track leaderboard refresh functions
google_refresh.click(
fn=lambda *args: refresh_track_leaderboard("google_comparable", *args),
inputs=[google_search, google_category],
outputs=[google_leaderboard, google_ranking_plot, google_comparison_plot, google_stats]
)
ug40_refresh.click(
fn=lambda *args: refresh_track_leaderboard("ug40_complete", *args),
inputs=[ug40_search, ug40_category],
outputs=[ug40_leaderboard, ug40_ranking_plot, ug40_comparison_plot, ug40_stats]
)
# Language pair analysis
pairs_refresh.click(
fn=get_language_pair_comparison,
inputs=[pairs_track_select],
outputs=[pairs_table, pairs_comparison_plot]
)
# Load initial data and update dropdowns
def load_initial_data():
try:
print("Loading initial data...")
global current_leaderboard
# Make sure we have a leaderboard
if current_leaderboard is None:
current_leaderboard = load_leaderboard()
print(f"Current leaderboard has {len(current_leaderboard)} entries")
# Try to load Google track data
try:
google_data = refresh_track_leaderboard("google_comparable", "", "all")
print("Successfully loaded Google track data")
return google_data
except Exception as e:
print(f"Error loading Google track: {e}")
# Return empty data if there's an error
empty_df = pd.DataFrame()
return (empty_df, None, None, "No data available")
except Exception as e:
print(f"Error in load_initial_data: {e}")
empty_df = pd.DataFrame()
return (empty_df, None, None, "Error loading data")
demo.load(
fn=load_initial_data,
outputs=[google_leaderboard, google_ranking_plot, google_comparison_plot, google_stats]
)
# Launch the application
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)