# config.py import os # HuggingFace settings HF_TOKEN = os.getenv("HF_TOKEN") LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard" TEST_SET_DATASET = "Sunbird/salt-translation-test-set" SALT_DATASET = "sunbird/salt" # Language settings - ALL UG40 LANGUAGES ALL_UG40_LANGUAGES = ["ach", "eng", "lgg", "lug", "nyn", "rny", "teo", "swa"] LANGUAGE_NAMES = { "ach": "Acholi", "eng": "English", "lgg": "Lugbara", "lug": "Luganda", "nyn": "Runyankole", "rny": "Runyoro", "teo": "Ateso", "swa": "Swahili", } # Google Translate supported subset (for fair comparison) GOOGLE_SUPPORTED_LANGUAGES = ["lug", "ach", "swa", "eng"] # Google Translate language mapping GOOGLE_LANG_MAP = {"lug": "lg", "ach": "ach", "swa": "sw", "eng": "en"} # SCIENTIFIC EVALUATION TRACKS EVALUATION_TRACKS = { "google_comparable": { "name": "Google-Comparable Track", "description": "Models evaluated only on language pairs supported by Google Translate", "languages": GOOGLE_SUPPORTED_LANGUAGES, "min_samples_per_pair": 50, "statistical_power": 0.8, "significance_level": 0.05, }, "ug40_complete": { "name": "UG40-Complete Track", "description": "Models evaluated on all UG40 language pairs", "languages": ALL_UG40_LANGUAGES, "min_samples_per_pair": 30, "statistical_power": 0.8, "significance_level": 0.05, }, "language_pair_matrix": { "name": "Language-Pair Matrix", "description": "Individual language pair analysis with statistical significance", "languages": ALL_UG40_LANGUAGES, "min_samples_per_pair": 20, "statistical_power": 0.7, "significance_level": 0.05, }, } # MODEL CATEGORIES MODEL_CATEGORIES = { "commercial": { "name": "Commercial Systems", "description": "Production translation systems", "examples": ["google_translate", "azure_translator"], "color": "#1f77b4", }, "research": { "name": "Research Models", "description": "Academic and research institution models", "examples": ["nllb", "m2m100"], "color": "#ff7f0e", }, "baseline": { "name": "Baseline Models", "description": "Simple baseline and reference models", "examples": ["word_lookup", "frequency_baseline"], "color": "#2ca02c", }, "community": { "name": "Community Submissions", "description": "User-submitted models and fine-tuned variants", "examples": ["user_submission"], "color": "#d62728", }, } # STATISTICAL SETTINGS STATISTICAL_CONFIG = { "confidence_level": 0.95, "bootstrap_samples": 1000, "min_samples_for_ci": 20, "effect_size_thresholds": { "small": 0.2, "medium": 0.5, "large": 0.8, }, "multiple_testing_correction": "bonferroni", "outlier_detection": { "method": "iqr", "factor": 1.5, }, } # METRICS CONFIGURATION - Enhanced for statistical analysis METRICS_CONFIG = { "primary_metrics": ["bleu", "chrf", "quality_score"], "secondary_metrics": ["rouge1", "rouge2", "rougeL", "cer", "wer", "len_ratio"], "display_precision": 4, "quality_score_components": ["bleu", "chrf", "cer", "wer", "rouge1", "rougeL"], "error_metrics": ["cer", "wer"], # Lower is better "score_metrics": ["bleu", "chrf", "quality_score", "rouge1", "rouge2", "rougeL"], "statistical_metrics": [ "mean", "std", "median", "ci_lower", "ci_upper", "p_value", "effect_size", ], } # VALIDATION REQUIREMENTS VALIDATION_CONFIG = { "min_samples_per_track": { "google_comparable": 200, "ug40_complete": 400, "language_pair_matrix": 50, }, "max_missing_rate": 0.05, # 5% missing predictions allowed "quality_thresholds": { "min_valid_predictions": 0.95, "max_duplicate_rate": 0.1, "min_avg_length": 3, "max_avg_length": 500, }, } # UI CONFIGURATION UI_CONFIG = { "title": "🏆 SALT Translation Leaderboard - Scientific Edition", "description": """ Rigorous evaluation of translation models on Ugandan languages with statistical significance testing. Three evaluation tracks ensure fair comparison across different model capabilities and language support. """, "tracks": { "google_comparable": { "tab_name": "🤖 Google-Comparable Track", "icon": "🤖", "color": "#1f77b4", }, "ug40_complete": { "tab_name": "🌍 UG40-Complete Track", "icon": "🌍", "color": "#ff7f0e", }, "language_pair_matrix": { "tab_name": "📊 Language-Pair Matrix", "icon": "📊", "color": "#2ca02c", }, }, } # CHART CONFIGURATION - Research-grade styling CHART_CONFIG = { "statistical_colorscale": "RdYlBu_r", "category_colors": {cat: info["color"] for cat, info in MODEL_CATEGORIES.items()}, "heatmap_config": { "colorscale": "Viridis", "show_values": True, "font_size": 10, }, "confidence_interval_config": { "alpha": 0.3, "line_width": 2, "marker_size": 8, }, "statistical_plot_config": { "height": 600, "width": 800, "margin": {"l": 100, "r": 50, "t": 50, "b": 100}, }, } # FILE FORMAT SPECIFICATIONS PREDICTION_FORMAT = { "required_columns": ["sample_id", "prediction"], "optional_columns": ["model_name", "confidence", "category"], "file_types": [".csv", ".tsv", ".json"], "category_detection": { "google": ["google", "translate"], "nllb": ["nllb", "meta"], "m2m": ["m2m", "facebook"], "baseline": ["baseline", "simple", "lookup"], }, } # EVALUATION SETTINGS MAX_TEST_SAMPLES = 500 # Per language pair MIN_SAMPLES_PER_PAIR = 10 # Minimum for basic statistics SAMPLE_SIZE_RECOMMENDATIONS = { "basic_comparison": 50, "statistical_significance": 100, "publication_quality": 200, }