# config.py import os # HuggingFace settings HF_TOKEN = os.getenv("HF_TOKEN") LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard" TEST_SET_DATASET = "Sunbird/salt-translation-test-set" SALT_DATASET = "sunbird/salt" # Language settings - ALL UG40 LANGUAGES ALL_UG40_LANGUAGES = ["ach", "eng", "lgg", "lug", "nyn", "rny", "teo", "swa"] LANGUAGE_NAMES = { "ach": "Acholi", "eng": "English", "lgg": "Lugbara", "lug": "Luganda", "nyn": "Runyankole", "rny": "Runyoro", "teo": "Ateso", "swa": "Swahili", } # Google Translate supported subset (for fair comparison) GOOGLE_SUPPORTED_LANGUAGES = ["lug", "ach", "swa", "eng"] # EVALUATION TRACKS EVALUATION_TRACKS = { "google_comparable": { "name": "Google-Comparable Track", "description": "Models evaluated on language pairs supported by Google Translate for commercial comparison", "languages": GOOGLE_SUPPORTED_LANGUAGES, "min_samples_per_pair": 50, }, "ug40_complete": { "name": "UG40-Complete Track", "description": "Models evaluated on all UG40 language pairs for comprehensive assessment", "languages": ALL_UG40_LANGUAGES, "min_samples_per_pair": 30, }, } # MODEL CATEGORIES MODEL_CATEGORIES = { "commercial": { "name": "Commercial Systems", "description": "Production translation systems", "examples": ["google_translate", "azure_translator"], "color": "#1f77b4", }, "research": { "name": "Research Models", "description": "Academic and research institution models", "examples": ["nllb", "m2m100"], "color": "#ff7f0e", }, "baseline": { "name": "Baseline Models", "description": "Simple baseline and reference models", "examples": ["word_lookup", "frequency_baseline"], "color": "#2ca02c", }, "community": { "name": "Community Submissions", "description": "User-submitted models and fine-tuned variants", "examples": ["user_submission"], "color": "#d62728", }, } # METRICS CONFIGURATION METRICS_CONFIG = { "primary_metrics": ["bleu", "chrf", "quality_score"], "secondary_metrics": ["rouge1", "rougeL", "cer", "wer"], "display_precision": 4, "confidence_level": 0.95, "bootstrap_samples": 1000, "min_samples_for_ci": 20, } # VALIDATION REQUIREMENTS VALIDATION_CONFIG = { "min_samples_per_track": { "google_comparable": 200, "ug40_complete": 400, }, "max_missing_rate": 0.05, # 5% missing predictions allowed "quality_thresholds": { "min_valid_predictions": 0.95, "max_duplicate_rate": 0.1, "min_avg_length": 3, "max_avg_length": 500, }, } # FILE FORMAT SPECIFICATIONS PREDICTION_FORMAT = { "required_columns": ["sample_id", "prediction"], "optional_columns": ["model_name", "confidence", "category"], "file_types": [".csv", ".tsv", ".json"], "category_detection": { "google": ["google", "translate"], "nllb": ["nllb", "meta"], "m2m": ["m2m", "facebook"], "baseline": ["baseline", "simple", "lookup"], }, } # EVALUATION SETTINGS MAX_TEST_SAMPLES = 500 # Per language pair MIN_SAMPLES_PER_PAIR = 10 # Minimum for basic statistics # CHART CONFIGURATION CHART_CONFIG = { "category_colors": {cat: info["color"] for cat, info in MODEL_CATEGORIES.items()}, "height": 600, "width": 800, "margin": {"l": 100, "r": 50, "t": 50, "b": 100}, }