# config.py import os # HuggingFace settings HF_TOKEN = os.getenv("HF_TOKEN") LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard" TEST_SET_DATASET = "Sunbird/salt-translation-test-set" SALT_DATASET = "sunbird/salt" # Language settings - ALL UG40 LANGUAGES ALL_UG40_LANGUAGES = [ 'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa' # Complete this with actual SALT languages ] LANGUAGE_NAMES = { 'ach': 'Acholi', 'eng': 'English', 'lgg': 'Lugbara', 'lug': 'Luganda', 'nyn': 'Runyankole', 'rny': 'Runyoro', 'teo': 'Ateso', 'swa': 'Swahili' } # Google Translate supported subset (for comparison) GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng'] # Google Translate language mapping GOOGLE_LANG_MAP = { 'lug': 'lg', 'ach': 'ach', 'swa': 'sw', 'eng': 'en' } # Evaluation settings MAX_TEST_SAMPLES = 500 # Per language pair MIN_SAMPLES_PER_PAIR = 10 # Minimum samples to be valid # UI settings TITLE = "🏆 SALT Translation Leaderboard" DESCRIPTION = """ **Scientific evaluation of translation models on Ugandan languages** Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs. Compare against Google Translate baseline and other submitted models. """ # File format specifications PREDICTION_FORMAT = { 'required_columns': ['sample_id', 'prediction'], 'optional_columns': ['model_name', 'confidence'], 'file_types': ['.csv', '.tsv', '.json'] } # Metrics configuration METRICS_CONFIG = { 'primary_metrics': ['bleu', 'chrf', 'quality_score'], 'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'], 'display_precision': 4 }