# config.py
import os

# HuggingFace settings
HF_TOKEN = os.getenv("HF_TOKEN")
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
SALT_DATASET = "sunbird/salt"

# Language settings - ALL UG40 LANGUAGES
ALL_UG40_LANGUAGES = [
    'ach', 'eng', 'lgg', 'lug', 'nyn', 'rny', 'teo', 'swa'  # Complete this with actual SALT languages
]

LANGUAGE_NAMES = {
    'ach': 'Acholi',
    'eng': 'English', 
    'lgg': 'Lugbara',
    'lug': 'Luganda',
    'nyn': 'Runyankole',
    'rny': 'Runyoro',
    'teo': 'Ateso',
    'swa': 'Swahili'
}

# Google Translate supported subset (for comparison)
GOOGLE_SUPPORTED_LANGUAGES = ['lug', 'ach', 'swa', 'eng']

# Google Translate language mapping
GOOGLE_LANG_MAP = {
    'lug': 'lg',
    'ach': 'ach', 
    'swa': 'sw',
    'eng': 'en'
}

# Evaluation settings
MAX_TEST_SAMPLES = 500  # Per language pair
MIN_SAMPLES_PER_PAIR = 10  # Minimum samples to be valid

# UI settings
TITLE = "🏆 SALT Translation Leaderboard"
DESCRIPTION = """
**Scientific evaluation of translation models on Ugandan languages**

Upload your model's predictions on our standardized test set to see how it performs across all UG40 language pairs.
Compare against Google Translate baseline and other submitted models.
"""

# File format specifications
PREDICTION_FORMAT = {
    'required_columns': ['sample_id', 'prediction'],
    'optional_columns': ['model_name', 'confidence'],
    'file_types': ['.csv', '.tsv', '.json']
}

# Metrics configuration
METRICS_CONFIG = {
    'primary_metrics': ['bleu', 'chrf', 'quality_score'],
    'secondary_metrics': ['rouge1', 'rougeL', 'cer', 'wer'],
    'display_precision': 4
}