File size: 3,535 Bytes
11a64ab
 
 
 
aa99a22
11a64ab
aa99a22
11a64ab
 
d5b83bc
 
11a64ab
 
d5b83bc
71c46bd
d5b83bc
 
 
 
 
 
11a64ab
 
d5b83bc
 
aa99a22
71c46bd
d5b83bc
 
 
71c46bd
d5b83bc
 
 
 
71c46bd
 
d5b83bc
 
 
 
aa99a22
d5b83bc
 
 
 
 
 
 
 
 
71c46bd
d5b83bc
 
 
 
 
 
 
 
 
 
 
 
71c46bd
d5b83bc
 
 
 
aa99a22
71c46bd
aa99a22
d5b83bc
71c46bd
d5b83bc
71c46bd
 
 
8727da4
 
d5b83bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71c46bd
 
 
 
 
 
 
11a64ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# config.py
import os

# HuggingFace settings
HF_TOKEN = os.getenv("HF_TOKEN")
LEADERBOARD_DATASET = "Sunbird/salt-translation-leaderboard"
TEST_SET_DATASET = "Sunbird/salt-translation-test-set"
SALT_DATASET = "sunbird/salt"

# Language settings - ALL UG40 LANGUAGES
ALL_UG40_LANGUAGES = ["ach", "eng", "lgg", "lug", "nyn", "rny", "teo", "swa"]

LANGUAGE_NAMES = {
    "ach": "Acholi",
    "eng": "English", 
    "lgg": "Lugbara",
    "lug": "Luganda",
    "nyn": "Runyankole",
    "rny": "Runyoro",
    "teo": "Ateso",
    "swa": "Swahili",
}

# Google Translate supported subset (for fair comparison)
GOOGLE_SUPPORTED_LANGUAGES = ["lug", "ach", "swa", "eng"]

# EVALUATION TRACKS
EVALUATION_TRACKS = {
    "google_comparable": {
        "name": "Google-Comparable Track",
        "description": "Models evaluated on language pairs supported by Google Translate for commercial comparison",
        "languages": GOOGLE_SUPPORTED_LANGUAGES,
        "min_samples_per_pair": 50,
    },
    "ug40_complete": {
        "name": "UG40-Complete Track", 
        "description": "Models evaluated on all UG40 language pairs for comprehensive assessment",
        "languages": ALL_UG40_LANGUAGES,
        "min_samples_per_pair": 30,
    },
}

# MODEL CATEGORIES
MODEL_CATEGORIES = {
    "commercial": {
        "name": "Commercial Systems",
        "description": "Production translation systems",
        "examples": ["google_translate", "azure_translator"],
        "color": "#1f77b4",
    },
    "research": {
        "name": "Research Models", 
        "description": "Academic and research institution models",
        "examples": ["nllb", "m2m100"],
        "color": "#ff7f0e",
    },
    "baseline": {
        "name": "Baseline Models",
        "description": "Simple baseline and reference models",
        "examples": ["word_lookup", "frequency_baseline"],
        "color": "#2ca02c",
    },
    "community": {
        "name": "Community Submissions",
        "description": "User-submitted models and fine-tuned variants", 
        "examples": ["user_submission"],
        "color": "#d62728",
    },
}

# METRICS CONFIGURATION
METRICS_CONFIG = {
    "primary_metrics": ["bleu", "chrf", "quality_score"],
    "secondary_metrics": ["rouge1", "rougeL", "cer", "wer"],
    "display_precision": 4,
    "confidence_level": 0.95,
    "bootstrap_samples": 1000,
    "min_samples_for_ci": 20,
}

# VALIDATION REQUIREMENTS
VALIDATION_CONFIG = {
    "min_samples_per_track": {
        "google_comparable": 200,
        "ug40_complete": 400,
    },
    "max_missing_rate": 0.05,  # 5% missing predictions allowed
    "quality_thresholds": {
        "min_valid_predictions": 0.95,
        "max_duplicate_rate": 0.1,
        "min_avg_length": 3,
        "max_avg_length": 500,
    },
}

# FILE FORMAT SPECIFICATIONS
PREDICTION_FORMAT = {
    "required_columns": ["sample_id", "prediction"],
    "optional_columns": ["model_name", "confidence", "category"],
    "file_types": [".csv", ".tsv", ".json"],
    "category_detection": {
        "google": ["google", "translate"],
        "nllb": ["nllb", "meta"],
        "m2m": ["m2m", "facebook"],
        "baseline": ["baseline", "simple", "lookup"],
    },
}

# EVALUATION SETTINGS
MAX_TEST_SAMPLES = 500  # Per language pair
MIN_SAMPLES_PER_PAIR = 10  # Minimum for basic statistics

# CHART CONFIGURATION
CHART_CONFIG = {
    "category_colors": {cat: info["color"] for cat, info in MODEL_CATEGORIES.items()},
    "height": 600,
    "width": 800,
    "margin": {"l": 100, "r": 50, "t": 50, "b": 100},
}