akera commited on
Commit
71c46bd
·
verified ·
1 Parent(s): 7c90731

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +19 -105
config.py CHANGED
@@ -12,7 +12,7 @@ ALL_UG40_LANGUAGES = ["ach", "eng", "lgg", "lug", "nyn", "rny", "teo", "swa"]
12
 
13
  LANGUAGE_NAMES = {
14
  "ach": "Acholi",
15
- "eng": "English",
16
  "lgg": "Lugbara",
17
  "lug": "Luganda",
18
  "nyn": "Runyankole",
@@ -24,34 +24,19 @@ LANGUAGE_NAMES = {
24
  # Google Translate supported subset (for fair comparison)
25
  GOOGLE_SUPPORTED_LANGUAGES = ["lug", "ach", "swa", "eng"]
26
 
27
- # Google Translate language mapping
28
- GOOGLE_LANG_MAP = {"lug": "lg", "ach": "ach", "swa": "sw", "eng": "en"}
29
-
30
- # SCIENTIFIC EVALUATION TRACKS
31
  EVALUATION_TRACKS = {
32
  "google_comparable": {
33
  "name": "Google-Comparable Track",
34
- "description": "Models evaluated only on language pairs supported by Google Translate",
35
  "languages": GOOGLE_SUPPORTED_LANGUAGES,
36
  "min_samples_per_pair": 50,
37
- "statistical_power": 0.8,
38
- "significance_level": 0.05,
39
  },
40
  "ug40_complete": {
41
- "name": "UG40-Complete Track",
42
- "description": "Models evaluated on all UG40 language pairs",
43
  "languages": ALL_UG40_LANGUAGES,
44
  "min_samples_per_pair": 30,
45
- "statistical_power": 0.8,
46
- "significance_level": 0.05,
47
- },
48
- "language_pair_matrix": {
49
- "name": "Language-Pair Matrix",
50
- "description": "Individual language pair analysis with statistical significance",
51
- "languages": ALL_UG40_LANGUAGES,
52
- "min_samples_per_pair": 20,
53
- "statistical_power": 0.7,
54
- "significance_level": 0.05,
55
  },
56
  }
57
 
@@ -64,7 +49,7 @@ MODEL_CATEGORIES = {
64
  "color": "#1f77b4",
65
  },
66
  "research": {
67
- "name": "Research Models",
68
  "description": "Academic and research institution models",
69
  "examples": ["nllb", "m2m100"],
70
  "color": "#ff7f0e",
@@ -77,46 +62,20 @@ MODEL_CATEGORIES = {
77
  },
78
  "community": {
79
  "name": "Community Submissions",
80
- "description": "User-submitted models and fine-tuned variants",
81
  "examples": ["user_submission"],
82
  "color": "#d62728",
83
  },
84
  }
85
 
86
- # STATISTICAL SETTINGS
87
- STATISTICAL_CONFIG = {
88
- "confidence_level": 0.95,
89
- "bootstrap_samples": 1000,
90
- "min_samples_for_ci": 20,
91
- "effect_size_thresholds": {
92
- "small": 0.2,
93
- "medium": 0.5,
94
- "large": 0.8,
95
- },
96
- "multiple_testing_correction": "bonferroni",
97
- "outlier_detection": {
98
- "method": "iqr",
99
- "factor": 1.5,
100
- },
101
- }
102
-
103
- # METRICS CONFIGURATION - Enhanced for statistical analysis
104
  METRICS_CONFIG = {
105
  "primary_metrics": ["bleu", "chrf", "quality_score"],
106
- "secondary_metrics": ["rouge1", "rouge2", "rougeL", "cer", "wer", "len_ratio"],
107
  "display_precision": 4,
108
- "quality_score_components": ["bleu", "chrf", "cer", "wer", "rouge1", "rougeL"],
109
- "error_metrics": ["cer", "wer"], # Lower is better
110
- "score_metrics": ["bleu", "chrf", "quality_score", "rouge1", "rouge2", "rougeL"],
111
- "statistical_metrics": [
112
- "mean",
113
- "std",
114
- "median",
115
- "ci_lower",
116
- "ci_upper",
117
- "p_value",
118
- "effect_size",
119
- ],
120
  }
121
 
122
  # VALIDATION REQUIREMENTS
@@ -124,7 +83,6 @@ VALIDATION_CONFIG = {
124
  "min_samples_per_track": {
125
  "google_comparable": 200,
126
  "ug40_complete": 400,
127
- "language_pair_matrix": 50,
128
  },
129
  "max_missing_rate": 0.05, # 5% missing predictions allowed
130
  "quality_thresholds": {
@@ -135,53 +93,6 @@ VALIDATION_CONFIG = {
135
  },
136
  }
137
 
138
- # UI CONFIGURATION
139
- UI_CONFIG = {
140
- "title": "🏆 SALT Translation Leaderboard - Scientific Edition",
141
- "description": """
142
- Rigorous evaluation of translation models on Ugandan languages with statistical significance testing.
143
- Three evaluation tracks ensure fair comparison across different model capabilities and language support.
144
- """,
145
- "tracks": {
146
- "google_comparable": {
147
- "tab_name": "🤖 Google-Comparable Track",
148
- "icon": "🤖",
149
- "color": "#1f77b4",
150
- },
151
- "ug40_complete": {
152
- "tab_name": "🌍 UG40-Complete Track",
153
- "icon": "🌍",
154
- "color": "#ff7f0e",
155
- },
156
- "language_pair_matrix": {
157
- "tab_name": "📊 Language-Pair Matrix",
158
- "icon": "📊",
159
- "color": "#2ca02c",
160
- },
161
- },
162
- }
163
-
164
- # CHART CONFIGURATION - Research-grade styling
165
- CHART_CONFIG = {
166
- "statistical_colorscale": "RdYlBu_r",
167
- "category_colors": {cat: info["color"] for cat, info in MODEL_CATEGORIES.items()},
168
- "heatmap_config": {
169
- "colorscale": "Viridis",
170
- "show_values": True,
171
- "font_size": 10,
172
- },
173
- "confidence_interval_config": {
174
- "alpha": 0.3,
175
- "line_width": 2,
176
- "marker_size": 8,
177
- },
178
- "statistical_plot_config": {
179
- "height": 600,
180
- "width": 800,
181
- "margin": {"l": 100, "r": 50, "t": 50, "b": 100},
182
- },
183
- }
184
-
185
  # FILE FORMAT SPECIFICATIONS
186
  PREDICTION_FORMAT = {
187
  "required_columns": ["sample_id", "prediction"],
@@ -198,8 +109,11 @@ PREDICTION_FORMAT = {
198
  # EVALUATION SETTINGS
199
  MAX_TEST_SAMPLES = 500 # Per language pair
200
  MIN_SAMPLES_PER_PAIR = 10 # Minimum for basic statistics
201
- SAMPLE_SIZE_RECOMMENDATIONS = {
202
- "basic_comparison": 50,
203
- "statistical_significance": 100,
204
- "publication_quality": 200,
 
 
 
205
  }
 
12
 
13
  LANGUAGE_NAMES = {
14
  "ach": "Acholi",
15
+ "eng": "English",
16
  "lgg": "Lugbara",
17
  "lug": "Luganda",
18
  "nyn": "Runyankole",
 
24
  # Google Translate supported subset (for fair comparison)
25
  GOOGLE_SUPPORTED_LANGUAGES = ["lug", "ach", "swa", "eng"]
26
 
27
+ # EVALUATION TRACKS
 
 
 
28
  EVALUATION_TRACKS = {
29
  "google_comparable": {
30
  "name": "Google-Comparable Track",
31
+ "description": "Models evaluated on language pairs supported by Google Translate for commercial comparison",
32
  "languages": GOOGLE_SUPPORTED_LANGUAGES,
33
  "min_samples_per_pair": 50,
 
 
34
  },
35
  "ug40_complete": {
36
+ "name": "UG40-Complete Track",
37
+ "description": "Models evaluated on all UG40 language pairs for comprehensive assessment",
38
  "languages": ALL_UG40_LANGUAGES,
39
  "min_samples_per_pair": 30,
 
 
 
 
 
 
 
 
 
 
40
  },
41
  }
42
 
 
49
  "color": "#1f77b4",
50
  },
51
  "research": {
52
+ "name": "Research Models",
53
  "description": "Academic and research institution models",
54
  "examples": ["nllb", "m2m100"],
55
  "color": "#ff7f0e",
 
62
  },
63
  "community": {
64
  "name": "Community Submissions",
65
+ "description": "User-submitted models and fine-tuned variants",
66
  "examples": ["user_submission"],
67
  "color": "#d62728",
68
  },
69
  }
70
 
71
+ # METRICS CONFIGURATION
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  METRICS_CONFIG = {
73
  "primary_metrics": ["bleu", "chrf", "quality_score"],
74
+ "secondary_metrics": ["rouge1", "rougeL", "cer", "wer"],
75
  "display_precision": 4,
76
+ "confidence_level": 0.95,
77
+ "bootstrap_samples": 1000,
78
+ "min_samples_for_ci": 20,
 
 
 
 
 
 
 
 
 
79
  }
80
 
81
  # VALIDATION REQUIREMENTS
 
83
  "min_samples_per_track": {
84
  "google_comparable": 200,
85
  "ug40_complete": 400,
 
86
  },
87
  "max_missing_rate": 0.05, # 5% missing predictions allowed
88
  "quality_thresholds": {
 
93
  },
94
  }
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # FILE FORMAT SPECIFICATIONS
97
  PREDICTION_FORMAT = {
98
  "required_columns": ["sample_id", "prediction"],
 
109
  # EVALUATION SETTINGS
110
  MAX_TEST_SAMPLES = 500 # Per language pair
111
  MIN_SAMPLES_PER_PAIR = 10 # Minimum for basic statistics
112
+
113
+ # CHART CONFIGURATION
114
+ CHART_CONFIG = {
115
+ "category_colors": {cat: info["color"] for cat, info in MODEL_CATEGORIES.items()},
116
+ "height": 600,
117
+ "width": 800,
118
+ "margin": {"l": 100, "r": 50, "t": 50, "b": 100},
119
  }