batyrme commited on
Commit
c9192c4
Β·
1 Parent(s): 8380377

fixed html formatting

Browse files
Files changed (1) hide show
  1. app.py +42 -40
app.py CHANGED
@@ -36,7 +36,7 @@ class KazTEBLeaderboard:
36
  def __init__(self, data: List[Dict[str, Any]]):
37
  self.data = data
38
  self.tasks = self._extract_tasks()
39
-
40
  def _extract_tasks(self) -> Dict[str, List[str]]:
41
  tasks = {}
42
  if self.data:
@@ -46,20 +46,20 @@ class KazTEBLeaderboard:
46
  datasets = [k for k in sample_model[task_name].keys() if k != 'average_score']
47
  tasks[task_name] = datasets
48
  return tasks
49
-
50
  def _format_score(self, score: float) -> str:
51
  return f"{score:.4f}"
52
-
53
  def _create_model_link(self, name: str, url: str) -> str:
54
  return f'<a href="{url}" target="_blank" style="color: #1976d2; text-decoration: none;">{name}</a>'
55
-
56
  def get_task_dataframe(self, task_name: str) -> pd.DataFrame:
57
  rows = []
58
-
59
  for model in self.data:
60
  if task_name not in model:
61
  continue
62
-
63
  row = {
64
  'Model': self._create_model_link(model['name'], model['url']),
65
  'Average': self._format_score(model[task_name]['average_score']),
@@ -67,21 +67,21 @@ class KazTEBLeaderboard:
67
  'Parameters': model.get('num_parameters', 'N/A'),
68
  'Embedding Dimmension': model.get('emb_dim', 'N/A')
69
  }
70
-
71
  # Addition of dataset-specific scores
72
  for dataset in self.tasks[task_name]:
73
  if dataset in model[task_name]:
74
  row[dataset] = self._format_score(model[task_name][dataset])
75
-
76
  rows.append(row)
77
-
78
  df = pd.DataFrame(rows)
79
  df['_sort_key'] = df['Average'].astype(float)
80
  df = df.sort_values('_sort_key', ascending=False).drop('_sort_key', axis=1)
81
  df.insert(0, 'Rank', range(1, len(df) + 1))
82
-
83
  return df
84
-
85
  def create_interface(self):
86
 
87
  # we will force the light theme for now :)
@@ -98,7 +98,7 @@ class KazTEBLeaderboard:
98
 
99
  with gr.Blocks(js=js_func) as demo:
100
  # Header
101
- gr.Markdown(
102
  """
103
  <div style="text-align: center; margin-bottom: 20px;">
104
  <h1 style="font-size: 36px; margin-bottom: 10px;">KazTEB Leaderboard πŸ†</h1>
@@ -106,9 +106,9 @@ class KazTEBLeaderboard:
106
  </div>
107
  """
108
  )
109
-
110
  # Subheader -- Project description
111
- gr.Markdown(
112
  """
113
  <div style="margin-bottom: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 8px; border-left: 4px solid #1976d2;">
114
  <p style="font-size: 16px; line-height: 1.6; margin: 0; color: #333;">
@@ -117,10 +117,10 @@ class KazTEBLeaderboard:
117
  </div>
118
  """
119
  )
120
-
121
  with gr.Tabs() as main_tabs:
122
  with gr.Tab("πŸ“Š Task Results"):
123
-
124
  with gr.Tabs() as task_tabs:
125
  with gr.Tab("Retrieval"):
126
  retrieval_df = self.get_task_dataframe('retrieval')
@@ -129,9 +129,10 @@ class KazTEBLeaderboard:
129
  headers=list(retrieval_df.columns),
130
  datatype=["number", "html", "str", "str", "str"] + ["str"] * (len(retrieval_df.columns) - 5),
131
  col_count=(len(retrieval_df.columns), "fixed"),
132
- interactive=False
 
133
  )
134
-
135
  with gr.Tab("Classification"):
136
  classification_df = self.get_task_dataframe('classification')
137
  gr.DataFrame(
@@ -139,9 +140,10 @@ class KazTEBLeaderboard:
139
  headers=list(classification_df.columns),
140
  datatype=["number", "html", "str", "str", "str"] + ["str"] * (len(classification_df.columns) - 5),
141
  col_count=(len(classification_df.columns), "fixed"),
142
- interactive=False
 
143
  )
144
-
145
  with gr.Tab("Bitext Mining"):
146
  bitext_df = self.get_task_dataframe('bitext_mining')
147
  gr.DataFrame(
@@ -149,19 +151,20 @@ class KazTEBLeaderboard:
149
  headers=list(bitext_df.columns),
150
  datatype=["number", "html", "str", "str", "str"] + ["str"] * (len(bitext_df.columns) - 5),
151
  col_count=(len(bitext_df.columns), "fixed"),
152
- interactive=False
 
153
  )
154
-
155
  with gr.Tab("πŸ“ˆ Metrics"):
156
  gr.Markdown("## Evaluation Metrics Overview")
157
  gr.Markdown("Although the evaluation generates multiple metric values for each task, we retain only a single metric for reference.")
158
-
159
  with gr.Row():
160
 
161
  with gr.Column():
162
  gr.Markdown(
163
  """### πŸ” Retrieval
164
-
165
  **Metric:** nDCG@10 (Normalized Discounted Cumulative Gain)
166
  - Measures ranking quality of retrieved documents
167
  - Considers both relevance and position
@@ -172,26 +175,26 @@ class KazTEBLeaderboard:
172
  - Human-annotated question-document pairs""",
173
  elem_classes=["retrieval-card"]
174
  )
175
-
176
  with gr.Column():
177
  gr.Markdown(
178
  """### πŸ“ Classification
179
-
180
  **Metric:** Accuracy
181
  - Percentage of correctly classified instances
182
  - Standard classification metric
183
  - **Range:** 0.0 - 1.0 (higher is better)
184
 
185
  **Datasets:**
186
- - [KazSandraPolarityClassification](https://huggingface.co/datasets/issai/kazsandra): Sentiment polarity
187
- - [KazSandraScoreClassification](https://huggingface.co/datasets/issai/kazsandra): Sentiment scoring""",
188
  elem_classes=["classification-card"]
189
  )
190
-
191
  with gr.Column():
192
  gr.Markdown(
193
  """### πŸ”— Bitext Mining
194
-
195
  **Metric:** F1-Score
196
  - Harmonic mean of precision and recall
197
  - Balances correctness and completeness
@@ -202,10 +205,10 @@ class KazTEBLeaderboard:
202
  - Bidirectional evaluation""",
203
  elem_classes=["bitext-card"]
204
  )
205
-
206
  gr.Markdown("---")
207
  gr.Markdown("### πŸ“Š Scoring & Ranking")
208
-
209
  with gr.Row():
210
  with gr.Column():
211
  gr.Markdown("**Task Averaging:** Equal weight per dataset within each task")
@@ -214,10 +217,9 @@ class KazTEBLeaderboard:
214
  with gr.Column():
215
  #gr.Markdown("**Future Plans:** Overall cross-task scoring implementation")
216
  pass
217
-
218
- # Todo section at the bottom
219
  gr.Markdown("---")
220
- gr.Markdown(
221
  """
222
  <div style="margin-top: 30px; padding: 20px; background-color: #f0f8ff; border-radius: 8px; border-left: 4px solid #4a90e2;">
223
  <h3 style="margin-top: 0; color: #2c3e50; display: flex; align-items: center;">
@@ -230,16 +232,16 @@ class KazTEBLeaderboard:
230
  </div>
231
  """
232
  )
233
-
234
  # Contact information
235
- gr.Markdown(
236
  """
237
  <div style="text-align: center; margin-top: 20px; padding: 15px; color: #666; font-size: 14px;">
238
  πŸ“§ Contact: <a href="mailto:arysbatyr@gmail.com" style="color: #1976d2; text-decoration: none;">arysbatyr@gmail.com</a>
239
  </div>
240
  """
241
  )
242
-
243
  return demo
244
 
245
 
@@ -252,9 +254,9 @@ def load_benchmark_data(filepath: str = None) -> List[Dict[str, Any]]:
252
 
253
  if __name__ == "__main__":
254
  data = load_benchmark_data("./results.json")
255
-
256
  leaderboard = KazTEBLeaderboard(data)
257
-
258
  demo = leaderboard.create_interface()
259
  demo.launch()
260
 
 
36
  def __init__(self, data: List[Dict[str, Any]]):
37
  self.data = data
38
  self.tasks = self._extract_tasks()
39
+
40
  def _extract_tasks(self) -> Dict[str, List[str]]:
41
  tasks = {}
42
  if self.data:
 
46
  datasets = [k for k in sample_model[task_name].keys() if k != 'average_score']
47
  tasks[task_name] = datasets
48
  return tasks
49
+
50
  def _format_score(self, score: float) -> str:
51
  return f"{score:.4f}"
52
+
53
  def _create_model_link(self, name: str, url: str) -> str:
54
  return f'<a href="{url}" target="_blank" style="color: #1976d2; text-decoration: none;">{name}</a>'
55
+
56
  def get_task_dataframe(self, task_name: str) -> pd.DataFrame:
57
  rows = []
58
+
59
  for model in self.data:
60
  if task_name not in model:
61
  continue
62
+
63
  row = {
64
  'Model': self._create_model_link(model['name'], model['url']),
65
  'Average': self._format_score(model[task_name]['average_score']),
 
67
  'Parameters': model.get('num_parameters', 'N/A'),
68
  'Embedding Dimmension': model.get('emb_dim', 'N/A')
69
  }
70
+
71
  # Addition of dataset-specific scores
72
  for dataset in self.tasks[task_name]:
73
  if dataset in model[task_name]:
74
  row[dataset] = self._format_score(model[task_name][dataset])
75
+
76
  rows.append(row)
77
+
78
  df = pd.DataFrame(rows)
79
  df['_sort_key'] = df['Average'].astype(float)
80
  df = df.sort_values('_sort_key', ascending=False).drop('_sort_key', axis=1)
81
  df.insert(0, 'Rank', range(1, len(df) + 1))
82
+
83
  return df
84
+
85
  def create_interface(self):
86
 
87
  # we will force the light theme for now :)
 
98
 
99
  with gr.Blocks(js=js_func) as demo:
100
  # Header
101
+ gr.HTML(
102
  """
103
  <div style="text-align: center; margin-bottom: 20px;">
104
  <h1 style="font-size: 36px; margin-bottom: 10px;">KazTEB Leaderboard πŸ†</h1>
 
106
  </div>
107
  """
108
  )
109
+
110
  # Subheader -- Project description
111
+ gr.HTML(
112
  """
113
  <div style="margin-bottom: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 8px; border-left: 4px solid #1976d2;">
114
  <p style="font-size: 16px; line-height: 1.6; margin: 0; color: #333;">
 
117
  </div>
118
  """
119
  )
120
+
121
  with gr.Tabs() as main_tabs:
122
  with gr.Tab("πŸ“Š Task Results"):
123
+
124
  with gr.Tabs() as task_tabs:
125
  with gr.Tab("Retrieval"):
126
  retrieval_df = self.get_task_dataframe('retrieval')
 
129
  headers=list(retrieval_df.columns),
130
  datatype=["number", "html", "str", "str", "str"] + ["str"] * (len(retrieval_df.columns) - 5),
131
  col_count=(len(retrieval_df.columns), "fixed"),
132
+ interactive=False,
133
+ column_widths=[50, 400] + [200] * (len(retrieval_df.columns)-2)
134
  )
135
+
136
  with gr.Tab("Classification"):
137
  classification_df = self.get_task_dataframe('classification')
138
  gr.DataFrame(
 
140
  headers=list(classification_df.columns),
141
  datatype=["number", "html", "str", "str", "str"] + ["str"] * (len(classification_df.columns) - 5),
142
  col_count=(len(classification_df.columns), "fixed"),
143
+ interactive=False,
144
+ column_widths=[50, 400] + [200] * (len(classification_df.columns)-2)
145
  )
146
+
147
  with gr.Tab("Bitext Mining"):
148
  bitext_df = self.get_task_dataframe('bitext_mining')
149
  gr.DataFrame(
 
151
  headers=list(bitext_df.columns),
152
  datatype=["number", "html", "str", "str", "str"] + ["str"] * (len(bitext_df.columns) - 5),
153
  col_count=(len(bitext_df.columns), "fixed"),
154
+ interactive=False,
155
+ column_widths=[50, 400] + [200] * (len(bitext_df.columns)-2)
156
  )
157
+
158
  with gr.Tab("πŸ“ˆ Metrics"):
159
  gr.Markdown("## Evaluation Metrics Overview")
160
  gr.Markdown("Although the evaluation generates multiple metric values for each task, we retain only a single metric for reference.")
161
+
162
  with gr.Row():
163
 
164
  with gr.Column():
165
  gr.Markdown(
166
  """### πŸ” Retrieval
167
+
168
  **Metric:** nDCG@10 (Normalized Discounted Cumulative Gain)
169
  - Measures ranking quality of retrieved documents
170
  - Considers both relevance and position
 
175
  - Human-annotated question-document pairs""",
176
  elem_classes=["retrieval-card"]
177
  )
178
+
179
  with gr.Column():
180
  gr.Markdown(
181
  """### πŸ“ Classification
182
+
183
  **Metric:** Accuracy
184
  - Percentage of correctly classified instances
185
  - Standard classification metric
186
  - **Range:** 0.0 - 1.0 (higher is better)
187
 
188
  **Datasets:**
189
+ - **[KazSandraPolarityClassification](https://huggingface.co/datasets/issai/kazsandra):** Sentiment polarity
190
+ - **[KazSandraScoreClassification](https://huggingface.co/datasets/issai/kazsandra):** Sentiment scoring""",
191
  elem_classes=["classification-card"]
192
  )
193
+
194
  with gr.Column():
195
  gr.Markdown(
196
  """### πŸ”— Bitext Mining
197
+
198
  **Metric:** F1-Score
199
  - Harmonic mean of precision and recall
200
  - Balances correctness and completeness
 
205
  - Bidirectional evaluation""",
206
  elem_classes=["bitext-card"]
207
  )
208
+
209
  gr.Markdown("---")
210
  gr.Markdown("### πŸ“Š Scoring & Ranking")
211
+
212
  with gr.Row():
213
  with gr.Column():
214
  gr.Markdown("**Task Averaging:** Equal weight per dataset within each task")
 
217
  with gr.Column():
218
  #gr.Markdown("**Future Plans:** Overall cross-task scoring implementation")
219
  pass
220
+
 
221
  gr.Markdown("---")
222
+ gr.HTML(
223
  """
224
  <div style="margin-top: 30px; padding: 20px; background-color: #f0f8ff; border-radius: 8px; border-left: 4px solid #4a90e2;">
225
  <h3 style="margin-top: 0; color: #2c3e50; display: flex; align-items: center;">
 
232
  </div>
233
  """
234
  )
235
+
236
  # Contact information
237
+ gr.HTML(
238
  """
239
  <div style="text-align: center; margin-top: 20px; padding: 15px; color: #666; font-size: 14px;">
240
  πŸ“§ Contact: <a href="mailto:arysbatyr@gmail.com" style="color: #1976d2; text-decoration: none;">arysbatyr@gmail.com</a>
241
  </div>
242
  """
243
  )
244
+
245
  return demo
246
 
247
 
 
254
 
255
  if __name__ == "__main__":
256
  data = load_benchmark_data("./results.json")
257
+
258
  leaderboard = KazTEBLeaderboard(data)
259
+
260
  demo = leaderboard.create_interface()
261
  demo.launch()
262