ping98k commited on
Commit
ef7c02e
·
1 Parent(s): fe3f9fe

Update K-Means input range and enhance cluster name extraction logic for improved accuracy

Browse files
Files changed (2) hide show
  1. index.html +1 -1
  2. main.js +36 -25
index.html CHANGED
@@ -57,7 +57,7 @@
57
  document.getElementById("input").value = sentences.join("\n");
58
  </script>
59
  <label for="kmeans-k" style="margin-left:10px;">Clusters:</label>
60
- <input id="kmeans-k" type="number" min="2" max="20" value="3" style="width:60px;">
61
  <button id="kmeans-btn">K-Means Clustering</button>
62
  <button id="run">Similarity Heatmap</button>
63
  <div id="progress-bar">
 
57
  document.getElementById("input").value = sentences.join("\n");
58
  </script>
59
  <label for="kmeans-k" style="margin-left:10px;">Clusters:</label>
60
+ <input id="kmeans-k" type="number" min="2" max="100" value="7" style="width:60px;">
61
  <button id="kmeans-btn">K-Means Clustering</button>
62
  <button id="run">Similarity Heatmap</button>
63
  <div id="progress-bar">
main.js CHANGED
@@ -124,7 +124,7 @@ document.getElementById("kmeans-btn").onclick = async () => {
124
  { role: "system", content: "Given the following texts, provide a short 1-3 word summary in plaintext" },
125
  { role: "user", content: `${joined}` }
126
  ];
127
-
128
  const inputs = tokenizer.apply_chat_template(messages, {
129
  add_generation_prompt: true,
130
  return_dict: true,
@@ -165,30 +165,41 @@ document.getElementById("kmeans-btn").onclick = async () => {
165
  max_new_tokens: 1024,
166
  do_sample: true,
167
  temperature: 0.6,
168
- streamer,
169
  });
170
- let rawName = tokenizer.decode(outputTokens[0], { skip_special_tokens: false }).trim();
171
- // Extract cluster name after last '</think>' if present, and remove '<|im_end|>' if present
172
- let idx = rawName.lastIndexOf('</think>');
173
- let name = idx !== -1 ? rawName.slice(idx + 8).trim() : rawName;
174
- if (name.endsWith('<|im_end|>')) name = name.slice(0, -11).trim();
175
- clusterNames.push(name.length > 0 ? name : `Cluster ${c + 1}`);
176
- }
177
- // After all names are generated, update the trace names and render once
178
- for (let c = 0; c < k; ++c) {
179
- traces[c].name = clusterNames[c];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
- Plotly.react("plot-scatter", traces, {
182
- xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
183
- yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
184
- width: 1000,
185
- height: 500,
186
- margin: { t: 40, l: 40, r: 10, b: 40 },
187
- title: `K-Means Clustering (k=${k})`,
188
- legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
189
- });
190
- // Update textarea: group by cluster, separated by triple newlines
191
- document.getElementById("input").value = clustered.map(g => g.join("\n")).join("\n\n\n");
192
- // Re-run heatmap after updating textarea
193
- document.getElementById("run").onclick();
194
  };
 
124
  { role: "system", content: "Given the following texts, provide a short 1-3 word summary in plaintext" },
125
  { role: "user", content: `${joined}` }
126
  ];
127
+
128
  const inputs = tokenizer.apply_chat_template(messages, {
129
  add_generation_prompt: true,
130
  return_dict: true,
 
165
  max_new_tokens: 1024,
166
  do_sample: true,
167
  temperature: 0.6,
168
+ // streamer,
169
  });
170
+ let rawName = tokenizer
171
+ .decode(outputTokens[0], { skip_special_tokens: false })
172
+ .trim();
173
+
174
+ const THINK_TAG = '</think>';
175
+ const END_TAG = '<|im_end|>';
176
+
177
+ if (rawName.includes(THINK_TAG)) {
178
+ // take everything after the last </think>
179
+ rawName = rawName.substring(rawName.lastIndexOf(THINK_TAG) + THINK_TAG.length).trim();
180
+ }
181
+ if (rawName.includes(END_TAG)) {
182
+ // take everything before the first <|im_end|>
183
+ rawName = rawName.substring(0, rawName.indexOf(END_TAG)).trim();
184
+ }
185
+ // use a default if name is empty
186
+ clusterNames.push(rawName || `Cluster ${c + 1}`);
187
+ // After all names are generated, update the trace names and render once
188
+ for (let c = 0; c < k; ++c) {
189
+ traces[c].name = clusterNames[c];
190
+ }
191
+ Plotly.react("plot-scatter", traces, {
192
+ xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
193
+ yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
194
+ width: 1000,
195
+ height: 500,
196
+ margin: { t: 40, l: 40, r: 10, b: 40 },
197
+ title: `K-Means Clustering (k=${k})`,
198
+ legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
199
+ });
200
+ // Update textarea: group by cluster, separated by triple newlines
201
+ document.getElementById("input").value = clustered.map(g => g.join("\n")).join("\n\n\n");
202
+ // Re-run heatmap after updating textarea
203
+ document.getElementById("run").onclick();
204
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  };