Spaces:
Running
Running
ping98k
commited on
Commit
·
ef7c02e
1
Parent(s):
fe3f9fe
Update K-Means input range and enhance cluster name extraction logic for improved accuracy
Browse files- index.html +1 -1
- main.js +36 -25
index.html
CHANGED
@@ -57,7 +57,7 @@
|
|
57 |
document.getElementById("input").value = sentences.join("\n");
|
58 |
</script>
|
59 |
<label for="kmeans-k" style="margin-left:10px;">Clusters:</label>
|
60 |
-
<input id="kmeans-k" type="number" min="2" max="
|
61 |
<button id="kmeans-btn">K-Means Clustering</button>
|
62 |
<button id="run">Similarity Heatmap</button>
|
63 |
<div id="progress-bar">
|
|
|
57 |
document.getElementById("input").value = sentences.join("\n");
|
58 |
</script>
|
59 |
<label for="kmeans-k" style="margin-left:10px;">Clusters:</label>
|
60 |
+
<input id="kmeans-k" type="number" min="2" max="100" value="7" style="width:60px;">
|
61 |
<button id="kmeans-btn">K-Means Clustering</button>
|
62 |
<button id="run">Similarity Heatmap</button>
|
63 |
<div id="progress-bar">
|
main.js
CHANGED
@@ -124,7 +124,7 @@ document.getElementById("kmeans-btn").onclick = async () => {
|
|
124 |
{ role: "system", content: "Given the following texts, provide a short 1-3 word summary in plaintext" },
|
125 |
{ role: "user", content: `${joined}` }
|
126 |
];
|
127 |
-
|
128 |
const inputs = tokenizer.apply_chat_template(messages, {
|
129 |
add_generation_prompt: true,
|
130 |
return_dict: true,
|
@@ -165,30 +165,41 @@ document.getElementById("kmeans-btn").onclick = async () => {
|
|
165 |
max_new_tokens: 1024,
|
166 |
do_sample: true,
|
167 |
temperature: 0.6,
|
168 |
-
streamer,
|
169 |
});
|
170 |
-
let rawName = tokenizer
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
}
|
181 |
-
Plotly.react("plot-scatter", traces, {
|
182 |
-
xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
|
183 |
-
yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
|
184 |
-
width: 1000,
|
185 |
-
height: 500,
|
186 |
-
margin: { t: 40, l: 40, r: 10, b: 40 },
|
187 |
-
title: `K-Means Clustering (k=${k})`,
|
188 |
-
legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
|
189 |
-
});
|
190 |
-
// Update textarea: group by cluster, separated by triple newlines
|
191 |
-
document.getElementById("input").value = clustered.map(g => g.join("\n")).join("\n\n\n");
|
192 |
-
// Re-run heatmap after updating textarea
|
193 |
-
document.getElementById("run").onclick();
|
194 |
};
|
|
|
124 |
{ role: "system", content: "Given the following texts, provide a short 1-3 word summary in plaintext" },
|
125 |
{ role: "user", content: `${joined}` }
|
126 |
];
|
127 |
+
|
128 |
const inputs = tokenizer.apply_chat_template(messages, {
|
129 |
add_generation_prompt: true,
|
130 |
return_dict: true,
|
|
|
165 |
max_new_tokens: 1024,
|
166 |
do_sample: true,
|
167 |
temperature: 0.6,
|
168 |
+
// streamer,
|
169 |
});
|
170 |
+
let rawName = tokenizer
|
171 |
+
.decode(outputTokens[0], { skip_special_tokens: false })
|
172 |
+
.trim();
|
173 |
+
|
174 |
+
const THINK_TAG = '</think>';
|
175 |
+
const END_TAG = '<|im_end|>';
|
176 |
+
|
177 |
+
if (rawName.includes(THINK_TAG)) {
|
178 |
+
// take everything after the last </think>
|
179 |
+
rawName = rawName.substring(rawName.lastIndexOf(THINK_TAG) + THINK_TAG.length).trim();
|
180 |
+
}
|
181 |
+
if (rawName.includes(END_TAG)) {
|
182 |
+
// take everything before the first <|im_end|>
|
183 |
+
rawName = rawName.substring(0, rawName.indexOf(END_TAG)).trim();
|
184 |
+
}
|
185 |
+
// use a default if name is empty
|
186 |
+
clusterNames.push(rawName || `Cluster ${c + 1}`);
|
187 |
+
// After all names are generated, update the trace names and render once
|
188 |
+
for (let c = 0; c < k; ++c) {
|
189 |
+
traces[c].name = clusterNames[c];
|
190 |
+
}
|
191 |
+
Plotly.react("plot-scatter", traces, {
|
192 |
+
xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
|
193 |
+
yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
|
194 |
+
width: 1000,
|
195 |
+
height: 500,
|
196 |
+
margin: { t: 40, l: 40, r: 10, b: 40 },
|
197 |
+
title: `K-Means Clustering (k=${k})`,
|
198 |
+
legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
|
199 |
+
});
|
200 |
+
// Update textarea: group by cluster, separated by triple newlines
|
201 |
+
document.getElementById("input").value = clustered.map(g => g.join("\n")).join("\n\n\n");
|
202 |
+
// Re-run heatmap after updating textarea
|
203 |
+
document.getElementById("run").onclick();
|
204 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
};
|