ping98k commited on
Commit
12c4198
·
1 Parent(s): 46bbd3d

Refactor K-Means clustering implementation; modularize embedding and clustering logic, enhance heatmap and scatter plot functions, and improve cluster naming process.

Browse files
Files changed (5) hide show
  1. cluster_naming.js +34 -0
  2. clustering.js +68 -0
  3. embedding.js +32 -0
  4. main.js +33 -164
  5. plotting.js +35 -0
cluster_naming.js ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AutoTokenizer, AutoModelForCausalLM } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.6.0';
2
+ import { prompt_cluster } from "./prompt_cluster.js";
3
+
4
+ const tokenizer = await AutoTokenizer.from_pretrained("onnx-community/Qwen3-0.6B-ONNX");
5
+ const model = await AutoModelForCausalLM.from_pretrained("onnx-community/Qwen3-0.6B-ONNX", { device: "webgpu", dtype: "q4f16" });
6
+
7
+ export async function nameCluster(lines) {
8
+ const joined = lines.join("\n");
9
+ const messages = [
10
+ { role: "system", content: prompt_cluster },
11
+ { role: "user", content: `Input:\n${joined}\nOutput:` }
12
+ ];
13
+ const inputs = tokenizer.apply_chat_template(messages, {
14
+ add_generation_prompt: true,
15
+ return_dict: true,
16
+ enable_thinking: false,
17
+ });
18
+ const outputTokens = await model.generate({
19
+ ...inputs,
20
+ max_new_tokens: 1024,
21
+ do_sample: true,
22
+ temperature: 0.6
23
+ });
24
+ let rawName = tokenizer.decode(outputTokens[0], { skip_special_tokens: false }).trim();
25
+ const THINK_TAG = "</think>";
26
+ const END_TAG = "<|im_end|>";
27
+ if (rawName.includes(THINK_TAG)) {
28
+ rawName = rawName.substring(rawName.lastIndexOf(THINK_TAG) + THINK_TAG.length).trim();
29
+ }
30
+ if (rawName.includes(END_TAG)) {
31
+ rawName = rawName.substring(0, rawName.indexOf(END_TAG)).trim();
32
+ }
33
+ return rawName;
34
+ }
clustering.js ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { UMAP } from "https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm";
2
+
3
+ export function kmeans(embeddings, k, maxIter = 100) {
4
+ const n = embeddings.length;
5
+ const dim = embeddings[0].length;
6
+ let centroids = Array.from({ length: k }, () => embeddings[Math.floor(Math.random() * n)].slice());
7
+ let labels = new Array(n).fill(0);
8
+
9
+ const reseed = () => {
10
+ let bestIdx = 0, bestDist = -1;
11
+ for (let i = 0; i < n; ++i) {
12
+ let minDist = Infinity;
13
+ for (let c = 0; c < k; ++c) {
14
+ let dist = 0;
15
+ for (let d = 0; d < dim; ++d)
16
+ dist += (embeddings[i][d] - centroids[c][d]) ** 2;
17
+ if (dist < minDist) minDist = dist;
18
+ }
19
+ if (minDist > bestDist) {
20
+ bestDist = minDist;
21
+ bestIdx = i;
22
+ }
23
+ }
24
+ return embeddings[bestIdx].slice();
25
+ };
26
+
27
+ for (let iter = 0; iter < maxIter; ++iter) {
28
+ let changed = false;
29
+ for (let i = 0; i < n; ++i) {
30
+ let best = 0, bestDist = Infinity;
31
+ for (let c = 0; c < k; ++c) {
32
+ let dist = 0;
33
+ for (let d = 0; d < dim; ++d)
34
+ dist += (embeddings[i][d] - centroids[c][d]) ** 2;
35
+ if (dist < bestDist) {
36
+ bestDist = dist;
37
+ best = c;
38
+ }
39
+ }
40
+ if (labels[i] !== best) {
41
+ labels[i] = best;
42
+ changed = true;
43
+ }
44
+ }
45
+ centroids = Array.from({ length: k }, () => new Array(dim).fill(0));
46
+ const counts = new Array(k).fill(0);
47
+ for (let i = 0; i < n; ++i) {
48
+ counts[labels[i]]++;
49
+ for (let d = 0; d < dim; ++d)
50
+ centroids[labels[i]][d] += embeddings[i][d];
51
+ }
52
+ for (let c = 0; c < k; ++c) {
53
+ if (counts[c] === 0) {
54
+ centroids[c] = reseed();
55
+ } else {
56
+ for (let d = 0; d < dim; ++d)
57
+ centroids[c][d] /= counts[c];
58
+ }
59
+ }
60
+ if (!changed) break;
61
+ }
62
+ return { labels, centroids };
63
+ }
64
+
65
+ export function runUMAP(embeddings, nNeighbors = 15) {
66
+ const umap = new UMAP({ nComponents: 2, nNeighbors, minDist: 0.1 });
67
+ return umap.fit(embeddings);
68
+ }
embedding.js ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.6.0';
2
+
3
+ const embed = await pipeline(
4
+ "feature-extraction",
5
+ "onnx-community/Qwen3-Embedding-0.6B-ONNX",
6
+ { device: "webgpu", dtype: "q4f16" },
7
+ );
8
+
9
+ export async function getGroupEmbeddings(groups, task) {
10
+ const groupEmbeddings = [];
11
+ for (const g of groups) {
12
+ // Remove lines starting with ##
13
+ const lines = g.split(/\n/)
14
+ .map(x => x.trim())
15
+ .filter(x => x && !x.startsWith('##'));
16
+ const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
17
+ const out = await embed(prompts, { pooling: "mean", normalize: true });
18
+ const embeddings = typeof out.tolist === 'function' ? out.tolist() : out.data;
19
+ const dim = embeddings[0].length;
20
+ const avg = new Float32Array(dim);
21
+ for (const e of embeddings) { for (let i = 0; i < dim; i++) avg[i] += e[i]; }
22
+ for (let i = 0; i < dim; i++) avg[i] /= embeddings.length;
23
+ groupEmbeddings.push(avg);
24
+ }
25
+ return groupEmbeddings;
26
+ }
27
+
28
+ export async function getLineEmbeddings(lines, task) {
29
+ const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
30
+ const out = await embed(prompts, { pooling: "mean", normalize: true });
31
+ return typeof out.tolist === 'function' ? out.tolist() : out.data;
32
+ }
main.js CHANGED
@@ -1,44 +1,27 @@
1
- import { pipeline, TextStreamer, AutoTokenizer, AutoModelForCausalLM } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.6.0';
2
- import { UMAP } from "https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm";
3
- import { prompt_cluster } from "./prompt_cluster.js"
4
- const embed = await pipeline(
5
- "feature-extraction",
6
- "onnx-community/Qwen3-Embedding-0.6B-ONNX",
7
- { device: "webgpu", dtype: "q4f16" },
8
- );
9
- const tokenizer = await AutoTokenizer.from_pretrained("onnx-community/Qwen3-0.6B-ONNX");
10
- const model = await AutoModelForCausalLM.from_pretrained("onnx-community/Qwen3-0.6B-ONNX", { device: "webgpu", dtype: "q4f16" });
11
 
12
  const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";
13
 
 
 
 
 
14
  document.getElementById("run").onclick = async () => {
15
  const text = document.getElementById("input").value;
16
-
17
  const groups = text.split(/\n{3,}/);
18
-
19
  // Extract cluster names from lines starting with ##
20
  const clusterNames = text.split(/\n/)
21
  .map(x => x.trim())
22
  .filter(x => x && x.startsWith('##'))
23
  .map(x => x.replace(/^##\s*/, ''));
24
-
25
-
26
- const groupEmbeddings = [];
27
- for (const g of groups) {
28
- // Remove lines starting with ##
29
- const lines = g.split(/\n/)
30
- .map(x => x.trim())
31
- .filter(x => x && !x.startsWith('##'));
32
- const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
33
- const out = await embed(prompts, { pooling: "mean", normalize: true });
34
- const embeddings = typeof out.tolist === 'function' ? out.tolist() : out.data;
35
- const dim = embeddings[0].length;
36
- const avg = new Float32Array(dim);
37
- for (const e of embeddings) { for (let i = 0; i < dim; i++) avg[i] += e[i]; }
38
- for (let i = 0; i < dim; i++) avg[i] /= embeddings.length;
39
- groupEmbeddings.push(avg);
40
- }
41
  const n = groupEmbeddings.length;
 
42
  const sim = [];
43
  for (let i = 0; i < n; i++) {
44
  const row = [];
@@ -53,104 +36,39 @@ document.getElementById("run").onclick = async () => {
53
  }
54
  sim.push(row);
55
  }
56
- // If clusterNames exist and match group count, use as axis labels
57
  let xLabels = clusterNames && clusterNames.length === n ? clusterNames : Array.from({ length: n }, (_, i) => `Group ${i + 1}`);
58
- const data = [{ z: sim, type: "heatmap", colorscale: "Viridis", zmin: 0.7, zmax: 1, x: xLabels, y: xLabels }];
59
- Plotly.newPlot("plot-heatmap", data, {
60
- xaxis: { title: "Group", scaleanchor: "y", scaleratio: 1 },
61
- yaxis: { title: "Group", scaleanchor: "x", scaleratio: 1 },
62
- width: 500,
63
- height: 500,
64
- margin: { t: 40, l: 200, r: 10, b: 200 },
65
- title: "Group Similarity Heatmap"
66
- });
67
  };
68
 
 
 
69
  document.getElementById("kmeans-btn").onclick = async () => {
70
  const progressBar = document.getElementById("progress-bar");
71
  const progressBarInner = document.getElementById("progress-bar-inner");
72
  progressBar.style.display = "block";
73
- progressBarInner.style.width = "0%"; // Set to 0% at the start
74
 
75
  const text = document.getElementById("input").value;
 
76
  const lines = text.split(/\n/).map(x => x.trim()).filter(x => x && !x.startsWith("##"));
77
- const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
78
- const out = await embed(prompts, { pooling: "mean", normalize: true });
79
- const embeddings = typeof out.tolist === "function" ? out.tolist() : out.data;
80
-
81
  const n = embeddings.length;
82
  if (n < 2) return;
83
-
84
  const requestedK = parseInt(document.getElementById("kmeans-k").value) || 3;
85
  const k = Math.max(2, Math.min(requestedK, n));
86
- const dim = embeddings[0].length;
87
-
88
- let centroids = Array.from({ length: k }, () => embeddings[Math.floor(Math.random() * n)].slice());
89
- let labels = new Array(n).fill(0);
90
-
91
- const reseed = () => {
92
- let bestIdx = 0, bestDist = -1;
93
- for (let i = 0; i < n; ++i) {
94
- let minDist = Infinity;
95
- for (let c = 0; c < k; ++c) {
96
- let dist = 0;
97
- for (let d = 0; d < dim; ++d)
98
- dist += (embeddings[i][d] - centroids[c][d]) ** 2;
99
- if (dist < minDist) minDist = dist;
100
- }
101
- if (minDist > bestDist) {
102
- bestDist = minDist;
103
- bestIdx = i;
104
- }
105
- }
106
- return embeddings[bestIdx].slice();
107
- };
108
-
109
- for (let iter = 0; iter < 100; ++iter) {
110
- let changed = false;
111
- for (let i = 0; i < n; ++i) {
112
- let best = 0, bestDist = Infinity;
113
- for (let c = 0; c < k; ++c) {
114
- let dist = 0;
115
- for (let d = 0; d < dim; ++d)
116
- dist += (embeddings[i][d] - centroids[c][d]) ** 2;
117
- if (dist < bestDist) {
118
- bestDist = dist;
119
- best = c;
120
- }
121
- }
122
- if (labels[i] !== best) {
123
- labels[i] = best;
124
- changed = true;
125
- }
126
- }
127
-
128
- centroids = Array.from({ length: k }, () => new Array(dim).fill(0));
129
- const counts = new Array(k).fill(0);
130
- for (let i = 0; i < n; ++i) {
131
- counts[labels[i]]++;
132
- for (let d = 0; d < dim; ++d)
133
- centroids[labels[i]][d] += embeddings[i][d];
134
- }
135
- for (let c = 0; c < k; ++c) {
136
- if (counts[c] === 0) {
137
- centroids[c] = reseed();
138
- } else {
139
- for (let d = 0; d < dim; ++d)
140
- centroids[c][d] /= counts[c];
141
- }
142
- }
143
- if (!changed) break;
144
- }
145
-
146
  const nNeighbors = Math.max(1, Math.min(lines.length - 1, 15));
147
  const umap = new UMAP({ nComponents: 2, nNeighbors, minDist: 0.1 });
148
  const proj = umap.fit(embeddings);
149
-
150
  const clustered = Array.from({ length: k }, () => []);
151
  for (let i = 0; i < n; ++i)
152
  clustered[labels[i]].push(lines[i]);
153
-
154
  const colors = ["red", "blue", "green", "orange", "purple", "cyan", "magenta", "yellow", "brown", "black", "lime", "navy", "teal", "olive", "maroon", "pink", "gray", "gold", "aqua", "indigo"];
155
  const placeholderNames = Array.from({ length: k }, (_, c) => `Cluster ${c + 1}`);
156
  const traces = Array.from({ length: k }, (_, c) => ({
@@ -164,70 +82,21 @@ document.getElementById("kmeans-btn").onclick = async () => {
164
  traces[labels[i]].y.push(proj[i][1]);
165
  traces[labels[i]].text.push(lines[i]);
166
  }
167
- Plotly.newPlot("plot-scatter", traces, {
168
- xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
169
- yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
170
- width: 1000,
171
- height: 500,
172
- margin: { t: 40, l: 40, r: 10, b: 40 },
173
- title: `K-Means Clustering (k=${k})`,
174
- legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
175
- });
176
-
177
  const clusterNames = [];
178
  for (let c = 0; c < k; ++c) {
179
  progressBarInner.style.width = `${Math.round(((c + 1) / k) * 100)}%`;
180
-
181
- const joined = clustered[c].join("\n");
182
- const messages = [
183
- { role: "system", content: prompt_cluster },
184
- { role: "user", content: `Input:\n${joined}\nOutput:` }
185
- ];
186
-
187
- const inputs = tokenizer.apply_chat_template(messages, {
188
- add_generation_prompt: true,
189
- return_dict: true,
190
- enable_thinking: false,
191
- });
192
-
193
- const outputTokens = await model.generate({
194
- ...inputs,
195
- max_new_tokens: 1024,
196
- do_sample: true,
197
- temperature: 0.6
198
- });
199
-
200
- let rawName = tokenizer.decode(outputTokens[0], { skip_special_tokens: false }).trim();
201
-
202
- const THINK_TAG = "</think>";
203
- const END_TAG = "<|im_end|>";
204
-
205
- if (rawName.includes(THINK_TAG)) {
206
- rawName = rawName.substring(rawName.lastIndexOf(THINK_TAG) + THINK_TAG.length).trim();
207
- }
208
- if (rawName.includes(END_TAG)) {
209
- rawName = rawName.substring(0, rawName.indexOf(END_TAG)).trim();
210
- }
211
-
212
- clusterNames.push(rawName || `Cluster ${c + 1}`);
213
  traces[c].name = clusterNames[c];
214
-
215
- Plotly.react("plot-scatter", traces, {
216
- xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
217
- yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
218
- width: 1000,
219
- height: 500,
220
- margin: { t: 40, l: 40, r: 10, b: 40 },
221
- title: `K-Means Clustering (k=${k})`,
222
- legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
223
- });
224
-
225
  document.getElementById("input").value = clustered.map((g, i) =>
226
  `## ${clusterNames[i]}\n${g.join("\n")}`
227
  ).join("\n\n\n");
228
-
229
  document.getElementById("run").onclick();
230
  }
231
-
232
- progressBarInner.style.width = "100%"; // Set to 100% after all clusters are named
233
  };
 
1
+ import { getGroupEmbeddings, getLineEmbeddings } from './embedding.js';
2
+ import { kmeans } from './clustering.js';
3
+ import { plotHeatmap, plotScatter, updateScatter } from './plotting.js';
4
+ import { nameCluster } from './cluster_naming.js';
5
+ import { prompt_cluster } from './prompt_cluster.js';
 
 
 
 
 
6
 
7
  const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";
8
 
9
+ // Heatmap event
10
+ // Handles group similarity heatmap
11
+ // Uses group-level embeddings
12
+
13
  document.getElementById("run").onclick = async () => {
14
  const text = document.getElementById("input").value;
 
15
  const groups = text.split(/\n{3,}/);
 
16
  // Extract cluster names from lines starting with ##
17
  const clusterNames = text.split(/\n/)
18
  .map(x => x.trim())
19
  .filter(x => x && x.startsWith('##'))
20
  .map(x => x.replace(/^##\s*/, ''));
21
+ // Get group embeddings (removes ## lines internally)
22
+ const groupEmbeddings = await getGroupEmbeddings(groups, task);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  const n = groupEmbeddings.length;
24
+ // Cosine similarity matrix
25
  const sim = [];
26
  for (let i = 0; i < n; i++) {
27
  const row = [];
 
36
  }
37
  sim.push(row);
38
  }
39
+ // Use cluster names as axis labels if available
40
  let xLabels = clusterNames && clusterNames.length === n ? clusterNames : Array.from({ length: n }, (_, i) => `Group ${i + 1}`);
41
+ plotHeatmap(sim, xLabels, xLabels);
 
 
 
 
 
 
 
 
42
  };
43
 
44
+ // K-Means + UMAP + Cluster Naming event
45
+
46
  document.getElementById("kmeans-btn").onclick = async () => {
47
  const progressBar = document.getElementById("progress-bar");
48
  const progressBarInner = document.getElementById("progress-bar-inner");
49
  progressBar.style.display = "block";
50
+ progressBarInner.style.width = "0%";
51
 
52
  const text = document.getElementById("input").value;
53
+ // Remove ## lines for embedding
54
  const lines = text.split(/\n/).map(x => x.trim()).filter(x => x && !x.startsWith("##"));
55
+ const embeddings = await getLineEmbeddings(lines, task);
 
 
 
56
  const n = embeddings.length;
57
  if (n < 2) return;
 
58
  const requestedK = parseInt(document.getElementById("kmeans-k").value) || 3;
59
  const k = Math.max(2, Math.min(requestedK, n));
60
+ // K-Means clustering
61
+ const { labels } = kmeans(embeddings, k);
62
+ // UMAP projection
63
+ const { UMAP } = await import('https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  const nNeighbors = Math.max(1, Math.min(lines.length - 1, 15));
65
  const umap = new UMAP({ nComponents: 2, nNeighbors, minDist: 0.1 });
66
  const proj = umap.fit(embeddings);
67
+ // Group lines by cluster
68
  const clustered = Array.from({ length: k }, () => []);
69
  for (let i = 0; i < n; ++i)
70
  clustered[labels[i]].push(lines[i]);
71
+ // Prepare scatter plot traces
72
  const colors = ["red", "blue", "green", "orange", "purple", "cyan", "magenta", "yellow", "brown", "black", "lime", "navy", "teal", "olive", "maroon", "pink", "gray", "gold", "aqua", "indigo"];
73
  const placeholderNames = Array.from({ length: k }, (_, c) => `Cluster ${c + 1}`);
74
  const traces = Array.from({ length: k }, (_, c) => ({
 
82
  traces[labels[i]].y.push(proj[i][1]);
83
  traces[labels[i]].text.push(lines[i]);
84
  }
85
+ plotScatter(traces, k);
86
+ // Cluster naming
 
 
 
 
 
 
 
 
87
  const clusterNames = [];
88
  for (let c = 0; c < k; ++c) {
89
  progressBarInner.style.width = `${Math.round(((c + 1) / k) * 100)}%`;
90
+ const name = await nameCluster(clustered[c]);
91
+ clusterNames.push(name || `Cluster ${c + 1}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  traces[c].name = clusterNames[c];
93
+ updateScatter(traces, k);
94
+ // Update textarea with cluster names as markdown headers
 
 
 
 
 
 
 
 
 
95
  document.getElementById("input").value = clustered.map((g, i) =>
96
  `## ${clusterNames[i]}\n${g.join("\n")}`
97
  ).join("\n\n\n");
98
+ // Update heatmap with new cluster names
99
  document.getElementById("run").onclick();
100
  }
101
+ progressBarInner.style.width = "100%";
 
102
  };
plotting.js ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export function plotHeatmap(sim, xLabels, yLabels) {
2
+ const data = [{ z: sim, type: "heatmap", colorscale: "Viridis", zmin: 0.7, zmax: 1, x: xLabels, y: yLabels }];
3
+ Plotly.newPlot("plot-heatmap", data, {
4
+ xaxis: { title: "Group", scaleanchor: "y", scaleratio: 1 },
5
+ yaxis: { title: "Group", scaleanchor: "x", scaleratio: 1 },
6
+ width: 500,
7
+ height: 500,
8
+ margin: { t: 40, l: 200, r: 10, b: 200 },
9
+ title: "Group Similarity Heatmap"
10
+ });
11
+ }
12
+
13
+ export function plotScatter(traces, k) {
14
+ Plotly.newPlot("plot-scatter", traces, {
15
+ xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
16
+ yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
17
+ width: 1000,
18
+ height: 500,
19
+ margin: { t: 40, l: 40, r: 10, b: 40 },
20
+ title: `K-Means Clustering (k=${k})`,
21
+ legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
22
+ });
23
+ }
24
+
25
+ export function updateScatter(traces, k) {
26
+ Plotly.react("plot-scatter", traces, {
27
+ xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
28
+ yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
29
+ width: 1000,
30
+ height: 500,
31
+ margin: { t: 40, l: 40, r: 10, b: 40 },
32
+ title: `K-Means Clustering (k=${k})`,
33
+ legend: { x: 1.05, y: 0.5, orientation: "v", xanchor: "left", yanchor: "middle" }
34
+ });
35
+ }