Spaces:
Running
Running
Add export functionality and polish user interface
Browse files- README.md +35 -12
- src/App.css +87 -0
- src/App.jsx +111 -0
README.md
CHANGED
@@ -1,19 +1,42 @@
|
|
1 |
-
# clip-tagger
|
2 |
|
3 |
-
Custom audio tagging in the browser using CLAP (Contrastive Language-Audio Pre-training)
|
4 |
|
5 |
-
|
6 |
|
7 |
-
|
8 |
-
- Local CLAP model for automatic tag generation
|
9 |
-
- User-correctable tags with personalized learning
|
10 |
-
- Lightweight classifier that adapts to your domain
|
11 |
-
- Runs entirely in the browser with JavaScript/WASM
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
|
17 |
-
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# π΅ clip-tagger
|
2 |
|
3 |
+
> Custom audio tagging in the browser using CLAP (Contrastive Language-Audio Pre-training)
|
4 |
|
5 |
+
Instantly tag any audio with AI that learns from your corrections. Upload files or record directly in your browser - everything runs locally, no servers needed.
|
6 |
|
7 |
+
## β¨ Features
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
- **π€ Audio Input**: Upload files or record directly from your microphone
|
10 |
+
- **π§ Smart Tagging**: CLAP model identifies speech, music, ambient sounds, and more
|
11 |
+
- **π Personalized Learning**: Correct tags and add custom ones - the model adapts to your domain
|
12 |
+
- **πΎ Persistent Memory**: Your corrections are saved and improve future predictions
|
13 |
+
- **π Export Ready**: Export tagged data and trained models for sharing
|
14 |
+
- **π Privacy First**: Everything runs in your browser - no data leaves your device
|
15 |
|
16 |
+
## π How It Works
|
17 |
|
18 |
+
1. **Drop an audio file** or click record
|
19 |
+
2. **Review AI-generated tags** with confidence scores
|
20 |
+
3. **Correct tags** with β/β buttons or add custom tags
|
21 |
+
4. **Watch the model learn** from your feedback in real-time
|
22 |
+
5. **Export results** or share your trained model
|
23 |
|
24 |
+
## π§ Technical Details
|
25 |
+
|
26 |
+
- **Model**: [Xenova/clap-htsat-unfused](https://huggingface.co/Xenova/clap-htsat-unfused) (~45MB)
|
27 |
+
- **Framework**: [Transformers.js](https://github.com/xenova/transformers.js) + React
|
28 |
+
- **Storage**: IndexedDB for user feedback and model weights
|
29 |
+
- **Deployment**: Ready for Hugging Face Spaces
|
30 |
+
|
31 |
+
## π― Use Cases
|
32 |
+
|
33 |
+
- Voice memo organization
|
34 |
+
- Music library tagging
|
35 |
+
- Audio content moderation
|
36 |
+
- Podcast categorization
|
37 |
+
- Sound effect libraries
|
38 |
+
- Research datasets
|
39 |
+
|
40 |
+
---
|
41 |
+
|
42 |
+
*Powered by Transformers.js β’ Runs entirely in your browser*
|
src/App.css
CHANGED
@@ -261,6 +261,93 @@ header p {
|
|
261 |
border-color: #646cff;
|
262 |
}
|
263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
@media (max-width: 768px) {
|
265 |
.app {
|
266 |
padding: 1rem;
|
|
|
261 |
border-color: #646cff;
|
262 |
}
|
263 |
|
264 |
+
.export-section {
|
265 |
+
margin: 3rem 0 2rem 0;
|
266 |
+
padding: 2rem;
|
267 |
+
border: 1px solid #eee;
|
268 |
+
border-radius: 12px;
|
269 |
+
background: #fafafa;
|
270 |
+
}
|
271 |
+
|
272 |
+
.export-section h3 {
|
273 |
+
margin-bottom: 1rem;
|
274 |
+
color: #333;
|
275 |
+
}
|
276 |
+
|
277 |
+
.export-controls {
|
278 |
+
display: flex;
|
279 |
+
gap: 1rem;
|
280 |
+
margin-bottom: 1rem;
|
281 |
+
flex-wrap: wrap;
|
282 |
+
}
|
283 |
+
|
284 |
+
.export-btn {
|
285 |
+
background: #3498db;
|
286 |
+
color: white;
|
287 |
+
border: none;
|
288 |
+
padding: 0.75rem 1.5rem;
|
289 |
+
border-radius: 8px;
|
290 |
+
cursor: pointer;
|
291 |
+
font-size: 1rem;
|
292 |
+
transition: background 0.3s ease;
|
293 |
+
}
|
294 |
+
|
295 |
+
.export-btn:hover {
|
296 |
+
background: #2980b9;
|
297 |
+
}
|
298 |
+
|
299 |
+
.clear-btn {
|
300 |
+
background: #e74c3c;
|
301 |
+
color: white;
|
302 |
+
border: none;
|
303 |
+
padding: 0.75rem 1.5rem;
|
304 |
+
border-radius: 8px;
|
305 |
+
cursor: pointer;
|
306 |
+
font-size: 1rem;
|
307 |
+
transition: background 0.3s ease;
|
308 |
+
}
|
309 |
+
|
310 |
+
.clear-btn:hover {
|
311 |
+
background: #c0392b;
|
312 |
+
}
|
313 |
+
|
314 |
+
.model-stats {
|
315 |
+
margin-top: 1rem;
|
316 |
+
padding: 1rem;
|
317 |
+
background: white;
|
318 |
+
border-radius: 8px;
|
319 |
+
border: 1px solid #ddd;
|
320 |
+
}
|
321 |
+
|
322 |
+
.model-stats p {
|
323 |
+
margin: 0.25rem 0;
|
324 |
+
color: #666;
|
325 |
+
font-size: 0.9rem;
|
326 |
+
}
|
327 |
+
|
328 |
+
footer {
|
329 |
+
margin-top: 3rem;
|
330 |
+
padding: 2rem 0 1rem 0;
|
331 |
+
border-top: 1px solid #eee;
|
332 |
+
text-align: center;
|
333 |
+
}
|
334 |
+
|
335 |
+
footer p {
|
336 |
+
margin: 0;
|
337 |
+
color: #888;
|
338 |
+
font-size: 0.9rem;
|
339 |
+
line-height: 1.5;
|
340 |
+
}
|
341 |
+
|
342 |
+
footer a {
|
343 |
+
color: #646cff;
|
344 |
+
text-decoration: none;
|
345 |
+
}
|
346 |
+
|
347 |
+
footer a:hover {
|
348 |
+
text-decoration: underline;
|
349 |
+
}
|
350 |
+
|
351 |
@media (max-width: 768px) {
|
352 |
.app {
|
353 |
padding: 1rem;
|
src/App.jsx
CHANGED
@@ -243,6 +243,81 @@ function App() {
|
|
243 |
}
|
244 |
}
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
return (
|
247 |
<div className="app">
|
248 |
<header>
|
@@ -366,7 +441,43 @@ function App() {
|
|
366 |
)}
|
367 |
</div>
|
368 |
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
</main>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
</div>
|
371 |
)
|
372 |
}
|
|
|
243 |
}
|
244 |
}
|
245 |
|
246 |
+
const exportModel = async () => {
|
247 |
+
try {
|
248 |
+
const modelStats = localClassifierRef.current?.getModelStats()
|
249 |
+
const feedbackData = await feedbackStoreRef.current.getAudioFeedback()
|
250 |
+
const customTagsData = await feedbackStoreRef.current.getCustomTags()
|
251 |
+
|
252 |
+
const exportData = {
|
253 |
+
modelStats,
|
254 |
+
feedbackData: feedbackData.slice(0, 50), // Limit for size
|
255 |
+
customTags: customTagsData,
|
256 |
+
exportDate: new Date().toISOString(),
|
257 |
+
version: '1.0'
|
258 |
+
}
|
259 |
+
|
260 |
+
const blob = new Blob([JSON.stringify(exportData, null, 2)], {
|
261 |
+
type: 'application/json'
|
262 |
+
})
|
263 |
+
const url = URL.createObjectURL(blob)
|
264 |
+
const a = document.createElement('a')
|
265 |
+
a.href = url
|
266 |
+
a.download = `clip-tagger-model-${Date.now()}.json`
|
267 |
+
document.body.appendChild(a)
|
268 |
+
a.click()
|
269 |
+
document.body.removeChild(a)
|
270 |
+
URL.revokeObjectURL(url)
|
271 |
+
} catch (error) {
|
272 |
+
console.error('Error exporting model:', error)
|
273 |
+
setError('Failed to export model')
|
274 |
+
}
|
275 |
+
}
|
276 |
+
|
277 |
+
const exportTags = () => {
|
278 |
+
if (tags.length === 0) return
|
279 |
+
|
280 |
+
const tagData = {
|
281 |
+
audioFile: audioFile?.name || 'recorded-audio',
|
282 |
+
audioHash,
|
283 |
+
timestamp: new Date().toISOString(),
|
284 |
+
tags: tags.map(tag => ({
|
285 |
+
label: tag.label,
|
286 |
+
confidence: tag.confidence,
|
287 |
+
source: tag.source || 'clap',
|
288 |
+
userFeedback: tag.userFeedback
|
289 |
+
}))
|
290 |
+
}
|
291 |
+
|
292 |
+
const blob = new Blob([JSON.stringify(tagData, null, 2)], {
|
293 |
+
type: 'application/json'
|
294 |
+
})
|
295 |
+
const url = URL.createObjectURL(blob)
|
296 |
+
const a = document.createElement('a')
|
297 |
+
a.href = url
|
298 |
+
a.download = `tags-${audioFile?.name || 'audio'}-${Date.now()}.json`
|
299 |
+
document.body.appendChild(a)
|
300 |
+
a.click()
|
301 |
+
document.body.removeChild(a)
|
302 |
+
URL.revokeObjectURL(url)
|
303 |
+
}
|
304 |
+
|
305 |
+
const clearAllData = async () => {
|
306 |
+
if (confirm('Are you sure you want to clear all training data? This cannot be undone.')) {
|
307 |
+
try {
|
308 |
+
await feedbackStoreRef.current.clearAllData()
|
309 |
+
localClassifierRef.current?.clearModel()
|
310 |
+
setCustomTags([])
|
311 |
+
setTags([])
|
312 |
+
setAudioFile(null)
|
313 |
+
setError(null)
|
314 |
+
} catch (error) {
|
315 |
+
console.error('Error clearing data:', error)
|
316 |
+
setError('Failed to clear data')
|
317 |
+
}
|
318 |
+
}
|
319 |
+
}
|
320 |
+
|
321 |
return (
|
322 |
<div className="app">
|
323 |
<header>
|
|
|
441 |
)}
|
442 |
</div>
|
443 |
)}
|
444 |
+
|
445 |
+
{(tags.length > 0 || customTags.length > 0) && (
|
446 |
+
<div className="export-section">
|
447 |
+
<h3>Export & Management</h3>
|
448 |
+
<div className="export-controls">
|
449 |
+
{tags.length > 0 && (
|
450 |
+
<button onClick={exportTags} className="export-btn">
|
451 |
+
π Export Current Tags
|
452 |
+
</button>
|
453 |
+
)}
|
454 |
+
{localClassifierRef.current?.getModelStats().trainedTags > 0 && (
|
455 |
+
<button onClick={exportModel} className="export-btn">
|
456 |
+
π§ Export Trained Model
|
457 |
+
</button>
|
458 |
+
)}
|
459 |
+
<button onClick={clearAllData} className="clear-btn">
|
460 |
+
ποΈ Clear All Data
|
461 |
+
</button>
|
462 |
+
</div>
|
463 |
+
|
464 |
+
{localClassifierRef.current && (
|
465 |
+
<div className="model-stats">
|
466 |
+
<p>Trained tags: {localClassifierRef.current.getModelStats().trainedTags}</p>
|
467 |
+
<p>Custom tags: {customTags.length}</p>
|
468 |
+
</div>
|
469 |
+
)}
|
470 |
+
</div>
|
471 |
+
)}
|
472 |
</main>
|
473 |
+
|
474 |
+
<footer>
|
475 |
+
<p>
|
476 |
+
Powered by <a href="https://github.com/xenova/transformers.js" target="_blank" rel="noopener">Transformers.js</a>
|
477 |
+
{' '} β’ CLAP model: <a href="https://huggingface.co/Xenova/clap-htsat-unfused" target="_blank" rel="noopener">Xenova/clap-htsat-unfused</a>
|
478 |
+
{' '} β’ Everything runs locally in your browser
|
479 |
+
</p>
|
480 |
+
</footer>
|
481 |
</div>
|
482 |
)
|
483 |
}
|