sohei1l commited on
Commit
ec858b7
Β·
1 Parent(s): e9a623f

Deploy latest build

Browse files
assets/index-CHouKU67.js ADDED
The diff for this file is too large to render. See raw diff
 
assets/vite-DcBtz0py-DcBtz0py.svg ADDED
index.html CHANGED
@@ -2,10 +2,10 @@
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
- <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py.svg" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <title>🎡 clip-tagger</title>
8
- <script type="module" crossorigin src="./assets/index-5J96wndd.js"></script>
9
  <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
10
  </head>
11
  <body>
 
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py-DcBtz0py.svg" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <title>🎡 clip-tagger</title>
8
+ <script type="module" crossorigin src="./assets/index-CHouKU67.js"></script>
9
  <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
10
  </head>
11
  <body>
package.json CHANGED
@@ -7,7 +7,12 @@
7
  "dev": "vite",
8
  "build": "vite build",
9
  "lint": "eslint .",
10
- "preview": "vite preview"
 
 
 
 
 
11
  },
12
  "dependencies": {
13
  "@xenova/transformers": "^2.17.2",
 
7
  "dev": "vite",
8
  "build": "vite build",
9
  "lint": "eslint .",
10
+ "preview": "vite preview",
11
+ "deploy": "npm run build && cp -r dist/* . && git add . && git commit -m 'Deploy latest build' && git push origin master && git push hf master:main",
12
+ "deploy-github": "git add . && git commit -m 'Update source code' && git push origin master",
13
+ "deploy-hf": "npm run build && cp -r dist/* . && git add . && git commit -m 'Deploy to HF Spaces' && git push hf master:main",
14
+ "clean": "rm -rf dist node_modules",
15
+ "fresh": "npm run clean && npm install && npm run build"
16
  },
17
  "dependencies": {
18
  "@xenova/transformers": "^2.17.2",
src/App.jsx CHANGED
@@ -211,10 +211,31 @@ function App() {
211
  }
212
 
213
  const handleAddCustomTag = async () => {
214
- if (!newTag.trim()) return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
  const customTag = {
217
- label: newTag.trim(),
218
  confidence: 1.0,
219
  userFeedback: 'custom',
220
  isCustom: true,
@@ -224,23 +245,29 @@ function App() {
224
  setTags(prev => [...prev, customTag])
225
 
226
  try {
227
- await feedbackStoreRef.current.saveCustomTag(newTag.trim())
228
- await feedbackStoreRef.current.saveTagFeedback(newTag.trim(), 'custom', audioHash)
 
 
 
 
229
 
230
  // Train local classifier on custom tag
231
  if (localClassifierRef.current && audioFeatures) {
232
  const simpleFeatures = localClassifierRef.current.extractSimpleFeatures(audioFeatures)
233
  localClassifierRef.current.trainOnFeedback(
234
  simpleFeatures,
235
- newTag.trim(),
236
  'custom'
237
  )
238
  localClassifierRef.current.saveModel()
239
  }
240
 
241
  loadCustomTags()
 
242
  } catch (error) {
243
  console.error('Error saving custom tag:', error)
 
244
  }
245
 
246
  setNewTag('')
 
211
  }
212
 
213
  const handleAddCustomTag = async () => {
214
+ const trimmedTag = newTag.trim().toLowerCase()
215
+
216
+ // Validation
217
+ if (!trimmedTag) {
218
+ setError('Please enter a tag name')
219
+ return
220
+ }
221
+
222
+ if (trimmedTag.length < 2) {
223
+ setError('Tag must be at least 2 characters long')
224
+ return
225
+ }
226
+
227
+ // Check if tag already exists
228
+ const existingTag = tags.find(tag => tag.label.toLowerCase() === trimmedTag)
229
+ if (existingTag) {
230
+ setError(`Tag "${trimmedTag}" already exists`)
231
+ return
232
+ }
233
+
234
+ // Clear any previous errors
235
+ setError(null)
236
 
237
  const customTag = {
238
+ label: trimmedTag,
239
  confidence: 1.0,
240
  userFeedback: 'custom',
241
  isCustom: true,
 
245
  setTags(prev => [...prev, customTag])
246
 
247
  try {
248
+ if (feedbackStoreRef.current) {
249
+ await feedbackStoreRef.current.saveCustomTag(trimmedTag)
250
+ if (audioHash) {
251
+ await feedbackStoreRef.current.saveTagFeedback(trimmedTag, 'custom', audioHash)
252
+ }
253
+ }
254
 
255
  // Train local classifier on custom tag
256
  if (localClassifierRef.current && audioFeatures) {
257
  const simpleFeatures = localClassifierRef.current.extractSimpleFeatures(audioFeatures)
258
  localClassifierRef.current.trainOnFeedback(
259
  simpleFeatures,
260
+ trimmedTag,
261
  'custom'
262
  )
263
  localClassifierRef.current.saveModel()
264
  }
265
 
266
  loadCustomTags()
267
+ console.log(`βœ… Added custom tag: "${trimmedTag}"`)
268
  } catch (error) {
269
  console.error('Error saving custom tag:', error)
270
+ setError('Failed to save custom tag')
271
  }
272
 
273
  setNewTag('')
src/clapProcessor.js CHANGED
@@ -2,7 +2,8 @@ import { pipeline } from '@xenova/transformers';
2
 
3
  class CLAPProcessor {
4
  constructor() {
5
- this.pipeline = null;
 
6
  this.defaultLabels = [
7
  'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
8
  'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
@@ -18,75 +19,136 @@ class CLAPProcessor {
18
  }
19
 
20
  async initialize() {
21
- if (this.pipeline) return;
22
 
23
  try {
24
- console.log('Loading CLAP model...');
25
- // Use the pipeline API which is more stable
26
- this.pipeline = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
27
- console.log('CLAP model loaded successfully');
 
 
 
 
 
 
 
 
 
 
28
  } catch (error) {
29
- console.error('Failed to load CLAP model:', error);
30
- throw error;
31
  }
32
  }
33
 
34
  async processAudio(audioBuffer) {
35
- if (!this.pipeline) {
 
 
36
  await this.initialize();
37
  }
38
 
39
  try {
40
- // Convert audio to the format expected by the model
41
- const audio = await this.preprocessAudio(audioBuffer);
 
 
42
 
43
- console.log('Processing audio with CLAP...');
 
44
 
45
- // Use the pipeline for zero-shot classification
46
- const results = await this.pipeline(audio, this.defaultLabels);
47
 
48
- console.log('CLAP results:', results);
 
 
49
 
50
- // Transform results to our format
51
- const tags = results.slice(0, 5).map(result => ({
52
- label: result.label,
53
- confidence: result.score
54
- }));
55
 
56
- return tags;
57
  } catch (error) {
58
- console.error('Error processing audio:', error);
59
- throw error;
60
  }
61
  }
62
 
63
- async preprocessAudio(audioBuffer) {
64
- // Convert to mono and get raw audio data
65
- let audioData;
66
- if (audioBuffer.numberOfChannels > 1) {
67
- // Convert stereo to mono by averaging channels
 
 
 
 
 
 
 
 
68
  const channel1 = audioBuffer.getChannelData(0);
69
  const channel2 = audioBuffer.getChannelData(1);
70
- audioData = new Float32Array(channel1.length);
71
  for (let i = 0; i < channel1.length; i++) {
72
- audioData[i] = (channel1[i] + channel2[i]) / 2;
73
  }
74
- } else {
75
- audioData = audioBuffer.getChannelData(0);
76
  }
77
-
78
- // Return the audio data with sample rate info
79
  return {
80
- data: audioData,
81
  sampling_rate: audioBuffer.sampleRate
82
  };
83
  }
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  // Convert file to AudioBuffer
86
  async fileToAudioBuffer(file) {
87
- const arrayBuffer = await file.arrayBuffer();
88
- const audioContext = new (window.AudioContext || window.webkitAudioContext)();
89
- return await audioContext.decodeAudioData(arrayBuffer);
 
 
 
 
 
 
 
 
 
 
90
  }
91
  }
92
 
 
2
 
3
  class CLAPProcessor {
4
  constructor() {
5
+ this.classifier = null;
6
+ this.isInitialized = false;
7
  this.defaultLabels = [
8
  'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
9
  'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
 
19
  }
20
 
21
  async initialize() {
22
+ if (this.isInitialized) return;
23
 
24
  try {
25
+ console.log('πŸ”„ Loading CLAP model (this may take a moment)...');
26
+
27
+ // Create a zero-shot audio classification pipeline
28
+ this.classifier = await pipeline(
29
+ 'zero-shot-audio-classification',
30
+ 'Xenova/clap-htsat-unfused',
31
+ {
32
+ // Optional: specify device and other configs
33
+ device: 'webgpu', // fallback to cpu if webgpu not available
34
+ }
35
+ );
36
+
37
+ this.isInitialized = true;
38
+ console.log('βœ… CLAP model loaded successfully!');
39
  } catch (error) {
40
+ console.error('❌ Failed to load CLAP model:', error);
41
+ throw new Error(`Failed to initialize CLAP model: ${error.message}`);
42
  }
43
  }
44
 
45
  async processAudio(audioBuffer) {
46
+ console.log('🎡 Starting audio processing...');
47
+
48
+ if (!this.isInitialized) {
49
  await this.initialize();
50
  }
51
 
52
  try {
53
+ // Convert AudioBuffer to the format expected by the model
54
+ const audioData = this.extractAudioData(audioBuffer);
55
+
56
+ console.log('πŸ” Classifying audio with', this.defaultLabels.length, 'possible labels...');
57
 
58
+ // Run zero-shot classification
59
+ const results = await this.classifier(audioData, this.defaultLabels);
60
 
61
+ console.log('🎯 Raw CLAP results:', results);
 
62
 
63
+ // Process and return top results
64
+ const processedTags = this.processResults(results);
65
+ console.log('πŸ“ Processed tags:', processedTags);
66
 
67
+ return processedTags;
 
 
 
 
68
 
 
69
  } catch (error) {
70
+ console.error('❌ Error during audio processing:', error);
71
+ throw new Error(`Audio processing failed: ${error.message}`);
72
  }
73
  }
74
 
75
+ extractAudioData(audioBuffer) {
76
+ console.log('πŸ”§ Converting audio buffer:', {
77
+ duration: audioBuffer.duration,
78
+ sampleRate: audioBuffer.sampleRate,
79
+ channels: audioBuffer.numberOfChannels
80
+ });
81
+
82
+ // Get audio data - convert to mono if needed
83
+ let audioArray;
84
+ if (audioBuffer.numberOfChannels === 1) {
85
+ audioArray = audioBuffer.getChannelData(0);
86
+ } else {
87
+ // Average multiple channels to mono
88
  const channel1 = audioBuffer.getChannelData(0);
89
  const channel2 = audioBuffer.getChannelData(1);
90
+ audioArray = new Float32Array(channel1.length);
91
  for (let i = 0; i < channel1.length; i++) {
92
+ audioArray[i] = (channel1[i] + channel2[i]) / 2;
93
  }
 
 
94
  }
95
+
96
+ // Return in the format expected by transformers.js
97
  return {
98
+ raw: audioArray,
99
  sampling_rate: audioBuffer.sampleRate
100
  };
101
  }
102
 
103
+ processResults(results) {
104
+ // Ensure we have results and they're in the expected format
105
+ if (!results || !Array.isArray(results)) {
106
+ console.warn('⚠️ Unexpected results format:', results);
107
+ return this.getFallbackTags();
108
+ }
109
+
110
+ // Sort by confidence and take top 5
111
+ const sortedResults = results
112
+ .sort((a, b) => b.score - a.score)
113
+ .slice(0, 5);
114
+
115
+ // Convert to our tag format
116
+ const tags = sortedResults.map(result => ({
117
+ label: result.label,
118
+ confidence: Math.max(0, Math.min(1, result.score)) // Clamp between 0 and 1
119
+ }));
120
+
121
+ // Ensure we have at least some tags
122
+ if (tags.length === 0) {
123
+ return this.getFallbackTags();
124
+ }
125
+
126
+ return tags;
127
+ }
128
+
129
+ getFallbackTags() {
130
+ return [
131
+ { label: 'audio', confidence: 0.9 },
132
+ { label: 'sound', confidence: 0.8 },
133
+ { label: 'recording', confidence: 0.7 }
134
+ ];
135
+ }
136
+
137
  // Convert file to AudioBuffer
138
  async fileToAudioBuffer(file) {
139
+ console.log('πŸ“ Processing file:', file.name, 'Size:', Math.round(file.size / 1024), 'KB');
140
+
141
+ try {
142
+ const arrayBuffer = await file.arrayBuffer();
143
+ const audioContext = new (window.AudioContext || window.webkitAudioContext)();
144
+ const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
145
+
146
+ console.log('βœ… Audio file decoded successfully');
147
+ return audioBuffer;
148
+ } catch (error) {
149
+ console.error('❌ Failed to decode audio file:', error);
150
+ throw new Error(`Failed to decode audio file: ${error.message}`);
151
+ }
152
  }
153
  }
154