sohei1l commited on
Commit
ee11310
Β·
1 Parent(s): e6ec02e

Deploy latest build

Browse files
assets/index-BYAX_2b6.js ADDED
The diff for this file is too large to render. See raw diff
 
assets/vite-DcBtz0py-DcBtz0py-DcBtz0py.svg ADDED
index.html CHANGED
@@ -2,10 +2,10 @@
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
- <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py-DcBtz0py.svg" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <title>🎡 clip-tagger</title>
8
- <script type="module" crossorigin src="./assets/index-CHouKU67.js"></script>
9
  <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
10
  </head>
11
  <body>
 
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py-DcBtz0py-DcBtz0py.svg" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <title>🎡 clip-tagger</title>
8
+ <script type="module" crossorigin src="./assets/index-BYAX_2b6.js"></script>
9
  <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
10
  </head>
11
  <body>
package-lock.json CHANGED
@@ -1944,9 +1944,9 @@
1944
  }
1945
  },
1946
  "node_modules/electron-to-chromium": {
1947
- "version": "1.5.159",
1948
- "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.159.tgz",
1949
- "integrity": "sha512-CEvHptWAMV5p6GJ0Lq8aheyvVbfzVrv5mmidu1D3pidoVNkB3tTBsTMVtPJ+rzRK5oV229mCLz9Zj/hNvU8GBA==",
1950
  "dev": true,
1951
  "license": "ISC"
1952
  },
 
1944
  }
1945
  },
1946
  "node_modules/electron-to-chromium": {
1947
+ "version": "1.5.160",
1948
+ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.160.tgz",
1949
+ "integrity": "sha512-8yQk54/CoCQT8GX3zuxqPBwMAQuIr6dWI/qO8Aah/JAZwB5XmCbEElsqb1n4pzc2vpkTdfc/kbyNPJOjswfbgg==",
1950
  "dev": true,
1951
  "license": "ISC"
1952
  },
src/clapProcessor.js CHANGED
@@ -3,8 +3,8 @@ import { pipeline } from '@xenova/transformers';
3
  class CLAPProcessor {
4
  constructor() {
5
  this.classifier = null;
6
- this.isInitialized = false;
7
- this.defaultLabels = [
8
  'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
9
  'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
10
  'pop music', 'jazz', 'electronic music', 'ambient', 'nature sounds',
@@ -19,135 +19,119 @@ class CLAPProcessor {
19
  }
20
 
21
  async initialize() {
22
- if (this.isInitialized) return;
23
 
24
  try {
25
- console.log('πŸ”„ Loading CLAP model (this may take a moment)...');
26
 
27
- // Create a zero-shot audio classification pipeline
28
  this.classifier = await pipeline(
29
  'zero-shot-audio-classification',
30
- 'Xenova/clap-htsat-unfused',
31
- {
32
- // Optional: specify device and other configs
33
- device: 'webgpu', // fallback to cpu if webgpu not available
34
- }
35
  );
36
 
37
- this.isInitialized = true;
38
- console.log('βœ… CLAP model loaded successfully!');
39
  } catch (error) {
40
- console.error('❌ Failed to load CLAP model:', error);
41
- throw new Error(`Failed to initialize CLAP model: ${error.message}`);
42
  }
43
  }
44
 
45
  async processAudio(audioBuffer) {
46
- console.log('🎡 Starting audio processing...');
47
 
48
- if (!this.isInitialized) {
49
  await this.initialize();
50
  }
51
 
52
  try {
53
- // Convert AudioBuffer to the format expected by the model
54
- const audioData = this.extractAudioData(audioBuffer);
55
 
56
- console.log('πŸ” Classifying audio with', this.defaultLabels.length, 'possible labels...');
57
 
58
- // Run zero-shot classification
59
- const results = await this.classifier(audioData, this.defaultLabels);
60
 
61
- console.log('🎯 Raw CLAP results:', results);
62
 
63
- // Process and return top results
64
- const processedTags = this.processResults(results);
65
- console.log('πŸ“ Processed tags:', processedTags);
66
 
67
- return processedTags;
 
68
 
69
  } catch (error) {
70
- console.error('❌ Error during audio processing:', error);
71
- throw new Error(`Audio processing failed: ${error.message}`);
 
 
 
 
 
 
72
  }
73
  }
74
 
75
- extractAudioData(audioBuffer) {
76
  console.log('πŸ”§ Converting audio buffer:', {
77
- duration: audioBuffer.duration,
78
  sampleRate: audioBuffer.sampleRate,
79
  channels: audioBuffer.numberOfChannels
80
  });
81
 
82
- // Get audio data - convert to mono if needed
83
- let audioArray;
84
  if (audioBuffer.numberOfChannels === 1) {
85
- audioArray = audioBuffer.getChannelData(0);
86
  } else {
87
- // Average multiple channels to mono
88
- const channel1 = audioBuffer.getChannelData(0);
89
- const channel2 = audioBuffer.getChannelData(1);
90
- audioArray = new Float32Array(channel1.length);
91
- for (let i = 0; i < channel1.length; i++) {
92
- audioArray[i] = (channel1[i] + channel2[i]) / 2;
93
  }
94
  }
95
 
96
- // Return in the format expected by transformers.js
97
  return {
98
- raw: audioArray,
99
  sampling_rate: audioBuffer.sampleRate
100
  };
101
  }
102
 
103
- processResults(results) {
104
- // Ensure we have results and they're in the expected format
105
- if (!results || !Array.isArray(results)) {
106
  console.warn('⚠️ Unexpected results format:', results);
107
- return this.getFallbackTags();
 
 
 
108
  }
109
 
110
- // Sort by confidence and take top 5
111
- const sortedResults = results
112
  .sort((a, b) => b.score - a.score)
113
- .slice(0, 5);
114
-
115
- // Convert to our tag format
116
- const tags = sortedResults.map(result => ({
117
- label: result.label,
118
- confidence: Math.max(0, Math.min(1, result.score)) // Clamp between 0 and 1
119
- }));
120
-
121
- // Ensure we have at least some tags
122
- if (tags.length === 0) {
123
- return this.getFallbackTags();
124
- }
125
-
126
- return tags;
127
- }
128
-
129
- getFallbackTags() {
130
- return [
131
- { label: 'audio', confidence: 0.9 },
132
- { label: 'sound', confidence: 0.8 },
133
- { label: 'recording', confidence: 0.7 }
134
- ];
135
  }
136
 
137
- // Convert file to AudioBuffer
138
  async fileToAudioBuffer(file) {
139
- console.log('πŸ“ Processing file:', file.name, 'Size:', Math.round(file.size / 1024), 'KB');
140
 
141
  try {
142
  const arrayBuffer = await file.arrayBuffer();
143
  const audioContext = new (window.AudioContext || window.webkitAudioContext)();
144
  const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
145
 
146
- console.log('βœ… Audio file decoded successfully');
147
  return audioBuffer;
148
  } catch (error) {
149
- console.error('❌ Failed to decode audio file:', error);
150
- throw new Error(`Failed to decode audio file: ${error.message}`);
151
  }
152
  }
153
  }
 
3
  class CLAPProcessor {
4
  constructor() {
5
  this.classifier = null;
6
+ this.isLoaded = false;
7
+ this.candidateLabels = [
8
  'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
9
  'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
10
  'pop music', 'jazz', 'electronic music', 'ambient', 'nature sounds',
 
19
  }
20
 
21
  async initialize() {
22
+ if (this.isLoaded) return;
23
 
24
  try {
25
+ console.log('πŸ”„ Loading CLAP pipeline...');
26
 
 
27
  this.classifier = await pipeline(
28
  'zero-shot-audio-classification',
29
+ 'Xenova/clap-htsat-unfused'
 
 
 
 
30
  );
31
 
32
+ this.isLoaded = true;
33
+ console.log('βœ… CLAP pipeline ready!');
34
  } catch (error) {
35
+ console.error('❌ CLAP initialization failed:', error);
36
+ throw new Error(`CLAP loading failed: ${error.message}`);
37
  }
38
  }
39
 
40
  async processAudio(audioBuffer) {
41
+ console.log('🎡 Processing audio...');
42
 
43
+ if (!this.isLoaded) {
44
  await this.initialize();
45
  }
46
 
47
  try {
48
+ // Convert AudioBuffer to raw audio data
49
+ const audioData = this.convertAudioBuffer(audioBuffer);
50
 
51
+ console.log('πŸ” Running classification...');
52
 
53
+ // Run the classification
54
+ const results = await this.classifier(audioData, this.candidateLabels);
55
 
56
+ console.log('🎯 Classification results:', results);
57
 
58
+ // Format results
59
+ const formattedTags = this.formatResults(results);
 
60
 
61
+ console.log('πŸ“ Final tags:', formattedTags);
62
+ return formattedTags;
63
 
64
  } catch (error) {
65
+ console.error('❌ Audio processing error:', error);
66
+
67
+ // Return fallback tags with error info
68
+ return [
69
+ { label: 'audio', confidence: 0.9 },
70
+ { label: 'sound', confidence: 0.8 },
71
+ { label: 'unknown', confidence: 0.5 }
72
+ ];
73
  }
74
  }
75
 
76
+ convertAudioBuffer(audioBuffer) {
77
  console.log('πŸ”§ Converting audio buffer:', {
78
+ duration: audioBuffer.duration.toFixed(2) + 's',
79
  sampleRate: audioBuffer.sampleRate,
80
  channels: audioBuffer.numberOfChannels
81
  });
82
 
83
+ // Extract audio data
84
+ let rawAudio;
85
  if (audioBuffer.numberOfChannels === 1) {
86
+ rawAudio = audioBuffer.getChannelData(0);
87
  } else {
88
+ // Convert stereo to mono by averaging
89
+ const left = audioBuffer.getChannelData(0);
90
+ const right = audioBuffer.getChannelData(1);
91
+ rawAudio = new Float32Array(left.length);
92
+ for (let i = 0; i < left.length; i++) {
93
+ rawAudio[i] = (left[i] + right[i]) / 2;
94
  }
95
  }
96
 
 
97
  return {
98
+ raw: rawAudio,
99
  sampling_rate: audioBuffer.sampleRate
100
  };
101
  }
102
 
103
+ formatResults(results) {
104
+ if (!Array.isArray(results)) {
 
105
  console.warn('⚠️ Unexpected results format:', results);
106
+ return [
107
+ { label: 'audio', confidence: 0.9 },
108
+ { label: 'sound', confidence: 0.8 }
109
+ ];
110
  }
111
 
112
+ // Sort by score and take top 5
113
+ return results
114
  .sort((a, b) => b.score - a.score)
115
+ .slice(0, 5)
116
+ .map(result => ({
117
+ label: result.label,
118
+ confidence: Math.max(0, Math.min(1, result.score))
119
+ }));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  }
121
 
 
122
  async fileToAudioBuffer(file) {
123
+ console.log('πŸ“ Decoding file:', file.name, `(${Math.round(file.size / 1024)}KB)`);
124
 
125
  try {
126
  const arrayBuffer = await file.arrayBuffer();
127
  const audioContext = new (window.AudioContext || window.webkitAudioContext)();
128
  const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
129
 
130
+ console.log('βœ… File decoded successfully');
131
  return audioBuffer;
132
  } catch (error) {
133
+ console.error('❌ File decoding failed:', error);
134
+ throw new Error(`Audio decoding failed: ${error.message}`);
135
  }
136
  }
137
  }