Spaces:

sohei1l
/

clip-tagger

Running

App Files Files Community

sohei1l commited on May 28

Commit

ee11310

1 Parent(s): e6ec02e

Deploy latest build

Browse files

Files changed (5) hide show

assets/index-BYAX_2b6.js +0 -0
assets/vite-DcBtz0py-DcBtz0py-DcBtz0py.svg +1 -0
index.html +2 -2
package-lock.json +3 -3
src/clapProcessor.js +58 -74

assets/index-BYAX_2b6.js ADDED Viewed

The diff for this file is too large to render. See raw diff

assets/vite-DcBtz0py-DcBtz0py-DcBtz0py.svg ADDED Viewed

index.html CHANGED Viewed

@@ -2,10 +2,10 @@
 <html lang="en">
   <head>
     <meta charset="UTF-8" />
-    <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py-DcBtz0py.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>🎵 clip-tagger</title>
-    <script type="module" crossorigin src="./assets/index-CHouKU67.js"></script>
     <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
   </head>
   <body>

 <html lang="en">
   <head>
     <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py-DcBtz0py-DcBtz0py.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>🎵 clip-tagger</title>
+    <script type="module" crossorigin src="./assets/index-BYAX_2b6.js"></script>
     <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
   </head>
   <body>

package-lock.json CHANGED Viewed

@@ -1944,9 +1944,9 @@
       }
     },
     "node_modules/electron-to-chromium": {
-      "version": "1.5.159",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.159.tgz",
-      "integrity": "sha512-CEvHptWAMV5p6GJ0Lq8aheyvVbfzVrv5mmidu1D3pidoVNkB3tTBsTMVtPJ+rzRK5oV229mCLz9Zj/hNvU8GBA==",
       "dev": true,
       "license": "ISC"
     },

       }
     },
     "node_modules/electron-to-chromium": {
+      "version": "1.5.160",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.160.tgz",
+      "integrity": "sha512-8yQk54/CoCQT8GX3zuxqPBwMAQuIr6dWI/qO8Aah/JAZwB5XmCbEElsqb1n4pzc2vpkTdfc/kbyNPJOjswfbgg==",
       "dev": true,
       "license": "ISC"
     },

src/clapProcessor.js CHANGED Viewed

@@ -3,8 +3,8 @@ import { pipeline } from '@xenova/transformers';
 class CLAPProcessor {
   constructor() {
     this.classifier = null;
-    this.isInitialized = false;
-    this.defaultLabels = [
       'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
       'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
       'pop music', 'jazz', 'electronic music', 'ambient', 'nature sounds',
@@ -19,135 +19,119 @@ class CLAPProcessor {
   }
   async initialize() {
-    if (this.isInitialized) return;
     try {
-      console.log('🔄 Loading CLAP model (this may take a moment)...');
-      // Create a zero-shot audio classification pipeline
       this.classifier = await pipeline(
         'zero-shot-audio-classification',
-        'Xenova/clap-htsat-unfused',
-        {
-          // Optional: specify device and other configs
-          device: 'webgpu', // fallback to cpu if webgpu not available
-        }
       );
-      this.isInitialized = true;
-      console.log('✅ CLAP model loaded successfully!');
     } catch (error) {
-      console.error('❌ Failed to load CLAP model:', error);
-      throw new Error(`Failed to initialize CLAP model: ${error.message}`);
     }
   }
   async processAudio(audioBuffer) {
-    console.log('🎵 Starting audio processing...');
-    if (!this.isInitialized) {
       await this.initialize();
     }
     try {
-      // Convert AudioBuffer to the format expected by the model
-      const audioData = this.extractAudioData(audioBuffer);
-      console.log('🔍 Classifying audio with', this.defaultLabels.length, 'possible labels...');
-      // Run zero-shot classification
-      const results = await this.classifier(audioData, this.defaultLabels);
-      console.log('🎯 Raw CLAP results:', results);
-      // Process and return top results
-      const processedTags = this.processResults(results);
-      console.log('📝 Processed tags:', processedTags);
-      return processedTags;
     } catch (error) {
-      console.error('❌ Error during audio processing:', error);
-      throw new Error(`Audio processing failed: ${error.message}`);
     }
   }
-  extractAudioData(audioBuffer) {
     console.log('🔧 Converting audio buffer:', {
-      duration: audioBuffer.duration,
       sampleRate: audioBuffer.sampleRate,
       channels: audioBuffer.numberOfChannels
     });
-    // Get audio data - convert to mono if needed
-    let audioArray;
     if (audioBuffer.numberOfChannels === 1) {
-      audioArray = audioBuffer.getChannelData(0);
     } else {
-      // Average multiple channels to mono
-      const channel1 = audioBuffer.getChannelData(0);
-      const channel2 = audioBuffer.getChannelData(1);
-      audioArray = new Float32Array(channel1.length);
-      for (let i = 0; i < channel1.length; i++) {
-        audioArray[i] = (channel1[i] + channel2[i]) / 2;
       }
     }
-    // Return in the format expected by transformers.js
     return {
-      raw: audioArray,
       sampling_rate: audioBuffer.sampleRate
     };
   }
-  processResults(results) {
-    // Ensure we have results and they're in the expected format
-    if (!results || !Array.isArray(results)) {
       console.warn('⚠️ Unexpected results format:', results);
-      return this.getFallbackTags();
     }
-    // Sort by confidence and take top 5
-    const sortedResults = results
       .sort((a, b) => b.score - a.score)
-      .slice(0, 5);
-    // Convert to our tag format
-    const tags = sortedResults.map(result => ({
-      label: result.label,
-      confidence: Math.max(0, Math.min(1, result.score)) // Clamp between 0 and 1
-    }));
-    // Ensure we have at least some tags
-    if (tags.length === 0) {
-      return this.getFallbackTags();
-    }
-    return tags;
-  }
-  getFallbackTags() {
-    return [
-      { label: 'audio', confidence: 0.9 },
-      { label: 'sound', confidence: 0.8 },
-      { label: 'recording', confidence: 0.7 }
-    ];
   }
-  // Convert file to AudioBuffer
   async fileToAudioBuffer(file) {
-    console.log('📁 Processing file:', file.name, 'Size:', Math.round(file.size / 1024), 'KB');
     try {
       const arrayBuffer = await file.arrayBuffer();
       const audioContext = new (window.AudioContext || window.webkitAudioContext)();
       const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
-      console.log('✅ Audio file decoded successfully');
       return audioBuffer;
     } catch (error) {
-      console.error('❌ Failed to decode audio file:', error);
-      throw new Error(`Failed to decode audio file: ${error.message}`);
     }
   }
 }

 class CLAPProcessor {
   constructor() {
     this.classifier = null;
+    this.isLoaded = false;
+    this.candidateLabels = [
       'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
       'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
       'pop music', 'jazz', 'electronic music', 'ambient', 'nature sounds',
   }
   async initialize() {
+    if (this.isLoaded) return;
     try {
+      console.log('🔄 Loading CLAP pipeline...');
       this.classifier = await pipeline(
         'zero-shot-audio-classification',
+        'Xenova/clap-htsat-unfused'
       );
+      this.isLoaded = true;
+      console.log('✅ CLAP pipeline ready!');
     } catch (error) {
+      console.error('❌ CLAP initialization failed:', error);
+      throw new Error(`CLAP loading failed: ${error.message}`);
     }
   }
   async processAudio(audioBuffer) {
+    console.log('🎵 Processing audio...');
+    if (!this.isLoaded) {
       await this.initialize();
     }
     try {
+      // Convert AudioBuffer to raw audio data
+      const audioData = this.convertAudioBuffer(audioBuffer);
+      console.log('🔍 Running classification...');
+      // Run the classification
+      const results = await this.classifier(audioData, this.candidateLabels);
+      console.log('🎯 Classification results:', results);
+      // Format results
+      const formattedTags = this.formatResults(results);
+      console.log('📝 Final tags:', formattedTags);
+      return formattedTags;
     } catch (error) {
+      console.error('❌ Audio processing error:', error);
+      // Return fallback tags with error info
+      return [
+        { label: 'audio', confidence: 0.9 },
+        { label: 'sound', confidence: 0.8 },
+        { label: 'unknown', confidence: 0.5 }
+      ];
     }
   }
+  convertAudioBuffer(audioBuffer) {
     console.log('🔧 Converting audio buffer:', {
+      duration: audioBuffer.duration.toFixed(2) + 's',
       sampleRate: audioBuffer.sampleRate,
       channels: audioBuffer.numberOfChannels
     });
+    // Extract audio data
+    let rawAudio;
     if (audioBuffer.numberOfChannels === 1) {
+      rawAudio = audioBuffer.getChannelData(0);
     } else {
+      // Convert stereo to mono by averaging
+      const left = audioBuffer.getChannelData(0);
+      const right = audioBuffer.getChannelData(1);
+      rawAudio = new Float32Array(left.length);
+      for (let i = 0; i < left.length; i++) {
+        rawAudio[i] = (left[i] + right[i]) / 2;
       }
     }
     return {
+      raw: rawAudio,
       sampling_rate: audioBuffer.sampleRate
     };
   }
+  formatResults(results) {
+    if (!Array.isArray(results)) {
       console.warn('⚠️ Unexpected results format:', results);
+      return [
+        { label: 'audio', confidence: 0.9 },
+        { label: 'sound', confidence: 0.8 }
+      ];
     }
+    // Sort by score and take top 5
+    return results
       .sort((a, b) => b.score - a.score)
+      .slice(0, 5)
+      .map(result => ({
+        label: result.label,
+        confidence: Math.max(0, Math.min(1, result.score))
+      }));
   }
   async fileToAudioBuffer(file) {
+    console.log('📁 Decoding file:', file.name, `(${Math.round(file.size / 1024)}KB)`);
     try {
       const arrayBuffer = await file.arrayBuffer();
       const audioContext = new (window.AudioContext || window.webkitAudioContext)();
       const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
+      console.log('✅ File decoded successfully');
       return audioBuffer;
     } catch (error) {
+      console.error('❌ File decoding failed:', error);
+      throw new Error(`Audio decoding failed: ${error.message}`);
     }
   }
 }