Spaces:

sohei1l
/

clip-tagger

Running

App Files Files Community

sohei1l commited on May 28

Commit

ec858b7

1 Parent(s): e9a623f

Deploy latest build

Browse files

Files changed (6) hide show

assets/index-CHouKU67.js +0 -0
assets/vite-DcBtz0py-DcBtz0py.svg +1 -0
index.html +2 -2
package.json +6 -1
src/App.jsx +32 -5
src/clapProcessor.js +100 -38

assets/index-CHouKU67.js ADDED Viewed

The diff for this file is too large to render. See raw diff

assets/vite-DcBtz0py-DcBtz0py.svg ADDED Viewed

index.html CHANGED Viewed

@@ -2,10 +2,10 @@
 <html lang="en">
   <head>
     <meta charset="UTF-8" />
-    <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>🎵 clip-tagger</title>
-    <script type="module" crossorigin src="./assets/index-5J96wndd.js"></script>
     <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
   </head>
   <body>

 <html lang="en">
   <head>
     <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="./assets/vite-DcBtz0py-DcBtz0py.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>🎵 clip-tagger</title>
+    <script type="module" crossorigin src="./assets/index-CHouKU67.js"></script>
     <link rel="stylesheet" crossorigin href="./assets/index-F_aFpJd-.css">
   </head>
   <body>

package.json CHANGED Viewed

@@ -7,7 +7,12 @@
     "dev": "vite",
     "build": "vite build",
     "lint": "eslint .",
-    "preview": "vite preview"
   },
   "dependencies": {
     "@xenova/transformers": "^2.17.2",

     "dev": "vite",
     "build": "vite build",
     "lint": "eslint .",
+    "preview": "vite preview",
+    "deploy": "npm run build && cp -r dist/* . && git add . && git commit -m 'Deploy latest build' && git push origin master && git push hf master:main",
+    "deploy-github": "git add . && git commit -m 'Update source code' && git push origin master",
+    "deploy-hf": "npm run build && cp -r dist/* . && git add . && git commit -m 'Deploy to HF Spaces' && git push hf master:main",
+    "clean": "rm -rf dist node_modules",
+    "fresh": "npm run clean && npm install && npm run build"
   },
   "dependencies": {
     "@xenova/transformers": "^2.17.2",

src/App.jsx CHANGED Viewed

@@ -211,10 +211,31 @@ function App() {
   }
   const handleAddCustomTag = async () => {
-    if (!newTag.trim()) return
     const customTag = {
-      label: newTag.trim(),
       confidence: 1.0,
       userFeedback: 'custom',
       isCustom: true,
@@ -224,23 +245,29 @@ function App() {
     setTags(prev => [...prev, customTag])
     try {
-      await feedbackStoreRef.current.saveCustomTag(newTag.trim())
-      await feedbackStoreRef.current.saveTagFeedback(newTag.trim(), 'custom', audioHash)
       // Train local classifier on custom tag
       if (localClassifierRef.current && audioFeatures) {
         const simpleFeatures = localClassifierRef.current.extractSimpleFeatures(audioFeatures)
         localClassifierRef.current.trainOnFeedback(
           simpleFeatures,
-          newTag.trim(),
           'custom'
         )
         localClassifierRef.current.saveModel()
       }
       loadCustomTags()
     } catch (error) {
       console.error('Error saving custom tag:', error)
     }
     setNewTag('')

   }
   const handleAddCustomTag = async () => {
+    const trimmedTag = newTag.trim().toLowerCase()
+    // Validation
+    if (!trimmedTag) {
+      setError('Please enter a tag name')
+      return
+    }
+    if (trimmedTag.length < 2) {
+      setError('Tag must be at least 2 characters long')
+      return
+    }
+    // Check if tag already exists
+    const existingTag = tags.find(tag => tag.label.toLowerCase() === trimmedTag)
+    if (existingTag) {
+      setError(`Tag "${trimmedTag}" already exists`)
+      return
+    }
+    // Clear any previous errors
+    setError(null)
     const customTag = {
+      label: trimmedTag,
       confidence: 1.0,
       userFeedback: 'custom',
       isCustom: true,
     setTags(prev => [...prev, customTag])
     try {
+      if (feedbackStoreRef.current) {
+        await feedbackStoreRef.current.saveCustomTag(trimmedTag)
+        if (audioHash) {
+          await feedbackStoreRef.current.saveTagFeedback(trimmedTag, 'custom', audioHash)
+        }
+      }
       // Train local classifier on custom tag
       if (localClassifierRef.current && audioFeatures) {
         const simpleFeatures = localClassifierRef.current.extractSimpleFeatures(audioFeatures)
         localClassifierRef.current.trainOnFeedback(
           simpleFeatures,
+          trimmedTag,
           'custom'
         )
         localClassifierRef.current.saveModel()
       }
       loadCustomTags()
+      console.log(`✅ Added custom tag: "${trimmedTag}"`)
     } catch (error) {
       console.error('Error saving custom tag:', error)
+      setError('Failed to save custom tag')
     }
     setNewTag('')

src/clapProcessor.js CHANGED Viewed

@@ -2,7 +2,8 @@ import { pipeline } from '@xenova/transformers';
 class CLAPProcessor {
   constructor() {
-    this.pipeline = null;
     this.defaultLabels = [
       'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
       'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
@@ -18,75 +19,136 @@ class CLAPProcessor {
   }
   async initialize() {
-    if (this.pipeline) return;
     try {
-      console.log('Loading CLAP model...');
-      // Use the pipeline API which is more stable
-      this.pipeline = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
-      console.log('CLAP model loaded successfully');
     } catch (error) {
-      console.error('Failed to load CLAP model:', error);
-      throw error;
     }
   }
   async processAudio(audioBuffer) {
-    if (!this.pipeline) {
       await this.initialize();
     }
     try {
-      // Convert audio to the format expected by the model
-      const audio = await this.preprocessAudio(audioBuffer);
-      console.log('Processing audio with CLAP...');
-      // Use the pipeline for zero-shot classification
-      const results = await this.pipeline(audio, this.defaultLabels);
-      console.log('CLAP results:', results);
-      // Transform results to our format
-      const tags = results.slice(0, 5).map(result => ({
-        label: result.label,
-        confidence: result.score
-      }));
-      return tags;
     } catch (error) {
-      console.error('Error processing audio:', error);
-      throw error;
     }
   }
-  async preprocessAudio(audioBuffer) {
-    // Convert to mono and get raw audio data
-    let audioData;
-    if (audioBuffer.numberOfChannels > 1) {
-      // Convert stereo to mono by averaging channels
       const channel1 = audioBuffer.getChannelData(0);
       const channel2 = audioBuffer.getChannelData(1);
-      audioData = new Float32Array(channel1.length);
       for (let i = 0; i < channel1.length; i++) {
-        audioData[i] = (channel1[i] + channel2[i]) / 2;
       }
-    } else {
-      audioData = audioBuffer.getChannelData(0);
     }
-    // Return the audio data with sample rate info
     return {
-      data: audioData,
       sampling_rate: audioBuffer.sampleRate
     };
   }
   // Convert file to AudioBuffer
   async fileToAudioBuffer(file) {
-    const arrayBuffer = await file.arrayBuffer();
-    const audioContext = new (window.AudioContext || window.webkitAudioContext)();
-    return await audioContext.decodeAudioData(arrayBuffer);
   }
 }

 class CLAPProcessor {
   constructor() {
+    this.classifier = null;
+    this.isInitialized = false;
     this.defaultLabels = [
       'speech', 'music', 'singing', 'guitar', 'piano', 'drums', 'violin',
       'trumpet', 'saxophone', 'flute', 'classical music', 'rock music',
   }
   async initialize() {
+    if (this.isInitialized) return;
     try {
+      console.log('🔄 Loading CLAP model (this may take a moment)...');
+      // Create a zero-shot audio classification pipeline
+      this.classifier = await pipeline(
+        'zero-shot-audio-classification',
+        'Xenova/clap-htsat-unfused',
+        {
+          // Optional: specify device and other configs
+          device: 'webgpu', // fallback to cpu if webgpu not available
+        }
+      );
+      this.isInitialized = true;
+      console.log('✅ CLAP model loaded successfully!');
     } catch (error) {
+      console.error('❌ Failed to load CLAP model:', error);
+      throw new Error(`Failed to initialize CLAP model: ${error.message}`);
     }
   }
   async processAudio(audioBuffer) {
+    console.log('🎵 Starting audio processing...');
+    if (!this.isInitialized) {
       await this.initialize();
     }
     try {
+      // Convert AudioBuffer to the format expected by the model
+      const audioData = this.extractAudioData(audioBuffer);
+      console.log('🔍 Classifying audio with', this.defaultLabels.length, 'possible labels...');
+      // Run zero-shot classification
+      const results = await this.classifier(audioData, this.defaultLabels);
+      console.log('🎯 Raw CLAP results:', results);
+      // Process and return top results
+      const processedTags = this.processResults(results);
+      console.log('📝 Processed tags:', processedTags);
+      return processedTags;
     } catch (error) {
+      console.error('❌ Error during audio processing:', error);
+      throw new Error(`Audio processing failed: ${error.message}`);
     }
   }
+  extractAudioData(audioBuffer) {
+    console.log('🔧 Converting audio buffer:', {
+      duration: audioBuffer.duration,
+      sampleRate: audioBuffer.sampleRate,
+      channels: audioBuffer.numberOfChannels
+    });
+    // Get audio data - convert to mono if needed
+    let audioArray;
+    if (audioBuffer.numberOfChannels === 1) {
+      audioArray = audioBuffer.getChannelData(0);
+    } else {
+      // Average multiple channels to mono
       const channel1 = audioBuffer.getChannelData(0);
       const channel2 = audioBuffer.getChannelData(1);
+      audioArray = new Float32Array(channel1.length);
       for (let i = 0; i < channel1.length; i++) {
+        audioArray[i] = (channel1[i] + channel2[i]) / 2;
       }
     }
+    // Return in the format expected by transformers.js
     return {
+      raw: audioArray,
       sampling_rate: audioBuffer.sampleRate
     };
   }
+  processResults(results) {
+    // Ensure we have results and they're in the expected format
+    if (!results || !Array.isArray(results)) {
+      console.warn('⚠️ Unexpected results format:', results);
+      return this.getFallbackTags();
+    }
+    // Sort by confidence and take top 5
+    const sortedResults = results
+      .sort((a, b) => b.score - a.score)
+      .slice(0, 5);
+    // Convert to our tag format
+    const tags = sortedResults.map(result => ({
+      label: result.label,
+      confidence: Math.max(0, Math.min(1, result.score)) // Clamp between 0 and 1
+    }));
+    // Ensure we have at least some tags
+    if (tags.length === 0) {
+      return this.getFallbackTags();
+    }
+    return tags;
+  }
+  getFallbackTags() {
+    return [
+      { label: 'audio', confidence: 0.9 },
+      { label: 'sound', confidence: 0.8 },
+      { label: 'recording', confidence: 0.7 }
+    ];
+  }
   // Convert file to AudioBuffer
   async fileToAudioBuffer(file) {
+    console.log('📁 Processing file:', file.name, 'Size:', Math.round(file.size / 1024), 'KB');
+    try {
+      const arrayBuffer = await file.arrayBuffer();
+      const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+      const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
+      console.log('✅ Audio file decoded successfully');
+      return audioBuffer;
+    } catch (error) {
+      console.error('❌ Failed to decode audio file:', error);
+      throw new Error(`Failed to decode audio file: ${error.message}`);
+    }
   }
 }