Dionyssos commited on
Commit
6f577a3
·
1 Parent(s): 07ebc68

limit 4s CPU

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +5 -5
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Wav2Vec2 / Wav2small
3
  emoji: 🎵
4
  colorFrom: blue
5
  colorTo: pink
 
1
  ---
2
+ title: Wav2small 2.0
3
  emoji: 🎵
4
  colorFrom: blue
5
  colorTo: pink
app.py CHANGED
@@ -369,7 +369,7 @@ ax.spines['left'].set_visible(False)
369
 
370
  def process_audio(audio_filepath):
371
  if audio_filepath is None:
372
-
373
  return fig_error, fig_error
374
 
375
  waveform, sample_rate = librosa.load(audio_filepath, sr=None)
@@ -380,20 +380,20 @@ def process_audio(audio_filepath):
380
  else:
381
  resampled_waveform_np = waveform[None, :]
382
 
383
- x = torch.from_numpy(resampled_waveform_np).to(torch.float)
384
 
385
  with torch.no_grad():
386
 
387
  logits_dawn = dawn(x).cpu().numpy()[0, :]
388
 
389
- logits_wavlm = base(x).cpu().numpy()[0, :]
390
 
391
  # 17K params
392
  logits_wav2small = wav2small(x).cpu().numpy()[0, :]
393
 
394
 
395
  # --- Plot 1: Wav2Vec2 vs Wav2Small Teacher Outputs ---
396
-
397
  fig, ax = plt.subplots(figsize=(10, 6))
398
 
399
  left_bars_data = logits_dawn.clip(0, 1)
@@ -460,7 +460,7 @@ def process_audio(audio_filepath):
460
 
461
  fig_2, ax_2 = plt.subplots(figsize=(10, 6))
462
 
463
-
464
  left_bars_data = logits_wavlm.clip(0, 1)
465
  right_bars_data = (.5 * logits_dawn + .5 * logits_wavlm).clip(0, 1)
466
 
 
369
 
370
  def process_audio(audio_filepath):
371
  if audio_filepath is None:
372
+
373
  return fig_error, fig_error
374
 
375
  waveform, sample_rate = librosa.load(audio_filepath, sr=None)
 
380
  else:
381
  resampled_waveform_np = waveform[None, :]
382
 
383
+ x = torch.from_numpy(resampled_waveform_np[:, :64000]).to(torch.float) # only 4s for speed
384
 
385
  with torch.no_grad():
386
 
387
  logits_dawn = dawn(x).cpu().numpy()[0, :]
388
 
389
+ logits_wavlm = base(x).cpu().numpy()[0, :]
390
 
391
  # 17K params
392
  logits_wav2small = wav2small(x).cpu().numpy()[0, :]
393
 
394
 
395
  # --- Plot 1: Wav2Vec2 vs Wav2Small Teacher Outputs ---
396
+
397
  fig, ax = plt.subplots(figsize=(10, 6))
398
 
399
  left_bars_data = logits_dawn.clip(0, 1)
 
460
 
461
  fig_2, ax_2 = plt.subplots(figsize=(10, 6))
462
 
463
+
464
  left_bars_data = logits_wavlm.clip(0, 1)
465
  right_bars_data = (.5 * logits_dawn + .5 * logits_wavlm).clip(0, 1)
466