Spaces:
Sleeping
Sleeping
limit 4s CPU
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🎵
|
4 |
colorFrom: blue
|
5 |
colorTo: pink
|
|
|
1 |
---
|
2 |
+
title: Wav2small 2.0
|
3 |
emoji: 🎵
|
4 |
colorFrom: blue
|
5 |
colorTo: pink
|
app.py
CHANGED
@@ -369,7 +369,7 @@ ax.spines['left'].set_visible(False)
|
|
369 |
|
370 |
def process_audio(audio_filepath):
|
371 |
if audio_filepath is None:
|
372 |
-
|
373 |
return fig_error, fig_error
|
374 |
|
375 |
waveform, sample_rate = librosa.load(audio_filepath, sr=None)
|
@@ -380,20 +380,20 @@ def process_audio(audio_filepath):
|
|
380 |
else:
|
381 |
resampled_waveform_np = waveform[None, :]
|
382 |
|
383 |
-
x = torch.from_numpy(resampled_waveform_np).to(torch.float)
|
384 |
|
385 |
with torch.no_grad():
|
386 |
|
387 |
logits_dawn = dawn(x).cpu().numpy()[0, :]
|
388 |
|
389 |
-
logits_wavlm = base(x).cpu().numpy()[0, :]
|
390 |
|
391 |
# 17K params
|
392 |
logits_wav2small = wav2small(x).cpu().numpy()[0, :]
|
393 |
|
394 |
|
395 |
# --- Plot 1: Wav2Vec2 vs Wav2Small Teacher Outputs ---
|
396 |
-
|
397 |
fig, ax = plt.subplots(figsize=(10, 6))
|
398 |
|
399 |
left_bars_data = logits_dawn.clip(0, 1)
|
@@ -460,7 +460,7 @@ def process_audio(audio_filepath):
|
|
460 |
|
461 |
fig_2, ax_2 = plt.subplots(figsize=(10, 6))
|
462 |
|
463 |
-
|
464 |
left_bars_data = logits_wavlm.clip(0, 1)
|
465 |
right_bars_data = (.5 * logits_dawn + .5 * logits_wavlm).clip(0, 1)
|
466 |
|
|
|
369 |
|
370 |
def process_audio(audio_filepath):
|
371 |
if audio_filepath is None:
|
372 |
+
|
373 |
return fig_error, fig_error
|
374 |
|
375 |
waveform, sample_rate = librosa.load(audio_filepath, sr=None)
|
|
|
380 |
else:
|
381 |
resampled_waveform_np = waveform[None, :]
|
382 |
|
383 |
+
x = torch.from_numpy(resampled_waveform_np[:, :64000]).to(torch.float) # only 4s for speed
|
384 |
|
385 |
with torch.no_grad():
|
386 |
|
387 |
logits_dawn = dawn(x).cpu().numpy()[0, :]
|
388 |
|
389 |
+
logits_wavlm = base(x).cpu().numpy()[0, :]
|
390 |
|
391 |
# 17K params
|
392 |
logits_wav2small = wav2small(x).cpu().numpy()[0, :]
|
393 |
|
394 |
|
395 |
# --- Plot 1: Wav2Vec2 vs Wav2Small Teacher Outputs ---
|
396 |
+
|
397 |
fig, ax = plt.subplots(figsize=(10, 6))
|
398 |
|
399 |
left_bars_data = logits_dawn.clip(0, 1)
|
|
|
460 |
|
461 |
fig_2, ax_2 = plt.subplots(figsize=(10, 6))
|
462 |
|
463 |
+
|
464 |
left_bars_data = logits_wavlm.clip(0, 1)
|
465 |
right_bars_data = (.5 * logits_dawn + .5 * logits_wavlm).clip(0, 1)
|
466 |
|