Spaces:
Running
Running
Error forced on audio normalization failure
Browse files- app/synth.py +22 -117
app/synth.py
CHANGED
@@ -175,25 +175,24 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
175 |
print('Done with', model)
|
176 |
|
177 |
# Resample to 24kHz
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
pass
|
197 |
# if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
|
198 |
result_storage[model] = result
|
199 |
|
@@ -281,10 +280,12 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
281 |
):
|
282 |
# run Zero-GPU spaces one at a time
|
283 |
predict_and_update_result(text, mdl1k, results, request)
|
284 |
-
|
|
|
285 |
|
286 |
predict_and_update_result(text, mdl2k, results, request)
|
287 |
-
|
|
|
288 |
else:
|
289 |
# use multithreading
|
290 |
thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results, request))
|
@@ -323,104 +324,8 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
323 |
|
324 |
# Battle Mode
|
325 |
|
326 |
-
def synthandreturn_battle(text, mdl1, mdl2, autoplay):
|
327 |
-
if mdl1 == mdl2:
|
328 |
-
raise gr.Error('You can\'t pick two of the same models.')
|
329 |
-
text = text.strip()
|
330 |
-
if len(text) > MAX_SAMPLE_TXT_LENGTH:
|
331 |
-
raise gr.Error(f'You exceeded the limit of {MAX_SAMPLE_TXT_LENGTH} characters')
|
332 |
-
if len(text) < MIN_SAMPLE_TXT_LENGTH:
|
333 |
-
raise gr.Error(f'Please input a text longer than {MIN_SAMPLE_TXT_LENGTH} characters')
|
334 |
-
if (
|
335 |
-
# test toxicity if not prepared text
|
336 |
-
text not in sents
|
337 |
-
and check_toxicity(text)
|
338 |
-
):
|
339 |
-
print(f'Detected toxic content! "{text}"')
|
340 |
-
raise gr.Error('Your text failed the toxicity test')
|
341 |
-
if not text:
|
342 |
-
raise gr.Error(f'You did not enter any text')
|
343 |
-
# Check language
|
344 |
-
try:
|
345 |
-
if not detect(text) == "en":
|
346 |
-
gr.Warning('Warning: The input text may not be in English')
|
347 |
-
except:
|
348 |
-
pass
|
349 |
-
# Get two random models
|
350 |
-
log_text(text)
|
351 |
-
print("[debug] Using", mdl1, mdl2)
|
352 |
-
def predict_and_update_result(text, model, result_storage):
|
353 |
-
try:
|
354 |
-
if model in AVAILABLE_MODELS:
|
355 |
-
result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
356 |
-
else:
|
357 |
-
result = router.predict(text, model.lower(), api_name="/synthesize")
|
358 |
-
except:
|
359 |
-
raise gr.Error('Unable to call API, please try again :)')
|
360 |
-
print('Done with', model)
|
361 |
-
# try:
|
362 |
-
# doresample(result)
|
363 |
-
# except:
|
364 |
-
# pass
|
365 |
-
try:
|
366 |
-
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
367 |
-
audio = AudioSegment.from_file(result)
|
368 |
-
current_sr = audio.frame_rate
|
369 |
-
if current_sr > 24000:
|
370 |
-
audio = audio.set_frame_rate(24000)
|
371 |
-
try:
|
372 |
-
print('Trying to normalize audio')
|
373 |
-
audio = match_target_amplitude(audio, -20)
|
374 |
-
except:
|
375 |
-
print('[WARN] Unable to normalize audio')
|
376 |
-
audio.export(f.name, format="wav")
|
377 |
-
os.unlink(result)
|
378 |
-
result = f.name
|
379 |
-
except:
|
380 |
-
pass
|
381 |
-
# if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
|
382 |
-
print(model)
|
383 |
-
print(f"Running model {model}")
|
384 |
-
result_storage[model] = result
|
385 |
-
# try:
|
386 |
-
# doloudnorm(result)
|
387 |
-
# except:
|
388 |
-
# pass
|
389 |
-
mdl1k = mdl1
|
390 |
-
mdl2k = mdl2
|
391 |
-
print(mdl1k, mdl2k)
|
392 |
-
# if mdl1 in AVAILABLE_MODELS.keys(): mdl1k=AVAILABLE_MODELS[mdl1]
|
393 |
-
# if mdl2 in AVAILABLE_MODELS.keys(): mdl2k=AVAILABLE_MODELS[mdl2]
|
394 |
-
results = {}
|
395 |
-
print(f"Sending models {mdl1k} and {mdl2k} to API")
|
396 |
-
thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results))
|
397 |
-
thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results))
|
398 |
-
|
399 |
-
thread1.start()
|
400 |
-
thread2.start()
|
401 |
-
thread1.join()
|
402 |
-
thread2.join()
|
403 |
-
|
404 |
-
print(f"Retrieving models {mdl1k} and {mdl2k} from API")
|
405 |
-
return (
|
406 |
-
text,
|
407 |
-
"Synthesize 🐢",
|
408 |
-
gr.update(visible=True), # r2
|
409 |
-
mdl1, # model1
|
410 |
-
mdl2, # model2
|
411 |
-
gr.update(visible=True, value=results[mdl1k], autoplay=autoplay), # aud1
|
412 |
-
gr.update(visible=True, value=results[mdl2k], autoplay=False), # aud2
|
413 |
-
gr.update(visible=True, interactive=False), #abetter
|
414 |
-
gr.update(visible=True, interactive=False), #bbetter
|
415 |
-
gr.update(visible=False), #prevmodel1
|
416 |
-
gr.update(visible=False), #prevmodel2
|
417 |
-
gr.update(visible=False), #nxt round btn
|
418 |
-
)
|
419 |
-
|
420 |
def randomsent():
|
421 |
return '⚡', random.choice(sents), '🎲'
|
422 |
-
def randomsent_battle():
|
423 |
-
return tuple(randomsent()) + tuple(random_m())
|
424 |
def clear_stuff():
|
425 |
return [
|
426 |
gr.update(visible=True, value="", elem_classes=[]),
|
|
|
175 |
print('Done with', model)
|
176 |
|
177 |
# Resample to 24kHz
|
178 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
179 |
+
audio = AudioSegment.from_file(result)
|
180 |
+
current_sr = audio.frame_rate
|
181 |
+
if current_sr > 24000:
|
182 |
+
print(f"{model}: Resampling")
|
183 |
+
audio = audio.set_frame_rate(24000)
|
184 |
+
try:
|
185 |
+
print(f"{model}: Trying to normalize audio")
|
186 |
+
audio = match_target_amplitude(audio, -20)
|
187 |
+
except:
|
188 |
+
print(f"{model}: [WARN] Unable to normalize audio")
|
189 |
+
raise gr.Error('Unable to normalize audio for output of space')
|
190 |
+
|
191 |
+
audio.export(f.name, format="wav")
|
192 |
+
os.unlink(result)
|
193 |
+
result = f.name
|
194 |
+
gr.Info('Audio from a TTS model received')
|
195 |
+
|
|
|
196 |
# if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
|
197 |
result_storage[model] = result
|
198 |
|
|
|
280 |
):
|
281 |
# run Zero-GPU spaces one at a time
|
282 |
predict_and_update_result(text, mdl1k, results, request)
|
283 |
+
if results[mdl1k] != None:
|
284 |
+
cache_sample(results[mdl1k], text, mdl1k)
|
285 |
|
286 |
predict_and_update_result(text, mdl2k, results, request)
|
287 |
+
if results[mdl2k] != None:
|
288 |
+
cache_sample(results[mdl2k], text, mdl2k)
|
289 |
else:
|
290 |
# use multithreading
|
291 |
thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results, request))
|
|
|
324 |
|
325 |
# Battle Mode
|
326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
def randomsent():
|
328 |
return '⚡', random.choice(sents), '🎲'
|
|
|
|
|
329 |
def clear_stuff():
|
330 |
return [
|
331 |
gr.update(visible=True, value="", elem_classes=[]),
|