Pendrokar commited on
Commit
61a82ba
·
1 Parent(s): d8a1450

Error forced on audio normalization failure

Browse files
Files changed (1) hide show
  1. app/synth.py +22 -117
app/synth.py CHANGED
@@ -175,25 +175,24 @@ def synthandreturn(text, autoplay, request: gr.Request):
175
  print('Done with', model)
176
 
177
  # Resample to 24kHz
178
- try:
179
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
180
- audio = AudioSegment.from_file(result)
181
- current_sr = audio.frame_rate
182
- if current_sr > 24000:
183
- print(f"{model}: Resampling")
184
- audio = audio.set_frame_rate(24000)
185
- try:
186
- print(f"{model}: Trying to normalize audio")
187
- audio = match_target_amplitude(audio, -20)
188
- except:
189
- print(f"{model}: [WARN] Unable to normalize audio")
190
- audio.export(f.name, format="wav")
191
- os.unlink(result)
192
- result = f.name
193
- gr.Info('Audio from a TTS model received')
194
- except:
195
- print(f"{model}: [WARN] Unable to resample audio")
196
- pass
197
  # if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
198
  result_storage[model] = result
199
 
@@ -281,10 +280,12 @@ def synthandreturn(text, autoplay, request: gr.Request):
281
  ):
282
  # run Zero-GPU spaces one at a time
283
  predict_and_update_result(text, mdl1k, results, request)
284
- cache_sample(results[mdl1k], text, mdl1k)
 
285
 
286
  predict_and_update_result(text, mdl2k, results, request)
287
- cache_sample(results[mdl2k], text, mdl2k)
 
288
  else:
289
  # use multithreading
290
  thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results, request))
@@ -323,104 +324,8 @@ def synthandreturn(text, autoplay, request: gr.Request):
323
 
324
  # Battle Mode
325
 
326
- def synthandreturn_battle(text, mdl1, mdl2, autoplay):
327
- if mdl1 == mdl2:
328
- raise gr.Error('You can\'t pick two of the same models.')
329
- text = text.strip()
330
- if len(text) > MAX_SAMPLE_TXT_LENGTH:
331
- raise gr.Error(f'You exceeded the limit of {MAX_SAMPLE_TXT_LENGTH} characters')
332
- if len(text) < MIN_SAMPLE_TXT_LENGTH:
333
- raise gr.Error(f'Please input a text longer than {MIN_SAMPLE_TXT_LENGTH} characters')
334
- if (
335
- # test toxicity if not prepared text
336
- text not in sents
337
- and check_toxicity(text)
338
- ):
339
- print(f'Detected toxic content! "{text}"')
340
- raise gr.Error('Your text failed the toxicity test')
341
- if not text:
342
- raise gr.Error(f'You did not enter any text')
343
- # Check language
344
- try:
345
- if not detect(text) == "en":
346
- gr.Warning('Warning: The input text may not be in English')
347
- except:
348
- pass
349
- # Get two random models
350
- log_text(text)
351
- print("[debug] Using", mdl1, mdl2)
352
- def predict_and_update_result(text, model, result_storage):
353
- try:
354
- if model in AVAILABLE_MODELS:
355
- result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
356
- else:
357
- result = router.predict(text, model.lower(), api_name="/synthesize")
358
- except:
359
- raise gr.Error('Unable to call API, please try again :)')
360
- print('Done with', model)
361
- # try:
362
- # doresample(result)
363
- # except:
364
- # pass
365
- try:
366
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
367
- audio = AudioSegment.from_file(result)
368
- current_sr = audio.frame_rate
369
- if current_sr > 24000:
370
- audio = audio.set_frame_rate(24000)
371
- try:
372
- print('Trying to normalize audio')
373
- audio = match_target_amplitude(audio, -20)
374
- except:
375
- print('[WARN] Unable to normalize audio')
376
- audio.export(f.name, format="wav")
377
- os.unlink(result)
378
- result = f.name
379
- except:
380
- pass
381
- # if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
382
- print(model)
383
- print(f"Running model {model}")
384
- result_storage[model] = result
385
- # try:
386
- # doloudnorm(result)
387
- # except:
388
- # pass
389
- mdl1k = mdl1
390
- mdl2k = mdl2
391
- print(mdl1k, mdl2k)
392
- # if mdl1 in AVAILABLE_MODELS.keys(): mdl1k=AVAILABLE_MODELS[mdl1]
393
- # if mdl2 in AVAILABLE_MODELS.keys(): mdl2k=AVAILABLE_MODELS[mdl2]
394
- results = {}
395
- print(f"Sending models {mdl1k} and {mdl2k} to API")
396
- thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results))
397
- thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results))
398
-
399
- thread1.start()
400
- thread2.start()
401
- thread1.join()
402
- thread2.join()
403
-
404
- print(f"Retrieving models {mdl1k} and {mdl2k} from API")
405
- return (
406
- text,
407
- "Synthesize 🐢",
408
- gr.update(visible=True), # r2
409
- mdl1, # model1
410
- mdl2, # model2
411
- gr.update(visible=True, value=results[mdl1k], autoplay=autoplay), # aud1
412
- gr.update(visible=True, value=results[mdl2k], autoplay=False), # aud2
413
- gr.update(visible=True, interactive=False), #abetter
414
- gr.update(visible=True, interactive=False), #bbetter
415
- gr.update(visible=False), #prevmodel1
416
- gr.update(visible=False), #prevmodel2
417
- gr.update(visible=False), #nxt round btn
418
- )
419
-
420
  def randomsent():
421
  return '⚡', random.choice(sents), '🎲'
422
- def randomsent_battle():
423
- return tuple(randomsent()) + tuple(random_m())
424
  def clear_stuff():
425
  return [
426
  gr.update(visible=True, value="", elem_classes=[]),
 
175
  print('Done with', model)
176
 
177
  # Resample to 24kHz
178
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
179
+ audio = AudioSegment.from_file(result)
180
+ current_sr = audio.frame_rate
181
+ if current_sr > 24000:
182
+ print(f"{model}: Resampling")
183
+ audio = audio.set_frame_rate(24000)
184
+ try:
185
+ print(f"{model}: Trying to normalize audio")
186
+ audio = match_target_amplitude(audio, -20)
187
+ except:
188
+ print(f"{model}: [WARN] Unable to normalize audio")
189
+ raise gr.Error('Unable to normalize audio for output of space')
190
+
191
+ audio.export(f.name, format="wav")
192
+ os.unlink(result)
193
+ result = f.name
194
+ gr.Info('Audio from a TTS model received')
195
+
 
196
  # if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
197
  result_storage[model] = result
198
 
 
280
  ):
281
  # run Zero-GPU spaces one at a time
282
  predict_and_update_result(text, mdl1k, results, request)
283
+ if results[mdl1k] != None:
284
+ cache_sample(results[mdl1k], text, mdl1k)
285
 
286
  predict_and_update_result(text, mdl2k, results, request)
287
+ if results[mdl2k] != None:
288
+ cache_sample(results[mdl2k], text, mdl2k)
289
  else:
290
  # use multithreading
291
  thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results, request))
 
324
 
325
  # Battle Mode
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  def randomsent():
328
  return '⚡', random.choice(sents), '🎲'
 
 
329
  def clear_stuff():
330
  return [
331
  gr.update(visible=True, value="", elem_classes=[]),