deep-div commited on
Commit
2217858
·
verified ·
1 Parent(s): a3eb7db

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +1055 -1071
model.py CHANGED
@@ -1,1071 +1,1055 @@
1
- # Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
2
- #
3
- # See LICENSE for clarification regarding multiple authors
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
-
17
- import os
18
- from functools import lru_cache
19
- from pathlib import Path
20
-
21
- import sherpa_onnx
22
- from huggingface_hub import hf_hub_download
23
-
24
-
25
- def get_file(
26
- repo_id: str,
27
- filename: str,
28
- subfolder: str = ".",
29
- ) -> str:
30
- model_filename = hf_hub_download(
31
- repo_id=repo_id,
32
- filename=filename,
33
- subfolder=subfolder,
34
- )
35
- return model_filename
36
-
37
-
38
- @lru_cache(maxsize=10)
39
- def _get_vits_vctk(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
40
- assert repo_id == "csukuangfj/vits-vctk"
41
-
42
- model = get_file(
43
- repo_id=repo_id,
44
- filename="vits-vctk.onnx",
45
- subfolder=".",
46
- )
47
-
48
- lexicon = get_file(
49
- repo_id=repo_id,
50
- filename="lexicon.txt",
51
- subfolder=".",
52
- )
53
-
54
- tokens = get_file(
55
- repo_id=repo_id,
56
- filename="tokens.txt",
57
- subfolder=".",
58
- )
59
-
60
- tts_config = sherpa_onnx.OfflineTtsConfig(
61
- model=sherpa_onnx.OfflineTtsModelConfig(
62
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
63
- model=model,
64
- lexicon=lexicon,
65
- tokens=tokens,
66
- length_scale=1.0 / speed,
67
- ),
68
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
69
- provider="cpu",
70
- debug=True,
71
- num_threads=2,
72
- ),
73
- max_num_sentences=1,
74
- )
75
- tts = sherpa_onnx.OfflineTts(tts_config)
76
-
77
- return tts
78
-
79
-
80
- @lru_cache(maxsize=10)
81
- def _get_vits_ljs(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
82
- assert repo_id == "csukuangfj/vits-ljs"
83
-
84
- model = get_file(
85
- repo_id=repo_id,
86
- filename="vits-ljs.onnx",
87
- subfolder=".",
88
- )
89
-
90
- lexicon = get_file(
91
- repo_id=repo_id,
92
- filename="lexicon.txt",
93
- subfolder=".",
94
- )
95
-
96
- tokens = get_file(
97
- repo_id=repo_id,
98
- filename="tokens.txt",
99
- subfolder=".",
100
- )
101
-
102
- tts_config = sherpa_onnx.OfflineTtsConfig(
103
- model=sherpa_onnx.OfflineTtsModelConfig(
104
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
105
- model=model,
106
- lexicon=lexicon,
107
- tokens=tokens,
108
- length_scale=1.0 / speed,
109
- ),
110
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
111
- provider="cpu",
112
- debug=True,
113
- num_threads=2,
114
- ),
115
- max_num_sentences=1,
116
- )
117
- tts = sherpa_onnx.OfflineTts(tts_config)
118
-
119
- return tts
120
-
121
-
122
- @lru_cache(maxsize=10)
123
- def _get_kokoro(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
124
- data_dir = "/tmp/espeak-ng-data"
125
- repo_id = repo_id.split("|")[0]
126
- assert repo_id in (
127
- "csukuangfj/kokoro-en-v0_19",
128
- "csukuangfj/kokoro-multi-lang-v1_0",
129
- "csukuangfj/kokoro-multi-lang-v1_1",
130
- ), repo_id
131
-
132
- model = get_file(
133
- repo_id=repo_id,
134
- filename="model.onnx",
135
- subfolder=".",
136
- )
137
-
138
- tokens = get_file(
139
- repo_id=repo_id,
140
- filename="tokens.txt",
141
- subfolder=".",
142
- )
143
-
144
- voices = get_file(
145
- repo_id=repo_id,
146
- filename="voices.bin",
147
- subfolder=".",
148
- )
149
- if repo_id in (
150
- "csukuangfj/kokoro-multi-lang-v1_0",
151
- "csukuangfj/kokoro-multi-lang-v1_1",
152
- ):
153
- lexicon_en = get_file(
154
- repo_id=repo_id,
155
- filename="lexicon-us-en.txt",
156
- subfolder=".",
157
- )
158
- lexicon_zh = get_file(
159
- repo_id=repo_id,
160
- filename="lexicon-zh.txt",
161
- subfolder=".",
162
- )
163
- lexicon = f"{lexicon_en},{lexicon_zh}"
164
-
165
- date_zh = get_file(
166
- repo_id=repo_id,
167
- filename="date-zh.fst",
168
- subfolder=".",
169
- )
170
-
171
- number_zh = get_file(
172
- repo_id=repo_id,
173
- filename="number-zh.fst",
174
- subfolder=".",
175
- )
176
- phone_zh = get_file(
177
- repo_id=repo_id,
178
- filename="phone-zh.fst",
179
- subfolder=".",
180
- )
181
- rule_fsts = f"{date_zh},{phone_zh},{number_zh}"
182
- dict_dir = "/tmp/dict"
183
- else:
184
- lexicon = ""
185
- rule_fsts = ""
186
- dict_dir = ""
187
-
188
- tts_config = sherpa_onnx.OfflineTtsConfig(
189
- model=sherpa_onnx.OfflineTtsModelConfig(
190
- kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
191
- model=model,
192
- voices=voices,
193
- tokens=tokens,
194
- data_dir=data_dir,
195
- length_scale=1.0 / speed,
196
- lexicon=lexicon,
197
- dict_dir=dict_dir,
198
- ),
199
- provider="cpu",
200
- debug=True,
201
- num_threads=2,
202
- ),
203
- max_num_sentences=1,
204
- rule_fsts=rule_fsts,
205
- )
206
-
207
- tts = sherpa_onnx.OfflineTts(tts_config)
208
-
209
- return tts
210
-
211
-
212
- @lru_cache(maxsize=10)
213
- def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
214
- data_dir = "/tmp/espeak-ng-data"
215
- repo_id = repo_id.split("|")[0]
216
-
217
- if "coqui" in repo_id or "vits-mms" in repo_id:
218
- name = "model"
219
- elif "piper" in repo_id:
220
- n = len("vits-piper-")
221
- name = repo_id.split("/")[1][n:]
222
- elif "mimic3" in repo_id:
223
- n = len("vits-mimic3-")
224
- name = repo_id.split("/")[1][n:]
225
- else:
226
- raise ValueError(f"Unsupported {repo_id}")
227
-
228
- if "vits-coqui-uk-mai" in repo_id or "vits-mms" in repo_id:
229
- data_dir = ""
230
-
231
- model = get_file(
232
- repo_id=repo_id,
233
- filename=f"{name}.onnx",
234
- subfolder=".",
235
- )
236
-
237
- tokens = get_file(
238
- repo_id=repo_id,
239
- filename="tokens.txt",
240
- subfolder=".",
241
- )
242
-
243
- tts_config = sherpa_onnx.OfflineTtsConfig(
244
- model=sherpa_onnx.OfflineTtsModelConfig(
245
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
246
- model=model,
247
- lexicon="",
248
- data_dir=data_dir,
249
- tokens=tokens,
250
- length_scale=1.0 / speed,
251
- ),
252
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
253
- provider="cpu",
254
- debug=True,
255
- num_threads=2,
256
- ),
257
- max_num_sentences=1,
258
- )
259
- tts = sherpa_onnx.OfflineTts(tts_config)
260
-
261
- return tts
262
-
263
-
264
- @lru_cache(maxsize=10)
265
- def _get_vits_mms(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
266
- return _get_vits_piper(repo_id, speed)
267
-
268
-
269
- @lru_cache(maxsize=10)
270
- def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
271
- repo_id = repo_id.split("|")[0]
272
- assert repo_id == "csukuangfj/vits-zh-aishell3", repo_id
273
-
274
- model = get_file(
275
- repo_id=repo_id,
276
- filename="vits-aishell3.onnx",
277
- subfolder=".",
278
- )
279
-
280
- lexicon = get_file(
281
- repo_id=repo_id,
282
- filename="lexicon.txt",
283
- subfolder=".",
284
- )
285
-
286
- tokens = get_file(
287
- repo_id=repo_id,
288
- filename="tokens.txt",
289
- subfolder=".",
290
- )
291
-
292
- rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
293
-
294
- rule_fsts = [
295
- get_file(
296
- repo_id=repo_id,
297
- filename=f,
298
- subfolder=".",
299
- )
300
- for f in rule_fsts
301
- ]
302
- rule_fsts = ",".join(rule_fsts)
303
-
304
- rule_fars = get_file(
305
- repo_id=repo_id,
306
- filename="rule.far",
307
- subfolder=".",
308
- )
309
-
310
- tts_config = sherpa_onnx.OfflineTtsConfig(
311
- model=sherpa_onnx.OfflineTtsModelConfig(
312
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
313
- model=model,
314
- lexicon=lexicon,
315
- tokens=tokens,
316
- length_scale=1.0 / speed,
317
- ),
318
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
319
- provider="cpu",
320
- debug=True,
321
- num_threads=2,
322
- ),
323
- rule_fsts=rule_fsts,
324
- rule_fars=rule_fars,
325
- max_num_sentences=1,
326
- )
327
- tts = sherpa_onnx.OfflineTts(tts_config)
328
-
329
- return tts
330
-
331
-
332
- @lru_cache(maxsize=10)
333
- def _get_matcha_hf_espeak(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
334
- repo_id = repo_id.split("|")[0]
335
- assert repo_id in (
336
- "csukuangfj/matcha-tts-fa_en-khadijah",
337
- "csukuangfj/matcha-tts-fa_en-musa",
338
- ), repo_id
339
-
340
- acoustic_model = get_file(
341
- repo_id=repo_id,
342
- filename="model.onnx",
343
- subfolder=".",
344
- )
345
-
346
- vocoder = get_file(
347
- repo_id="csukuangfj/sherpa-onnx-hifigan",
348
- filename="hifigan_v2.onnx",
349
- subfolder=".",
350
- )
351
-
352
- tokens = get_file(
353
- repo_id=repo_id,
354
- filename="tokens.txt",
355
- subfolder=".",
356
- )
357
-
358
- data_dir = "/tmp/espeak-ng-data"
359
- tts_config = sherpa_onnx.OfflineTtsConfig(
360
- model=sherpa_onnx.OfflineTtsModelConfig(
361
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
362
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
363
- acoustic_model=acoustic_model,
364
- vocoder=vocoder,
365
- tokens=tokens,
366
- lexicon="",
367
- data_dir=data_dir,
368
- length_scale=1.0 / speed,
369
- ),
370
- provider="cpu",
371
- debug=True,
372
- num_threads=2,
373
- ),
374
- max_num_sentences=1,
375
- )
376
- tts = sherpa_onnx.OfflineTts(tts_config)
377
-
378
- return tts
379
-
380
-
381
- @lru_cache(maxsize=10)
382
- def _get_matcha_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
383
- repo_id = repo_id.split("|")[0]
384
- assert repo_id in ("csukuangfj/matcha-icefall-zh-baker",), repo_id
385
-
386
- if repo_id == "csukuangfj/matcha-icefall-zh-baker":
387
- acoustic_model = "model-steps-3.onnx"
388
-
389
- if not Path("/tmp/dict").is_dir():
390
- os.system(
391
- "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
392
- )
393
- os.system("ls -lh /tmp/dict")
394
-
395
- acoustic_model = get_file(
396
- repo_id=repo_id,
397
- filename=acoustic_model,
398
- subfolder=".",
399
- )
400
-
401
- vocoder = get_file(
402
- repo_id="csukuangfj/sherpa-onnx-hifigan",
403
- filename="hifigan_v2.onnx",
404
- subfolder=".",
405
- )
406
-
407
- lexicon = get_file(
408
- repo_id=repo_id,
409
- filename="lexicon.txt",
410
- subfolder=".",
411
- )
412
-
413
- tokens = get_file(
414
- repo_id=repo_id,
415
- filename="tokens.txt",
416
- subfolder=".",
417
- )
418
-
419
- rule_fars = ""
420
-
421
- rule_fsts = ["phone.fst", "date.fst", "number.fst"]
422
-
423
- rule_fsts = [
424
- get_file(
425
- repo_id=repo_id,
426
- filename=f,
427
- subfolder=".",
428
- )
429
- for f in rule_fsts
430
- ]
431
- rule_fsts = ",".join(rule_fsts)
432
-
433
- dict_dir = "/tmp/dict"
434
-
435
- tts_config = sherpa_onnx.OfflineTtsConfig(
436
- model=sherpa_onnx.OfflineTtsModelConfig(
437
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
438
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
439
- acoustic_model=acoustic_model,
440
- vocoder=vocoder,
441
- lexicon=lexicon,
442
- tokens=tokens,
443
- dict_dir=dict_dir,
444
- length_scale=1.0 / speed,
445
- ),
446
- provider="cpu",
447
- debug=True,
448
- num_threads=2,
449
- ),
450
- rule_fsts=rule_fsts,
451
- rule_fars=rule_fars,
452
- max_num_sentences=1,
453
- )
454
- tts = sherpa_onnx.OfflineTts(tts_config)
455
-
456
- return tts
457
-
458
-
459
- @lru_cache(maxsize=10)
460
- def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
461
- repo_id = repo_id.split("|")[0]
462
-
463
- if "fanchen" in repo_id or "vits-cantonese-hf-xiaomaiiwn" in repo_id:
464
- model = repo_id.split("/")[-1]
465
- elif "csukuangfj/vits-melo-tts-zh_en" == repo_id:
466
- model = "model"
467
- else:
468
- model = repo_id.split("-")[-1]
469
-
470
- if "sherpa-onnx-vits-zh-ll" in repo_id:
471
- model = "model"
472
-
473
- if not Path("/tmp/dict").is_dir():
474
- os.system(
475
- "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
476
- )
477
- os.system("ls -lh /tmp/dict")
478
-
479
- model = get_file(
480
- repo_id=repo_id,
481
- filename=f"{model}.onnx",
482
- subfolder=".",
483
- )
484
-
485
- lexicon = get_file(
486
- repo_id=repo_id,
487
- filename="lexicon.txt",
488
- subfolder=".",
489
- )
490
-
491
- tokens = get_file(
492
- repo_id=repo_id,
493
- filename="tokens.txt",
494
- subfolder=".",
495
- )
496
-
497
- rule_fars = ""
498
-
499
- if "vits-cantonese-hf-xiaomaiiwn" not in repo_id:
500
- rule_fsts = ["phone.fst", "date.fst", "number.fst"]
501
-
502
- rule_fsts = [
503
- get_file(
504
- repo_id=repo_id,
505
- filename=f,
506
- subfolder=".",
507
- )
508
- for f in rule_fsts
509
- ]
510
- rule_fsts = ",".join(rule_fsts)
511
-
512
- # rule_fars = get_file(
513
- # repo_id=repo_id,
514
- # filename="rule.far",
515
- # subfolder=".",
516
- # )
517
- vits_dict_dir = "/tmp/dict"
518
- else:
519
- rule_fsts = get_file(
520
- repo_id=repo_id,
521
- filename="rule.fst",
522
- subfolder=".",
523
- )
524
- vits_dict_dir = ""
525
-
526
- tts_config = sherpa_onnx.OfflineTtsConfig(
527
- model=sherpa_onnx.OfflineTtsModelConfig(
528
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
529
- model=model,
530
- lexicon=lexicon,
531
- tokens=tokens,
532
- dict_dir=vits_dict_dir,
533
- length_scale=1.0 / speed,
534
- ),
535
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
536
- provider="cpu",
537
- debug=True,
538
- num_threads=2,
539
- ),
540
- rule_fsts=rule_fsts,
541
- rule_fars=rule_fars,
542
- max_num_sentences=1,
543
- )
544
- tts = sherpa_onnx.OfflineTts(tts_config)
545
-
546
- return tts
547
-
548
-
549
- @lru_cache(maxsize=10)
550
- def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
551
- if repo_id in chinese_models:
552
- return chinese_models[repo_id](repo_id, speed)
553
- elif repo_id in chinese_english_models:
554
- return chinese_english_models[repo_id](repo_id, speed)
555
- elif repo_id in persian_english_models:
556
- return persian_english_models[repo_id](repo_id, speed)
557
- if repo_id in cantonese_models:
558
- return cantonese_models[repo_id](repo_id, speed)
559
- elif repo_id in english_models:
560
- return english_models[repo_id](repo_id, speed)
561
- elif repo_id in german_models:
562
- return german_models[repo_id](repo_id, speed)
563
- elif repo_id in spanish_models:
564
- return spanish_models[repo_id](repo_id, speed)
565
- elif repo_id in french_models:
566
- return french_models[repo_id](repo_id, speed)
567
- elif repo_id in ukrainian_models:
568
- return ukrainian_models[repo_id](repo_id, speed)
569
- elif repo_id in russian_models:
570
- return russian_models[repo_id](repo_id, speed)
571
- elif repo_id in arabic_models:
572
- return arabic_models[repo_id](repo_id, speed)
573
- elif repo_id in catalan_models:
574
- return catalan_models[repo_id](repo_id, speed)
575
- elif repo_id in czech_models:
576
- return czech_models[repo_id](repo_id, speed)
577
- elif repo_id in danish_models:
578
- return danish_models[repo_id](repo_id, speed)
579
- elif repo_id in greek_models:
580
- return greek_models[repo_id](repo_id, speed)
581
- elif repo_id in finnish_models:
582
- return finnish_models[repo_id](repo_id, speed)
583
- elif repo_id in hungarian_models:
584
- return hungarian_models[repo_id](repo_id, speed)
585
- elif repo_id in icelandic_models:
586
- return icelandic_models[repo_id](repo_id, speed)
587
- elif repo_id in italian_models:
588
- return italian_models[repo_id](repo_id, speed)
589
- elif repo_id in georgian_models:
590
- return georgian_models[repo_id](repo_id, speed)
591
- elif repo_id in kazakh_models:
592
- return kazakh_models[repo_id](repo_id, speed)
593
- elif repo_id in luxembourgish_models:
594
- return luxembourgish_models[repo_id](repo_id, speed)
595
- elif repo_id in nepali_models:
596
- return nepali_models[repo_id](repo_id, speed)
597
- elif repo_id in dutch_models:
598
- return dutch_models[repo_id](repo_id, speed)
599
- elif repo_id in norwegian_models:
600
- return norwegian_models[repo_id](repo_id, speed)
601
- elif repo_id in polish_models:
602
- return polish_models[repo_id](repo_id, speed)
603
- elif repo_id in portuguese_models:
604
- return portuguese_models[repo_id](repo_id, speed)
605
- elif repo_id in romanian_models:
606
- return romanian_models[repo_id](repo_id, speed)
607
- elif repo_id in slovak_models:
608
- return slovak_models[repo_id](repo_id, speed)
609
- elif repo_id in serbian_models:
610
- return serbian_models[repo_id](repo_id, speed)
611
- elif repo_id in swedish_models:
612
- return swedish_models[repo_id](repo_id, speed)
613
- elif repo_id in swahili_models:
614
- return swahili_models[repo_id](repo_id, speed)
615
- elif repo_id in turkish_models:
616
- return turkish_models[repo_id](repo_id, speed)
617
- elif repo_id in vietnamese_models:
618
- return vietnamese_models[repo_id](repo_id, speed)
619
- elif repo_id in bulgarian_models:
620
- return bulgarian_models[repo_id](repo_id, speed)
621
- elif repo_id in estonian_models:
622
- return estonian_models[repo_id](repo_id, speed)
623
- elif repo_id in irish_models:
624
- return irish_models[repo_id](repo_id, speed)
625
- elif repo_id in croatian_models:
626
- return croatian_models[repo_id](repo_id, speed)
627
- elif repo_id in lithuanian_models:
628
- return lithuanian_models[repo_id](repo_id, speed)
629
- elif repo_id in latvian_models:
630
- return latvian_models[repo_id](repo_id, speed)
631
- elif repo_id in maltese_models:
632
- return maltese_models[repo_id](repo_id, speed)
633
- elif repo_id in slovenian_models:
634
- return slovenian_models[repo_id](repo_id, speed)
635
- elif repo_id in bengali_models:
636
- return bengali_models[repo_id](repo_id, speed)
637
- elif repo_id in min_nan_models:
638
- return min_nan_models[repo_id](repo_id, speed)
639
- elif repo_id in thai_models:
640
- return thai_models[repo_id](repo_id, speed)
641
- elif repo_id in persian_models:
642
- return persian_models[repo_id](repo_id, speed)
643
- elif repo_id in korean_models:
644
- return korean_models[repo_id](repo_id, speed)
645
- elif repo_id in afrikaans_models:
646
- return afrikaans_models[repo_id](repo_id, speed)
647
- elif repo_id in gujarati_models:
648
- return gujarati_models[repo_id](repo_id, speed)
649
- elif repo_id in tswana_models:
650
- return tswana_models[repo_id](repo_id, speed)
651
- elif repo_id in welsh_models:
652
- return welsh_models[repo_id](repo_id, speed)
653
- else:
654
- raise ValueError(f"Unsupported repo_id: {repo_id}")
655
-
656
-
657
- cantonese_models = {
658
- "csukuangfj/vits-cantonese-hf-xiaomaiiwn": _get_vits_hf,
659
- }
660
-
661
- chinese_english_models = {
662
- "csukuangfj/kokoro-multi-lang-v1_1|103 speakers": _get_kokoro,
663
- "csukuangfj/kokoro-multi-lang-v1_0|53 speakers": _get_kokoro,
664
- "csukuangfj/vits-melo-tts-zh_en|1": _get_vits_hf, # 1
665
- }
666
-
667
- persian_english_models = {
668
- "csukuangfj/matcha-tts-fa_en-khadijah|1 speaker": _get_matcha_hf_espeak, # 1
669
- "csukuangfj/matcha-tts-fa_en-musa|1 speaker": _get_matcha_hf_espeak, # 1
670
- "csukuangfj/vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium|1": _get_vits_piper, # 1
671
- }
672
-
673
- chinese_models = {
674
- "csukuangfj/matcha-icefall-zh-baker|1 speaker": _get_matcha_hf, # 1
675
- "csukuangfj/vits-zh-hf-fanchen-wnj|1 speaker": _get_vits_hf, # 1
676
- "csukuangfj/vits-zh-hf-fanchen-C|187 speakers": _get_vits_hf, # 187
677
- "csukuangfj/sherpa-onnx-vits-zh-ll|5 speakers": _get_vits_hf, # 804
678
- "csukuangfj/vits-zh-hf-keqing|804 speakers": _get_vits_hf, # 804
679
- "csukuangfj/vits-zh-hf-theresa|804 speakers": _get_vits_hf, # 804
680
- "csukuangfj/vits-zh-hf-eula|804 speakers": _get_vits_hf, # 804
681
- "csukuangfj/vits-zh-hf-echo|804 speakers": _get_vits_hf, # 804
682
- "csukuangfj/vits-zh-hf-bronya|804 speakers": _get_vits_hf, # 804
683
- "csukuangfj/vits-zh-hf-doom|804 speakers": _get_vits_hf, # 804
684
- "csukuangfj/vits-zh-hf-zenyatta|804 speakers": _get_vits_hf, # 804
685
- "csukuangfj/vits-zh-hf-abyssinvoker|804 speakers": _get_vits_hf, # 804
686
- "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe|1 speaker": _get_vits_hf, # 1
687
- "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe_new|1 speaker": _get_vits_hf, # 1
688
- "csukuangfj/vits-zh-hf-fanchen-unity|1 speaker": _get_vits_hf, # 1
689
- "csukuangfj/vits-zh-aishell3|174 speakers": _get_vits_zh_aishell3,
690
- "csukuangfj/vits-piper-zh_CN-huayan-medium|1 speaker": _get_vits_piper,
691
- # "csukuangfj/vits-piper-zh_CN-huayan-x_low": _get_vits_piper,
692
- }
693
-
694
- english_models = {
695
- "csukuangfj/kokoro-en-v0_19|11 speakers": _get_kokoro,
696
- "csukuangfj/vits-piper-en_US-glados-high|1 speaker": _get_vits_piper,
697
- "csukuangfj/vits-piper-en_US-glados|1 speaker": _get_vits_piper,
698
- "csukuangfj/vits-piper-en_GB-southern_english_male-medium|8 speakers": _get_vits_piper,
699
- "csukuangfj/vits-piper-en_GB-southern_english_female-medium|6 speakers": _get_vits_piper,
700
- "csukuangfj/vits-piper-en_US-bryce-medium|1 speaker": _get_vits_piper,
701
- "csukuangfj/vits-piper-en_US-john-medium|1 speaker": _get_vits_piper,
702
- "csukuangfj/vits-piper-en_US-norman-medium|1 speaker": _get_vits_piper,
703
- # coqui-ai
704
- "csukuangfj/vits-coqui-en-ljspeech|1 speaker": _get_vits_piper,
705
- "csukuangfj/vits-coqui-en-ljspeech-neon|1 speaker": _get_vits_piper,
706
- "csukuangfj/vits-coqui-en-vctk|109 speakers": _get_vits_piper,
707
- # piper, US
708
- "csukuangfj/vits-piper-en_GB-sweetbbak-amy|1 speaker": _get_vits_piper,
709
- "csukuangfj/vits-piper-en_US-amy-low|1 speaker": _get_vits_piper,
710
- "csukuangfj/vits-piper-en_US-amy-medium|1 speaker": _get_vits_piper,
711
- "csukuangfj/vits-piper-en_US-arctic-medium|18 speakers": _get_vits_piper, # 18 speakers
712
- "csukuangfj/vits-piper-en_US-danny-low|1 speaker": _get_vits_piper,
713
- "csukuangfj/vits-piper-en_US-hfc_male-medium|1 speaker": _get_vits_piper,
714
- "csukuangfj/vits-piper-en_US-hfc_female-medium|1 speaker": _get_vits_piper,
715
- "csukuangfj/vits-piper-en_US-joe-medium|1 speaker": _get_vits_piper,
716
- "csukuangfj/vits-piper-en_US-kathleen-low|1 speaker": _get_vits_piper,
717
- "csukuangfj/vits-piper-en_US-kusal-medium|1 speaker": _get_vits_piper,
718
- "csukuangfj/vits-piper-en_US-l2arctic-medium|24 speakers": _get_vits_piper, # 24 speakers
719
- "csukuangfj/vits-piper-en_US-lessac-high|1 speaker": _get_vits_piper,
720
- "csukuangfj/vits-piper-en_US-lessac-low|1 speaker": _get_vits_piper,
721
- "csukuangfj/vits-piper-en_US-lessac-medium|1 speaker": _get_vits_piper,
722
- "csukuangfj/vits-piper-en_US-libritts-high|904 speakers": _get_vits_piper, # 904 speakers
723
- "csukuangfj/vits-piper-en_US-libritts_r-medium|904 speakers": _get_vits_piper, # 904 speakers
724
- "csukuangfj/vits-piper-en_US-ljspeech-high|1 speaker": _get_vits_piper,
725
- "csukuangfj/vits-piper-en_US-ljspeech-medium|1 speaker": _get_vits_piper,
726
- "csukuangfj/vits-piper-en_US-ryan-high|1 speaker": _get_vits_piper,
727
- "csukuangfj/vits-piper-en_US-ryan-low|1 speaker": _get_vits_piper,
728
- "csukuangfj/vits-piper-en_US-ryan-medium|1 speaker": _get_vits_piper,
729
- # piper, GB
730
- "csukuangfj/vits-piper-en_GB-alan-low|1 speaker": _get_vits_piper,
731
- "csukuangfj/vits-piper-en_GB-alan-medium|1 speaker": _get_vits_piper,
732
- "csukuangfj/vits-piper-en_GB-alan-medium": _get_vits_piper,
733
- "csukuangfj/vits-piper-en_GB-cori-high|1 speaker": _get_vits_piper,
734
- "csukuangfj/vits-piper-en_GB-cori-medium|1 speaker": _get_vits_piper,
735
- "csukuangfj/vits-piper-en_GB-jenny_dioco-medium|1 speaker": _get_vits_piper,
736
- "csukuangfj/vits-piper-en_GB-northern_english_male-medium|1 speaker": _get_vits_piper,
737
- "csukuangfj/vits-piper-en_GB-semaine-medium|4 speakers": _get_vits_piper,
738
- "csukuangfj/vits-piper-en_GB-southern_english_female-low|1 speaker": _get_vits_piper,
739
- "csukuangfj/vits-piper-en_GB-vctk-medium|109 speakers": _get_vits_piper,
740
- #
741
- "csukuangfj/vits-vctk|109 speakers": _get_vits_vctk, # 109 speakers
742
- "csukuangfj/vits-ljs|1 speaker": _get_vits_ljs,
743
- }
744
-
745
- german_models = {
746
- "csukuangfj/vits-piper-de_DE-glados-low|1 speaker": _get_vits_piper,
747
- "csukuangfj/vits-piper-de_DE-glados-medium|1 speaker": _get_vits_piper,
748
- "csukuangfj/vits-piper-de_DE-glados-high|1 speaker": _get_vits_piper,
749
- "csukuangfj/vits-coqui-de-css10|1 speaker": _get_vits_piper,
750
- "csukuangfj/vits-piper-de_DE-eva_k-x_low|1 speaker": _get_vits_piper,
751
- "csukuangfj/vits-piper-de_DE-karlsson-low|1 speaker": _get_vits_piper,
752
- "csukuangfj/vits-piper-de_DE-kerstin-low|1 speaker": _get_vits_piper,
753
- # "csukuangfj/vits-piper-de_DE-mls-medium": _get_vits_piper,
754
- "csukuangfj/vits-piper-de_DE-pavoque-low|1 speaker": _get_vits_piper,
755
- "csukuangfj/vits-piper-de_DE-ramona-low|1 speaker": _get_vits_piper,
756
- "csukuangfj/vits-piper-de_DE-thorsten-low|1 speaker": _get_vits_piper,
757
- "csukuangfj/vits-piper-de_DE-thorsten-medium|1 speaker": _get_vits_piper,
758
- "csukuangfj/vits-piper-de_DE-thorsten-high|1 speaker": _get_vits_piper,
759
- "csukuangfj/vits-piper-de_DE-thorsten_emotional-medium|8 speakers": _get_vits_piper, # 8 speakers
760
- }
761
-
762
- spanish_models = {
763
- # "csukuangfj/vits-coqui-es-css10": _get_vits_piper,
764
- "csukuangfj/vits-piper-es-glados-medium": _get_vits_piper,
765
- "csukuangfj/vits-piper-es_ES-carlfm-x_low": _get_vits_piper,
766
- "csukuangfj/vits-piper-es_ES-davefx-medium": _get_vits_piper,
767
- # "csukuangfj/vits-piper-es_ES-mls_10246-low": _get_vits_piper,
768
- # "csukuangfj/vits-piper-es_ES-mls_9972-low": _get_vits_piper,
769
- "csukuangfj/vits-piper-es_ES-sharvard-medium": _get_vits_piper, # 2 speakers
770
- "csukuangfj/vits-piper-es_MX-ald-medium": _get_vits_piper,
771
- "csukuangfj/vits-piper-es_MX-claude-high": _get_vits_piper,
772
- "csukuangfj/vits-mimic3-es_ES-m-ailabs_low": _get_vits_piper,
773
- }
774
-
775
- french_models = {
776
- "csukuangfj/vits-coqui-fr-css10": _get_vits_piper,
777
- # "csukuangfj/vits-piper-fr_FR-gilles-low": _get_vits_piper,
778
- # "csukuangfj/vits-piper-fr_FR-mls_1840-low": _get_vits_piper,
779
- # "csukuangfj/vits-piper-fr_FR-mls-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
780
- "csukuangfj/vits-piper-fr_FR-upmc-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
781
- "csukuangfj/vits-piper-fr_FR-tom-medium|1 speaker": _get_vits_piper, # 2 speakers, 0-femal, 1-male
782
- "csukuangfj/vits-piper-fr_FR-siwis-low": _get_vits_piper, # female
783
- "csukuangfj/vits-piper-fr_FR-siwis-medium": _get_vits_piper,
784
- "csukuangfj/vits-piper-fr_FR-tjiho-model1": _get_vits_piper,
785
- "csukuangfj/vits-piper-fr_FR-tjiho-model2": _get_vits_piper,
786
- "csukuangfj/vits-piper-fr_FR-tjiho-model3": _get_vits_piper,
787
- }
788
-
789
- ukrainian_models = {
790
- "csukuangfj/vits-piper-uk_UA-lada-x_low": _get_vits_piper,
791
- "csukuangfj/vits-coqui-uk-mai": _get_vits_piper,
792
- # "csukuangfj/vits-piper-uk_UA-ukrainian_tts-medium": _get_vits_piper, # does not work somehow
793
- }
794
-
795
- russian_models = {
796
- "csukuangfj/vits-piper-ru_RU-denis-medium": _get_vits_piper,
797
- "csukuangfj/vits-piper-ru_RU-dmitri-medium": _get_vits_piper,
798
- "csukuangfj/vits-piper-ru_RU-irina-medium": _get_vits_piper,
799
- "csukuangfj/vits-piper-ru_RU-ruslan-medium": _get_vits_piper,
800
- }
801
-
802
- arabic_models = {
803
- "csukuangfj/vits-piper-ar_JO-kareem-low": _get_vits_piper,
804
- "csukuangfj/vits-piper-ar_JO-kareem-medium": _get_vits_piper,
805
- }
806
-
807
- catalan_models = {
808
- "csukuangfj/vits-piper-ca_ES-upc_ona-x_low": _get_vits_piper,
809
- "csukuangfj/vits-piper-ca_ES-upc_ona-medium": _get_vits_piper,
810
- "csukuangfj/vits-piper-ca_ES-upc_pau-x_low": _get_vits_piper,
811
- }
812
-
813
- czech_models = {
814
- "csukuangfj/vits-piper-cs_CZ-jirka-low": _get_vits_piper,
815
- "csukuangfj/vits-piper-cs_CZ-jirka-medium": _get_vits_piper,
816
- "csukuangfj/vits-coqui-cs-cv": _get_vits_piper,
817
- }
818
-
819
- danish_models = {
820
- "csukuangfj/vits-coqui-da-cv": _get_vits_piper,
821
- "csukuangfj/vits-piper-da_DK-talesyntese-medium": _get_vits_piper,
822
- }
823
-
824
- greek_models = {
825
- "csukuangfj/vits-piper-el_GR-rapunzelina-low": _get_vits_piper,
826
- # "csukuangfj/vits-mimic3-el_GR-rapunzelina_low": _get_vits_piper,
827
- }
828
-
829
- finnish_models = {
830
- "csukuangfj/vits-coqui-fi-css10": _get_vits_piper,
831
- "csukuangfj/vits-piper-fi_FI-harri-low": _get_vits_piper,
832
- "csukuangfj/vits-piper-fi_FI-harri-medium": _get_vits_piper,
833
- "csukuangfj/vits-mimic3-fi_FI-harri-tapani-ylilammi_low": _get_vits_piper,
834
- }
835
-
836
- hungarian_models = {
837
- # "csukuangfj/vits-coqui-hu-css10": _get_vits_piper,
838
- "csukuangfj/vits-piper-hu_HU-anna-medium": _get_vits_piper,
839
- "csukuangfj/vits-piper-hu_HU-berta-medium": _get_vits_piper,
840
- "csukuangfj/vits-piper-hu_HU-imre-medium": _get_vits_piper,
841
- "csukuangfj/vits-mimic3-hu_HU-diana-majlinger_low": _get_vits_piper,
842
- }
843
-
844
- icelandic_models = {
845
- "csukuangfj/vits-piper-is_IS-bui-medium": _get_vits_piper,
846
- "csukuangfj/vits-piper-is_IS-salka-medium": _get_vits_piper,
847
- "csukuangfj/vits-piper-is_IS-steinn-medium": _get_vits_piper,
848
- "csukuangfj/vits-piper-is_IS-ugla-medium": _get_vits_piper,
849
- }
850
-
851
- italian_models = {
852
- "csukuangfj/vits-piper-it_IT-riccardo-x_low": _get_vits_piper,
853
- "csukuangfj/vits-piper-it_IT-paola-medium": _get_vits_piper,
854
- }
855
-
856
- georgian_models = {
857
- "csukuangfj/vits-piper-ka_GE-natia-medium": _get_vits_piper,
858
- }
859
-
860
- kazakh_models = {
861
- "csukuangfj/vits-piper-kk_KZ-iseke-x_low": _get_vits_piper,
862
- "csukuangfj/vits-piper-kk_KZ-issai-high": _get_vits_piper,
863
- "csukuangfj/vits-piper-kk_KZ-raya-x_low": _get_vits_piper,
864
- }
865
-
866
- luxembourgish_models = {
867
- "csukuangfj/vits-piper-lb_LU-marylux-medium": _get_vits_piper,
868
- }
869
-
870
- nepali_models = {
871
- "csukuangfj/vits-piper-ne_NP-google-medium": _get_vits_piper,
872
- "csukuangfj/vits-piper-ne_NP-google-x_low": _get_vits_piper,
873
- "csukuangfj/vits-mimic3-ne_NP-ne-google_low": _get_vits_piper,
874
- }
875
-
876
- dutch_models = {
877
- "csukuangfj/vits-coqui-nl-css10": _get_vits_piper,
878
- "csukuangfj/vits-piper-nl_BE-nathalie-medium": _get_vits_piper,
879
- "csukuangfj/vits-piper-nl_BE-nathalie-x_low": _get_vits_piper,
880
- "csukuangfj/vits-piper-nl_BE-rdh-medium": _get_vits_piper,
881
- "csukuangfj/vits-piper-nl_BE-rdh-x_low": _get_vits_piper,
882
- # "csukuangfj/vits-piper-nl_NL-mls-medium": _get_vits_piper,
883
- # "csukuangfj/vits-piper-nl_NL-mls_5809-low": _get_vits_piper,
884
- # "csukuangfj/vits-piper-nl_NL-mls_7432-low": _get_vits_piper,
885
- }
886
-
887
- norwegian_models = {
888
- "csukuangfj/vits-piper-no_NO-talesyntese-medium": _get_vits_piper,
889
- }
890
-
891
- polish_models = {
892
- "csukuangfj/vits-coqui-pl-mai_female": _get_vits_piper,
893
- "csukuangfj/vits-piper-pl_PL-darkman-medium": _get_vits_piper,
894
- "csukuangfj/vits-piper-pl_PL-gosia-medium": _get_vits_piper,
895
- "csukuangfj/vits-piper-pl_PL-mc_speech-medium": _get_vits_piper,
896
- # "csukuangfj/vits-piper-pl_PL-mls_6892-low": _get_vits_piper,
897
- "csukuangfj/vits-mimic3-pl_PL-m-ailabs_low": _get_vits_piper,
898
- }
899
-
900
- portuguese_models = {
901
- "csukuangfj/vits-coqui-pt-cv": _get_vits_piper,
902
- "csukuangfj/vits-piper-pt_BR-edresson-low": _get_vits_piper,
903
- "csukuangfj/vits-piper-pt_BR-faber-medium": _get_vits_piper,
904
- "csukuangfj/vits-piper-pt_PT-tugao-medium": _get_vits_piper,
905
- }
906
-
907
- romanian_models = {
908
- "csukuangfj/vits-coqui-ro-cv": _get_vits_piper,
909
- "csukuangfj/vits-piper-ro_RO-mihai-medium": _get_vits_piper,
910
- }
911
-
912
-
913
- slovak_models = {
914
- "csukuangfj/vits-coqui-sk-cv": _get_vits_piper,
915
- "csukuangfj/vits-piper-sk_SK-lili-medium": _get_vits_piper,
916
- }
917
-
918
- serbian_models = {
919
- "csukuangfj/vits-piper-sr_RS-serbski_institut-medium": _get_vits_piper,
920
- }
921
-
922
- swedish_models = {
923
- "csukuangfj/vits-coqui-sv-cv": _get_vits_piper,
924
- "csukuangfj/vits-piper-sv_SE-nst-medium": _get_vits_piper,
925
- }
926
-
927
- swahili_models = {
928
- "csukuangfj/vits-piper-sw_CD-lanfrica-medium": _get_vits_piper,
929
- }
930
-
931
- turkish_models = {
932
- "csukuangfj/vits-piper-tr_TR-dfki-medium": _get_vits_piper,
933
- "csukuangfj/vits-piper-tr_TR-fahrettin-medium": _get_vits_piper,
934
- "csukuangfj/vits-piper-tr_TR-fettah-medium|1 speaker": _get_vits_piper,
935
- }
936
-
937
- vietnamese_models = {
938
- "csukuangfj/vits-piper-vi_VN-25hours_single-low": _get_vits_piper,
939
- "csukuangfj/vits-piper-vi_VN-vais1000-medium": _get_vits_piper,
940
- "csukuangfj/vits-piper-vi_VN-vivos-x_low": _get_vits_piper,
941
- "csukuangfj/vits-mimic3-vi_VN-vais1000_low": _get_vits_piper,
942
- }
943
-
944
- bulgarian_models = {
945
- "csukuangfj/vits-coqui-bg-cv": _get_vits_piper,
946
- }
947
-
948
- estonian_models = {
949
- "csukuangfj/vits-coqui-et-cv": _get_vits_piper,
950
- }
951
-
952
- irish_models = {
953
- "csukuangfj/vits-coqui-ga-cv": _get_vits_piper,
954
- }
955
-
956
- croatian_models = {
957
- "csukuangfj/vits-coqui-hr-cv": _get_vits_piper,
958
- }
959
-
960
- lithuanian_models = {
961
- "csukuangfj/vits-coqui-lt-cv": _get_vits_piper,
962
- }
963
-
964
- latvian_models = {
965
- "csukuangfj/vits-piper-lv_LV-aivars-medium": _get_vits_piper,
966
- "csukuangfj/vits-coqui-lv-cv": _get_vits_piper,
967
- }
968
-
969
- maltese_models = {
970
- "csukuangfj/vits-coqui-mt-cv": _get_vits_piper,
971
- }
972
-
973
- slovenian_models = {
974
- "csukuangfj/vits-piper-sl_SI-artur-medium": _get_vits_piper,
975
- "csukuangfj/vits-coqui-sl-cv": _get_vits_piper,
976
- }
977
-
978
- # Bangla
979
- bengali_models = {
980
- "csukuangfj/vits-coqui-bn-custom_female": _get_vits_piper,
981
- "csukuangfj/vits-mimic3-bn-multi_low": _get_vits_piper,
982
- }
983
-
984
- min_nan_models = {
985
- "csukuangfj/vits-mms-nan": _get_vits_mms,
986
- }
987
-
988
- thai_models = {
989
- "csukuangfj/vits-mms-tha": _get_vits_mms,
990
- }
991
-
992
- persian_models = {
993
- "csukuangfj/vits-piper-fa_IR-amir-medium": _get_vits_piper,
994
- "csukuangfj/vits-piper-fa_IR-gyro-medium": _get_vits_piper,
995
- "csukuangfj/vits-mimic3-fa-haaniye_low": _get_vits_piper,
996
- }
997
-
998
- korean_models = {
999
- "csukuangfj/vits-mimic3-ko_KO-kss_low": _get_vits_piper,
1000
- }
1001
-
1002
-
1003
- afrikaans_models = {
1004
- "csukuangfj/vits-mimic3-af_ZA-google-nwu_low": _get_vits_piper,
1005
- }
1006
-
1007
- gujarati_models = {
1008
- "csukuangfj/vits-mimic3-gu_IN-cmu-indic_low": _get_vits_piper,
1009
- }
1010
-
1011
- tswana_models = {
1012
- "csukuangfj/vits-mimic3-tn_ZA-google-nwu_low": _get_vits_piper,
1013
- }
1014
-
1015
- welsh_models = {
1016
- "csukuangfj/vits-piper-cy_GB-gwryw_gogleddol-medium|1 speaker": _get_vits_piper,
1017
- }
1018
-
1019
- language_to_models = {
1020
- "English": list(english_models.keys()),
1021
- "Chinese (Mandarin, 普通话)": list(chinese_models.keys()),
1022
- "Chinese+English": list(chinese_english_models.keys()),
1023
- "Persian+English": list(persian_english_models.keys()),
1024
- "Cantonese (粤语)": list(cantonese_models.keys()),
1025
- "Min-nan (闽南话)": list(min_nan_models.keys()),
1026
- "Arabic": list(arabic_models.keys()),
1027
- "Afrikaans": list(afrikaans_models.keys()),
1028
- "Bengali": list(bengali_models.keys()),
1029
- "Bulgarian": list(bulgarian_models.keys()),
1030
- "Catalan": list(catalan_models.keys()),
1031
- "Croatian": list(croatian_models.keys()),
1032
- "Czech": list(czech_models.keys()),
1033
- "Danish": list(danish_models.keys()),
1034
- "Dutch": list(dutch_models.keys()),
1035
- "Estonian": list(estonian_models.keys()),
1036
- "Finnish": list(finnish_models.keys()),
1037
- "French": list(french_models.keys()),
1038
- "Georgian": list(georgian_models.keys()),
1039
- "German": list(german_models.keys()),
1040
- "Greek": list(greek_models.keys()),
1041
- "Gujarati": list(gujarati_models.keys()),
1042
- "Hungarian": list(hungarian_models.keys()),
1043
- "Icelandic": list(icelandic_models.keys()),
1044
- "Irish": list(irish_models.keys()),
1045
- "Italian": list(italian_models.keys()),
1046
- "Kazakh": list(kazakh_models.keys()),
1047
- "Korean": list(korean_models.keys()),
1048
- "Latvian": list(latvian_models.keys()),
1049
- "Lithuanian": list(lithuanian_models.keys()),
1050
- "Luxembourgish": list(luxembourgish_models.keys()),
1051
- "Maltese": list(maltese_models.keys()),
1052
- "Nepali": list(nepali_models.keys()),
1053
- "Norwegian": list(norwegian_models.keys()),
1054
- "Persian": list(persian_models.keys()),
1055
- "Polish": list(polish_models.keys()),
1056
- "Portuguese": list(portuguese_models.keys()),
1057
- "Romanian": list(romanian_models.keys()),
1058
- "Russian": list(russian_models.keys()),
1059
- "Serbian": list(serbian_models.keys()),
1060
- "Slovak": list(slovak_models.keys()),
1061
- "Slovenian": list(slovenian_models.keys()),
1062
- "Spanish": list(spanish_models.keys()),
1063
- "Swahili": list(swahili_models.keys()),
1064
- "Swedish": list(swedish_models.keys()),
1065
- "Thai": list(thai_models.keys()),
1066
- "Tswana": list(tswana_models.keys()),
1067
- "Turkish": list(turkish_models.keys()),
1068
- "Ukrainian": list(ukrainian_models.keys()),
1069
- "Vietnamese": list(vietnamese_models.keys()),
1070
- "Welsh": list(welsh_models.keys()),
1071
- }
 
1
+ import os
2
+ from functools import lru_cache
3
+ from pathlib import Path
4
+
5
+ import sherpa_onnx
6
+ from huggingface_hub import hf_hub_download
7
+
8
+
9
+ def get_file(
10
+ repo_id: str,
11
+ filename: str,
12
+ subfolder: str = ".",
13
+ ) -> str:
14
+ model_filename = hf_hub_download(
15
+ repo_id=repo_id,
16
+ filename=filename,
17
+ subfolder=subfolder,
18
+ )
19
+ return model_filename
20
+
21
+
22
+ @lru_cache(maxsize=10)
23
+ def _get_vits_vctk(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
24
+ assert repo_id == "csukuangfj/vits-vctk"
25
+
26
+ model = get_file(
27
+ repo_id=repo_id,
28
+ filename="vits-vctk.onnx",
29
+ subfolder=".",
30
+ )
31
+
32
+ lexicon = get_file(
33
+ repo_id=repo_id,
34
+ filename="lexicon.txt",
35
+ subfolder=".",
36
+ )
37
+
38
+ tokens = get_file(
39
+ repo_id=repo_id,
40
+ filename="tokens.txt",
41
+ subfolder=".",
42
+ )
43
+
44
+ tts_config = sherpa_onnx.OfflineTtsConfig(
45
+ model=sherpa_onnx.OfflineTtsModelConfig(
46
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
47
+ model=model,
48
+ lexicon=lexicon,
49
+ tokens=tokens,
50
+ length_scale=1.0 / speed,
51
+ ),
52
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
53
+ provider="cpu",
54
+ debug=True,
55
+ num_threads=2,
56
+ ),
57
+ max_num_sentences=1,
58
+ )
59
+ tts = sherpa_onnx.OfflineTts(tts_config)
60
+
61
+ return tts
62
+
63
+
64
+ @lru_cache(maxsize=10)
65
+ def _get_vits_ljs(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
66
+ assert repo_id == "csukuangfj/vits-ljs"
67
+
68
+ model = get_file(
69
+ repo_id=repo_id,
70
+ filename="vits-ljs.onnx",
71
+ subfolder=".",
72
+ )
73
+
74
+ lexicon = get_file(
75
+ repo_id=repo_id,
76
+ filename="lexicon.txt",
77
+ subfolder=".",
78
+ )
79
+
80
+ tokens = get_file(
81
+ repo_id=repo_id,
82
+ filename="tokens.txt",
83
+ subfolder=".",
84
+ )
85
+
86
+ tts_config = sherpa_onnx.OfflineTtsConfig(
87
+ model=sherpa_onnx.OfflineTtsModelConfig(
88
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
89
+ model=model,
90
+ lexicon=lexicon,
91
+ tokens=tokens,
92
+ length_scale=1.0 / speed,
93
+ ),
94
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
95
+ provider="cpu",
96
+ debug=True,
97
+ num_threads=2,
98
+ ),
99
+ max_num_sentences=1,
100
+ )
101
+ tts = sherpa_onnx.OfflineTts(tts_config)
102
+
103
+ return tts
104
+
105
+
106
+ @lru_cache(maxsize=10)
107
+ def _get_kokoro(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
108
+ data_dir = "/tmp/espeak-ng-data"
109
+ repo_id = repo_id.split("|")[0]
110
+ assert repo_id in (
111
+ "csukuangfj/kokoro-en-v0_19",
112
+ "csukuangfj/kokoro-multi-lang-v1_0",
113
+ "csukuangfj/kokoro-multi-lang-v1_1",
114
+ ), repo_id
115
+
116
+ model = get_file(
117
+ repo_id=repo_id,
118
+ filename="model.onnx",
119
+ subfolder=".",
120
+ )
121
+
122
+ tokens = get_file(
123
+ repo_id=repo_id,
124
+ filename="tokens.txt",
125
+ subfolder=".",
126
+ )
127
+
128
+ voices = get_file(
129
+ repo_id=repo_id,
130
+ filename="voices.bin",
131
+ subfolder=".",
132
+ )
133
+ if repo_id in (
134
+ "csukuangfj/kokoro-multi-lang-v1_0",
135
+ "csukuangfj/kokoro-multi-lang-v1_1",
136
+ ):
137
+ lexicon_en = get_file(
138
+ repo_id=repo_id,
139
+ filename="lexicon-us-en.txt",
140
+ subfolder=".",
141
+ )
142
+ lexicon_zh = get_file(
143
+ repo_id=repo_id,
144
+ filename="lexicon-zh.txt",
145
+ subfolder=".",
146
+ )
147
+ lexicon = f"{lexicon_en},{lexicon_zh}"
148
+
149
+ date_zh = get_file(
150
+ repo_id=repo_id,
151
+ filename="date-zh.fst",
152
+ subfolder=".",
153
+ )
154
+
155
+ number_zh = get_file(
156
+ repo_id=repo_id,
157
+ filename="number-zh.fst",
158
+ subfolder=".",
159
+ )
160
+ phone_zh = get_file(
161
+ repo_id=repo_id,
162
+ filename="phone-zh.fst",
163
+ subfolder=".",
164
+ )
165
+ rule_fsts = f"{date_zh},{phone_zh},{number_zh}"
166
+ dict_dir = "/tmp/dict"
167
+ else:
168
+ lexicon = ""
169
+ rule_fsts = ""
170
+ dict_dir = ""
171
+
172
+ tts_config = sherpa_onnx.OfflineTtsConfig(
173
+ model=sherpa_onnx.OfflineTtsModelConfig(
174
+ kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
175
+ model=model,
176
+ voices=voices,
177
+ tokens=tokens,
178
+ data_dir=data_dir,
179
+ length_scale=1.0 / speed,
180
+ lexicon=lexicon,
181
+ dict_dir=dict_dir,
182
+ ),
183
+ provider="cpu",
184
+ debug=True,
185
+ num_threads=2,
186
+ ),
187
+ max_num_sentences=1,
188
+ rule_fsts=rule_fsts,
189
+ )
190
+
191
+ tts = sherpa_onnx.OfflineTts(tts_config)
192
+
193
+ return tts
194
+
195
+
196
+ @lru_cache(maxsize=10)
197
+ def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
198
+ data_dir = "/tmp/espeak-ng-data"
199
+ repo_id = repo_id.split("|")[0]
200
+
201
+ if "coqui" in repo_id or "vits-mms" in repo_id:
202
+ name = "model"
203
+ elif "piper" in repo_id:
204
+ n = len("vits-piper-")
205
+ name = repo_id.split("/")[1][n:]
206
+ elif "mimic3" in repo_id:
207
+ n = len("vits-mimic3-")
208
+ name = repo_id.split("/")[1][n:]
209
+ else:
210
+ raise ValueError(f"Unsupported {repo_id}")
211
+
212
+ if "vits-coqui-uk-mai" in repo_id or "vits-mms" in repo_id:
213
+ data_dir = ""
214
+
215
+ model = get_file(
216
+ repo_id=repo_id,
217
+ filename=f"{name}.onnx",
218
+ subfolder=".",
219
+ )
220
+
221
+ tokens = get_file(
222
+ repo_id=repo_id,
223
+ filename="tokens.txt",
224
+ subfolder=".",
225
+ )
226
+
227
+ tts_config = sherpa_onnx.OfflineTtsConfig(
228
+ model=sherpa_onnx.OfflineTtsModelConfig(
229
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
230
+ model=model,
231
+ lexicon="",
232
+ data_dir=data_dir,
233
+ tokens=tokens,
234
+ length_scale=1.0 / speed,
235
+ ),
236
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
237
+ provider="cpu",
238
+ debug=True,
239
+ num_threads=2,
240
+ ),
241
+ max_num_sentences=1,
242
+ )
243
+ tts = sherpa_onnx.OfflineTts(tts_config)
244
+
245
+ return tts
246
+
247
+
248
+ @lru_cache(maxsize=10)
249
+ def _get_vits_mms(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
250
+ return _get_vits_piper(repo_id, speed)
251
+
252
+
253
+ @lru_cache(maxsize=10)
254
+ def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
255
+ repo_id = repo_id.split("|")[0]
256
+ assert repo_id == "csukuangfj/vits-zh-aishell3", repo_id
257
+
258
+ model = get_file(
259
+ repo_id=repo_id,
260
+ filename="vits-aishell3.onnx",
261
+ subfolder=".",
262
+ )
263
+
264
+ lexicon = get_file(
265
+ repo_id=repo_id,
266
+ filename="lexicon.txt",
267
+ subfolder=".",
268
+ )
269
+
270
+ tokens = get_file(
271
+ repo_id=repo_id,
272
+ filename="tokens.txt",
273
+ subfolder=".",
274
+ )
275
+
276
+ rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
277
+
278
+ rule_fsts = [
279
+ get_file(
280
+ repo_id=repo_id,
281
+ filename=f,
282
+ subfolder=".",
283
+ )
284
+ for f in rule_fsts
285
+ ]
286
+ rule_fsts = ",".join(rule_fsts)
287
+
288
+ rule_fars = get_file(
289
+ repo_id=repo_id,
290
+ filename="rule.far",
291
+ subfolder=".",
292
+ )
293
+
294
+ tts_config = sherpa_onnx.OfflineTtsConfig(
295
+ model=sherpa_onnx.OfflineTtsModelConfig(
296
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
297
+ model=model,
298
+ lexicon=lexicon,
299
+ tokens=tokens,
300
+ length_scale=1.0 / speed,
301
+ ),
302
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
303
+ provider="cpu",
304
+ debug=True,
305
+ num_threads=2,
306
+ ),
307
+ rule_fsts=rule_fsts,
308
+ rule_fars=rule_fars,
309
+ max_num_sentences=1,
310
+ )
311
+ tts = sherpa_onnx.OfflineTts(tts_config)
312
+
313
+ return tts
314
+
315
+
316
+ @lru_cache(maxsize=10)
317
+ def _get_matcha_hf_espeak(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
318
+ repo_id = repo_id.split("|")[0]
319
+ assert repo_id in (
320
+ "csukuangfj/matcha-tts-fa_en-khadijah",
321
+ "csukuangfj/matcha-tts-fa_en-musa",
322
+ ), repo_id
323
+
324
+ acoustic_model = get_file(
325
+ repo_id=repo_id,
326
+ filename="model.onnx",
327
+ subfolder=".",
328
+ )
329
+
330
+ vocoder = get_file(
331
+ repo_id="csukuangfj/sherpa-onnx-hifigan",
332
+ filename="hifigan_v2.onnx",
333
+ subfolder=".",
334
+ )
335
+
336
+ tokens = get_file(
337
+ repo_id=repo_id,
338
+ filename="tokens.txt",
339
+ subfolder=".",
340
+ )
341
+
342
+ data_dir = "/tmp/espeak-ng-data"
343
+ tts_config = sherpa_onnx.OfflineTtsConfig(
344
+ model=sherpa_onnx.OfflineTtsModelConfig(
345
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
346
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
347
+ acoustic_model=acoustic_model,
348
+ vocoder=vocoder,
349
+ tokens=tokens,
350
+ lexicon="",
351
+ data_dir=data_dir,
352
+ length_scale=1.0 / speed,
353
+ ),
354
+ provider="cpu",
355
+ debug=True,
356
+ num_threads=2,
357
+ ),
358
+ max_num_sentences=1,
359
+ )
360
+ tts = sherpa_onnx.OfflineTts(tts_config)
361
+
362
+ return tts
363
+
364
+
365
+ @lru_cache(maxsize=10)
366
+ def _get_matcha_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
367
+ repo_id = repo_id.split("|")[0]
368
+ assert repo_id in ("csukuangfj/matcha-icefall-zh-baker",), repo_id
369
+
370
+ if repo_id == "csukuangfj/matcha-icefall-zh-baker":
371
+ acoustic_model = "model-steps-3.onnx"
372
+
373
+ if not Path("/tmp/dict").is_dir():
374
+ os.system(
375
+ "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
376
+ )
377
+ os.system("ls -lh /tmp/dict")
378
+
379
+ acoustic_model = get_file(
380
+ repo_id=repo_id,
381
+ filename=acoustic_model,
382
+ subfolder=".",
383
+ )
384
+
385
+ vocoder = get_file(
386
+ repo_id="csukuangfj/sherpa-onnx-hifigan",
387
+ filename="hifigan_v2.onnx",
388
+ subfolder=".",
389
+ )
390
+
391
+ lexicon = get_file(
392
+ repo_id=repo_id,
393
+ filename="lexicon.txt",
394
+ subfolder=".",
395
+ )
396
+
397
+ tokens = get_file(
398
+ repo_id=repo_id,
399
+ filename="tokens.txt",
400
+ subfolder=".",
401
+ )
402
+
403
+ rule_fars = ""
404
+
405
+ rule_fsts = ["phone.fst", "date.fst", "number.fst"]
406
+
407
+ rule_fsts = [
408
+ get_file(
409
+ repo_id=repo_id,
410
+ filename=f,
411
+ subfolder=".",
412
+ )
413
+ for f in rule_fsts
414
+ ]
415
+ rule_fsts = ",".join(rule_fsts)
416
+
417
+ dict_dir = "/tmp/dict"
418
+
419
+ tts_config = sherpa_onnx.OfflineTtsConfig(
420
+ model=sherpa_onnx.OfflineTtsModelConfig(
421
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
422
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
423
+ acoustic_model=acoustic_model,
424
+ vocoder=vocoder,
425
+ lexicon=lexicon,
426
+ tokens=tokens,
427
+ dict_dir=dict_dir,
428
+ length_scale=1.0 / speed,
429
+ ),
430
+ provider="cpu",
431
+ debug=True,
432
+ num_threads=2,
433
+ ),
434
+ rule_fsts=rule_fsts,
435
+ rule_fars=rule_fars,
436
+ max_num_sentences=1,
437
+ )
438
+ tts = sherpa_onnx.OfflineTts(tts_config)
439
+
440
+ return tts
441
+
442
+
443
+ @lru_cache(maxsize=10)
444
+ def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
445
+ repo_id = repo_id.split("|")[0]
446
+
447
+ if "fanchen" in repo_id or "vits-cantonese-hf-xiaomaiiwn" in repo_id:
448
+ model = repo_id.split("/")[-1]
449
+ elif "csukuangfj/vits-melo-tts-zh_en" == repo_id:
450
+ model = "model"
451
+ else:
452
+ model = repo_id.split("-")[-1]
453
+
454
+ if "sherpa-onnx-vits-zh-ll" in repo_id:
455
+ model = "model"
456
+
457
+ if not Path("/tmp/dict").is_dir():
458
+ os.system(
459
+ "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
460
+ )
461
+ os.system("ls -lh /tmp/dict")
462
+
463
+ model = get_file(
464
+ repo_id=repo_id,
465
+ filename=f"{model}.onnx",
466
+ subfolder=".",
467
+ )
468
+
469
+ lexicon = get_file(
470
+ repo_id=repo_id,
471
+ filename="lexicon.txt",
472
+ subfolder=".",
473
+ )
474
+
475
+ tokens = get_file(
476
+ repo_id=repo_id,
477
+ filename="tokens.txt",
478
+ subfolder=".",
479
+ )
480
+
481
+ rule_fars = ""
482
+
483
+ if "vits-cantonese-hf-xiaomaiiwn" not in repo_id:
484
+ rule_fsts = ["phone.fst", "date.fst", "number.fst"]
485
+
486
+ rule_fsts = [
487
+ get_file(
488
+ repo_id=repo_id,
489
+ filename=f,
490
+ subfolder=".",
491
+ )
492
+ for f in rule_fsts
493
+ ]
494
+ rule_fsts = ",".join(rule_fsts)
495
+
496
+ # rule_fars = get_file(
497
+ # repo_id=repo_id,
498
+ # filename="rule.far",
499
+ # subfolder=".",
500
+ # )
501
+ vits_dict_dir = "/tmp/dict"
502
+ else:
503
+ rule_fsts = get_file(
504
+ repo_id=repo_id,
505
+ filename="rule.fst",
506
+ subfolder=".",
507
+ )
508
+ vits_dict_dir = ""
509
+
510
+ tts_config = sherpa_onnx.OfflineTtsConfig(
511
+ model=sherpa_onnx.OfflineTtsModelConfig(
512
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
513
+ model=model,
514
+ lexicon=lexicon,
515
+ tokens=tokens,
516
+ dict_dir=vits_dict_dir,
517
+ length_scale=1.0 / speed,
518
+ ),
519
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
520
+ provider="cpu",
521
+ debug=True,
522
+ num_threads=2,
523
+ ),
524
+ rule_fsts=rule_fsts,
525
+ rule_fars=rule_fars,
526
+ max_num_sentences=1,
527
+ )
528
+ tts = sherpa_onnx.OfflineTts(tts_config)
529
+
530
+ return tts
531
+
532
+
533
+ @lru_cache(maxsize=10)
534
+ def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
535
+ if repo_id in chinese_models:
536
+ return chinese_models[repo_id](repo_id, speed)
537
+ elif repo_id in chinese_english_models:
538
+ return chinese_english_models[repo_id](repo_id, speed)
539
+ elif repo_id in persian_english_models:
540
+ return persian_english_models[repo_id](repo_id, speed)
541
+ if repo_id in cantonese_models:
542
+ return cantonese_models[repo_id](repo_id, speed)
543
+ elif repo_id in english_models:
544
+ return english_models[repo_id](repo_id, speed)
545
+ elif repo_id in german_models:
546
+ return german_models[repo_id](repo_id, speed)
547
+ elif repo_id in spanish_models:
548
+ return spanish_models[repo_id](repo_id, speed)
549
+ elif repo_id in french_models:
550
+ return french_models[repo_id](repo_id, speed)
551
+ elif repo_id in ukrainian_models:
552
+ return ukrainian_models[repo_id](repo_id, speed)
553
+ elif repo_id in russian_models:
554
+ return russian_models[repo_id](repo_id, speed)
555
+ elif repo_id in arabic_models:
556
+ return arabic_models[repo_id](repo_id, speed)
557
+ elif repo_id in catalan_models:
558
+ return catalan_models[repo_id](repo_id, speed)
559
+ elif repo_id in czech_models:
560
+ return czech_models[repo_id](repo_id, speed)
561
+ elif repo_id in danish_models:
562
+ return danish_models[repo_id](repo_id, speed)
563
+ elif repo_id in greek_models:
564
+ return greek_models[repo_id](repo_id, speed)
565
+ elif repo_id in finnish_models:
566
+ return finnish_models[repo_id](repo_id, speed)
567
+ elif repo_id in hungarian_models:
568
+ return hungarian_models[repo_id](repo_id, speed)
569
+ elif repo_id in icelandic_models:
570
+ return icelandic_models[repo_id](repo_id, speed)
571
+ elif repo_id in italian_models:
572
+ return italian_models[repo_id](repo_id, speed)
573
+ elif repo_id in georgian_models:
574
+ return georgian_models[repo_id](repo_id, speed)
575
+ elif repo_id in kazakh_models:
576
+ return kazakh_models[repo_id](repo_id, speed)
577
+ elif repo_id in luxembourgish_models:
578
+ return luxembourgish_models[repo_id](repo_id, speed)
579
+ elif repo_id in nepali_models:
580
+ return nepali_models[repo_id](repo_id, speed)
581
+ elif repo_id in dutch_models:
582
+ return dutch_models[repo_id](repo_id, speed)
583
+ elif repo_id in norwegian_models:
584
+ return norwegian_models[repo_id](repo_id, speed)
585
+ elif repo_id in polish_models:
586
+ return polish_models[repo_id](repo_id, speed)
587
+ elif repo_id in portuguese_models:
588
+ return portuguese_models[repo_id](repo_id, speed)
589
+ elif repo_id in romanian_models:
590
+ return romanian_models[repo_id](repo_id, speed)
591
+ elif repo_id in slovak_models:
592
+ return slovak_models[repo_id](repo_id, speed)
593
+ elif repo_id in serbian_models:
594
+ return serbian_models[repo_id](repo_id, speed)
595
+ elif repo_id in swedish_models:
596
+ return swedish_models[repo_id](repo_id, speed)
597
+ elif repo_id in swahili_models:
598
+ return swahili_models[repo_id](repo_id, speed)
599
+ elif repo_id in turkish_models:
600
+ return turkish_models[repo_id](repo_id, speed)
601
+ elif repo_id in vietnamese_models:
602
+ return vietnamese_models[repo_id](repo_id, speed)
603
+ elif repo_id in bulgarian_models:
604
+ return bulgarian_models[repo_id](repo_id, speed)
605
+ elif repo_id in estonian_models:
606
+ return estonian_models[repo_id](repo_id, speed)
607
+ elif repo_id in irish_models:
608
+ return irish_models[repo_id](repo_id, speed)
609
+ elif repo_id in croatian_models:
610
+ return croatian_models[repo_id](repo_id, speed)
611
+ elif repo_id in lithuanian_models:
612
+ return lithuanian_models[repo_id](repo_id, speed)
613
+ elif repo_id in latvian_models:
614
+ return latvian_models[repo_id](repo_id, speed)
615
+ elif repo_id in maltese_models:
616
+ return maltese_models[repo_id](repo_id, speed)
617
+ elif repo_id in slovenian_models:
618
+ return slovenian_models[repo_id](repo_id, speed)
619
+ elif repo_id in bengali_models:
620
+ return bengali_models[repo_id](repo_id, speed)
621
+ elif repo_id in min_nan_models:
622
+ return min_nan_models[repo_id](repo_id, speed)
623
+ elif repo_id in thai_models:
624
+ return thai_models[repo_id](repo_id, speed)
625
+ elif repo_id in persian_models:
626
+ return persian_models[repo_id](repo_id, speed)
627
+ elif repo_id in korean_models:
628
+ return korean_models[repo_id](repo_id, speed)
629
+ elif repo_id in afrikaans_models:
630
+ return afrikaans_models[repo_id](repo_id, speed)
631
+ elif repo_id in gujarati_models:
632
+ return gujarati_models[repo_id](repo_id, speed)
633
+ elif repo_id in tswana_models:
634
+ return tswana_models[repo_id](repo_id, speed)
635
+ elif repo_id in welsh_models:
636
+ return welsh_models[repo_id](repo_id, speed)
637
+ else:
638
+ raise ValueError(f"Unsupported repo_id: {repo_id}")
639
+
640
+
641
+ cantonese_models = {
642
+ "csukuangfj/vits-cantonese-hf-xiaomaiiwn": _get_vits_hf,
643
+ }
644
+
645
+ chinese_english_models = {
646
+ "csukuangfj/kokoro-multi-lang-v1_1|103 speakers": _get_kokoro,
647
+ "csukuangfj/kokoro-multi-lang-v1_0|53 speakers": _get_kokoro,
648
+ "csukuangfj/vits-melo-tts-zh_en|1": _get_vits_hf, # 1
649
+ }
650
+
651
+ persian_english_models = {
652
+ "csukuangfj/matcha-tts-fa_en-khadijah|1 speaker": _get_matcha_hf_espeak, # 1
653
+ "csukuangfj/matcha-tts-fa_en-musa|1 speaker": _get_matcha_hf_espeak, # 1
654
+ "csukuangfj/vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium|1": _get_vits_piper, # 1
655
+ }
656
+
657
+ chinese_models = {
658
+ "csukuangfj/matcha-icefall-zh-baker|1 speaker": _get_matcha_hf, # 1
659
+ "csukuangfj/vits-zh-hf-fanchen-wnj|1 speaker": _get_vits_hf, # 1
660
+ "csukuangfj/vits-zh-hf-fanchen-C|187 speakers": _get_vits_hf, # 187
661
+ "csukuangfj/sherpa-onnx-vits-zh-ll|5 speakers": _get_vits_hf, # 804
662
+ "csukuangfj/vits-zh-hf-keqing|804 speakers": _get_vits_hf, # 804
663
+ "csukuangfj/vits-zh-hf-theresa|804 speakers": _get_vits_hf, # 804
664
+ "csukuangfj/vits-zh-hf-eula|804 speakers": _get_vits_hf, # 804
665
+ "csukuangfj/vits-zh-hf-echo|804 speakers": _get_vits_hf, # 804
666
+ "csukuangfj/vits-zh-hf-bronya|804 speakers": _get_vits_hf, # 804
667
+ "csukuangfj/vits-zh-hf-doom|804 speakers": _get_vits_hf, # 804
668
+ "csukuangfj/vits-zh-hf-zenyatta|804 speakers": _get_vits_hf, # 804
669
+ "csukuangfj/vits-zh-hf-abyssinvoker|804 speakers": _get_vits_hf, # 804
670
+ "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe|1 speaker": _get_vits_hf, # 1
671
+ "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe_new|1 speaker": _get_vits_hf, # 1
672
+ "csukuangfj/vits-zh-hf-fanchen-unity|1 speaker": _get_vits_hf, # 1
673
+ "csukuangfj/vits-zh-aishell3|174 speakers": _get_vits_zh_aishell3,
674
+ "csukuangfj/vits-piper-zh_CN-huayan-medium|1 speaker": _get_vits_piper,
675
+ # "csukuangfj/vits-piper-zh_CN-huayan-x_low": _get_vits_piper,
676
+ }
677
+
678
+ english_models = {
679
+ "csukuangfj/kokoro-en-v0_19|11 speakers": _get_kokoro,
680
+ "csukuangfj/vits-piper-en_US-glados-high|1 speaker": _get_vits_piper,
681
+ "csukuangfj/vits-piper-en_US-glados|1 speaker": _get_vits_piper,
682
+ "csukuangfj/vits-piper-en_GB-southern_english_male-medium|8 speakers": _get_vits_piper,
683
+ "csukuangfj/vits-piper-en_GB-southern_english_female-medium|6 speakers": _get_vits_piper,
684
+ "csukuangfj/vits-piper-en_US-bryce-medium|1 speaker": _get_vits_piper,
685
+ "csukuangfj/vits-piper-en_US-john-medium|1 speaker": _get_vits_piper,
686
+ "csukuangfj/vits-piper-en_US-norman-medium|1 speaker": _get_vits_piper,
687
+ # coqui-ai
688
+ "csukuangfj/vits-coqui-en-ljspeech|1 speaker": _get_vits_piper,
689
+ "csukuangfj/vits-coqui-en-ljspeech-neon|1 speaker": _get_vits_piper,
690
+ "csukuangfj/vits-coqui-en-vctk|109 speakers": _get_vits_piper,
691
+ # piper, US
692
+ "csukuangfj/vits-piper-en_GB-sweetbbak-amy|1 speaker": _get_vits_piper,
693
+ "csukuangfj/vits-piper-en_US-amy-low|1 speaker": _get_vits_piper,
694
+ "csukuangfj/vits-piper-en_US-amy-medium|1 speaker": _get_vits_piper,
695
+ "csukuangfj/vits-piper-en_US-arctic-medium|18 speakers": _get_vits_piper, # 18 speakers
696
+ "csukuangfj/vits-piper-en_US-danny-low|1 speaker": _get_vits_piper,
697
+ "csukuangfj/vits-piper-en_US-hfc_male-medium|1 speaker": _get_vits_piper,
698
+ "csukuangfj/vits-piper-en_US-hfc_female-medium|1 speaker": _get_vits_piper,
699
+ "csukuangfj/vits-piper-en_US-joe-medium|1 speaker": _get_vits_piper,
700
+ "csukuangfj/vits-piper-en_US-kathleen-low|1 speaker": _get_vits_piper,
701
+ "csukuangfj/vits-piper-en_US-kusal-medium|1 speaker": _get_vits_piper,
702
+ "csukuangfj/vits-piper-en_US-l2arctic-medium|24 speakers": _get_vits_piper, # 24 speakers
703
+ "csukuangfj/vits-piper-en_US-lessac-high|1 speaker": _get_vits_piper,
704
+ "csukuangfj/vits-piper-en_US-lessac-low|1 speaker": _get_vits_piper,
705
+ "csukuangfj/vits-piper-en_US-lessac-medium|1 speaker": _get_vits_piper,
706
+ "csukuangfj/vits-piper-en_US-libritts-high|904 speakers": _get_vits_piper, # 904 speakers
707
+ "csukuangfj/vits-piper-en_US-libritts_r-medium|904 speakers": _get_vits_piper, # 904 speakers
708
+ "csukuangfj/vits-piper-en_US-ljspeech-high|1 speaker": _get_vits_piper,
709
+ "csukuangfj/vits-piper-en_US-ljspeech-medium|1 speaker": _get_vits_piper,
710
+ "csukuangfj/vits-piper-en_US-ryan-high|1 speaker": _get_vits_piper,
711
+ "csukuangfj/vits-piper-en_US-ryan-low|1 speaker": _get_vits_piper,
712
+ "csukuangfj/vits-piper-en_US-ryan-medium|1 speaker": _get_vits_piper,
713
+ # piper, GB
714
+ "csukuangfj/vits-piper-en_GB-alan-low|1 speaker": _get_vits_piper,
715
+ "csukuangfj/vits-piper-en_GB-alan-medium|1 speaker": _get_vits_piper,
716
+ "csukuangfj/vits-piper-en_GB-alan-medium": _get_vits_piper,
717
+ "csukuangfj/vits-piper-en_GB-cori-high|1 speaker": _get_vits_piper,
718
+ "csukuangfj/vits-piper-en_GB-cori-medium|1 speaker": _get_vits_piper,
719
+ "csukuangfj/vits-piper-en_GB-jenny_dioco-medium|1 speaker": _get_vits_piper,
720
+ "csukuangfj/vits-piper-en_GB-northern_english_male-medium|1 speaker": _get_vits_piper,
721
+ "csukuangfj/vits-piper-en_GB-semaine-medium|4 speakers": _get_vits_piper,
722
+ "csukuangfj/vits-piper-en_GB-southern_english_female-low|1 speaker": _get_vits_piper,
723
+ "csukuangfj/vits-piper-en_GB-vctk-medium|109 speakers": _get_vits_piper,
724
+ #
725
+ "csukuangfj/vits-vctk|109 speakers": _get_vits_vctk, # 109 speakers
726
+ "csukuangfj/vits-ljs|1 speaker": _get_vits_ljs,
727
+ }
728
+
729
+ german_models = {
730
+ "csukuangfj/vits-piper-de_DE-glados-low|1 speaker": _get_vits_piper,
731
+ "csukuangfj/vits-piper-de_DE-glados-medium|1 speaker": _get_vits_piper,
732
+ "csukuangfj/vits-piper-de_DE-glados-high|1 speaker": _get_vits_piper,
733
+ "csukuangfj/vits-coqui-de-css10|1 speaker": _get_vits_piper,
734
+ "csukuangfj/vits-piper-de_DE-eva_k-x_low|1 speaker": _get_vits_piper,
735
+ "csukuangfj/vits-piper-de_DE-karlsson-low|1 speaker": _get_vits_piper,
736
+ "csukuangfj/vits-piper-de_DE-kerstin-low|1 speaker": _get_vits_piper,
737
+ # "csukuangfj/vits-piper-de_DE-mls-medium": _get_vits_piper,
738
+ "csukuangfj/vits-piper-de_DE-pavoque-low|1 speaker": _get_vits_piper,
739
+ "csukuangfj/vits-piper-de_DE-ramona-low|1 speaker": _get_vits_piper,
740
+ "csukuangfj/vits-piper-de_DE-thorsten-low|1 speaker": _get_vits_piper,
741
+ "csukuangfj/vits-piper-de_DE-thorsten-medium|1 speaker": _get_vits_piper,
742
+ "csukuangfj/vits-piper-de_DE-thorsten-high|1 speaker": _get_vits_piper,
743
+ "csukuangfj/vits-piper-de_DE-thorsten_emotional-medium|8 speakers": _get_vits_piper, # 8 speakers
744
+ }
745
+
746
+ spanish_models = {
747
+ # "csukuangfj/vits-coqui-es-css10": _get_vits_piper,
748
+ "csukuangfj/vits-piper-es-glados-medium": _get_vits_piper,
749
+ "csukuangfj/vits-piper-es_ES-carlfm-x_low": _get_vits_piper,
750
+ "csukuangfj/vits-piper-es_ES-davefx-medium": _get_vits_piper,
751
+ # "csukuangfj/vits-piper-es_ES-mls_10246-low": _get_vits_piper,
752
+ # "csukuangfj/vits-piper-es_ES-mls_9972-low": _get_vits_piper,
753
+ "csukuangfj/vits-piper-es_ES-sharvard-medium": _get_vits_piper, # 2 speakers
754
+ "csukuangfj/vits-piper-es_MX-ald-medium": _get_vits_piper,
755
+ "csukuangfj/vits-piper-es_MX-claude-high": _get_vits_piper,
756
+ "csukuangfj/vits-mimic3-es_ES-m-ailabs_low": _get_vits_piper,
757
+ }
758
+
759
+ french_models = {
760
+ "csukuangfj/vits-coqui-fr-css10": _get_vits_piper,
761
+ # "csukuangfj/vits-piper-fr_FR-gilles-low": _get_vits_piper,
762
+ # "csukuangfj/vits-piper-fr_FR-mls_1840-low": _get_vits_piper,
763
+ # "csukuangfj/vits-piper-fr_FR-mls-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
764
+ "csukuangfj/vits-piper-fr_FR-upmc-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
765
+ "csukuangfj/vits-piper-fr_FR-tom-medium|1 speaker": _get_vits_piper, # 2 speakers, 0-femal, 1-male
766
+ "csukuangfj/vits-piper-fr_FR-siwis-low": _get_vits_piper, # female
767
+ "csukuangfj/vits-piper-fr_FR-siwis-medium": _get_vits_piper,
768
+ "csukuangfj/vits-piper-fr_FR-tjiho-model1": _get_vits_piper,
769
+ "csukuangfj/vits-piper-fr_FR-tjiho-model2": _get_vits_piper,
770
+ "csukuangfj/vits-piper-fr_FR-tjiho-model3": _get_vits_piper,
771
+ }
772
+
773
+ ukrainian_models = {
774
+ "csukuangfj/vits-piper-uk_UA-lada-x_low": _get_vits_piper,
775
+ "csukuangfj/vits-coqui-uk-mai": _get_vits_piper,
776
+ # "csukuangfj/vits-piper-uk_UA-ukrainian_tts-medium": _get_vits_piper, # does not work somehow
777
+ }
778
+
779
+ russian_models = {
780
+ "csukuangfj/vits-piper-ru_RU-denis-medium": _get_vits_piper,
781
+ "csukuangfj/vits-piper-ru_RU-dmitri-medium": _get_vits_piper,
782
+ "csukuangfj/vits-piper-ru_RU-irina-medium": _get_vits_piper,
783
+ "csukuangfj/vits-piper-ru_RU-ruslan-medium": _get_vits_piper,
784
+ }
785
+
786
+ arabic_models = {
787
+ "csukuangfj/vits-piper-ar_JO-kareem-low": _get_vits_piper,
788
+ "csukuangfj/vits-piper-ar_JO-kareem-medium": _get_vits_piper,
789
+ }
790
+
791
+ catalan_models = {
792
+ "csukuangfj/vits-piper-ca_ES-upc_ona-x_low": _get_vits_piper,
793
+ "csukuangfj/vits-piper-ca_ES-upc_ona-medium": _get_vits_piper,
794
+ "csukuangfj/vits-piper-ca_ES-upc_pau-x_low": _get_vits_piper,
795
+ }
796
+
797
+ czech_models = {
798
+ "csukuangfj/vits-piper-cs_CZ-jirka-low": _get_vits_piper,
799
+ "csukuangfj/vits-piper-cs_CZ-jirka-medium": _get_vits_piper,
800
+ "csukuangfj/vits-coqui-cs-cv": _get_vits_piper,
801
+ }
802
+
803
+ danish_models = {
804
+ "csukuangfj/vits-coqui-da-cv": _get_vits_piper,
805
+ "csukuangfj/vits-piper-da_DK-talesyntese-medium": _get_vits_piper,
806
+ }
807
+
808
+ greek_models = {
809
+ "csukuangfj/vits-piper-el_GR-rapunzelina-low": _get_vits_piper,
810
+ # "csukuangfj/vits-mimic3-el_GR-rapunzelina_low": _get_vits_piper,
811
+ }
812
+
813
+ finnish_models = {
814
+ "csukuangfj/vits-coqui-fi-css10": _get_vits_piper,
815
+ "csukuangfj/vits-piper-fi_FI-harri-low": _get_vits_piper,
816
+ "csukuangfj/vits-piper-fi_FI-harri-medium": _get_vits_piper,
817
+ "csukuangfj/vits-mimic3-fi_FI-harri-tapani-ylilammi_low": _get_vits_piper,
818
+ }
819
+
820
+ hungarian_models = {
821
+ # "csukuangfj/vits-coqui-hu-css10": _get_vits_piper,
822
+ "csukuangfj/vits-piper-hu_HU-anna-medium": _get_vits_piper,
823
+ "csukuangfj/vits-piper-hu_HU-berta-medium": _get_vits_piper,
824
+ "csukuangfj/vits-piper-hu_HU-imre-medium": _get_vits_piper,
825
+ "csukuangfj/vits-mimic3-hu_HU-diana-majlinger_low": _get_vits_piper,
826
+ }
827
+
828
+ icelandic_models = {
829
+ "csukuangfj/vits-piper-is_IS-bui-medium": _get_vits_piper,
830
+ "csukuangfj/vits-piper-is_IS-salka-medium": _get_vits_piper,
831
+ "csukuangfj/vits-piper-is_IS-steinn-medium": _get_vits_piper,
832
+ "csukuangfj/vits-piper-is_IS-ugla-medium": _get_vits_piper,
833
+ }
834
+
835
+ italian_models = {
836
+ "csukuangfj/vits-piper-it_IT-riccardo-x_low": _get_vits_piper,
837
+ "csukuangfj/vits-piper-it_IT-paola-medium": _get_vits_piper,
838
+ }
839
+
840
+ georgian_models = {
841
+ "csukuangfj/vits-piper-ka_GE-natia-medium": _get_vits_piper,
842
+ }
843
+
844
+ kazakh_models = {
845
+ "csukuangfj/vits-piper-kk_KZ-iseke-x_low": _get_vits_piper,
846
+ "csukuangfj/vits-piper-kk_KZ-issai-high": _get_vits_piper,
847
+ "csukuangfj/vits-piper-kk_KZ-raya-x_low": _get_vits_piper,
848
+ }
849
+
850
+ luxembourgish_models = {
851
+ "csukuangfj/vits-piper-lb_LU-marylux-medium": _get_vits_piper,
852
+ }
853
+
854
+ nepali_models = {
855
+ "csukuangfj/vits-piper-ne_NP-google-medium": _get_vits_piper,
856
+ "csukuangfj/vits-piper-ne_NP-google-x_low": _get_vits_piper,
857
+ "csukuangfj/vits-mimic3-ne_NP-ne-google_low": _get_vits_piper,
858
+ }
859
+
860
+ dutch_models = {
861
+ "csukuangfj/vits-coqui-nl-css10": _get_vits_piper,
862
+ "csukuangfj/vits-piper-nl_BE-nathalie-medium": _get_vits_piper,
863
+ "csukuangfj/vits-piper-nl_BE-nathalie-x_low": _get_vits_piper,
864
+ "csukuangfj/vits-piper-nl_BE-rdh-medium": _get_vits_piper,
865
+ "csukuangfj/vits-piper-nl_BE-rdh-x_low": _get_vits_piper,
866
+ # "csukuangfj/vits-piper-nl_NL-mls-medium": _get_vits_piper,
867
+ # "csukuangfj/vits-piper-nl_NL-mls_5809-low": _get_vits_piper,
868
+ # "csukuangfj/vits-piper-nl_NL-mls_7432-low": _get_vits_piper,
869
+ }
870
+
871
+ norwegian_models = {
872
+ "csukuangfj/vits-piper-no_NO-talesyntese-medium": _get_vits_piper,
873
+ }
874
+
875
+ polish_models = {
876
+ "csukuangfj/vits-coqui-pl-mai_female": _get_vits_piper,
877
+ "csukuangfj/vits-piper-pl_PL-darkman-medium": _get_vits_piper,
878
+ "csukuangfj/vits-piper-pl_PL-gosia-medium": _get_vits_piper,
879
+ "csukuangfj/vits-piper-pl_PL-mc_speech-medium": _get_vits_piper,
880
+ # "csukuangfj/vits-piper-pl_PL-mls_6892-low": _get_vits_piper,
881
+ "csukuangfj/vits-mimic3-pl_PL-m-ailabs_low": _get_vits_piper,
882
+ }
883
+
884
+ portuguese_models = {
885
+ "csukuangfj/vits-coqui-pt-cv": _get_vits_piper,
886
+ "csukuangfj/vits-piper-pt_BR-edresson-low": _get_vits_piper,
887
+ "csukuangfj/vits-piper-pt_BR-faber-medium": _get_vits_piper,
888
+ "csukuangfj/vits-piper-pt_PT-tugao-medium": _get_vits_piper,
889
+ }
890
+
891
+ romanian_models = {
892
+ "csukuangfj/vits-coqui-ro-cv": _get_vits_piper,
893
+ "csukuangfj/vits-piper-ro_RO-mihai-medium": _get_vits_piper,
894
+ }
895
+
896
+
897
+ slovak_models = {
898
+ "csukuangfj/vits-coqui-sk-cv": _get_vits_piper,
899
+ "csukuangfj/vits-piper-sk_SK-lili-medium": _get_vits_piper,
900
+ }
901
+
902
+ serbian_models = {
903
+ "csukuangfj/vits-piper-sr_RS-serbski_institut-medium": _get_vits_piper,
904
+ }
905
+
906
+ swedish_models = {
907
+ "csukuangfj/vits-coqui-sv-cv": _get_vits_piper,
908
+ "csukuangfj/vits-piper-sv_SE-nst-medium": _get_vits_piper,
909
+ }
910
+
911
+ swahili_models = {
912
+ "csukuangfj/vits-piper-sw_CD-lanfrica-medium": _get_vits_piper,
913
+ }
914
+
915
+ turkish_models = {
916
+ "csukuangfj/vits-piper-tr_TR-dfki-medium": _get_vits_piper,
917
+ "csukuangfj/vits-piper-tr_TR-fahrettin-medium": _get_vits_piper,
918
+ "csukuangfj/vits-piper-tr_TR-fettah-medium|1 speaker": _get_vits_piper,
919
+ }
920
+
921
+ vietnamese_models = {
922
+ "csukuangfj/vits-piper-vi_VN-25hours_single-low": _get_vits_piper,
923
+ "csukuangfj/vits-piper-vi_VN-vais1000-medium": _get_vits_piper,
924
+ "csukuangfj/vits-piper-vi_VN-vivos-x_low": _get_vits_piper,
925
+ "csukuangfj/vits-mimic3-vi_VN-vais1000_low": _get_vits_piper,
926
+ }
927
+
928
+ bulgarian_models = {
929
+ "csukuangfj/vits-coqui-bg-cv": _get_vits_piper,
930
+ }
931
+
932
+ estonian_models = {
933
+ "csukuangfj/vits-coqui-et-cv": _get_vits_piper,
934
+ }
935
+
936
+ irish_models = {
937
+ "csukuangfj/vits-coqui-ga-cv": _get_vits_piper,
938
+ }
939
+
940
+ croatian_models = {
941
+ "csukuangfj/vits-coqui-hr-cv": _get_vits_piper,
942
+ }
943
+
944
+ lithuanian_models = {
945
+ "csukuangfj/vits-coqui-lt-cv": _get_vits_piper,
946
+ }
947
+
948
+ latvian_models = {
949
+ "csukuangfj/vits-piper-lv_LV-aivars-medium": _get_vits_piper,
950
+ "csukuangfj/vits-coqui-lv-cv": _get_vits_piper,
951
+ }
952
+
953
+ maltese_models = {
954
+ "csukuangfj/vits-coqui-mt-cv": _get_vits_piper,
955
+ }
956
+
957
+ slovenian_models = {
958
+ "csukuangfj/vits-piper-sl_SI-artur-medium": _get_vits_piper,
959
+ "csukuangfj/vits-coqui-sl-cv": _get_vits_piper,
960
+ }
961
+
962
+ # Bangla
963
+ bengali_models = {
964
+ "csukuangfj/vits-coqui-bn-custom_female": _get_vits_piper,
965
+ "csukuangfj/vits-mimic3-bn-multi_low": _get_vits_piper,
966
+ }
967
+
968
+ min_nan_models = {
969
+ "csukuangfj/vits-mms-nan": _get_vits_mms,
970
+ }
971
+
972
+ thai_models = {
973
+ "csukuangfj/vits-mms-tha": _get_vits_mms,
974
+ }
975
+
976
+ persian_models = {
977
+ "csukuangfj/vits-piper-fa_IR-amir-medium": _get_vits_piper,
978
+ "csukuangfj/vits-piper-fa_IR-gyro-medium": _get_vits_piper,
979
+ "csukuangfj/vits-mimic3-fa-haaniye_low": _get_vits_piper,
980
+ }
981
+
982
+ korean_models = {
983
+ "csukuangfj/vits-mimic3-ko_KO-kss_low": _get_vits_piper,
984
+ }
985
+
986
+
987
+ afrikaans_models = {
988
+ "csukuangfj/vits-mimic3-af_ZA-google-nwu_low": _get_vits_piper,
989
+ }
990
+
991
+ gujarati_models = {
992
+ "csukuangfj/vits-mimic3-gu_IN-cmu-indic_low": _get_vits_piper,
993
+ }
994
+
995
+ tswana_models = {
996
+ "csukuangfj/vits-mimic3-tn_ZA-google-nwu_low": _get_vits_piper,
997
+ }
998
+
999
+ welsh_models = {
1000
+ "csukuangfj/vits-piper-cy_GB-gwryw_gogleddol-medium|1 speaker": _get_vits_piper,
1001
+ }
1002
+
1003
+ language_to_models = {
1004
+ "English": list(english_models.keys()),
1005
+ "Chinese (Mandarin, 普通话)": list(chinese_models.keys()),
1006
+ "Chinese+English": list(chinese_english_models.keys()),
1007
+ "Persian+English": list(persian_english_models.keys()),
1008
+ "Cantonese (粤语)": list(cantonese_models.keys()),
1009
+ "Min-nan (闽南话)": list(min_nan_models.keys()),
1010
+ "Arabic": list(arabic_models.keys()),
1011
+ "Afrikaans": list(afrikaans_models.keys()),
1012
+ "Bengali": list(bengali_models.keys()),
1013
+ "Bulgarian": list(bulgarian_models.keys()),
1014
+ "Catalan": list(catalan_models.keys()),
1015
+ "Croatian": list(croatian_models.keys()),
1016
+ "Czech": list(czech_models.keys()),
1017
+ "Danish": list(danish_models.keys()),
1018
+ "Dutch": list(dutch_models.keys()),
1019
+ "Estonian": list(estonian_models.keys()),
1020
+ "Finnish": list(finnish_models.keys()),
1021
+ "French": list(french_models.keys()),
1022
+ "Georgian": list(georgian_models.keys()),
1023
+ "German": list(german_models.keys()),
1024
+ "Greek": list(greek_models.keys()),
1025
+ "Gujarati": list(gujarati_models.keys()),
1026
+ "Hungarian": list(hungarian_models.keys()),
1027
+ "Icelandic": list(icelandic_models.keys()),
1028
+ "Irish": list(irish_models.keys()),
1029
+ "Italian": list(italian_models.keys()),
1030
+ "Kazakh": list(kazakh_models.keys()),
1031
+ "Korean": list(korean_models.keys()),
1032
+ "Latvian": list(latvian_models.keys()),
1033
+ "Lithuanian": list(lithuanian_models.keys()),
1034
+ "Luxembourgish": list(luxembourgish_models.keys()),
1035
+ "Maltese": list(maltese_models.keys()),
1036
+ "Nepali": list(nepali_models.keys()),
1037
+ "Norwegian": list(norwegian_models.keys()),
1038
+ "Persian": list(persian_models.keys()),
1039
+ "Polish": list(polish_models.keys()),
1040
+ "Portuguese": list(portuguese_models.keys()),
1041
+ "Romanian": list(romanian_models.keys()),
1042
+ "Russian": list(russian_models.keys()),
1043
+ "Serbian": list(serbian_models.keys()),
1044
+ "Slovak": list(slovak_models.keys()),
1045
+ "Slovenian": list(slovenian_models.keys()),
1046
+ "Spanish": list(spanish_models.keys()),
1047
+ "Swahili": list(swahili_models.keys()),
1048
+ "Swedish": list(swedish_models.keys()),
1049
+ "Thai": list(thai_models.keys()),
1050
+ "Tswana": list(tswana_models.keys()),
1051
+ "Turkish": list(turkish_models.keys()),
1052
+ "Ukrainian": list(ukrainian_models.keys()),
1053
+ "Vietnamese": list(vietnamese_models.keys()),
1054
+ "Welsh": list(welsh_models.keys()),
1055
+ }