File size: 1,312 Bytes
8dc2bd9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
{
"model_type": "vui",
"library_name": "vui",
"pipeline_tag": "text-to-speech",
"license": "mit",
"language": ["en"],
"architectures": ["VuiForConditionalGeneration"],
"model_files": {
"base": "vui-100m-base.pt",
"abraham": "vui-abraham-100m.pt",
"cohost": "vui-cohost-100m.pt",
"cohost_alt": "ckpts-vui-cohost-100m.pt",
"tokenizer": "fluac-22hz-22khz.pt"
},
"model_variants": {
"vui-100m-base": {
"description": "Base checkpoint trained on 40k hours of audio conversations",
"file": "vui-100m-base.pt",
"size_mb": 198
},
"vui-abraham-100m": {
"description": "Single speaker model with context awareness",
"file": "vui-abraham-100m.pt",
"size_mb": 198
},
"vui-cohost-100m": {
"description": "Two speakers that can interact with each other",
"file": "vui-cohost-100m.pt",
"size_mb": 198
}
},
"tokenizer_config": {
"audio_tokenizer": "fluac",
"sample_rate": "22khz",
"file": "fluac-22hz-22khz.pt",
"size_mb": 307
},
"training_data": {
"hours": 40000,
"type": "audio_conversations"
},
"capabilities": [
"text-to-speech",
"conversational-speech",
"voice-cloning",
"on-device-inference"
],
"torch_dtype": "float32",
"framework": "pytorch"
} |