hausa_english_blend / vocab_extension_info.json
thiomajid's picture
Upload folder using huggingface_hub
6ff3d25 verified
{
"base_tokenizer": "HuggingFaceTB/SmolLM2-135M",
"data_splits": [
{
"dataset_url": "thiomajid/english_hausa_mix",
"text_column": "text",
"split": "train",
"samples": "all",
"subset": null
},
{
"dataset_url": "thiomajid/english_hausa_mix",
"text_column": "text",
"split": "test",
"samples": "all",
"subset": null
}
],
"text_column": "text",
"base_output_dir": "./base_tokenizer",
"vocab_size": 64000,
"output_dir": "./merged_tokenizer",
"batch_size": 1000,
"hub_id": "thiomajid/hausa_english_blend"
}