manaestras commited on
Commit
dca87ad
·
verified ·
1 Parent(s): 77e56c6

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +17 -0
tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GPT2LMHeadModel"
4
+ ],
5
+ "model_max_length": 1048576,
6
+ "tokenizer_class": "HYTokenizer",
7
+ "auto_map": {
8
+ "AutoTokenizer": [
9
+ "tokenization_hy.HYTokenizer",
10
+ null
11
+ ]
12
+ },
13
+ "model_type": "gpt2",
14
+ "additional_special_tokens": ["<|startoftext|>", "<|extra_0|>", "<|extra_4|>", "<|extra_5|>", "<|eos|>"],
15
+ "pad_token": "<|pad|>",
16
+ "chat_template": "{% set context = {'has_head': true} %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 %}{% if content == '' %}{% set _ = context.update({'has_head': false}) %}{% elif message['role'] == 'system' %}{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}{% endif %}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 == 1 and not context.has_head %}{% set content = '<|startoftext|>' + content %}{% endif %}{% if loop.index0 == 1 and context.has_head %}{% set content = content + '<|extra_0|>' %}{% else %}{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}{% endif %}{% elif message['role'] == 'assistant' %}{% set content = content + '<|eos|>' %}{% endif %}{{ content }}{% endfor %}"
17
+ }