yujiepan commited on
Commit
0c24726
·
verified ·
1 Parent(s): 17dcccf

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ pipeline_tag: text-generation
4
+ inference: true
5
+ widget:
6
+ - text: Hello!
7
+ example_title: Hello world
8
+ group: Python
9
+ base_model:
10
+ - microsoft/Phi-4-mini-flash-reasoning
11
+ ---
12
+
13
+ This tiny model is for debugging. It is randomly initialized with the config adapted from [microsoft/Phi-4-mini-flash-reasoning](https://huggingface.co/microsoft/Phi-4-mini-flash-reasoning).
14
+
15
+ ### Example usage:
16
+
17
+ ```python
18
+ import torch
19
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
20
+ torch.random.manual_seed(0)
21
+
22
+ model_id = "tiny-random/phi4-flash"
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ model_id,
25
+ device_map="cuda",
26
+ torch_dtype=torch.bfloat16,
27
+ trust_remote_code=True,
28
+ )
29
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
30
+
31
+ messages = [{
32
+ "role": "user",
33
+ "content": "How to solve 3*x^2+4*x+5=1?"
34
+ }]
35
+ inputs = tokenizer.apply_chat_template(
36
+ messages,
37
+ add_generation_prompt=True,
38
+ return_dict=True,
39
+ return_tensors="pt",
40
+ )
41
+
42
+ outputs = model.generate(
43
+ **inputs.to(model.device),
44
+ max_new_tokens=600,
45
+ temperature=0.6,
46
+ top_p=0.95,
47
+ do_sample=True,
48
+ )
49
+ outputs = tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])
50
+
51
+ print(outputs[0])
52
+ ```
53
+
54
+ ### Codes to create this repo:
55
+
56
+ ```python
57
+ import json
58
+ from pathlib import Path
59
+
60
+ import accelerate
61
+ import torch
62
+ from huggingface_hub import file_exists, hf_hub_download
63
+ from transformers import (
64
+ AutoConfig,
65
+ AutoModelForCausalLM,
66
+ AutoProcessor,
67
+ GenerationConfig,
68
+ set_seed,
69
+ )
70
+
71
+ source_model_id = "microsoft/Phi-4-mini-flash-reasoning"
72
+ save_folder = "/tmp/tiny-random/phi4-flash"
73
+
74
+ processor = AutoProcessor.from_pretrained(source_model_id, trust_remote_code=True)
75
+ processor.save_pretrained(save_folder)
76
+
77
+ with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
78
+ config_json = json.load(f)
79
+ for key in ['AutoConfig', 'AutoModelForCausalLM']:
80
+ config_json['auto_map'][key] = f'{source_model_id}--' + config_json['auto_map'][key]
81
+ automap = config_json['auto_map']
82
+ config_json['hidden_size'] = 64
83
+ config_json['intermediate_size'] = 64
84
+ config_json['num_attention_heads'] = 2
85
+ config_json['num_hidden_layers'] = 4
86
+ config_json['num_key_value_heads'] = 2
87
+ config_json['tie_word_embeddings'] = True
88
+ config_json['sliding_window'] = 512
89
+ config_json['use_cache'] = True
90
+ config_json['mb_per_layer'] = 2 # first layer is mamba
91
+
92
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
93
+ json.dump(config_json, f, indent=2)
94
+ config = AutoConfig.from_pretrained(
95
+ save_folder,
96
+ trust_remote_code=True,
97
+ )
98
+ print(config)
99
+ torch.set_default_dtype(torch.bfloat16)
100
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
101
+ torch.set_default_dtype(torch.float32)
102
+ if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
103
+ model.generation_config = GenerationConfig.from_pretrained(
104
+ source_model_id, trust_remote_code=True,
105
+ )
106
+ set_seed(42)
107
+ model = model.cpu() # cpu is more stable for random initialization across machines
108
+ with torch.no_grad():
109
+ for name, p in sorted(model.named_parameters()):
110
+ torch.nn.init.normal_(p, 0, 0.2)
111
+ print(name, p.shape)
112
+ model.save_pretrained(save_folder)
113
+ print(model)
114
+
115
+ with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
116
+ config_json = json.load(f)
117
+ config_json['auto_map'] = automap
118
+ config_json['sliding_window'] = 512 # a bugfix for '<' not supported between instances of 'int' and 'list'
119
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
120
+ json.dump(config_json, f, indent=2)
121
+ for python_file in Path(save_folder).glob('*.py'):
122
+ if python_file.name.startswith('modeling_') or python_file.name.startswith('configuration_'):
123
+ python_file.unlink()
124
+ ```
125
+
126
+ ### Printing the model:
127
+
128
+ ```text
129
+ Phi4FlashForCausalLM(
130
+ (model): Phi4FlashModel(
131
+ (embed_tokens): Embedding(200064, 64, padding_idx=199999)
132
+ (embed_dropout): Dropout(p=0.0, inplace=False)
133
+ (layers): ModuleList(
134
+ (0): SambaYDecoderLayer(
135
+ (mlp): SambaYMLP(
136
+ (fc1): Linear(in_features=64, out_features=128, bias=False)
137
+ (fc2): Linear(in_features=64, out_features=64, bias=False)
138
+ (activation_fn): SiLU()
139
+ )
140
+ (input_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
141
+ (attn): Phi3Mamba(
142
+ (in_proj): Linear(in_features=64, out_features=256, bias=False)
143
+ (conv1d): Conv1d(128, 128, kernel_size=(4,), stride=(1,), padding=(3,), groups=128)
144
+ (act): SiLU()
145
+ (x_proj): Linear(in_features=128, out_features=36, bias=False)
146
+ (dt_proj): Linear(in_features=4, out_features=128, bias=True)
147
+ (out_proj): Linear(in_features=128, out_features=64, bias=False)
148
+ )
149
+ (resid_attn_dropout): Dropout(p=0.0, inplace=False)
150
+ (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
151
+ (post_attention_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
152
+ )
153
+ (1): SambaYDecoderLayer(
154
+ (mlp): SambaYMLP(
155
+ (fc1): Linear(in_features=64, out_features=128, bias=False)
156
+ (fc2): Linear(in_features=64, out_features=64, bias=False)
157
+ (activation_fn): SiLU()
158
+ )
159
+ (input_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
160
+ (attn): SambaYFlashAttention2(
161
+ (out_proj): Linear(in_features=64, out_features=64, bias=True)
162
+ (Wqkv): Linear(in_features=64, out_features=192, bias=True)
163
+ (inner_cross_attn): FlashDiffCustomAttention(
164
+ (subln): SambaYRMSNorm()
165
+ )
166
+ )
167
+ (resid_attn_dropout): Dropout(p=0.0, inplace=False)
168
+ (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
169
+ (post_attention_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
170
+ )
171
+ (2): SambaYDecoderLayer(
172
+ (mlp): SambaYMLP(
173
+ (fc1): Linear(in_features=64, out_features=128, bias=False)
174
+ (fc2): Linear(in_features=64, out_features=64, bias=False)
175
+ (activation_fn): SiLU()
176
+ )
177
+ (input_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
178
+ (attn): Phi3Mamba(
179
+ (in_proj): Linear(in_features=64, out_features=256, bias=False)
180
+ (conv1d): Conv1d(128, 128, kernel_size=(4,), stride=(1,), padding=(3,), groups=128)
181
+ (act): SiLU()
182
+ (x_proj): Linear(in_features=128, out_features=36, bias=False)
183
+ (dt_proj): Linear(in_features=4, out_features=128, bias=True)
184
+ (out_proj): Linear(in_features=128, out_features=64, bias=False)
185
+ )
186
+ (resid_attn_dropout): Dropout(p=0.0, inplace=False)
187
+ (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
188
+ (post_attention_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
189
+ )
190
+ (3): SambaYDecoderLayer(
191
+ (mlp): SambaYMLP(
192
+ (fc1): Linear(in_features=64, out_features=128, bias=False)
193
+ (fc2): Linear(in_features=64, out_features=64, bias=False)
194
+ (activation_fn): SiLU()
195
+ )
196
+ (input_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
197
+ (attn): SambaYFlashAttention2(
198
+ (out_proj): Linear(in_features=64, out_features=64, bias=True)
199
+ (Wqkv): Linear(in_features=64, out_features=192, bias=True)
200
+ (inner_cross_attn): FlashDiffCustomAttention(
201
+ (subln): SambaYRMSNorm()
202
+ )
203
+ )
204
+ (resid_attn_dropout): Dropout(p=0.0, inplace=False)
205
+ (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
206
+ (post_attention_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
207
+ )
208
+ )
209
+ (final_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
210
+ )
211
+ (lm_head): Linear(in_features=64, out_features=200064, bias=False)
212
+ )
213
+ ```
added_tokens.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|/tool_call|>": 200026,
3
+ "<|/tool|>": 200024,
4
+ "<|assistant|>": 200019,
5
+ "<|end|>": 200020,
6
+ "<|system|>": 200022,
7
+ "<|tag|>": 200028,
8
+ "<|tool_call|>": 200025,
9
+ "<|tool_response|>": 200027,
10
+ "<|tool|>": 200023,
11
+ "<|user|>": 200021
12
+ }
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Phi4FlashForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "auto_map": {
7
+ "AutoConfig": "microsoft/Phi-4-mini-flash-reasoning--configuration_phi4flash.Phi4FlashConfig",
8
+ "AutoModelForCausalLM": "microsoft/Phi-4-mini-flash-reasoning--modeling_phi4flash.Phi4FlashForCausalLM",
9
+ "AutoTokenizer": "Xenova/gpt-4o"
10
+ },
11
+ "bos_token_id": 199999,
12
+ "embd_pdrop": 0.0,
13
+ "eos_token_id": 199999,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 64,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 64,
18
+ "layer_norm_eps": 1e-05,
19
+ "lm_head_bias": false,
20
+ "mamba_conv_bias": true,
21
+ "mamba_d_conv": 4,
22
+ "mamba_d_state": 16,
23
+ "mamba_dt_rank": 4,
24
+ "mamba_expand": 2,
25
+ "mamba_proj_bias": false,
26
+ "max_position_embeddings": 262144,
27
+ "mb_per_layer": 2,
28
+ "mlp_bias": false,
29
+ "model_type": "phi4flash",
30
+ "num_attention_heads": 2,
31
+ "num_hidden_layers": 4,
32
+ "num_key_value_heads": 2,
33
+ "pad_token_id": 199999,
34
+ "resid_pdrop": 0.0,
35
+ "rope_theta": 10000.0,
36
+ "sliding_window": 512,
37
+ "torch_dtype": "bfloat16",
38
+ "transformers_version": "4.54.0.dev0",
39
+ "use_cache": true,
40
+ "vocab_size": 200064
41
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 199999,
4
+ "eos_token_id": [
5
+ 200020,
6
+ 199999
7
+ ],
8
+ "pad_token_id": 199999,
9
+ "transformers_version": "4.54.0.dev0"
10
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18cf02c0c75b4bcd621ebbd2267c20c491e56bcaa0d4bb376638e38c2b7a82e
3
+ size 25921976
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382cc235b56c725945e149cc25f191da667c836655efd0857b004320e90e91ea
3
+ size 15524095
tokenizer_config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "199999": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "200018": {
15
+ "content": "<|endofprompt|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "200019": {
23
+ "content": "<|assistant|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "200020": {
31
+ "content": "<|end|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": true,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "200021": {
39
+ "content": "<|user|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "200022": {
47
+ "content": "<|system|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "200023": {
55
+ "content": "<|tool|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": false
61
+ },
62
+ "200024": {
63
+ "content": "<|/tool|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": false
69
+ },
70
+ "200025": {
71
+ "content": "<|tool_call|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": false
77
+ },
78
+ "200026": {
79
+ "content": "<|/tool_call|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": false
85
+ },
86
+ "200027": {
87
+ "content": "<|tool_response|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": true,
91
+ "single_word": false,
92
+ "special": false
93
+ },
94
+ "200028": {
95
+ "content": "<|tag|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ }
102
+ },
103
+ "bos_token": "<|endoftext|>",
104
+ "clean_up_tokenization_spaces": false,
105
+ "eos_token": "<|endoftext|>",
106
+ "extra_special_tokens": {},
107
+ "model_max_length": 65536,
108
+ "pad_token": "<|endoftext|>",
109
+ "tokenizer_class": "GPT2Tokenizer",
110
+ "unk_token": "<|endoftext|>"
111
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff