JingyaHuang commited on
Commit
80edaeb
·
1 Parent(s): 08a8fde

update with dtype in the config

Browse files
README.md CHANGED
@@ -7,4 +7,4 @@ To build the model:
7
 
8
  ```bash
9
  optimum-cli export neuron --model hf-internal-testing/tiny-random-t5 --task text2text-generation --batch_size 1 --sequence_length 18 --num_beams 4 tiny_random_t5_neuronx/
10
- ```
 
7
 
8
  ```bash
9
  optimum-cli export neuron --model hf-internal-testing/tiny-random-t5 --task text2text-generation --batch_size 1 --sequence_length 18 --num_beams 4 tiny_random_t5_neuronx/
10
+ ```
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/tmp/tmp1v7faz4n/encoder/config.json",
3
  "bos_token_id": 0,
4
  "classifier_dropout": 0.0,
5
  "d_ff": 37,
@@ -16,84 +16,14 @@
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
19
- "neuron": {
20
- "auto_cast": "matmul",
21
- "auto_cast_type": "bf16",
22
- "compiler_type": "neuronx-cc",
23
- "compiler_version": "2.11.0.34+c5231f848",
24
- "decoder_input_names": [
25
- "decoder_input_ids",
26
- "decoder_attention_mask",
27
- "encoder_hidden_states",
28
- "attention_mask",
29
- "beam_idx",
30
- "beam_scores"
31
- ],
32
- "decoder_output_names": [
33
- "next_tokens",
34
- "past.0.self.key",
35
- "past.1.self.key",
36
- "past.2.self.key",
37
- "past.3.self.key",
38
- "past.4.self.key",
39
- "past.0.self.value",
40
- "past.1.self.value",
41
- "past.2.self.value",
42
- "past.3.self.value",
43
- "past.4.self.value",
44
- "past.0.cross.key",
45
- "past.1.cross.key",
46
- "past.2.cross.key",
47
- "past.3.cross.key",
48
- "past.4.cross.key",
49
- "past.0.cross.value",
50
- "past.1.cross.value",
51
- "past.2.cross.value",
52
- "past.3.cross.value",
53
- "past.4.cross.value"
54
- ],
55
- "disable_fallback": false,
56
- "disable_fast_relayout": false,
57
- "dynamic_batch_size": false,
58
- "encoder_input_names": [
59
- "input_ids",
60
- "attention_mask"
61
- ],
62
- "encoder_output_names": [
63
- "present.0.self.key",
64
- "present.1.self.key",
65
- "present.2.self.key",
66
- "present.3.self.key",
67
- "present.4.self.key",
68
- "present.0.self.value",
69
- "present.1.self.value",
70
- "present.2.self.value",
71
- "present.3.self.value",
72
- "present.4.self.value",
73
- "present.0.cross.key",
74
- "present.1.cross.key",
75
- "present.2.cross.key",
76
- "present.3.cross.key",
77
- "present.4.cross.key",
78
- "present.0.cross.value",
79
- "present.1.cross.value",
80
- "present.2.cross.value",
81
- "present.3.cross.value",
82
- "present.4.cross.value"
83
- ],
84
- "static_batch_size": 1,
85
- "static_num_beams": 1,
86
- "static_sequence_length": 64
87
- },
88
  "num_decoder_layers": 5,
89
  "num_heads": 4,
90
  "num_layers": 5,
91
  "pad_token_id": 0,
92
  "relative_attention_max_distance": 128,
93
  "relative_attention_num_buckets": 8,
94
- "task": "text2text-generation",
95
- "torchscript": true,
96
- "transformers_version": "4.35.0",
97
  "use_cache": true,
98
  "vocab_size": 1103
99
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
  "bos_token_id": 0,
4
  "classifier_dropout": 0.0,
5
  "d_ff": 37,
 
16
  "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "num_decoder_layers": 5,
20
  "num_heads": 4,
21
  "num_layers": 5,
22
  "pad_token_id": 0,
23
  "relative_attention_max_distance": 128,
24
  "relative_attention_num_buckets": 8,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.51.0",
 
27
  "use_cache": true,
28
  "vocab_size": 1103
29
  }
decoder/config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "_commit_hash": "2f582cd79ed5795b71539951d237945bc1c5ac7e",
3
- "_name_or_path": "hf-internal-testing/tiny-random-t5",
4
  "bos_token_id": 0,
5
  "classifier_dropout": 0.0,
6
  "d_ff": 37,
@@ -21,8 +20,10 @@
21
  "auto_cast": null,
22
  "auto_cast_type": null,
23
  "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.12.54.0+f631c2365",
25
  "dynamic_batch_size": false,
 
 
26
  "input_names": [
27
  "decoder_input_ids",
28
  "decoder_attention_mask",
@@ -31,6 +32,7 @@
31
  "beam_idx",
32
  "beam_scores"
33
  ],
 
34
  "model_type": "t5-decoder",
35
  "optlevel": "2",
36
  "output_attentions": false,
@@ -62,7 +64,9 @@
62
  ],
63
  "static_batch_size": 1,
64
  "static_num_beams": 4,
65
- "static_sequence_length": 18
 
 
66
  },
67
  "num_decoder_layers": 5,
68
  "num_heads": 4,
@@ -70,9 +74,9 @@
70
  "pad_token_id": 0,
71
  "relative_attention_max_distance": 128,
72
  "relative_attention_num_buckets": 8,
73
- "task": "text2text-generation",
74
  "torchscript": true,
75
- "transformers_version": "4.11.0.dev0",
76
  "use_cache": true,
77
  "vocab_size": 1103
78
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
 
3
  "bos_token_id": 0,
4
  "classifier_dropout": 0.0,
5
  "d_ff": 37,
 
20
  "auto_cast": null,
21
  "auto_cast_type": null,
22
  "compiler_type": "neuronx-cc",
23
+ "compiler_version": "2.19.8089.0+8ab9f450",
24
  "dynamic_batch_size": false,
25
+ "float_dtype": "fp32",
26
+ "inline_weights_to_neff": false,
27
  "input_names": [
28
  "decoder_input_ids",
29
  "decoder_attention_mask",
 
32
  "beam_idx",
33
  "beam_scores"
34
  ],
35
+ "int_dtype": "int64",
36
  "model_type": "t5-decoder",
37
  "optlevel": "2",
38
  "output_attentions": false,
 
64
  ],
65
  "static_batch_size": 1,
66
  "static_num_beams": 4,
67
+ "static_sequence_length": 18,
68
+ "task": "text2text-generation",
69
+ "tensor_parallel_size": 1
70
  },
71
  "num_decoder_layers": 5,
72
  "num_heads": 4,
 
74
  "pad_token_id": 0,
75
  "relative_attention_max_distance": 128,
76
  "relative_attention_num_buckets": 8,
77
+ "torch_dtype": "float32",
78
  "torchscript": true,
79
+ "transformers_version": "4.51.0",
80
  "use_cache": true,
81
  "vocab_size": 1103
82
  }
decoder/model.neuron CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16fc7412146e0cf515099112ee32d3399c5bf8890fc57b77216a8edb3118c488
3
- size 783754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29bba6b4ab3e0d3a51f998c8e35887f7083a7d49c88322d2eb1d75930af66000
3
+ size 936999
encoder/config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "_commit_hash": "2f582cd79ed5795b71539951d237945bc1c5ac7e",
3
- "_name_or_path": "hf-internal-testing/tiny-random-t5",
4
  "bos_token_id": 0,
5
  "classifier_dropout": 0.0,
6
  "d_ff": 37,
@@ -21,12 +20,15 @@
21
  "auto_cast": null,
22
  "auto_cast_type": null,
23
  "compiler_type": "neuronx-cc",
24
- "compiler_version": "2.12.54.0+f631c2365",
25
  "dynamic_batch_size": false,
 
 
26
  "input_names": [
27
  "input_ids",
28
  "attention_mask"
29
  ],
 
30
  "model_type": "t5-encoder",
31
  "optlevel": "2",
32
  "output_attentions": false,
@@ -55,7 +57,9 @@
55
  ],
56
  "static_batch_size": 1,
57
  "static_num_beams": 4,
58
- "static_sequence_length": 18
 
 
59
  },
60
  "num_decoder_layers": 5,
61
  "num_heads": 4,
@@ -63,9 +67,9 @@
63
  "pad_token_id": 0,
64
  "relative_attention_max_distance": 128,
65
  "relative_attention_num_buckets": 8,
66
- "task": "text2text-generation",
67
  "torchscript": true,
68
- "transformers_version": "4.11.0.dev0",
69
  "use_cache": true,
70
  "vocab_size": 1103
71
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
 
3
  "bos_token_id": 0,
4
  "classifier_dropout": 0.0,
5
  "d_ff": 37,
 
20
  "auto_cast": null,
21
  "auto_cast_type": null,
22
  "compiler_type": "neuronx-cc",
23
+ "compiler_version": "2.19.8089.0+8ab9f450",
24
  "dynamic_batch_size": false,
25
+ "float_dtype": "fp32",
26
+ "inline_weights_to_neff": false,
27
  "input_names": [
28
  "input_ids",
29
  "attention_mask"
30
  ],
31
+ "int_dtype": "int64",
32
  "model_type": "t5-encoder",
33
  "optlevel": "2",
34
  "output_attentions": false,
 
57
  ],
58
  "static_batch_size": 1,
59
  "static_num_beams": 4,
60
+ "static_sequence_length": 18,
61
+ "task": "text2text-generation",
62
+ "tensor_parallel_size": 1
63
  },
64
  "num_decoder_layers": 5,
65
  "num_heads": 4,
 
67
  "pad_token_id": 0,
68
  "relative_attention_max_distance": 128,
69
  "relative_attention_num_buckets": 8,
70
+ "torch_dtype": "float32",
71
  "torchscript": true,
72
+ "transformers_version": "4.51.0",
73
  "use_cache": true,
74
  "vocab_size": 1103
75
  }
encoder/model.neuron CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54d269f5f2f9f40833871e9e9d310c29351b1a7e05b360d7ddeb1addc151f227
3
- size 404354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06985bf02c880acb6fe126cf49167fb3e630b47c38fcba767fcd7c1677435e0
3
+ size 485543
generation_config.json CHANGED
@@ -4,5 +4,5 @@
4
  "decoder_start_token_id": 0,
5
  "eos_token_id": 1,
6
  "pad_token_id": 0,
7
- "transformers_version": "4.35.0"
8
  }
 
4
  "decoder_start_token_id": 0,
5
  "eos_token_id": 1,
6
  "pad_token_id": 0,
7
+ "transformers_version": "4.51.0"
8
  }
special_tokens_map.json CHANGED
@@ -101,7 +101,25 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
  }
tokenizer.json CHANGED
@@ -944,7 +944,8 @@
944
  {
945
  "type": "Metaspace",
946
  "replacement": "▁",
947
- "add_prefix_space": true
 
948
  }
949
  ]
950
  },
@@ -1005,7 +1006,8 @@
1005
  "decoder": {
1006
  "type": "Metaspace",
1007
  "replacement": "▁",
1008
- "add_prefix_space": true
 
1009
  },
1010
  "model": {
1011
  "type": "Unigram",
 
944
  {
945
  "type": "Metaspace",
946
  "replacement": "▁",
947
+ "prepend_scheme": "always",
948
+ "split": true
949
  }
950
  ]
951
  },
 
1006
  "decoder": {
1007
  "type": "Metaspace",
1008
  "replacement": "▁",
1009
+ "prepend_scheme": "always",
1010
+ "split": true
1011
  },
1012
  "model": {
1013
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
@@ -927,11 +928,12 @@
927
  "<extra_id_98>",
928
  "<extra_id_99>"
929
  ],
930
- "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
 
933
  "model_max_length": 1024,
934
  "pad_token": "<pad>",
935
- "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
937
  }
 
1
  {
2
+ "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
 
928
  "<extra_id_98>",
929
  "<extra_id_99>"
930
  ],
931
+ "clean_up_tokenization_spaces": false,
932
  "eos_token": "</s>",
933
  "extra_ids": 100,
934
+ "extra_special_tokens": {},
935
  "model_max_length": 1024,
936
  "pad_token": "<pad>",
937
+ "tokenizer_class": "T5TokenizerFast",
938
  "unk_token": "<unk>"
939
  }