itazap HF Staff commited on
Commit
ed852e6
·
verified ·
1 Parent(s): 3ed28f0

Overwrite with snapshot bb8c23be2 from checkpoint itazap/blt-1b

Browse files
Files changed (1) hide show
  1. config.json +8 -4
config.json CHANGED
@@ -31,9 +31,10 @@
31
  "num_attention_heads": 12,
32
  "num_key_value_heads": null,
33
  "max_position_embeddings": 8192,
34
- "rms_norm_eps": 1e-05,
35
  "dropout": 0.0,
36
  "rope_theta": 10000.0,
 
37
  "attn_bias_type": "local_block_causal",
38
  "intermediate_size": 2048
39
  },
@@ -47,7 +48,7 @@
47
  "num_attention_heads": 16,
48
  "num_key_value_heads": null,
49
  "num_hidden_layers": 1,
50
- "rms_norm_eps": 1e-05,
51
  "dropout": 0.0,
52
  "max_position_embeddings": 24576,
53
  "rope_theta": 500000.0,
@@ -55,6 +56,7 @@
55
  "rope_type": "default"
56
  },
57
  "hidden_act": "silu",
 
58
  "intermediate_size": 2816
59
  },
60
  "decoder_config": {
@@ -66,7 +68,7 @@
66
  "num_attention_heads": 16,
67
  "num_key_value_heads": null,
68
  "num_hidden_layers": 9,
69
- "rms_norm_eps": 1e-05,
70
  "dropout": 0.0,
71
  "max_position_embeddings": 24576,
72
  "rope_theta": 500000.0,
@@ -74,6 +76,7 @@
74
  "rope_type": "default"
75
  },
76
  "hidden_act": "silu",
 
77
  "intermediate_size": 2816
78
  },
79
  "global_config": {
@@ -81,7 +84,7 @@
81
  "num_attention_heads": 16,
82
  "num_key_value_heads": null,
83
  "num_hidden_layers": 25,
84
- "rms_norm_eps": 1e-05,
85
  "dropout": 0.0,
86
  "max_position_embeddings": 4096,
87
  "rope_theta": 500000.0,
@@ -89,6 +92,7 @@
89
  "rope_type": "default"
90
  },
91
  "hidden_act": "silu",
 
92
  "intermediate_size": 5632
93
  },
94
  "tie_word_embeddings": false
 
31
  "num_attention_heads": 12,
32
  "num_key_value_heads": null,
33
  "max_position_embeddings": 8192,
34
+ "norm_eps": 1e-05,
35
  "dropout": 0.0,
36
  "rope_theta": 10000.0,
37
+ "attn_impl": "xformers",
38
  "attn_bias_type": "local_block_causal",
39
  "intermediate_size": 2048
40
  },
 
48
  "num_attention_heads": 16,
49
  "num_key_value_heads": null,
50
  "num_hidden_layers": 1,
51
+ "norm_eps": 1e-05,
52
  "dropout": 0.0,
53
  "max_position_embeddings": 24576,
54
  "rope_theta": 500000.0,
 
56
  "rope_type": "default"
57
  },
58
  "hidden_act": "silu",
59
+ "_attn_implementation": "sdpa",
60
  "intermediate_size": 2816
61
  },
62
  "decoder_config": {
 
68
  "num_attention_heads": 16,
69
  "num_key_value_heads": null,
70
  "num_hidden_layers": 9,
71
+ "norm_eps": 1e-05,
72
  "dropout": 0.0,
73
  "max_position_embeddings": 24576,
74
  "rope_theta": 500000.0,
 
76
  "rope_type": "default"
77
  },
78
  "hidden_act": "silu",
79
+ "_attn_implementation": "sdpa",
80
  "intermediate_size": 2816
81
  },
82
  "global_config": {
 
84
  "num_attention_heads": 16,
85
  "num_key_value_heads": null,
86
  "num_hidden_layers": 25,
87
+ "norm_eps": 1e-05,
88
  "dropout": 0.0,
89
  "max_position_embeddings": 4096,
90
  "rope_theta": 500000.0,
 
92
  "rope_type": "default"
93
  },
94
  "hidden_act": "silu",
95
+ "_attn_implementation": "sdpa",
96
  "intermediate_size": 5632
97
  },
98
  "tie_word_embeddings": false