|
{ |
|
"metadata": { |
|
"ParamSize": 390, |
|
"ParamBytes": 1441894400.0, |
|
"BitsPerParam": 4.068705088590267 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 64389120, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
50304, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 64389120, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98d23cfe5512e29951cc6b2f76d1caf7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 29414400, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
50304, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2012160, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 2012160 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 2017280 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 2022400 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 10869760 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 11146240 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 28840960 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 29393920 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 29399040 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 29404160 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 29409280 |
|
} |
|
], |
|
"md5sum": "a921989fbcf09e8fdef7bc0d840232b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 27381760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 8847360 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 9123840 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 26818560 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 27371520 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 27376640 |
|
} |
|
], |
|
"md5sum": "130d1f45cebd0853d77473b8659a71ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73ce835f1b612e65c6a291ee2e806cdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "73ad222c4095c71f043fbe672e28e1da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 64389120, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
50304, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 64389120, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b22f7767d9dd755464ea66463aed56d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 32793600, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.norm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
50304, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2012160, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 5401600 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 5406720 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 5411840 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 14259200 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 14535680 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 32230400 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 32783360 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 32788480 |
|
} |
|
], |
|
"md5sum": "603ed240df48b3a37fa6065f0f7a99dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e144edf9c98bff173385e8a15819ad3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "48fb26a8d1fe22be3597c0c62bf0085a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "235d6932ce55f47b1fabec44f9d9f1de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0235b262486df6faf22eeba523c09244" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "88168bc312a4a6ebc31abb70b6ab8ed3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "849180aaf2afe8dcf7bb0a929d9cf4ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b6b058f8df7e734e05fb1fb624fcb9b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "400418ac9e5d166e5a7f10a8fbd8a1ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "9186d2689c56b5b655779dc790426463" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d90065923fb4584413485a3d96848f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "baf09c7c670e5cf7e7d6f4dc8d606d70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "78fee06cca6d44f0c9ee5a8743d2ee06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72aaac15f1a7fc2ad8cab9d7cc6ce068" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "3675c19e05e3a5057ee2e15e25e19b6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "d33b874643a048baaaf5f6d280d75bc1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9b5ac1af5ca51f409f444d39c1279e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "0cfbde3cfd564dec5a9e52c16e599b53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "245f61f85f505dad4ee771dcf0b8bcd2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96c11360bba3055374d33505356b1df4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "bc7e3f6e8c99fb220b557bb3882b93f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "adf866ae4171b163730cbeaf9a4fc485" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a00a9570f1d60743ce22215e1c0bba9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "728953a6902e4bd4f107f3d9d60fdcd9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "8f9f9b5e6038a1680ca7b69cc7037a18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6560d32247d59e8ffea7c5ed89e2b2ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "f04de0c64cfebfda5473467c61803a82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "7417930cb4fbd8c51bd30e0740e96526" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fcd70282fc189c4fc33153751b5f0e6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "822a79040fd65bb7606fbd175b83071d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "3b0d686b256b47b5c8af984bb75e12f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1e38c225d8a389dc62eb802254b5058" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "fad5ee217ddb40b9c79d3e079f350e64" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a59930546dfe9d29f46bcb6ab0ee8618" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 26593280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 13209600 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 16793600 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 16896000 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 16901120 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 16906240 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 25753600 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 26030080 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26583040 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26588160 |
|
} |
|
], |
|
"md5sum": "969800b8417c99aa6f4c0a80393bdd98" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2613741cf96673eb2051bb4c7115b06e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "012804524ab3a5fca7e9fbcac06b0573" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "0e28641be310c7a942949641679262cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1620200927280c0b92e2fcd7df838e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "fb2b065a50cb04e8eea2acf482de10bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "995e7052fde2e824b15cee47663c04a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 17694720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f97608bc020459a8ed50c272d9794fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 33351680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13516800 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13521920 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 13527040 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22374400 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 22650880 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23203840 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 23208960 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 23214080 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 33044480 |
|
} |
|
], |
|
"md5sum": "eb6739fe6326d703e816132a4705d927" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 30771200, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 3276800 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3379200 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 3384320 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
864 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8847360, |
|
"byteOffset": 3389440 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
54 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12236800 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
13824, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 17694720, |
|
"byteOffset": 12513280 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
13824, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 552960, |
|
"byteOffset": 30208000 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30760960 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 30766080 |
|
} |
|
], |
|
"md5sum": "cb776036c5508bc8ad612628c8ba41a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "compressed-shard", |
|
"nbytes": 13516800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
7680, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9830400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
7680, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 307200, |
|
"byteOffset": 9830400 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2560, |
|
320 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3276800, |
|
"byteOffset": 10137600 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2560, |
|
20 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 13414400 |
|
} |
|
], |
|
"md5sum": "0101d6595436a80f5ee2682bb8b72c25" |
|
} |
|
] |
|
} |