numen-tech's picture
Add weights
4dcd421
{
"metadata": {
"ParamSize": 390,
"ParamBytes": 1441894400.0,
"BitsPerParam": 4.068705088590267
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "compressed-shard",
"nbytes": 64389120,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
50304,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 64389120,
"byteOffset": 0
}
],
"md5sum": "98d23cfe5512e29951cc6b2f76d1caf7"
},
{
"dataPath": "params_shard_1.bin",
"format": "compressed-shard",
"nbytes": 29414400,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
50304,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2012160,
"byteOffset": 0
},
{
"name": "model.layers.29.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2012160
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2017280
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 2022400
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 10869760
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 11146240
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 28840960
},
{
"name": "model.layers.29.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 29393920
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 29399040
},
{
"name": "model.layers.30.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 29404160
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 29409280
}
],
"md5sum": "a921989fbcf09e8fdef7bc0d840232b8"
},
{
"dataPath": "params_shard_2.bin",
"format": "compressed-shard",
"nbytes": 27381760,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 8847360
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 9123840
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 26818560
},
{
"name": "model.layers.30.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27371520
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27376640
}
],
"md5sum": "130d1f45cebd0853d77473b8659a71ed"
},
{
"dataPath": "params_shard_3.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "73ce835f1b612e65c6a291ee2e806cdd"
},
{
"dataPath": "params_shard_4.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.31.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.31.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "73ad222c4095c71f043fbe672e28e1da"
},
{
"dataPath": "params_shard_5.bin",
"format": "compressed-shard",
"nbytes": 64389120,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
50304,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 64389120,
"byteOffset": 0
}
],
"md5sum": "0b22f7767d9dd755464ea66463aed56d"
},
{
"dataPath": "params_shard_6.bin",
"format": "compressed-shard",
"nbytes": 32793600,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.norm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.norm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
50304,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2012160,
"byteOffset": 3389440
},
{
"name": "model.layers.0.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 5401600
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 5406720
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 5411840
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 14259200
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 14535680
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 32230400
},
{
"name": "model.layers.0.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 32783360
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 32788480
}
],
"md5sum": "603ed240df48b3a37fa6065f0f7a99dd"
},
{
"dataPath": "params_shard_7.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "e144edf9c98bff173385e8a15819ad3c"
},
{
"dataPath": "params_shard_8.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.1.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.1.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "48fb26a8d1fe22be3597c0c62bf0085a"
},
{
"dataPath": "params_shard_9.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.10.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.10.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "235d6932ce55f47b1fabec44f9d9f1de"
},
{
"dataPath": "params_shard_10.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "0235b262486df6faf22eeba523c09244"
},
{
"dataPath": "params_shard_11.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.11.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.11.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "88168bc312a4a6ebc31abb70b6ab8ed3"
},
{
"dataPath": "params_shard_12.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.12.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.12.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "849180aaf2afe8dcf7bb0a929d9cf4ef"
},
{
"dataPath": "params_shard_13.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "b6b058f8df7e734e05fb1fb624fcb9b8"
},
{
"dataPath": "params_shard_14.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.13.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.13.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "400418ac9e5d166e5a7f10a8fbd8a1ab"
},
{
"dataPath": "params_shard_15.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.14.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.14.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "9186d2689c56b5b655779dc790426463"
},
{
"dataPath": "params_shard_16.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "9d90065923fb4584413485a3d96848f4"
},
{
"dataPath": "params_shard_17.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.15.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.15.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "baf09c7c670e5cf7e7d6f4dc8d606d70"
},
{
"dataPath": "params_shard_18.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.16.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.16.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "78fee06cca6d44f0c9ee5a8743d2ee06"
},
{
"dataPath": "params_shard_19.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "72aaac15f1a7fc2ad8cab9d7cc6ce068"
},
{
"dataPath": "params_shard_20.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.17.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.17.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "3675c19e05e3a5057ee2e15e25e19b6c"
},
{
"dataPath": "params_shard_21.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.18.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.18.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "d33b874643a048baaaf5f6d280d75bc1"
},
{
"dataPath": "params_shard_22.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "e9b5ac1af5ca51f409f444d39c1279e8"
},
{
"dataPath": "params_shard_23.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.19.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.19.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "0cfbde3cfd564dec5a9e52c16e599b53"
},
{
"dataPath": "params_shard_24.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.2.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.2.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "245f61f85f505dad4ee771dcf0b8bcd2"
},
{
"dataPath": "params_shard_25.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "96c11360bba3055374d33505356b1df4"
},
{
"dataPath": "params_shard_26.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.20.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.20.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "bc7e3f6e8c99fb220b557bb3882b93f2"
},
{
"dataPath": "params_shard_27.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.21.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.21.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "adf866ae4171b163730cbeaf9a4fc485"
},
{
"dataPath": "params_shard_28.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "7a00a9570f1d60743ce22215e1c0bba9"
},
{
"dataPath": "params_shard_29.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.22.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.22.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "728953a6902e4bd4f107f3d9d60fdcd9"
},
{
"dataPath": "params_shard_30.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.23.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.23.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "8f9f9b5e6038a1680ca7b69cc7037a18"
},
{
"dataPath": "params_shard_31.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "6560d32247d59e8ffea7c5ed89e2b2ab"
},
{
"dataPath": "params_shard_32.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.24.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.24.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "f04de0c64cfebfda5473467c61803a82"
},
{
"dataPath": "params_shard_33.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.25.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.25.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "7417930cb4fbd8c51bd30e0740e96526"
},
{
"dataPath": "params_shard_34.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "fcd70282fc189c4fc33153751b5f0e6a"
},
{
"dataPath": "params_shard_35.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.26.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.26.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "822a79040fd65bb7606fbd175b83071d"
},
{
"dataPath": "params_shard_36.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.27.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.27.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "3b0d686b256b47b5c8af984bb75e12f7"
},
{
"dataPath": "params_shard_37.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "f1e38c225d8a389dc62eb802254b5058"
},
{
"dataPath": "params_shard_38.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.28.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.28.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "fad5ee217ddb40b9c79d3e079f350e64"
},
{
"dataPath": "params_shard_39.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "a59930546dfe9d29f46bcb6ab0ee8618"
},
{
"dataPath": "params_shard_40.bin",
"format": "compressed-shard",
"nbytes": 26593280,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 3379200
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 13209600
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 13516800
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 16793600
},
{
"name": "model.layers.3.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 16896000
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 16901120
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 16906240
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 25753600
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 26030080
},
{
"name": "model.layers.3.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26583040
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26588160
}
],
"md5sum": "969800b8417c99aa6f4c0a80393bdd98"
},
{
"dataPath": "params_shard_41.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "2613741cf96673eb2051bb4c7115b06e"
},
{
"dataPath": "params_shard_42.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.4.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.4.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "012804524ab3a5fca7e9fbcac06b0573"
},
{
"dataPath": "params_shard_43.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.5.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.5.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "0e28641be310c7a942949641679262cc"
},
{
"dataPath": "params_shard_44.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "f1620200927280c0b92e2fcd7df838e5"
},
{
"dataPath": "params_shard_45.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.6.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.6.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "fb2b065a50cb04e8eea2acf482de10bd"
},
{
"dataPath": "params_shard_46.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.7.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.7.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "995e7052fde2e824b15cee47663c04a6"
},
{
"dataPath": "params_shard_47.bin",
"format": "compressed-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "2f97608bc020459a8ed50c272d9794fa"
},
{
"dataPath": "params_shard_48.bin",
"format": "compressed-shard",
"nbytes": 33351680,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
},
{
"name": "model.layers.8.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13516800
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13521920
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 13527040
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22374400
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 22650880
},
{
"name": "model.layers.8.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23203840
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23208960
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 23214080
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 33044480
}
],
"md5sum": "eb6739fe6326d703e816132a4705d927"
},
{
"dataPath": "params_shard_49.bin",
"format": "compressed-shard",
"nbytes": 30771200,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 3276800
},
{
"name": "model.layers.9.input_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3379200
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3384320
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3389440
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2560,
54
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12236800
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 12513280
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
13824,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 552960,
"byteOffset": 30208000
},
{
"name": "model.layers.9.post_attention_layernorm.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30760960
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 30766080
}
],
"md5sum": "cb776036c5508bc8ad612628c8ba41a7"
},
{
"dataPath": "params_shard_50.bin",
"format": "compressed-shard",
"nbytes": 13516800,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
7680,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 307200,
"byteOffset": 9830400
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 10137600
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2560,
20
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 13414400
}
],
"md5sum": "0101d6595436a80f5ee2682bb8b72c25"
}
]
}