feihu.hf commited on
Commit
1b183a8
1 Parent(s): 2303ef2

update weights

Browse files
config.json CHANGED
@@ -8,7 +8,7 @@
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 13696,
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 35,
14
  "model_type": "qwen2",
 
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 14436,
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 35,
14
  "model_type": "qwen2",
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c7d6bdded3e62e7786ec46f79bdaf04605cc6cd174cd5678068249c950d524
3
- size 3981821832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dae54bb1ace1fa5fbf4381acf3b151f88da07e556419d9ab1dc2be718f8bbcd
3
+ size 3980417584
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5f84db35ce352f487c8a6c640358e0ded3d22dcd197118f8a14b18bcf9cc935
3
- size 3988050688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:658b8b195751b07b866a3bebaa56ec9f0f74d392173eb56a69f147bff72f0ed8
3
+ size 3967789304
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8f5d5772ee940d9631f6f0350af2d1300f146b76c8555114376d5cb6aac9a00
3
- size 1707574440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cebc8cf4f30c67a07134c5b6f1394d38acf985cbd819ab5ddab8baca5f9afe33
3
+ size 1933732848
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 9677281280
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -228,16 +228,16 @@
228
  "model.layers.13.self_attn.v_proj.qzeros": "model-00001-of-00003.safetensors",
229
  "model.layers.13.self_attn.v_proj.scales": "model-00001-of-00003.safetensors",
230
  "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
231
- "model.layers.14.mlp.down_proj.bias": "model-00001-of-00003.safetensors",
232
- "model.layers.14.mlp.down_proj.g_idx": "model-00001-of-00003.safetensors",
233
- "model.layers.14.mlp.down_proj.qweight": "model-00001-of-00003.safetensors",
234
- "model.layers.14.mlp.down_proj.qzeros": "model-00001-of-00003.safetensors",
235
- "model.layers.14.mlp.down_proj.scales": "model-00001-of-00003.safetensors",
236
- "model.layers.14.mlp.gate_proj.bias": "model-00001-of-00003.safetensors",
237
- "model.layers.14.mlp.gate_proj.g_idx": "model-00001-of-00003.safetensors",
238
- "model.layers.14.mlp.gate_proj.qweight": "model-00001-of-00003.safetensors",
239
- "model.layers.14.mlp.gate_proj.qzeros": "model-00001-of-00003.safetensors",
240
- "model.layers.14.mlp.gate_proj.scales": "model-00001-of-00003.safetensors",
241
  "model.layers.14.mlp.up_proj.bias": "model-00002-of-00003.safetensors",
242
  "model.layers.14.mlp.up_proj.g_idx": "model-00002-of-00003.safetensors",
243
  "model.layers.14.mlp.up_proj.qweight": "model-00002-of-00003.safetensors",
@@ -1152,7 +1152,7 @@
1152
  "model.layers.36.self_attn.v_proj.qweight": "model-00002-of-00003.safetensors",
1153
  "model.layers.36.self_attn.v_proj.qzeros": "model-00002-of-00003.safetensors",
1154
  "model.layers.36.self_attn.v_proj.scales": "model-00002-of-00003.safetensors",
1155
- "model.layers.37.input_layernorm.weight": "model-00002-of-00003.safetensors",
1156
  "model.layers.37.mlp.down_proj.bias": "model-00002-of-00003.safetensors",
1157
  "model.layers.37.mlp.down_proj.g_idx": "model-00002-of-00003.safetensors",
1158
  "model.layers.37.mlp.down_proj.qweight": "model-00002-of-00003.safetensors",
@@ -1163,12 +1163,12 @@
1163
  "model.layers.37.mlp.gate_proj.qweight": "model-00002-of-00003.safetensors",
1164
  "model.layers.37.mlp.gate_proj.qzeros": "model-00002-of-00003.safetensors",
1165
  "model.layers.37.mlp.gate_proj.scales": "model-00002-of-00003.safetensors",
1166
- "model.layers.37.mlp.up_proj.bias": "model-00002-of-00003.safetensors",
1167
- "model.layers.37.mlp.up_proj.g_idx": "model-00002-of-00003.safetensors",
1168
- "model.layers.37.mlp.up_proj.qweight": "model-00002-of-00003.safetensors",
1169
- "model.layers.37.mlp.up_proj.qzeros": "model-00002-of-00003.safetensors",
1170
- "model.layers.37.mlp.up_proj.scales": "model-00002-of-00003.safetensors",
1171
- "model.layers.37.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
1172
  "model.layers.37.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
1173
  "model.layers.37.self_attn.k_proj.g_idx": "model-00002-of-00003.safetensors",
1174
  "model.layers.37.self_attn.k_proj.qweight": "model-00002-of-00003.safetensors",
@@ -1189,43 +1189,43 @@
1189
  "model.layers.37.self_attn.v_proj.qweight": "model-00002-of-00003.safetensors",
1190
  "model.layers.37.self_attn.v_proj.qzeros": "model-00002-of-00003.safetensors",
1191
  "model.layers.37.self_attn.v_proj.scales": "model-00002-of-00003.safetensors",
1192
- "model.layers.38.input_layernorm.weight": "model-00002-of-00003.safetensors",
1193
- "model.layers.38.mlp.down_proj.bias": "model-00002-of-00003.safetensors",
1194
- "model.layers.38.mlp.down_proj.g_idx": "model-00002-of-00003.safetensors",
1195
- "model.layers.38.mlp.down_proj.qweight": "model-00002-of-00003.safetensors",
1196
- "model.layers.38.mlp.down_proj.qzeros": "model-00002-of-00003.safetensors",
1197
- "model.layers.38.mlp.down_proj.scales": "model-00002-of-00003.safetensors",
1198
- "model.layers.38.mlp.gate_proj.bias": "model-00002-of-00003.safetensors",
1199
- "model.layers.38.mlp.gate_proj.g_idx": "model-00002-of-00003.safetensors",
1200
- "model.layers.38.mlp.gate_proj.qweight": "model-00002-of-00003.safetensors",
1201
- "model.layers.38.mlp.gate_proj.qzeros": "model-00002-of-00003.safetensors",
1202
- "model.layers.38.mlp.gate_proj.scales": "model-00002-of-00003.safetensors",
1203
- "model.layers.38.mlp.up_proj.bias": "model-00002-of-00003.safetensors",
1204
- "model.layers.38.mlp.up_proj.g_idx": "model-00002-of-00003.safetensors",
1205
- "model.layers.38.mlp.up_proj.qweight": "model-00002-of-00003.safetensors",
1206
- "model.layers.38.mlp.up_proj.qzeros": "model-00002-of-00003.safetensors",
1207
- "model.layers.38.mlp.up_proj.scales": "model-00002-of-00003.safetensors",
1208
- "model.layers.38.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
1209
- "model.layers.38.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
1210
- "model.layers.38.self_attn.k_proj.g_idx": "model-00002-of-00003.safetensors",
1211
- "model.layers.38.self_attn.k_proj.qweight": "model-00002-of-00003.safetensors",
1212
- "model.layers.38.self_attn.k_proj.qzeros": "model-00002-of-00003.safetensors",
1213
- "model.layers.38.self_attn.k_proj.scales": "model-00002-of-00003.safetensors",
1214
- "model.layers.38.self_attn.o_proj.bias": "model-00002-of-00003.safetensors",
1215
- "model.layers.38.self_attn.o_proj.g_idx": "model-00002-of-00003.safetensors",
1216
- "model.layers.38.self_attn.o_proj.qweight": "model-00002-of-00003.safetensors",
1217
- "model.layers.38.self_attn.o_proj.qzeros": "model-00002-of-00003.safetensors",
1218
- "model.layers.38.self_attn.o_proj.scales": "model-00002-of-00003.safetensors",
1219
- "model.layers.38.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
1220
- "model.layers.38.self_attn.q_proj.g_idx": "model-00002-of-00003.safetensors",
1221
- "model.layers.38.self_attn.q_proj.qweight": "model-00002-of-00003.safetensors",
1222
- "model.layers.38.self_attn.q_proj.qzeros": "model-00002-of-00003.safetensors",
1223
- "model.layers.38.self_attn.q_proj.scales": "model-00002-of-00003.safetensors",
1224
- "model.layers.38.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
1225
- "model.layers.38.self_attn.v_proj.g_idx": "model-00002-of-00003.safetensors",
1226
- "model.layers.38.self_attn.v_proj.qweight": "model-00002-of-00003.safetensors",
1227
- "model.layers.38.self_attn.v_proj.qzeros": "model-00002-of-00003.safetensors",
1228
- "model.layers.38.self_attn.v_proj.scales": "model-00002-of-00003.safetensors",
1229
  "model.layers.39.input_layernorm.weight": "model-00003-of-00003.safetensors",
1230
  "model.layers.39.mlp.down_proj.bias": "model-00003-of-00003.safetensors",
1231
  "model.layers.39.mlp.down_proj.g_idx": "model-00003-of-00003.safetensors",
@@ -1243,11 +1243,11 @@
1243
  "model.layers.39.mlp.up_proj.qzeros": "model-00003-of-00003.safetensors",
1244
  "model.layers.39.mlp.up_proj.scales": "model-00003-of-00003.safetensors",
1245
  "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1246
- "model.layers.39.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
1247
- "model.layers.39.self_attn.k_proj.g_idx": "model-00002-of-00003.safetensors",
1248
- "model.layers.39.self_attn.k_proj.qweight": "model-00002-of-00003.safetensors",
1249
- "model.layers.39.self_attn.k_proj.qzeros": "model-00002-of-00003.safetensors",
1250
- "model.layers.39.self_attn.k_proj.scales": "model-00002-of-00003.safetensors",
1251
  "model.layers.39.self_attn.o_proj.bias": "model-00003-of-00003.safetensors",
1252
  "model.layers.39.self_attn.o_proj.g_idx": "model-00003-of-00003.safetensors",
1253
  "model.layers.39.self_attn.o_proj.qweight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 9881774080
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
228
  "model.layers.13.self_attn.v_proj.qzeros": "model-00001-of-00003.safetensors",
229
  "model.layers.13.self_attn.v_proj.scales": "model-00001-of-00003.safetensors",
230
  "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
231
+ "model.layers.14.mlp.down_proj.bias": "model-00002-of-00003.safetensors",
232
+ "model.layers.14.mlp.down_proj.g_idx": "model-00002-of-00003.safetensors",
233
+ "model.layers.14.mlp.down_proj.qweight": "model-00002-of-00003.safetensors",
234
+ "model.layers.14.mlp.down_proj.qzeros": "model-00002-of-00003.safetensors",
235
+ "model.layers.14.mlp.down_proj.scales": "model-00002-of-00003.safetensors",
236
+ "model.layers.14.mlp.gate_proj.bias": "model-00002-of-00003.safetensors",
237
+ "model.layers.14.mlp.gate_proj.g_idx": "model-00002-of-00003.safetensors",
238
+ "model.layers.14.mlp.gate_proj.qweight": "model-00002-of-00003.safetensors",
239
+ "model.layers.14.mlp.gate_proj.qzeros": "model-00002-of-00003.safetensors",
240
+ "model.layers.14.mlp.gate_proj.scales": "model-00002-of-00003.safetensors",
241
  "model.layers.14.mlp.up_proj.bias": "model-00002-of-00003.safetensors",
242
  "model.layers.14.mlp.up_proj.g_idx": "model-00002-of-00003.safetensors",
243
  "model.layers.14.mlp.up_proj.qweight": "model-00002-of-00003.safetensors",
 
1152
  "model.layers.36.self_attn.v_proj.qweight": "model-00002-of-00003.safetensors",
1153
  "model.layers.36.self_attn.v_proj.qzeros": "model-00002-of-00003.safetensors",
1154
  "model.layers.36.self_attn.v_proj.scales": "model-00002-of-00003.safetensors",
1155
+ "model.layers.37.input_layernorm.weight": "model-00003-of-00003.safetensors",
1156
  "model.layers.37.mlp.down_proj.bias": "model-00002-of-00003.safetensors",
1157
  "model.layers.37.mlp.down_proj.g_idx": "model-00002-of-00003.safetensors",
1158
  "model.layers.37.mlp.down_proj.qweight": "model-00002-of-00003.safetensors",
 
1163
  "model.layers.37.mlp.gate_proj.qweight": "model-00002-of-00003.safetensors",
1164
  "model.layers.37.mlp.gate_proj.qzeros": "model-00002-of-00003.safetensors",
1165
  "model.layers.37.mlp.gate_proj.scales": "model-00002-of-00003.safetensors",
1166
+ "model.layers.37.mlp.up_proj.bias": "model-00003-of-00003.safetensors",
1167
+ "model.layers.37.mlp.up_proj.g_idx": "model-00003-of-00003.safetensors",
1168
+ "model.layers.37.mlp.up_proj.qweight": "model-00003-of-00003.safetensors",
1169
+ "model.layers.37.mlp.up_proj.qzeros": "model-00003-of-00003.safetensors",
1170
+ "model.layers.37.mlp.up_proj.scales": "model-00003-of-00003.safetensors",
1171
+ "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1172
  "model.layers.37.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
1173
  "model.layers.37.self_attn.k_proj.g_idx": "model-00002-of-00003.safetensors",
1174
  "model.layers.37.self_attn.k_proj.qweight": "model-00002-of-00003.safetensors",
 
1189
  "model.layers.37.self_attn.v_proj.qweight": "model-00002-of-00003.safetensors",
1190
  "model.layers.37.self_attn.v_proj.qzeros": "model-00002-of-00003.safetensors",
1191
  "model.layers.37.self_attn.v_proj.scales": "model-00002-of-00003.safetensors",
1192
+ "model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors",
1193
+ "model.layers.38.mlp.down_proj.bias": "model-00003-of-00003.safetensors",
1194
+ "model.layers.38.mlp.down_proj.g_idx": "model-00003-of-00003.safetensors",
1195
+ "model.layers.38.mlp.down_proj.qweight": "model-00003-of-00003.safetensors",
1196
+ "model.layers.38.mlp.down_proj.qzeros": "model-00003-of-00003.safetensors",
1197
+ "model.layers.38.mlp.down_proj.scales": "model-00003-of-00003.safetensors",
1198
+ "model.layers.38.mlp.gate_proj.bias": "model-00003-of-00003.safetensors",
1199
+ "model.layers.38.mlp.gate_proj.g_idx": "model-00003-of-00003.safetensors",
1200
+ "model.layers.38.mlp.gate_proj.qweight": "model-00003-of-00003.safetensors",
1201
+ "model.layers.38.mlp.gate_proj.qzeros": "model-00003-of-00003.safetensors",
1202
+ "model.layers.38.mlp.gate_proj.scales": "model-00003-of-00003.safetensors",
1203
+ "model.layers.38.mlp.up_proj.bias": "model-00003-of-00003.safetensors",
1204
+ "model.layers.38.mlp.up_proj.g_idx": "model-00003-of-00003.safetensors",
1205
+ "model.layers.38.mlp.up_proj.qweight": "model-00003-of-00003.safetensors",
1206
+ "model.layers.38.mlp.up_proj.qzeros": "model-00003-of-00003.safetensors",
1207
+ "model.layers.38.mlp.up_proj.scales": "model-00003-of-00003.safetensors",
1208
+ "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1209
+ "model.layers.38.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
1210
+ "model.layers.38.self_attn.k_proj.g_idx": "model-00003-of-00003.safetensors",
1211
+ "model.layers.38.self_attn.k_proj.qweight": "model-00003-of-00003.safetensors",
1212
+ "model.layers.38.self_attn.k_proj.qzeros": "model-00003-of-00003.safetensors",
1213
+ "model.layers.38.self_attn.k_proj.scales": "model-00003-of-00003.safetensors",
1214
+ "model.layers.38.self_attn.o_proj.bias": "model-00003-of-00003.safetensors",
1215
+ "model.layers.38.self_attn.o_proj.g_idx": "model-00003-of-00003.safetensors",
1216
+ "model.layers.38.self_attn.o_proj.qweight": "model-00003-of-00003.safetensors",
1217
+ "model.layers.38.self_attn.o_proj.qzeros": "model-00003-of-00003.safetensors",
1218
+ "model.layers.38.self_attn.o_proj.scales": "model-00003-of-00003.safetensors",
1219
+ "model.layers.38.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
1220
+ "model.layers.38.self_attn.q_proj.g_idx": "model-00003-of-00003.safetensors",
1221
+ "model.layers.38.self_attn.q_proj.qweight": "model-00003-of-00003.safetensors",
1222
+ "model.layers.38.self_attn.q_proj.qzeros": "model-00003-of-00003.safetensors",
1223
+ "model.layers.38.self_attn.q_proj.scales": "model-00003-of-00003.safetensors",
1224
+ "model.layers.38.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
1225
+ "model.layers.38.self_attn.v_proj.g_idx": "model-00003-of-00003.safetensors",
1226
+ "model.layers.38.self_attn.v_proj.qweight": "model-00003-of-00003.safetensors",
1227
+ "model.layers.38.self_attn.v_proj.qzeros": "model-00003-of-00003.safetensors",
1228
+ "model.layers.38.self_attn.v_proj.scales": "model-00003-of-00003.safetensors",
1229
  "model.layers.39.input_layernorm.weight": "model-00003-of-00003.safetensors",
1230
  "model.layers.39.mlp.down_proj.bias": "model-00003-of-00003.safetensors",
1231
  "model.layers.39.mlp.down_proj.g_idx": "model-00003-of-00003.safetensors",
 
1243
  "model.layers.39.mlp.up_proj.qzeros": "model-00003-of-00003.safetensors",
1244
  "model.layers.39.mlp.up_proj.scales": "model-00003-of-00003.safetensors",
1245
  "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1246
+ "model.layers.39.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
1247
+ "model.layers.39.self_attn.k_proj.g_idx": "model-00003-of-00003.safetensors",
1248
+ "model.layers.39.self_attn.k_proj.qweight": "model-00003-of-00003.safetensors",
1249
+ "model.layers.39.self_attn.k_proj.qzeros": "model-00003-of-00003.safetensors",
1250
+ "model.layers.39.self_attn.k_proj.scales": "model-00003-of-00003.safetensors",
1251
  "model.layers.39.self_attn.o_proj.bias": "model-00003-of-00003.safetensors",
1252
  "model.layers.39.self_attn.o_proj.g_idx": "model-00003-of-00003.safetensors",
1253
  "model.layers.39.self_attn.o_proj.qweight": "model-00003-of-00003.safetensors",