kernelpool commited on
Commit
10ccfdb
·
verified ·
1 Parent(s): c81b7fe

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  license: mit
3
- library_name: mlx
4
  pipeline_tag: text-generation
 
5
  base_model: moonshotai/Kimi-Linear-48B-A3B-Instruct
6
  tags:
7
  - mlx
 
1
  ---
2
  license: mit
 
3
  pipeline_tag: text-generation
4
+ library_name: mlx
5
  base_model: moonshotai/Kimi-Linear-48B-A3B-Instruct
6
  tags:
7
  - mlx
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2117c23724d1acf37a48274fa18afd0af6ba3f983cea3bc3300d8c56d7e73a63
3
- size 4947256852
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7deaebb926e0a206665bb0af15f5f8e33e9fade3801959ce0dce65e401205c
3
+ size 4947256914
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be99124a72f97775fce931af4561cf68b1586003baa92afdbdfc1e91fbd00e07
3
- size 5014526051
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78d860c933f7181b92ba0ce72750ea16641c77d5ce12d6684e80c2dca76278e7
3
+ size 5014526093
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4983135555dafbf519a765496d1e061248dccc6717f3684a46990192f04f1c8c
3
- size 5023048926
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e8de06cb33f18b7d1264bac5693a69af07276826c04d9c42071f7d9d312be43
3
+ size 5023048944
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:430bb8eb3bd17d630f55aa6a72a43be54fe55e850b82f9d822ed5560d6e17330
3
- size 5053096011
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a477d0afea613c87f3050ba41efd88ba44c90fa19c52aed159dfce943d820c43
3
+ size 5053096059
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90378c6265fdbe4ec3e8c988e4d6e169ccf8d495965a5b62b1a7291a6db356ac
3
- size 5014526163
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3cb6251042aed978b1a3df3918ec210f0201643ebac930fca666d897d29abf4
3
+ size 5014526203
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e916c0a9e6a7aae6a9b2353b6691cac6c64bd0e577b0febb9a8e60981a323b0
3
- size 5014526121
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c5d95c16641c9058d7b0c5d9fe3014aadc38e9417c691cb67b5a0008a2f55f
3
+ size 5014526165
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf3ae62eb01a6b13d1d85d8681e4d8783f1becb9eda7dee48df20928f83e59f2
3
- size 5053095989
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d479ff1516fdb4dbc5f97beda7192a3ff7fe701ced606efb1296ff46ed46b625
3
+ size 5053096041
model-00008-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d00b721e82f9c59f26d572073de0d80ca66ce6e35cd3d938a0beb21d22c4738
3
- size 4797697864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5842792f58103e7e6aaf3e49e6920a212e71b21a7e560d3d2ab700c81e69e67
3
+ size 4797697900
model.safetensors.index.json CHANGED
@@ -192,10 +192,10 @@
192
  "model.layers.11.mlp.switch_mlp.up_proj.scales": "model-00004-of-00008.safetensors",
193
  "model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00004-of-00008.safetensors",
194
  "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
195
- "model.layers.11.self_attn.kv_a_norm.weight": "model-00004-of-00008.safetensors",
196
- "model.layers.11.self_attn.kv_a_proj.biases": "model-00004-of-00008.safetensors",
197
- "model.layers.11.self_attn.kv_a_proj.scales": "model-00004-of-00008.safetensors",
198
- "model.layers.11.self_attn.kv_a_proj.weight": "model-00004-of-00008.safetensors",
199
  "model.layers.11.self_attn.kv_b_proj.biases": "model-00004-of-00008.safetensors",
200
  "model.layers.11.self_attn.kv_b_proj.scales": "model-00004-of-00008.safetensors",
201
  "model.layers.11.self_attn.kv_b_proj.weight": "model-00004-of-00008.safetensors",
@@ -400,10 +400,10 @@
400
  "model.layers.15.mlp.switch_mlp.up_proj.scales": "model-00005-of-00008.safetensors",
401
  "model.layers.15.mlp.switch_mlp.up_proj.weight": "model-00005-of-00008.safetensors",
402
  "model.layers.15.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
403
- "model.layers.15.self_attn.kv_a_norm.weight": "model-00005-of-00008.safetensors",
404
- "model.layers.15.self_attn.kv_a_proj.biases": "model-00005-of-00008.safetensors",
405
- "model.layers.15.self_attn.kv_a_proj.scales": "model-00005-of-00008.safetensors",
406
- "model.layers.15.self_attn.kv_a_proj.weight": "model-00005-of-00008.safetensors",
407
  "model.layers.15.self_attn.kv_b_proj.biases": "model-00005-of-00008.safetensors",
408
  "model.layers.15.self_attn.kv_b_proj.scales": "model-00005-of-00008.safetensors",
409
  "model.layers.15.self_attn.kv_b_proj.weight": "model-00005-of-00008.safetensors",
@@ -608,10 +608,10 @@
608
  "model.layers.19.mlp.switch_mlp.up_proj.scales": "model-00006-of-00008.safetensors",
609
  "model.layers.19.mlp.switch_mlp.up_proj.weight": "model-00006-of-00008.safetensors",
610
  "model.layers.19.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
611
- "model.layers.19.self_attn.kv_a_norm.weight": "model-00006-of-00008.safetensors",
612
- "model.layers.19.self_attn.kv_a_proj.biases": "model-00006-of-00008.safetensors",
613
- "model.layers.19.self_attn.kv_a_proj.scales": "model-00006-of-00008.safetensors",
614
- "model.layers.19.self_attn.kv_a_proj.weight": "model-00006-of-00008.safetensors",
615
  "model.layers.19.self_attn.kv_b_proj.biases": "model-00006-of-00008.safetensors",
616
  "model.layers.19.self_attn.kv_b_proj.scales": "model-00006-of-00008.safetensors",
617
  "model.layers.19.self_attn.kv_b_proj.weight": "model-00006-of-00008.safetensors",
@@ -873,10 +873,10 @@
873
  "model.layers.23.mlp.switch_mlp.up_proj.scales": "model-00007-of-00008.safetensors",
874
  "model.layers.23.mlp.switch_mlp.up_proj.weight": "model-00007-of-00008.safetensors",
875
  "model.layers.23.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
876
- "model.layers.23.self_attn.kv_a_norm.weight": "model-00007-of-00008.safetensors",
877
- "model.layers.23.self_attn.kv_a_proj.biases": "model-00007-of-00008.safetensors",
878
- "model.layers.23.self_attn.kv_a_proj.scales": "model-00007-of-00008.safetensors",
879
- "model.layers.23.self_attn.kv_a_proj.weight": "model-00007-of-00008.safetensors",
880
  "model.layers.23.self_attn.kv_b_proj.biases": "model-00007-of-00008.safetensors",
881
  "model.layers.23.self_attn.kv_b_proj.scales": "model-00007-of-00008.safetensors",
882
  "model.layers.23.self_attn.kv_b_proj.weight": "model-00007-of-00008.safetensors",
@@ -1024,10 +1024,10 @@
1024
  "model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00008-of-00008.safetensors",
1025
  "model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00008-of-00008.safetensors",
1026
  "model.layers.26.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
1027
- "model.layers.26.self_attn.kv_a_norm.weight": "model-00008-of-00008.safetensors",
1028
- "model.layers.26.self_attn.kv_a_proj.biases": "model-00008-of-00008.safetensors",
1029
- "model.layers.26.self_attn.kv_a_proj.scales": "model-00008-of-00008.safetensors",
1030
- "model.layers.26.self_attn.kv_a_proj.weight": "model-00008-of-00008.safetensors",
1031
  "model.layers.26.self_attn.kv_b_proj.biases": "model-00008-of-00008.safetensors",
1032
  "model.layers.26.self_attn.kv_b_proj.scales": "model-00008-of-00008.safetensors",
1033
  "model.layers.26.self_attn.kv_b_proj.weight": "model-00008-of-00008.safetensors",
@@ -1061,10 +1061,10 @@
1061
  "model.layers.3.mlp.switch_mlp.up_proj.scales": "model-00001-of-00008.safetensors",
1062
  "model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1063
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
1064
- "model.layers.3.self_attn.kv_a_norm.weight": "model-00001-of-00008.safetensors",
1065
- "model.layers.3.self_attn.kv_a_proj.biases": "model-00001-of-00008.safetensors",
1066
- "model.layers.3.self_attn.kv_a_proj.scales": "model-00001-of-00008.safetensors",
1067
- "model.layers.3.self_attn.kv_a_proj.weight": "model-00001-of-00008.safetensors",
1068
  "model.layers.3.self_attn.kv_b_proj.biases": "model-00001-of-00008.safetensors",
1069
  "model.layers.3.self_attn.kv_b_proj.scales": "model-00001-of-00008.safetensors",
1070
  "model.layers.3.self_attn.kv_b_proj.weight": "model-00001-of-00008.safetensors",
@@ -1269,10 +1269,10 @@
1269
  "model.layers.7.mlp.switch_mlp.up_proj.scales": "model-00003-of-00008.safetensors",
1270
  "model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00003-of-00008.safetensors",
1271
  "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
1272
- "model.layers.7.self_attn.kv_a_norm.weight": "model-00002-of-00008.safetensors",
1273
- "model.layers.7.self_attn.kv_a_proj.biases": "model-00002-of-00008.safetensors",
1274
- "model.layers.7.self_attn.kv_a_proj.scales": "model-00002-of-00008.safetensors",
1275
- "model.layers.7.self_attn.kv_a_proj.weight": "model-00002-of-00008.safetensors",
1276
  "model.layers.7.self_attn.kv_b_proj.biases": "model-00002-of-00008.safetensors",
1277
  "model.layers.7.self_attn.kv_b_proj.scales": "model-00002-of-00008.safetensors",
1278
  "model.layers.7.self_attn.kv_b_proj.weight": "model-00002-of-00008.safetensors",
 
192
  "model.layers.11.mlp.switch_mlp.up_proj.scales": "model-00004-of-00008.safetensors",
193
  "model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00004-of-00008.safetensors",
194
  "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
195
+ "model.layers.11.self_attn.kv_a_layernorm.weight": "model-00004-of-00008.safetensors",
196
+ "model.layers.11.self_attn.kv_a_proj_with_mqa.biases": "model-00004-of-00008.safetensors",
197
+ "model.layers.11.self_attn.kv_a_proj_with_mqa.scales": "model-00004-of-00008.safetensors",
198
+ "model.layers.11.self_attn.kv_a_proj_with_mqa.weight": "model-00004-of-00008.safetensors",
199
  "model.layers.11.self_attn.kv_b_proj.biases": "model-00004-of-00008.safetensors",
200
  "model.layers.11.self_attn.kv_b_proj.scales": "model-00004-of-00008.safetensors",
201
  "model.layers.11.self_attn.kv_b_proj.weight": "model-00004-of-00008.safetensors",
 
400
  "model.layers.15.mlp.switch_mlp.up_proj.scales": "model-00005-of-00008.safetensors",
401
  "model.layers.15.mlp.switch_mlp.up_proj.weight": "model-00005-of-00008.safetensors",
402
  "model.layers.15.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
403
+ "model.layers.15.self_attn.kv_a_layernorm.weight": "model-00005-of-00008.safetensors",
404
+ "model.layers.15.self_attn.kv_a_proj_with_mqa.biases": "model-00005-of-00008.safetensors",
405
+ "model.layers.15.self_attn.kv_a_proj_with_mqa.scales": "model-00005-of-00008.safetensors",
406
+ "model.layers.15.self_attn.kv_a_proj_with_mqa.weight": "model-00005-of-00008.safetensors",
407
  "model.layers.15.self_attn.kv_b_proj.biases": "model-00005-of-00008.safetensors",
408
  "model.layers.15.self_attn.kv_b_proj.scales": "model-00005-of-00008.safetensors",
409
  "model.layers.15.self_attn.kv_b_proj.weight": "model-00005-of-00008.safetensors",
 
608
  "model.layers.19.mlp.switch_mlp.up_proj.scales": "model-00006-of-00008.safetensors",
609
  "model.layers.19.mlp.switch_mlp.up_proj.weight": "model-00006-of-00008.safetensors",
610
  "model.layers.19.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
611
+ "model.layers.19.self_attn.kv_a_layernorm.weight": "model-00006-of-00008.safetensors",
612
+ "model.layers.19.self_attn.kv_a_proj_with_mqa.biases": "model-00006-of-00008.safetensors",
613
+ "model.layers.19.self_attn.kv_a_proj_with_mqa.scales": "model-00006-of-00008.safetensors",
614
+ "model.layers.19.self_attn.kv_a_proj_with_mqa.weight": "model-00006-of-00008.safetensors",
615
  "model.layers.19.self_attn.kv_b_proj.biases": "model-00006-of-00008.safetensors",
616
  "model.layers.19.self_attn.kv_b_proj.scales": "model-00006-of-00008.safetensors",
617
  "model.layers.19.self_attn.kv_b_proj.weight": "model-00006-of-00008.safetensors",
 
873
  "model.layers.23.mlp.switch_mlp.up_proj.scales": "model-00007-of-00008.safetensors",
874
  "model.layers.23.mlp.switch_mlp.up_proj.weight": "model-00007-of-00008.safetensors",
875
  "model.layers.23.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
876
+ "model.layers.23.self_attn.kv_a_layernorm.weight": "model-00007-of-00008.safetensors",
877
+ "model.layers.23.self_attn.kv_a_proj_with_mqa.biases": "model-00007-of-00008.safetensors",
878
+ "model.layers.23.self_attn.kv_a_proj_with_mqa.scales": "model-00007-of-00008.safetensors",
879
+ "model.layers.23.self_attn.kv_a_proj_with_mqa.weight": "model-00007-of-00008.safetensors",
880
  "model.layers.23.self_attn.kv_b_proj.biases": "model-00007-of-00008.safetensors",
881
  "model.layers.23.self_attn.kv_b_proj.scales": "model-00007-of-00008.safetensors",
882
  "model.layers.23.self_attn.kv_b_proj.weight": "model-00007-of-00008.safetensors",
 
1024
  "model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00008-of-00008.safetensors",
1025
  "model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00008-of-00008.safetensors",
1026
  "model.layers.26.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
1027
+ "model.layers.26.self_attn.kv_a_layernorm.weight": "model-00008-of-00008.safetensors",
1028
+ "model.layers.26.self_attn.kv_a_proj_with_mqa.biases": "model-00008-of-00008.safetensors",
1029
+ "model.layers.26.self_attn.kv_a_proj_with_mqa.scales": "model-00008-of-00008.safetensors",
1030
+ "model.layers.26.self_attn.kv_a_proj_with_mqa.weight": "model-00008-of-00008.safetensors",
1031
  "model.layers.26.self_attn.kv_b_proj.biases": "model-00008-of-00008.safetensors",
1032
  "model.layers.26.self_attn.kv_b_proj.scales": "model-00008-of-00008.safetensors",
1033
  "model.layers.26.self_attn.kv_b_proj.weight": "model-00008-of-00008.safetensors",
 
1061
  "model.layers.3.mlp.switch_mlp.up_proj.scales": "model-00001-of-00008.safetensors",
1062
  "model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1063
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
1064
+ "model.layers.3.self_attn.kv_a_layernorm.weight": "model-00001-of-00008.safetensors",
1065
+ "model.layers.3.self_attn.kv_a_proj_with_mqa.biases": "model-00001-of-00008.safetensors",
1066
+ "model.layers.3.self_attn.kv_a_proj_with_mqa.scales": "model-00001-of-00008.safetensors",
1067
+ "model.layers.3.self_attn.kv_a_proj_with_mqa.weight": "model-00001-of-00008.safetensors",
1068
  "model.layers.3.self_attn.kv_b_proj.biases": "model-00001-of-00008.safetensors",
1069
  "model.layers.3.self_attn.kv_b_proj.scales": "model-00001-of-00008.safetensors",
1070
  "model.layers.3.self_attn.kv_b_proj.weight": "model-00001-of-00008.safetensors",
 
1269
  "model.layers.7.mlp.switch_mlp.up_proj.scales": "model-00003-of-00008.safetensors",
1270
  "model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00003-of-00008.safetensors",
1271
  "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
1272
+ "model.layers.7.self_attn.kv_a_layernorm.weight": "model-00002-of-00008.safetensors",
1273
+ "model.layers.7.self_attn.kv_a_proj_with_mqa.biases": "model-00002-of-00008.safetensors",
1274
+ "model.layers.7.self_attn.kv_a_proj_with_mqa.scales": "model-00002-of-00008.safetensors",
1275
+ "model.layers.7.self_attn.kv_a_proj_with_mqa.weight": "model-00002-of-00008.safetensors",
1276
  "model.layers.7.self_attn.kv_b_proj.biases": "model-00002-of-00008.safetensors",
1277
  "model.layers.7.self_attn.kv_b_proj.scales": "model-00002-of-00008.safetensors",
1278
  "model.layers.7.self_attn.kv_b_proj.weight": "model-00002-of-00008.safetensors",