yzhangcs commited on
Commit
63b33f0
1 Parent(s): 3c83818

Upload GLAForCausalLM

Browse files
Files changed (4) hide show
  1. config.json +6 -1
  2. generation_config.json +1 -1
  3. model.safetensors +1 -1
  4. tokenizer.json +1 -0
config.json CHANGED
@@ -7,9 +7,11 @@
7
  "bos_token_id": 1,
8
  "clamp_min": null,
9
  "conv_size": 4,
 
10
  "eos_token_id": 2,
11
  "expand_k": 0.5,
12
  "expand_v": 1,
 
13
  "fuse_cross_entropy": true,
14
  "fuse_norm": true,
15
  "hidden_act": "swish",
@@ -19,16 +21,19 @@
19
  "intermediate_size": null,
20
  "max_position_embeddings": 2048,
21
  "model_type": "gla",
 
22
  "num_heads": 4,
23
  "num_hidden_layers": 24,
 
24
  "rms_norm_eps": 1e-06,
25
  "share_conv_kernel": true,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
- "transformers_version": "4.39.1",
29
  "use_cache": true,
30
  "use_gk": true,
31
  "use_gv": false,
 
32
  "use_short_conv": false,
33
  "vocab_size": 32000
34
  }
 
7
  "bos_token_id": 1,
8
  "clamp_min": null,
9
  "conv_size": 4,
10
+ "elementwise_affine": true,
11
  "eos_token_id": 2,
12
  "expand_k": 0.5,
13
  "expand_v": 1,
14
+ "feature_map": null,
15
  "fuse_cross_entropy": true,
16
  "fuse_norm": true,
17
  "hidden_act": "swish",
 
21
  "intermediate_size": null,
22
  "max_position_embeddings": 2048,
23
  "model_type": "gla",
24
+ "norm_eps": 1e-06,
25
  "num_heads": 4,
26
  "num_hidden_layers": 24,
27
+ "num_kv_heads": null,
28
  "rms_norm_eps": 1e-06,
29
  "share_conv_kernel": true,
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "bfloat16",
32
+ "transformers_version": "4.40.2",
33
  "use_cache": true,
34
  "use_gk": true,
35
  "use_gv": false,
36
+ "use_output_gate": true,
37
  "use_short_conv": false,
38
  "vocab_size": 32000
39
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.39.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.40.2"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6737ad095f49a580fc19ab640bdde4f69f56a969988d84798c35c5fd9c25a96c
3
  size 2731063560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f086b8b8db79b9f466788f99f3e4d86951cc5eae8e37e4c7f04912ecb64d4eb2
3
  size 2731063560
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,