ai-forever commited on
Commit
64b1153
1 Parent(s): 0101459

Align model config with tokenizer settings

Browse files
Files changed (3) hide show
  1. config.json +3 -2
  2. generation_config.json +3 -2
  3. tokenizer_config.json +3 -1
config.json CHANGED
@@ -4,9 +4,9 @@
4
  "GPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
- "bos_token_id": 50256,
8
  "embd_pdrop": 0.1,
9
- "eos_token_id": 50256,
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gpt2",
@@ -16,6 +16,7 @@
16
  "n_inner": null,
17
  "n_layer": 40,
18
  "n_positions": 2048,
 
19
  "reorder_and_upcast_attn": false,
20
  "resid_pdrop": 0.1,
21
  "scale_attn_by_inverse_layer_idx": false,
 
4
  "GPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
+ "bos_token_id": 2,
8
  "embd_pdrop": 0.1,
9
+ "eos_token_id": 3,
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gpt2",
 
16
  "n_inner": null,
17
  "n_layer": 40,
18
  "n_positions": 2048,
19
+ "pad_token_id": 0,
20
  "reorder_and_upcast_attn": false,
21
  "resid_pdrop": 0.1,
22
  "scale_attn_by_inverse_layer_idx": false,
generation_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 50256,
4
- "eos_token_id": 50256,
 
5
  "transformers_version": "4.27.1"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 3,
5
+ "pad_token_id": 0,
6
  "transformers_version": "4.27.1"
7
  }
tokenizer_config.json CHANGED
@@ -20,7 +20,7 @@
20
  },
21
  "errors": "replace",
22
  "mask_token": "<mask>",
23
- "model_max_length": 1000000000000000019884624838656,
24
  "pad_token": {
25
  "__type": "AddedToken",
26
  "content": "<pad>",
@@ -29,7 +29,9 @@
29
  "rstrip": false,
30
  "single_word": false
31
  },
 
32
  "tokenizer_class": "GPT2Tokenizer",
 
33
  "unk_token": {
34
  "__type": "AddedToken",
35
  "content": "<|endoftext|>",
 
20
  },
21
  "errors": "replace",
22
  "mask_token": "<mask>",
23
+ "model_max_length": 2048,
24
  "pad_token": {
25
  "__type": "AddedToken",
26
  "content": "<pad>",
 
29
  "rstrip": false,
30
  "single_word": false
31
  },
32
+ "padding_side": "left",
33
  "tokenizer_class": "GPT2Tokenizer",
34
+ "truncation_side": "left",
35
  "unk_token": {
36
  "__type": "AddedToken",
37
  "content": "<|endoftext|>",