harouzie commited on
Commit
82770a5
1 Parent(s): e0027bc

Training in progress, step 500

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
config.json CHANGED
@@ -4,7 +4,7 @@
4
  "EncoderDecoderModel"
5
  ],
6
  "decoder": {
7
- "_name_or_path": "roberta-base",
8
  "add_cross_attention": true,
9
  "architectures": [
10
  "RobertaForMaskedLM"
@@ -26,6 +26,7 @@
26
  "finetuning_task": null,
27
  "forced_bos_token_id": null,
28
  "forced_eos_token_id": null,
 
29
  "hidden_act": "gelu",
30
  "hidden_dropout_prob": 0.1,
31
  "hidden_size": 768,
@@ -44,7 +45,7 @@
44
  "layer_norm_eps": 1e-05,
45
  "length_penalty": 1.0,
46
  "max_length": 20,
47
- "max_position_embeddings": 514,
48
  "min_length": 0,
49
  "model_type": "roberta",
50
  "no_repeat_ngram_size": 0,
@@ -72,7 +73,7 @@
72
  "tf_legacy_loss": false,
73
  "tie_encoder_decoder": false,
74
  "tie_word_embeddings": true,
75
- "tokenizer_class": null,
76
  "top_k": 50,
77
  "top_p": 1.0,
78
  "torch_dtype": null,
@@ -82,12 +83,12 @@
82
  "typical_p": 1.0,
83
  "use_bfloat16": false,
84
  "use_cache": true,
85
- "vocab_size": 50265
86
  },
87
  "decoder_start_token_id": 0,
88
  "early_stopping": true,
89
  "encoder": {
90
- "_name_or_path": "roberta-base",
91
  "add_cross_attention": false,
92
  "architectures": [
93
  "RobertaForMaskedLM"
@@ -109,6 +110,7 @@
109
  "finetuning_task": null,
110
  "forced_bos_token_id": null,
111
  "forced_eos_token_id": null,
 
112
  "hidden_act": "gelu",
113
  "hidden_dropout_prob": 0.1,
114
  "hidden_size": 768,
@@ -127,7 +129,7 @@
127
  "layer_norm_eps": 1e-05,
128
  "length_penalty": 1.0,
129
  "max_length": 20,
130
- "max_position_embeddings": 514,
131
  "min_length": 0,
132
  "model_type": "roberta",
133
  "no_repeat_ngram_size": 0,
@@ -155,7 +157,7 @@
155
  "tf_legacy_loss": false,
156
  "tie_encoder_decoder": false,
157
  "tie_word_embeddings": true,
158
- "tokenizer_class": null,
159
  "top_k": 50,
160
  "top_p": 1.0,
161
  "torch_dtype": null,
@@ -165,7 +167,7 @@
165
  "typical_p": 1.0,
166
  "use_bfloat16": false,
167
  "use_cache": true,
168
- "vocab_size": 50265
169
  },
170
  "eos_token_id": 2,
171
  "is_encoder_decoder": true,
@@ -178,5 +180,5 @@
178
  "tie_encoder_decoder": true,
179
  "torch_dtype": "float32",
180
  "transformers_version": null,
181
- "vocab_size": 50265
182
  }
 
4
  "EncoderDecoderModel"
5
  ],
6
  "decoder": {
7
+ "_name_or_path": "vinai/phobert-base",
8
  "add_cross_attention": true,
9
  "architectures": [
10
  "RobertaForMaskedLM"
 
26
  "finetuning_task": null,
27
  "forced_bos_token_id": null,
28
  "forced_eos_token_id": null,
29
+ "gradient_checkpointing": false,
30
  "hidden_act": "gelu",
31
  "hidden_dropout_prob": 0.1,
32
  "hidden_size": 768,
 
45
  "layer_norm_eps": 1e-05,
46
  "length_penalty": 1.0,
47
  "max_length": 20,
48
+ "max_position_embeddings": 258,
49
  "min_length": 0,
50
  "model_type": "roberta",
51
  "no_repeat_ngram_size": 0,
 
73
  "tf_legacy_loss": false,
74
  "tie_encoder_decoder": false,
75
  "tie_word_embeddings": true,
76
+ "tokenizer_class": "PhobertTokenizer",
77
  "top_k": 50,
78
  "top_p": 1.0,
79
  "torch_dtype": null,
 
83
  "typical_p": 1.0,
84
  "use_bfloat16": false,
85
  "use_cache": true,
86
+ "vocab_size": 64001
87
  },
88
  "decoder_start_token_id": 0,
89
  "early_stopping": true,
90
  "encoder": {
91
+ "_name_or_path": "vinai/phobert-base",
92
  "add_cross_attention": false,
93
  "architectures": [
94
  "RobertaForMaskedLM"
 
110
  "finetuning_task": null,
111
  "forced_bos_token_id": null,
112
  "forced_eos_token_id": null,
113
+ "gradient_checkpointing": false,
114
  "hidden_act": "gelu",
115
  "hidden_dropout_prob": 0.1,
116
  "hidden_size": 768,
 
129
  "layer_norm_eps": 1e-05,
130
  "length_penalty": 1.0,
131
  "max_length": 20,
132
+ "max_position_embeddings": 258,
133
  "min_length": 0,
134
  "model_type": "roberta",
135
  "no_repeat_ngram_size": 0,
 
157
  "tf_legacy_loss": false,
158
  "tie_encoder_decoder": false,
159
  "tie_word_embeddings": true,
160
+ "tokenizer_class": "PhobertTokenizer",
161
  "top_k": 50,
162
  "top_p": 1.0,
163
  "torch_dtype": null,
 
167
  "typical_p": 1.0,
168
  "use_bfloat16": false,
169
  "use_cache": true,
170
+ "vocab_size": 64001
171
  },
172
  "eos_token_id": 2,
173
  "is_encoder_decoder": true,
 
180
  "tie_encoder_decoder": true,
181
  "torch_dtype": "float32",
182
  "transformers_version": null,
183
+ "vocab_size": 64001
184
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaf26529644227dac37cd3229f6b05813d48ed14c35b16952c597a56127b9fc8
3
- size 614771365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a203030b90c83185e77226eeaed08ad2c9c2447371c48a319e9b019226ead81
3
+ size 656232805
runs/May12_16-01-38_5165d9a7ee5b/1683907656.4304566/events.out.tfevents.1683907656.5165d9a7ee5b.168.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca4cea1cafa9886991b2d68b5fe7e156b8b1fc222d99b8fb92558d24fcf9b18
3
+ size 6199
runs/May12_16-01-38_5165d9a7ee5b/events.out.tfevents.1683907656.5165d9a7ee5b.168.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca4db850db7d5ef9d0bbe47c6592a3958fb7f503e8acad68c1fe66c9c69c69da
3
+ size 8771
special_tokens_map.json CHANGED
@@ -2,13 +2,7 @@
2
  "bos_token": "<s>",
3
  "cls_token": "<s>",
4
  "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
  "pad_token": "<pad>",
13
  "sep_token": "</s>",
14
  "unk_token": "<unk>"
 
2
  "bos_token": "<s>",
3
  "cls_token": "<s>",
4
  "eos_token": "</s>",
5
+ "mask_token": "<mask>",
 
 
 
 
 
 
6
  "pad_token": "<pad>",
7
  "sep_token": "</s>",
8
  "unk_token": "<unk>"
tokenizer_config.json CHANGED
@@ -1,15 +1,12 @@
1
  {
2
- "add_prefix_space": false,
3
  "bos_token": "<s>",
4
  "clean_up_tokenization_spaces": true,
5
  "cls_token": "<s>",
6
  "eos_token": "</s>",
7
- "errors": "replace",
8
  "mask_token": "<mask>",
9
- "model_max_length": 512,
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
12
- "tokenizer_class": "RobertaTokenizer",
13
- "trim_offsets": true,
14
  "unk_token": "<unk>"
15
  }
 
1
  {
 
2
  "bos_token": "<s>",
3
  "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
5
  "eos_token": "</s>",
 
6
  "mask_token": "<mask>",
7
+ "model_max_length": 256,
8
  "pad_token": "<pad>",
9
  "sep_token": "</s>",
10
+ "tokenizer_class": "PhobertTokenizer",
 
11
  "unk_token": "<unk>"
12
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b217cfb83ec0d94f3082e68a6c1d84fd3e01fb1ae97b6fe977764a9822ac8ec
3
  size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca12c74006d536fad46377ac4ad99cf43ea95fbe535bb6d77fdb10d1e6999b4
3
  size 3771
vocab.txt ADDED
The diff for this file is too large to render. See raw diff