ayjays132 commited on
Commit
87ba2de
1 Parent(s): eb0b287

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Phillnet.png filter=lfs diff=lfs merge=lfs -text
37
  Model_Overview.png filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Phillnet.png filter=lfs diff=lfs merge=lfs -text
37
  Model_Overview.png filter=lfs diff=lfs merge=lfs -text
38
+ vocab.json filter=lfs diff=lfs merge=lfs -text
optimizer_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baeabefd42201c6174aa5e7478f82e9aaec22e292c465e8dd44a7a8238f9ada8
3
+ size 2838829242
special_tokens_map.json CHANGED
@@ -108,13 +108,7 @@
108
  "rstrip": false,
109
  "single_word": false
110
  },
111
- "pad_token": {
112
- "content": "<pad>",
113
- "lstrip": false,
114
- "normalized": false,
115
- "rstrip": false,
116
- "single_word": false
117
- },
118
  "unk_token": {
119
  "content": "<unk>",
120
  "lstrip": false,
 
108
  "rstrip": false,
109
  "single_word": false
110
  },
111
+ "pad_token": "</s>",
 
 
 
 
 
 
112
  "unk_token": {
113
  "content": "<unk>",
114
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,7 +1,19 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -1064,6 +1076,15 @@
1064
  "rstrip": false,
1065
  "normalized": true,
1066
  "special": false
 
 
 
 
 
 
 
 
 
1067
  }
1068
  ],
1069
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 0,
14
+ "pad_type_id": 0,
15
+ "pad_token": "<pad>"
16
+ },
17
  "added_tokens": [
18
  {
19
  "id": 0,
 
1076
  "rstrip": false,
1077
  "normalized": true,
1078
  "special": false
1079
+ },
1080
+ {
1081
+ "id": 32114,
1082
+ "content": "[PAD]",
1083
+ "single_word": false,
1084
+ "lstrip": false,
1085
+ "rstrip": false,
1086
+ "normalized": false,
1087
+ "special": true
1088
  }
1089
  ],
1090
  "normalizer": {
tokenizer_config.json CHANGED
@@ -1,4 +1,10 @@
1
  {
 
 
 
 
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
@@ -943,6 +949,14 @@
943
  "rstrip": false,
944
  "single_word": false,
945
  "special": false
 
 
 
 
 
 
 
 
946
  }
947
  },
948
  "additional_special_tokens": [
@@ -1047,24 +1061,40 @@
1047
  "<extra_id_98>",
1048
  "<extra_id_99>"
1049
  ],
 
1050
  "clean_up_tokenization_spaces": true,
 
1051
  "eos_token": "</s>",
1052
  "extra_ids": 100,
1053
  "max_length": 1024,
1054
  "model_max_length": 1024,
1055
- "pad_token": "<pad>",
1056
- "pad_token_type_id": 0,
1057
- "padding_side": "right",
1058
- "tokenizer_class": "T5Tokenizer",
1059
- "unk_token": "<unk>",
1060
- "enable_token_classification": true,
1061
  "normalization_rules": {
1062
  "enable": true,
1063
  "lowercase": true,
1064
- "strip_accents": true,
1065
  "pre_tokenization": {
1066
  "punctuation_split": true,
1067
  "split_digits": true
1068
- }
1069
- }
1070
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "adaptive_tokenization": {
3
+ "enable": true,
4
+ "max_tokens": 100000,
5
+ "min_frequency": 3,
6
+ "retain_variant_forms": true
7
+ },
8
  "added_tokens_decoder": {
9
  "0": {
10
  "content": "<pad>",
 
949
  "rstrip": false,
950
  "single_word": false,
951
  "special": false
952
+ },
953
+ "32114": {
954
+ "content": "[PAD]",
955
+ "lstrip": false,
956
+ "normalized": false,
957
+ "rstrip": false,
958
+ "single_word": false,
959
+ "special": true
960
  }
961
  },
962
  "additional_special_tokens": [
 
1061
  "<extra_id_98>",
1062
  "<extra_id_99>"
1063
  ],
1064
+ "bpe_dropout": 0.1,
1065
  "clean_up_tokenization_spaces": true,
1066
+ "enable_token_classification": true,
1067
  "eos_token": "</s>",
1068
  "extra_ids": 100,
1069
  "max_length": 1024,
1070
  "model_max_length": 1024,
 
 
 
 
 
 
1071
  "normalization_rules": {
1072
  "enable": true,
1073
  "lowercase": true,
1074
+ "nmt_normalization": true,
1075
  "pre_tokenization": {
1076
  "punctuation_split": true,
1077
  "split_digits": true
1078
+ },
1079
+ "strip_accents": true
1080
+ },
1081
+ "pad_to_multiple_of": null,
1082
+ "pad_token": "</s>",
1083
+ "pad_token_type_id": 0,
1084
+ "padding_side": "right",
1085
+ "spe_tokenization": {
1086
+ "coverage": 0.9995,
1087
+ "enable": true,
1088
+ "nbest_size": 64
1089
+ },
1090
+ "stride": 0,
1091
+ "subword_regularization": {
1092
+ "alpha": 0.1,
1093
+ "dropout": 0.1,
1094
+ "enable": true
1095
+ },
1096
+ "tokenizer_class": "T5Tokenizer",
1097
+ "truncation_side": "right",
1098
+ "truncation_strategy": "longest_first",
1099
+ "unk_token": "<unk>"
1100
+ }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff