{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "[STOP]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "[UNK]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "[SPACE]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": null, | |
"pre_tokenizer": { | |
"type": "Whitespace" | |
}, | |
"post_processor": null, | |
"decoder": null, | |
"model": { | |
"type": "BPE", | |
"language": "nl", | |
"dropout": null, | |
"unk_token": "[UNK]", | |
"continuing_subword_prefix": null, | |
"end_of_word_suffix": null, | |
"fuse_unk": false, | |
"byte_fallback": false, | |
"vocab": { | |
"[STOP]": 0, | |
"[UNK]": 1, | |
"[SPACE]": 2, | |
"!": 3, | |
"\"": 4, | |
"$": 5, | |
"&": 6, | |
"'": 7, | |
"(": 8, | |
")": 9, | |
"*": 10, | |
",": 11, | |
"-": 12, | |
".": 13, | |
"/": 14, | |
"0": 15, | |
"1": 16, | |
"2": 17, | |
"3": 18, | |
"4": 19, | |
"5": 20, | |
"6": 21, | |
"7": 22, | |
"8": 23, | |
"9": 24, | |
":": 25, | |
";": 26, | |
"<": 27, | |
"=": 28, | |
">": 29, | |
"?": 30, | |
"A": 31, | |
"B": 32, | |
"C": 33, | |
"D": 34, | |
"E": 35, | |
"F": 36, | |
"G": 37, | |
"H": 38, | |
"I": 39, | |
"J": 40, | |
"K": 41, | |
"L": 42, | |
"M": 43, | |
"N": 44, | |
"O": 45, | |
"P": 46, | |
"Q": 47, | |
"R": 48, | |
"S": 49, | |
"T": 50, | |
"U": 51, | |
"V": 52, | |
"W": 53, | |
"X": 54, | |
"Y": 55, | |
"Z": 56, | |
"a": 57, | |
"b": 58, | |
"c": 59, | |
"d": 60, | |
"e": 61, | |
"f": 62, | |
"g": 63, | |
"h": 64, | |
"i": 65, | |
"j": 66, | |
"k": 67, | |
"l": 68, | |
"m": 69, | |
"n": 70, | |
"o": 71, | |
"p": 72, | |
"q": 73, | |
"r": 74, | |
"s": 75, | |
"t": 76, | |
"u": 77, | |
"v": 78, | |
"w": 79, | |
"x": 80, | |
"y": 81, | |
"z": 82, | |
"©": 83, | |
"«": 84, | |
"°": 85, | |
"»": 86, | |
"¿": 87, | |
"Ó": 88, | |
"Ö": 89, | |
"Ü": 90, | |
"ß": 91, | |
"à": 92, | |
"á": 93, | |
"ä": 94, | |
"ç": 95, | |
"è": 96, | |
"é": 97, | |
"ê": 98, | |
"ë": 99, | |
"í": 100, | |
"î": 101, | |
"ï": 102, | |
"ñ": 103, | |
"ò": 104, | |
"ó": 105, | |
"ô": 106, | |
"ö": 107, | |
"ú": 108, | |
"û": 109, | |
"ü": 110, | |
"č": 111, | |
"ę": 112, | |
"ł": 113, | |
"œ": 114, | |
"ř": 115, | |
"ś": 116, | |
"ƒ": 117, | |
"α": 118, | |
"π": 119, | |
"–": 120, | |
"‘": 121, | |
"’": 122, | |
"“": 123, | |
"”": 124, | |
"•": 125, | |
"…": 126, | |
"Ω": 127, | |
"ℵ": 128, | |
"en": 129, | |
"er": 130, | |
"ij": 131, | |
"de": 132, | |
"et": 133, | |
"aa": 134, | |
"an": 135, | |
"el": 136, | |
"in": 137, | |
"st": 138, | |
"ch": 139, | |
"aar": 140, | |
"oo": 141, | |
"at": 142, | |
"een": 143, | |
"ge": 144, | |
"on": 145, | |
"ie": 146, | |
"te": 147, | |
"het": 148, | |
"al": 149, | |
"ver": 150, | |
"op": 151, | |
"ijn": 152, | |
"van": 153, | |
"ze": 154, | |
"gen": 155, | |
"oe": 156, | |
"wa": 157, | |
"ee": 158, | |
"it": 159, | |
"den": 160, | |
"oor": 161, | |
"hij": 162, | |
"dat": 163, | |
"cht": 164, | |
"der": 165, | |
"is": 166, | |
"iet": 167, | |
"zijn": 168, | |
"he": 169, | |
"om": 170, | |
"be": 171, | |
"aan": 172, | |
"je": 173, | |
"ou": 174, | |
"ken": 175, | |
"niet": 176, | |
"ik": 177, | |
"ar": 178, | |
"eer": 179, | |
"or": 180, | |
"sch": 181, | |
"was": 182, | |
"le": 183, | |
"die": 184, | |
"met": 185, | |
"ad": 186, | |
"ijk": 187, | |
"zi": 188, | |
"ing": 189, | |
"re": 190, | |
"ur": 191, | |
"uit": 192, | |
"we": 193, | |
"had": 194, | |
"il": 195, | |
"to": 196, | |
"ig": 197, | |
"ven": 198, | |
"voor": 199, | |
"zei": 200, | |
"ol": 201, | |
"no": 202, | |
"acht": 203, | |
"am": 204, | |
"maar": 205, | |
"ten": 206, | |
"als": 207, | |
"naar": 208, | |
"us": 209, | |
"ien": 210, | |
"gr": 211, | |
"hem": 212, | |
"gel": 213, | |
"un": 214, | |
"af": 215, | |
"vr": 216, | |
"over": 217, | |
"id": 218, | |
"haar": 219, | |
"of": 220, | |
"zo": 221, | |
"ste": 222, | |
"and": 223, | |
"Hij": 224, | |
"men": 225, | |
"sp": 226, | |
"dr": 227, | |
"la": 228, | |
"waar": 229, | |
"arr": 230, | |
"Harr": 231, | |
"lijk": 232, | |
"Harry": 233, | |
"zich": 234, | |
"ter": 235, | |
"ond": 236, | |
".’": 237, | |
"aal": 238, | |
"ui": 239, | |
"wer": 240, | |
"ier": 241, | |
"nog": 242, | |
"door": 243, | |
"Ik": 244, | |
"dan": 245, | |
"ro": 246, | |
"ook": 247, | |
"aat": 248, | |
"heb": 249, | |
"ben": 250, | |
"bl": 251, | |
"ag": 252, | |
"bij": 253, | |
"ak": 254 | |
}, | |
"merges": [ | |
"e n", | |
"e r", | |
"i j", | |
"d e", | |
"e t", | |
"a a", | |
"a n", | |
"e l", | |
"i n", | |
"s t", | |
"c h", | |
"aa r", | |
"o o", | |
"a t", | |
"e en", | |
"g e", | |
"o n", | |
"i e", | |
"t e", | |
"h et", | |
"a l", | |
"v er", | |
"o p", | |
"ij n", | |
"v an", | |
"z e", | |
"g en", | |
"o e", | |
"w a", | |
"e e", | |
"i t", | |
"d en", | |
"oo r", | |
"h ij", | |
"d at", | |
"ch t", | |
"d er", | |
"i s", | |
"i et", | |
"z ijn", | |
"h e", | |
"o m", | |
"b e", | |
"aa n", | |
"j e", | |
"o u", | |
"k en", | |
"n iet", | |
"i k", | |
"a r", | |
"e er", | |
"o r", | |
"s ch", | |
"wa s", | |
"l e", | |
"d ie", | |
"m et", | |
"a d", | |
"ij k", | |
"z i", | |
"in g", | |
"r e", | |
"u r", | |
"u it", | |
"w e", | |
"h ad", | |
"i l", | |
"t o", | |
"i g", | |
"v en", | |
"v oor", | |
"ze i", | |
"o l", | |
"n o", | |
"a cht", | |
"a m", | |
"m aar", | |
"t en", | |
"al s", | |
"n aar", | |
"u s", | |
"i en", | |
"g r", | |
"he m", | |
"g el", | |
"u n", | |
"a f", | |
"v r", | |
"o ver", | |
"i d", | |
"h aar", | |
"o f", | |
"z o", | |
"st e", | |
"an d", | |
"H ij", | |
"m en", | |
"s p", | |
"d r", | |
"l a", | |
"w aar", | |
"ar r", | |
"H arr", | |
"l ijk", | |
"Harr y", | |
"zi ch", | |
"t er", | |
"on d", | |
". ’", | |
"aa l", | |
"u i", | |
"w er", | |
"i er", | |
"no g", | |
"d oor", | |
"I k", | |
"d an", | |
"r o", | |
"oo k", | |
"aa t", | |
"he b", | |
"b en", | |
"b l", | |
"a g", | |
"b ij", | |
"a k" | |
] | |
} | |
} |