Li commited on
Commit
35e02a7
1 Parent(s): 621f3fa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -27
README.md CHANGED
@@ -1,55 +1,28 @@
1
  ---
2
-
3
  language:
4
-
5
  - zh
6
-
7
  license:
8
-
9
  - apache-2.0
10
-
11
  ---
12
-
13
  ```python
14
-
15
  import jieba_fast
16
-
17
  from transformers import BertTokenizer
18
-
19
  from transformers import BigBirdModel
20
-
21
  class JiebaTokenizer(BertTokenizer):
22
-
23
  def __init__(
24
-
25
  self, pre_tokenizer=lambda x: jieba_fast.cut(x, HMM=False), *args, **kwargs
26
-
27
  ):
28
-
29
  super().__init__(*args, **kwargs)
30
-
31
  self.pre_tokenizer = pre_tokenizer
32
-
33
  def _tokenize(self, text, *arg, **kwargs):
34
-
35
  split_tokens = []
36
-
37
  for text in self.pre_tokenizer(text):
38
-
39
  if text in self.vocab:
40
-
41
  split_tokens.append(text)
42
-
43
  else:
44
-
45
  split_tokens.extend(super()._tokenize(text))
46
-
47
  return split_tokens
48
-
49
  model = BigBirdModel.from_pretrained('Lowin/chinese-bigbird-small')
50
-
51
  tokenizer = JiebaTokenizer.from_pretrained('Lowin/chinese-bigbird-small')
52
-
53
  ```
54
-
55
  https://github.com/LowinLi/chinese-bigbird
 
1
  ---
 
2
  language:
 
3
  - zh
 
4
  license:
 
5
  - apache-2.0
 
6
  ---
 
7
  ```python
 
8
  import jieba_fast
 
9
  from transformers import BertTokenizer
 
10
  from transformers import BigBirdModel
 
11
  class JiebaTokenizer(BertTokenizer):
 
12
  def __init__(
 
13
  self, pre_tokenizer=lambda x: jieba_fast.cut(x, HMM=False), *args, **kwargs
 
14
  ):
 
15
  super().__init__(*args, **kwargs)
 
16
  self.pre_tokenizer = pre_tokenizer
 
17
  def _tokenize(self, text, *arg, **kwargs):
 
18
  split_tokens = []
 
19
  for text in self.pre_tokenizer(text):
 
20
  if text in self.vocab:
 
21
  split_tokens.append(text)
 
22
  else:
 
23
  split_tokens.extend(super()._tokenize(text))
 
24
  return split_tokens
 
25
  model = BigBirdModel.from_pretrained('Lowin/chinese-bigbird-small')
 
26
  tokenizer = JiebaTokenizer.from_pretrained('Lowin/chinese-bigbird-small')
 
27
  ```
 
28
  https://github.com/LowinLi/chinese-bigbird