update README
Browse files- README.md +13 -6
- config.json +0 -1
README.md
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
---
|
2 |
language: "mn"
|
3 |
tags:
|
|
|
4 |
- mongolian
|
5 |
-
-
|
6 |
---
|
7 |
|
8 |
-
# BERT-
|
9 |
[Link to Official Mongolian-BERT repo](https://github.com/tugstugi/mongolian-bert)
|
10 |
|
11 |
## Model description
|
@@ -18,21 +19,27 @@ This repository is based on the following open source projects: [google-research
|
|
18 |
#### How to use
|
19 |
|
20 |
```python
|
21 |
-
from transformers import pipeline, AutoTokenizer,
|
22 |
|
23 |
-
tokenizer = AutoTokenizer.from_pretrained('tugstugi/bert-large-mongolian-uncased')
|
24 |
-
model =
|
25 |
|
26 |
## declare task ##
|
27 |
pipe = pipeline(task="fill-mask", model=model, tokenizer=tokenizer)
|
28 |
|
29 |
## example ##
|
30 |
-
input_ = '
|
31 |
|
32 |
output_ = pipe(input_)
|
33 |
for i in range(len(output_)):
|
34 |
print(output_[i])
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
```
|
37 |
|
38 |
|
|
|
1 |
---
|
2 |
language: "mn"
|
3 |
tags:
|
4 |
+
- bert
|
5 |
- mongolian
|
6 |
+
- uncased
|
7 |
---
|
8 |
|
9 |
+
# BERT-LARGE-MONGOLIAN-UNCASED
|
10 |
[Link to Official Mongolian-BERT repo](https://github.com/tugstugi/mongolian-bert)
|
11 |
|
12 |
## Model description
|
|
|
19 |
#### How to use
|
20 |
|
21 |
```python
|
22 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM
|
23 |
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained('tugstugi/bert-large-mongolian-uncased', use_fast=False)
|
25 |
+
model = AutoModelForMaskedLM.from_pretrained('tugstugi/bert-large-mongolian-uncased')
|
26 |
|
27 |
## declare task ##
|
28 |
pipe = pipeline(task="fill-mask", model=model, tokenizer=tokenizer)
|
29 |
|
30 |
## example ##
|
31 |
+
input_ = 'Монгол улсын [MASK] Улаанбаатар хотоос ярьж байна.'
|
32 |
|
33 |
output_ = pipe(input_)
|
34 |
for i in range(len(output_)):
|
35 |
print(output_[i])
|
36 |
|
37 |
+
## output ##
|
38 |
+
# {'sequence': 'монгол улсын нийслэл улаанбаатар хотоос ярьж байна.', 'score': 0.7867621183395386, 'token': 849, 'token_str': 'нийслэл'}
|
39 |
+
# {'sequence': 'монгол улсын ерөнхийлөгч улаанбаатар хотоос ярьж байна.', 'score': 0.14303277432918549, 'token': 244, 'token_str': 'ерөнхийлөгч'}
|
40 |
+
# {'sequence': 'монгол улсын ерөнхийлөгчийг улаанбаатар хотоос ярьж байна.', 'score': 0.011642335914075375, 'token': 8373, 'token_str': 'ерөнхийлөгчийг'}
|
41 |
+
# {'sequence': 'монгол улсын иргэд улаанбаатар хотоос ярьж байна.', 'score': 0.006592822726815939, 'token': 247, 'token_str': 'иргэд'}
|
42 |
+
# {'sequence': 'монгол улсын нийслэлийг улаанбаатар хотоос ярьж байна.', 'score': 0.006165097933262587, 'token': 15501, 'token_str': 'нийслэлийг'}
|
43 |
```
|
44 |
|
45 |
|
config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/content/model-uncased-large-32k-3000000",
|
3 |
"architectures": [
|
4 |
"BertForMaskedLM"
|
5 |
],
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"BertForMaskedLM"
|
4 |
],
|