TwentyNine
commited on
Commit
β’
908620a
1
Parent(s):
88c2b0e
Correct incorrect code in model card.
Browse files
README.md
CHANGED
@@ -16,7 +16,7 @@ The following is adapted from [slone/nllb-rus-tyv-v1](https://huggingface.co/slo
|
|
16 |
|
17 |
```Python
|
18 |
# the version of transformers is important!
|
19 |
-
!pip install sentencepiece transformers==4.33
|
20 |
import torch
|
21 |
from transformers import NllbTokenizer, AutoModelForSeq2SeqLM
|
22 |
|
@@ -39,14 +39,14 @@ def fix_tokenizer(tokenizer, new_lang):
|
|
39 |
MODEL_URL = "TwentyNine/nllb-ain-kana-latin-converter-v1"
|
40 |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
|
41 |
tokenizer = NllbTokenizer.from_pretrained(MODEL_URL)
|
42 |
-
fix_tokenizer(tokenizer, '
|
43 |
fix_tokenizer(tokenizer, 'ain_Latn')
|
44 |
|
45 |
-
def
|
46 |
text,
|
47 |
model,
|
48 |
tokenizer,
|
49 |
-
src_lang='
|
50 |
tgt_lang='ain_Latn',
|
51 |
max_length='auto',
|
52 |
num_beams=4,
|
@@ -69,8 +69,12 @@ def translate(
|
|
69 |
out = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
70 |
if isinstance(text, str) and n_out is None:
|
71 |
return out[0]
|
72 |
-
return
|
73 |
|
74 |
-
|
75 |
# 'pon seta ku=kor rusuy'
|
|
|
|
|
|
|
|
|
76 |
```
|
|
|
16 |
|
17 |
```Python
|
18 |
# the version of transformers is important!
|
19 |
+
!pip install sentencepiece transformers==4.33 > /dev/null
|
20 |
import torch
|
21 |
from transformers import NllbTokenizer, AutoModelForSeq2SeqLM
|
22 |
|
|
|
39 |
MODEL_URL = "TwentyNine/nllb-ain-kana-latin-converter-v1"
|
40 |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
|
41 |
tokenizer = NllbTokenizer.from_pretrained(MODEL_URL)
|
42 |
+
fix_tokenizer(tokenizer, 'ain_Japn')
|
43 |
fix_tokenizer(tokenizer, 'ain_Latn')
|
44 |
|
45 |
+
def convert(
|
46 |
text,
|
47 |
model,
|
48 |
tokenizer,
|
49 |
+
src_lang='ain_Japn',
|
50 |
tgt_lang='ain_Latn',
|
51 |
max_length='auto',
|
52 |
num_beams=4,
|
|
|
69 |
out = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
70 |
if isinstance(text, str) and n_out is None:
|
71 |
return out[0]
|
72 |
+
return
|
73 |
|
74 |
+
convert("γγ€γγ»γΏγγ―γ³γ³γγ«γΉγ€", model=model, tokenizer=tokenizer)
|
75 |
# 'pon seta ku=kor rusuy'
|
76 |
+
|
77 |
+
convert("γΏγ³γ γγ£γγγγͺγ«γ³γγγ€γ§", model=model, tokenizer=tokenizer)
|
78 |
+
# 'tanto γγ£γγ or un paye'
|
79 |
+
# ideal: 'tanto GAKKO or un paye' or 'tanto GAKKOU or un paye'
|
80 |
```
|