Demosthene-OR
commited on
Commit
•
17f4cff
1
Parent(s):
054d05d
Reprise de lang id par DL
Browse files- main_dl.py +4 -5
- requirements.txt +3 -2
main_dl.py
CHANGED
@@ -14,7 +14,7 @@ import string
|
|
14 |
import re
|
15 |
import json
|
16 |
import csv
|
17 |
-
|
18 |
from sklearn.preprocessing import LabelEncoder
|
19 |
from tensorflow import keras
|
20 |
# import keras
|
@@ -244,7 +244,7 @@ def load_all_data():
|
|
244 |
rnn_en_fr, rnn_fr_en, transformer_en_fr, transformer_fr_en = load_all_data()
|
245 |
|
246 |
# ==== Language identifier ====
|
247 |
-
|
248 |
def encode_text(textes):
|
249 |
global tokenizer
|
250 |
|
@@ -273,7 +273,7 @@ def init_dl_identifier():
|
|
273 |
merge = Merge(dataPath+"/dl_id_lang_split", dataPath, "dl_tiktoken_id_language_model.h5").merge(cleanup=False)
|
274 |
dl_model = keras.models.load_model(dataPath+"/dl_tiktoken_id_language_model.h5")
|
275 |
return
|
276 |
-
|
277 |
def lang_id_dl(sentences):
|
278 |
global dl_model, label_encoder, lan_to_language
|
279 |
|
@@ -290,8 +290,7 @@ def lang_id_dl(sentences):
|
|
290 |
@api.get('/', name="Vérification que l'API fonctionne")
|
291 |
def check_api():
|
292 |
load_all_data()
|
293 |
-
|
294 |
-
# init_dl_identifier()
|
295 |
return {'message': "L'API fonctionne"}
|
296 |
|
297 |
@api.get('/small_vocab/rnn', name="Traduction par RNN")
|
|
|
14 |
import re
|
15 |
import json
|
16 |
import csv
|
17 |
+
import tiktoken
|
18 |
from sklearn.preprocessing import LabelEncoder
|
19 |
from tensorflow import keras
|
20 |
# import keras
|
|
|
244 |
rnn_en_fr, rnn_fr_en, transformer_en_fr, transformer_fr_en = load_all_data()
|
245 |
|
246 |
# ==== Language identifier ====
|
247 |
+
|
248 |
def encode_text(textes):
|
249 |
global tokenizer
|
250 |
|
|
|
273 |
merge = Merge(dataPath+"/dl_id_lang_split", dataPath, "dl_tiktoken_id_language_model.h5").merge(cleanup=False)
|
274 |
dl_model = keras.models.load_model(dataPath+"/dl_tiktoken_id_language_model.h5")
|
275 |
return
|
276 |
+
'''
|
277 |
def lang_id_dl(sentences):
|
278 |
global dl_model, label_encoder, lan_to_language
|
279 |
|
|
|
290 |
@api.get('/', name="Vérification que l'API fonctionne")
|
291 |
def check_api():
|
292 |
load_all_data()
|
293 |
+
init_dl_identifier()
|
|
|
294 |
return {'message': "L'API fonctionne"}
|
295 |
|
296 |
@api.get('/small_vocab/rnn', name="Traduction par RNN")
|
requirements.txt
CHANGED
@@ -12,7 +12,7 @@ pydantic==2.6.1
|
|
12 |
pydantic-core==2.16.2
|
13 |
python-dateutil==2.8.2
|
14 |
pytz==2024.1
|
15 |
-
requests==2.
|
16 |
six==1.16.0
|
17 |
sniffio==1.3.0
|
18 |
starlette==0.36.3
|
@@ -27,4 +27,5 @@ sentencepiece==0.1.99
|
|
27 |
filesplit==4.0.1
|
28 |
pydot==2.0.0
|
29 |
graphviz==0.20.1
|
30 |
-
scikit-learn==1.1.3
|
|
|
|
12 |
pydantic-core==2.16.2
|
13 |
python-dateutil==2.8.2
|
14 |
pytz==2024.1
|
15 |
+
requests==2.26.0
|
16 |
six==1.16.0
|
17 |
sniffio==1.3.0
|
18 |
starlette==0.36.3
|
|
|
27 |
filesplit==4.0.1
|
28 |
pydot==2.0.0
|
29 |
graphviz==0.20.1
|
30 |
+
scikit-learn==1.1.3
|
31 |
+
tiktoken
|