lingbionlp commited on
Commit
645d04b
1 Parent(s): acdbc7f

Upload 10 files

Browse files
Files changed (3) hide show
  1. src/ml_ner.py +8 -17
  2. src/nn_model.py +1 -13
  3. src/tagging_text.py +2 -2
src/ml_ner.py CHANGED
@@ -8,7 +8,7 @@ Created on Fri Jun 12 16:41:54 2020
8
  import io
9
  import time
10
  import numpy as np
11
- from keras import backend as K
12
  def ml_intext(infile):
13
  fin=open(infile,'r',encoding='utf-8')
14
  alltexts=fin.read().strip().split('\n\n')
@@ -462,7 +462,7 @@ def combine_strategy(test_decode_temp, T=0.8):
462
  return fout.getvalue()
463
 
464
 
465
- def model_predict(session,ml_input,nn_model,ml_input_txt,ml_input_index,Threshold):
466
  if nn_model.model_type=='cnn':
467
  #startTime=time.time()
468
  test_set,test_label = ml_intext_fn(ml_input)
@@ -482,7 +482,6 @@ def model_predict(session,ml_input,nn_model,ml_input_txt,ml_input_index,Threshol
482
  input_test.append(test_x[3])
483
  # print('ml-model-represent:',time.time()-startTime)
484
  # startTime=time.time()
485
- K.set_session(session)
486
  test_pre = nn_model.model.predict(input_test)
487
  # print('ml-model-predict:',time.time()-startTime)
488
 
@@ -492,10 +491,6 @@ def model_predict(session,ml_input,nn_model,ml_input_txt,ml_input_index,Threshol
492
  test_x,test_y=nn_model.rep.load_data(test_set,test_label,word_max_len=nn_model.maxlen)
493
  #print('ml-model-represent:',time.time()-startTime)
494
  #startTime=time.time()
495
- #K.set_session(session)
496
- #with session.as_default():
497
- #with session.graph.as_default():
498
- #print('......session')
499
  test_pre = nn_model.model.predict(test_x)
500
  #print('ml-model-modedpred:',time.time()-startTime)
501
  # startTime=time.time()
@@ -527,19 +522,15 @@ def model_predict_old(ml_input,nn_model,ml_input_txt,ml_input_index,Threshold):
527
 
528
  if nn_model.fea_dict['pos'] == 1:
529
  input_test.append(test_x[3])
530
- K.set_session(nn_model.session)
531
- with nn_model.session.as_default():
532
- with nn_model.session.graph.as_default():
533
- test_pre = nn_model.model.predict(input_test,batch_size=256)
534
 
535
  elif nn_model.model_type=='bert' or nn_model.model_type=='bioformer':
536
 
537
  test_set,test_label = ml_intext_fn(ml_input)
538
  test_x,test_y=nn_model.rep.load_data(test_set,test_label,word_max_len=nn_model.maxlen)
539
- K.set_session(nn_model.session)
540
- with nn_model.session.as_default():
541
- with nn_model.session.graph.as_default():
542
- test_pre = nn_model.model.predict(test_x,batch_size=128)
543
 
544
  test_score=output_result(test_pre, nn_model.rep.label_2_index,Top_N=3)
545
  #print('test_score:',test_score)
@@ -562,7 +553,7 @@ def output_txt(ml_input_txt):
562
 
563
  return fout.getvalue()
564
 
565
- def ml_tagging(session,ssplit_token,ml_model,Threshold):
566
  # startTime=time.time()
567
  ml_input, ml_input_txt,ml_input_index=build_ngram_testset_filted(ssplit_token)
568
  # print('ml-ngrambuild:',time.time()-startTime)
@@ -570,7 +561,7 @@ def ml_tagging(session,ssplit_token,ml_model,Threshold):
570
  #print(ml_input)
571
  # startTime=time.time()
572
  if len(ml_input_index)>0:
573
- ml_pre_tsv=model_predict(session,ml_input,ml_model,ml_input_txt,ml_input_index,Threshold)
574
  else:
575
  ml_pre_tsv=output_txt(ml_input_txt)
576
  # print('ml-modelpred:',time.time()-startTime)
 
8
  import io
9
  import time
10
  import numpy as np
11
+
12
  def ml_intext(infile):
13
  fin=open(infile,'r',encoding='utf-8')
14
  alltexts=fin.read().strip().split('\n\n')
 
462
  return fout.getvalue()
463
 
464
 
465
+ def model_predict(ml_input,nn_model,ml_input_txt,ml_input_index,Threshold):
466
  if nn_model.model_type=='cnn':
467
  #startTime=time.time()
468
  test_set,test_label = ml_intext_fn(ml_input)
 
482
  input_test.append(test_x[3])
483
  # print('ml-model-represent:',time.time()-startTime)
484
  # startTime=time.time()
 
485
  test_pre = nn_model.model.predict(input_test)
486
  # print('ml-model-predict:',time.time()-startTime)
487
 
 
491
  test_x,test_y=nn_model.rep.load_data(test_set,test_label,word_max_len=nn_model.maxlen)
492
  #print('ml-model-represent:',time.time()-startTime)
493
  #startTime=time.time()
 
 
 
 
494
  test_pre = nn_model.model.predict(test_x)
495
  #print('ml-model-modedpred:',time.time()-startTime)
496
  # startTime=time.time()
 
522
 
523
  if nn_model.fea_dict['pos'] == 1:
524
  input_test.append(test_x[3])
525
+
526
+ test_pre = nn_model.model.predict(input_test,batch_size=256)
 
 
527
 
528
  elif nn_model.model_type=='bert' or nn_model.model_type=='bioformer':
529
 
530
  test_set,test_label = ml_intext_fn(ml_input)
531
  test_x,test_y=nn_model.rep.load_data(test_set,test_label,word_max_len=nn_model.maxlen)
532
+
533
+ test_pre = nn_model.model.predict(test_x,batch_size=128)
 
 
534
 
535
  test_score=output_result(test_pre, nn_model.rep.label_2_index,Top_N=3)
536
  #print('test_score:',test_score)
 
553
 
554
  return fout.getvalue()
555
 
556
+ def ml_tagging(ssplit_token,ml_model,Threshold):
557
  # startTime=time.time()
558
  ml_input, ml_input_txt,ml_input_index=build_ngram_testset_filted(ssplit_token)
559
  # print('ml-ngrambuild:',time.time()-startTime)
 
561
  #print(ml_input)
562
  # startTime=time.time()
563
  if len(ml_input_index)>0:
564
+ ml_pre_tsv=model_predict(ml_input,ml_model,ml_input_txt,ml_input_index,Threshold)
565
  else:
566
  ml_pre_tsv=output_txt(ml_input_txt)
567
  # print('ml-modelpred:',time.time()-startTime)
src/nn_model.py CHANGED
@@ -8,12 +8,10 @@ Created on Thu Mar 26 09:04:13 2020
8
  import time
9
  import sys
10
  import numpy as np
11
- import tensorflow as tf
12
  import keras
13
  from src.nn_represent import CNN_RepresentationLayer,BERT_RepresentationLayer
14
  from keras.layers import *
15
  from keras.models import Model
16
- from keras import backend as K
17
  from keras_bert import load_trained_model_from_checkpoint
18
 
19
 
@@ -37,7 +35,7 @@ class bioTag_CNN():
37
  self.charfile=model_files['charfile']
38
  self.labelfile=model_files['labelfile']
39
  self.posfile=model_files['posfile']
40
- self.session = K.get_session()
41
  vocab={'char':self.charfile,'label':self.labelfile,'pos':self.posfile}
42
  print('loading w2v model.....')
43
  self.rep = CNN_RepresentationLayer(self.w2vfile,vocab_file=vocab, frequency=400000)
@@ -94,8 +92,6 @@ class bioTag_CNN():
94
  self.model = Model(inputs=all_fea, outputs=output)
95
  def load_model(self,model_file):
96
  self.model.load_weights(model_file)
97
- self.session = K.get_session()
98
- print(self.session)
99
  #self.model.summary()
100
  print('load cnn model done!')
101
 
@@ -107,7 +103,6 @@ class bioTag_BERT():
107
  checkpoint_path = model_files['checkpoint_path']
108
  vocab_path = model_files['vocab_path']
109
  self.label_file=model_files['labelfile']
110
- self.session = tf.Session()
111
 
112
  self.rep = BERT_RepresentationLayer( vocab_path, self.label_file)
113
 
@@ -124,8 +119,6 @@ class bioTag_BERT():
124
 
125
  def load_model(self,model_file):
126
  self.model.load_weights(model_file)
127
- self.session = K.get_session()
128
- print(self.session)
129
  #self.model.summary()
130
 
131
  class bioTag_Bioformer():
@@ -152,11 +145,6 @@ class bioTag_Bioformer():
152
 
153
  def load_model(self,model_file):
154
  self.model.load_weights(model_file)
155
- #self.model._make_predict_function()
156
- #session = K.get_session()
157
- #print(session)
158
  #self.model.summary()
159
- session=''
160
- return session
161
  print('load bioformer model done!')
162
 
 
8
  import time
9
  import sys
10
  import numpy as np
 
11
  import keras
12
  from src.nn_represent import CNN_RepresentationLayer,BERT_RepresentationLayer
13
  from keras.layers import *
14
  from keras.models import Model
 
15
  from keras_bert import load_trained_model_from_checkpoint
16
 
17
 
 
35
  self.charfile=model_files['charfile']
36
  self.labelfile=model_files['labelfile']
37
  self.posfile=model_files['posfile']
38
+
39
  vocab={'char':self.charfile,'label':self.labelfile,'pos':self.posfile}
40
  print('loading w2v model.....')
41
  self.rep = CNN_RepresentationLayer(self.w2vfile,vocab_file=vocab, frequency=400000)
 
92
  self.model = Model(inputs=all_fea, outputs=output)
93
  def load_model(self,model_file):
94
  self.model.load_weights(model_file)
 
 
95
  #self.model.summary()
96
  print('load cnn model done!')
97
 
 
103
  checkpoint_path = model_files['checkpoint_path']
104
  vocab_path = model_files['vocab_path']
105
  self.label_file=model_files['labelfile']
 
106
 
107
  self.rep = BERT_RepresentationLayer( vocab_path, self.label_file)
108
 
 
119
 
120
  def load_model(self,model_file):
121
  self.model.load_weights(model_file)
 
 
122
  #self.model.summary()
123
 
124
  class bioTag_Bioformer():
 
145
 
146
  def load_model(self,model_file):
147
  self.model.load_weights(model_file)
 
 
 
148
  #self.model.summary()
 
 
149
  print('load bioformer model done!')
150
 
src/tagging_text.py CHANGED
@@ -18,7 +18,7 @@ import time
18
  import json
19
 
20
  #hybrid method
21
- def bioTag(session,text,biotag_dic,ml_model,onlyLongest=False, abbrRecog=False, Threshold=0.95):
22
 
23
  # startTime=time.time()
24
  ssplit_token=ssplit_token_pos_lemma(text)
@@ -31,7 +31,7 @@ def bioTag(session,text,biotag_dic,ml_model,onlyLongest=False, abbrRecog=False,
31
  # print('dict ner:',time.time()-startTime)
32
 
33
  # startTime=time.time()
34
- ml_tsv=ml_tagging(session,ssplit_token,ml_model,Threshold)
35
  #print('ml_tsv:\n',ml_tsv)
36
  # print('ml ner:',time.time()-startTime)
37
 
 
18
  import json
19
 
20
  #hybrid method
21
+ def bioTag(text,biotag_dic,ml_model,onlyLongest=False, abbrRecog=False, Threshold=0.95):
22
 
23
  # startTime=time.time()
24
  ssplit_token=ssplit_token_pos_lemma(text)
 
31
  # print('dict ner:',time.time()-startTime)
32
 
33
  # startTime=time.time()
34
+ ml_tsv=ml_tagging(ssplit_token,ml_model,Threshold)
35
  #print('ml_tsv:\n',ml_tsv)
36
  # print('ml ner:',time.time()-startTime)
37