# -*- coding: utf-8 -*- """ Created on Tue Nov 22 09:54:41 2022 @author: luol2 """ import streamlit as st import argparse from src.nn_model import bioTag_CNN,bioTag_BERT,bioTag_Bioformer from src.dic_ner import dic_ont from src.tagging_text import bioTag import os import time import json import sys import nltk nltk.download('punkt') nltk.download('averaged_perceptron_tagger') st.set_page_config( page_title="PhenoTagger", page_icon=":shark:", # layout="wide", initial_sidebar_state="expanded", menu_items={ 'Get Help': 'https://www.extremelycoolapp.com/help', 'Report a bug': "https://www.extremelycoolapp.com/bug", 'About': "# This is a header. This is an *extremely* cool app!" } ) st.title('PhenoTagger Demo') # with st.spinner('Model is being loaded..'): # print('load model done!') with st.form(key="my_form"): @st.cache(allow_output_mutation=True) def load_model(): ontfiles={'dic_file':'./dict_new/noabb_lemma.dic', 'word_hpo_file':'./dict_new/word_id_map.json', 'hpo_word_file':'./dict_new/id_word_map.json'} # if para_set['model_type']=='cnn': # vocabfiles={'w2vfile':'../vocab/bio_embedding_intrinsic.d200', # 'charfile':'../vocab/char.vocab', # 'labelfile':'../dict_new/lable.vocab', # 'posfile':'../vocab/pos.vocab'} # modelfile='../models/cnn_p5n5_b128_95_hponew1.h5' # elif para_set['model_type']=='bioformer': vocabfiles={'labelfile':'./dict_new/lable.vocab', 'config_path':'./vocab/bioformer-cased-v1.0/bert_config.json', 'checkpoint_path':'./vocab/bioformer-cased-v1.0/bioformer-cased-v1.0-model.ckpt-2000000', 'vocab_path':'./vocab/bioformer-cased-v1.0/vocab.txt'} modelfile='./vocab/bioformer_p5n5_b64_1e-5_95_hponew3.h5' # else: # print('Model type is wrong, please select cnn or bioformer.') # sys.exit() biotag_dic=dic_ont(ontfiles) # if para_set['model_type']=='cnn': # nn_model=bioTag_CNN(vocabfiles) # nn_model.load_model(modelfile) # elif para_set['model_type']=='bioformer': nn_model=bioTag_Bioformer(vocabfiles) session=nn_model.load_model(modelfile) test_tag='1232' return nn_model,biotag_dic,test_tag,session #hyper-parameter st.sidebar.header("Hyperparameter Settings") sbform = st.sidebar.form("Hyper-paramiters") # para_model=sbform.selectbox('Model', ['cnn', 'bioformer']) para_overlap=sbform.selectbox('Return overlapping concepts', ['True', 'False']) para_abbr=sbform.selectbox('Identify abbreviations', ['True', 'False']) para_threshold = sbform.slider('Threshold:', min_value=0.5, max_value=0.95, value=0.95, step=0.05) sbform.form_submit_button("Setting") st.write('parameters:', para_overlap,para_abbr,para_threshold) nn_model,biotag_dic,test_tag,session=load_model() input_text = st.text_area( "Paste your text below (max 500 words)", height=510, ) MAX_WORDS = 500 import re res = len(re.findall(r"\w+", input_text)) if res > MAX_WORDS: st.warning( "⚠️ Your text contains " + str(res) + " words." + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊" ) input_text = input_text[:MAX_WORDS] submit_button = st.form_submit_button(label="✨ Get me the data!") if para_overlap=='True': para_overlap=True else: para_overlap=False if para_abbr=='True': para_abbr=True else: para_abbr=False para_set={ #model_type':para_model, # cnn or bioformer 'onlyLongest':para_overlap, # False: return overlap concepts, True only longgest 'abbrRecog':para_abbr,# False: don't identify abbr, True: identify abbr 'ML_Threshold':para_threshold,# the Threshold of deep learning model } if not submit_button: st.stop() st.markdown(f"""**Results:**\n""") # print('dic...........:',biotag_dic.keys()) print('........:',test_tag) print('........!!!!!!:',input_text) print('...input:',input_text) tag_result=bioTag(session,input_text,biotag_dic,nn_model,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold']) for ele in tag_result: start = ele[0] last = ele[1] mention = input_text[int(ele[0]):int(ele[1])] type='Phenotype' id=ele[2] score=ele[3] output=start+"\t"+last+"\t"+mention+"\t"+id+'\t'+score+"\n" st.info(output)