import streamlit as st import streamlit.components.v1 as components import requests import spacy import hashlib nlp = spacy.load("en_core_web_md") # add pipeline (declared through entry_points in setup.py) nlp.add_pipe("entityfishing") st.title('Entity Linking Demo') article = st.text_area('Article to analyze:', value=open("example.txt").read()) seen_entities = [] if st.button('Submit'): print(article) good_ents = [] doc = nlp(article) for ent in doc.ents: if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities: continue seen_entities.append(ent.text) print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score)) r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid) data = r.json()["claims"] if "P18" in data.keys(): data = data["P18"][0]["mainsnak"] img_name = data["datavalue"]["value"].replace(" ", "_") img_name_hash = hashlib.md5(img_name.encode("utf-8")).hexdigest() a = img_name_hash[0] b = img_name_hash[1] url= f"https://upload.wikimedia.org/wikipedia/commons/{a}/{a}{b}/{img_name}" good_ents.append((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score, url)) cols = st.columns(len(good_ents)) for i, ent in enumerate(good_ents): # st.image(url) with cols[i]: components.html(f"", height=110, width=110) st.caption(ent[0])