|
import streamlit as st |
|
import streamlit.components.v1 as components |
|
import requests |
|
import hashlib |
|
from entity_extraction import extract_entities |
|
|
|
st.title('Entity Linking Demo') |
|
st.markdown("""Linking named entities in an article to |
|
wikidata entries (allowing us to pull the images). |
|
|
|
*Note: Only trained on entities before May 2020*""") |
|
|
|
article = st.text_area('Article to analyze:', value=open("example.txt").read()) |
|
|
|
if st.button('Submit'): |
|
with st.spinner(text="Extracting..."): |
|
good_ents = [] |
|
|
|
ents = extract_entities(article) |
|
for i, ent in enumerate(ents): |
|
r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid) |
|
data = r.json()["claims"] |
|
if "P18" in data.keys(): |
|
data = data["P18"][0]["mainsnak"] |
|
img_name = data["datavalue"]["value"].replace(" ", "_") |
|
img_name_hash = hashlib.md5(img_name.encode("utf-8")).hexdigest() |
|
a = img_name_hash[0] |
|
b = img_name_hash[1] |
|
url= f"https://upload.wikimedia.org/wikipedia/commons/{a}/{a}{b}/{img_name}" |
|
good_ents.append((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score, url)) |
|
cols = st.columns(len(good_ents)) |
|
for i, ent in enumerate(good_ents): |
|
with cols[i]: |
|
components.html(f"<image style='border-radius: 50%;object-fit:cover;width:100px;height:100px' src='{ent[-1]}'/>", height=110, width=110) |
|
st.caption(ent[0]) |
|
|