Entity_Linking_Demo / entity_extraction.py
ghomasHudson's picture
Added cli standalone
8014fee
raw
history blame
1.11 kB
import spacy
nlp = spacy.load("en_core_web_md")
nlp.add_pipe("entityfishing")
def extract_entities(article):
'''Find wikidata refs for article entities'''
ents = []
seen_entities = []
seen_surnames = []
seen_qids = []
doc = nlp(article)
for ent in doc.ents:
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
continue
if ent._.nerd_score < 0.5:
continue
if len(ent.text.split()) == 1:
# Single name
if ent.text in seen_surnames:
continue
elif ent.label_ == "PERSON":
# Multipart name
seen_surnames.append(ent.text.split()[-1])
seen_entities.append(ent.text)
if ent._.kb_qid in seen_qids:
continue
seen_qids.append(ent._.kb_qid)
ents.append(ent)
return ents
if __name__ == "__main__":
ents = extract_entities(input("article: "))
print()
print("ENTITIES:")
for ent in ents:
print(ent.text, "\t", ent.label_, "\t", ent._.url_wikidata)