Spaces:

nsstt
/

Entity_Linking_Demo

App Files Files Community

Entity_Linking_Demo / entity_extraction.py

ghomasHudson's picture

Remove plural names

8b1561c over 1 year ago

history blame contribute delete

1.4 kB

	import spacy

	nlp = spacy.load("en_core_web_md")
	nlp.add_pipe("entityfishing")

	def remove_plural_names(article):
	words = article.split()
	new_words = []
	for word in words:
	word = word.replace("’s", "")
	word = word.replace("'s", "")
	new_words.append(word)
	return " ".join(new_words)


	def extract_entities(article):
	'''Find wikidata refs for article entities'''
	ents = []
	seen_entities = []
	seen_surnames = []
	seen_qids = []

	article = remove_plural_names(article)
	doc = nlp(article)
	for ent in doc.ents:
	if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
	continue
	if ent._.nerd_score < 0.5:
	continue

	if len(ent.text.split()) == 1:
	# Single name
	if ent.text in seen_surnames:
	continue
	elif ent.label_ == "PERSON":
	# Multipart name
	seen_surnames.append(ent.text.split()[-1])

	seen_entities.append(ent.text)

	if ent._.kb_qid in seen_qids:
	continue
	seen_qids.append(ent._.kb_qid)
	ents.append(ent)
	return ents


	if __name__ == "__main__":
	ents = extract_entities(input("article: "))
	print()
	print("ENTITIES:")
	for ent in ents:
	print(ent.text, "\t", ent.label_, "\t", ent._.url_wikidata)