ghomasHudson commited on
Commit
8b1561c
1 Parent(s): 8014fee

Remove plural names

Browse files
Files changed (1) hide show
  1. entity_extraction.py +10 -0
entity_extraction.py CHANGED
@@ -3,6 +3,15 @@ import spacy
3
  nlp = spacy.load("en_core_web_md")
4
  nlp.add_pipe("entityfishing")
5
 
 
 
 
 
 
 
 
 
 
6
 
7
  def extract_entities(article):
8
  '''Find wikidata refs for article entities'''
@@ -11,6 +20,7 @@ def extract_entities(article):
11
  seen_surnames = []
12
  seen_qids = []
13
 
 
14
  doc = nlp(article)
15
  for ent in doc.ents:
16
  if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
 
3
  nlp = spacy.load("en_core_web_md")
4
  nlp.add_pipe("entityfishing")
5
 
6
+ def remove_plural_names(article):
7
+ words = article.split()
8
+ new_words = []
9
+ for word in words:
10
+ word = word.replace("’s", "")
11
+ word = word.replace("'s", "")
12
+ new_words.append(word)
13
+ return " ".join(new_words)
14
+
15
 
16
  def extract_entities(article):
17
  '''Find wikidata refs for article entities'''
 
20
  seen_surnames = []
21
  seen_qids = []
22
 
23
+ article = remove_plural_names(article)
24
  doc = nlp(article)
25
  for ent in doc.ents:
26
  if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities: