Update modules/semantic_analysis.py
Browse files
modules/semantic_analysis.py
CHANGED
@@ -183,14 +183,16 @@ def visualize_semantic_relations(doc, lang):
|
|
183 |
G = nx.Graph()
|
184 |
word_freq = defaultdict(int)
|
185 |
lemma_to_word = {}
|
|
|
186 |
|
187 |
-
# Count frequencies of lemmas and map lemmas to their most common word form
|
188 |
for token in doc:
|
189 |
if token.pos_ in ['NOUN', 'VERB']:
|
190 |
lemma = token.lemma_.lower()
|
191 |
word_freq[lemma] += 1
|
192 |
if lemma not in lemma_to_word or token.text.lower() == lemma:
|
193 |
lemma_to_word[lemma] = token.text
|
|
|
194 |
|
195 |
# Get top 20 most frequent lemmas
|
196 |
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
|
@@ -198,7 +200,7 @@ def visualize_semantic_relations(doc, lang):
|
|
198 |
# Add nodes
|
199 |
for lemma in top_lemmas:
|
200 |
word = lemma_to_word[lemma]
|
201 |
-
G.add_node(word, pos=
|
202 |
|
203 |
# Add edges
|
204 |
for token in doc:
|
@@ -242,7 +244,6 @@ def visualize_semantic_relations(doc, lang):
|
|
242 |
|
243 |
return fig
|
244 |
|
245 |
-
|
246 |
############################################################################################################################################
|
247 |
def perform_semantic_analysis(text, nlp, lang):
|
248 |
doc = nlp(text)
|
|
|
183 |
G = nx.Graph()
|
184 |
word_freq = defaultdict(int)
|
185 |
lemma_to_word = {}
|
186 |
+
lemma_to_pos = {}
|
187 |
|
188 |
+
# Count frequencies of lemmas and map lemmas to their most common word form and POS
|
189 |
for token in doc:
|
190 |
if token.pos_ in ['NOUN', 'VERB']:
|
191 |
lemma = token.lemma_.lower()
|
192 |
word_freq[lemma] += 1
|
193 |
if lemma not in lemma_to_word or token.text.lower() == lemma:
|
194 |
lemma_to_word[lemma] = token.text
|
195 |
+
lemma_to_pos[lemma] = token.pos_
|
196 |
|
197 |
# Get top 20 most frequent lemmas
|
198 |
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
|
|
|
200 |
# Add nodes
|
201 |
for lemma in top_lemmas:
|
202 |
word = lemma_to_word[lemma]
|
203 |
+
G.add_node(word, pos=lemma_to_pos[lemma])
|
204 |
|
205 |
# Add edges
|
206 |
for token in doc:
|
|
|
244 |
|
245 |
return fig
|
246 |
|
|
|
247 |
############################################################################################################################################
|
248 |
def perform_semantic_analysis(text, nlp, lang):
|
249 |
doc = nlp(text)
|