Update modules/semantic_analysis.py
Browse files- modules/semantic_analysis.py +35 -18
modules/semantic_analysis.py
CHANGED
@@ -4,6 +4,7 @@ import spacy
|
|
4 |
import networkx as nx
|
5 |
import matplotlib.pyplot as plt
|
6 |
from collections import Counter
|
|
|
7 |
|
8 |
# Remove the global nlp model loading
|
9 |
|
@@ -180,50 +181,66 @@ def visualize_context_graph(doc, lang):
|
|
180 |
|
181 |
def visualize_semantic_relations(doc, lang):
|
182 |
G = nx.Graph()
|
183 |
-
word_freq =
|
184 |
-
|
185 |
|
|
|
186 |
for token in doc:
|
187 |
-
if token.pos_ in ['NOUN', 'VERB']
|
188 |
-
|
|
|
|
|
|
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
for token in doc:
|
191 |
-
if token.
|
192 |
-
if token.head.
|
193 |
-
|
|
|
|
|
|
|
194 |
|
195 |
-
fig, ax = plt.subplots(figsize=(36, 27))
|
196 |
-
pos = nx.spring_layout(G, k=0.7, iterations=50)
|
197 |
|
198 |
node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
|
199 |
|
200 |
nx.draw(G, pos, node_color=node_colors, with_labels=True,
|
201 |
-
node_size=10000,
|
202 |
-
font_size=16,
|
203 |
font_weight='bold',
|
204 |
arrows=True,
|
205 |
-
arrowsize=30,
|
206 |
-
width=3,
|
207 |
edge_color='gray',
|
208 |
-
ax=ax)
|
209 |
|
210 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
211 |
-
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
|
212 |
|
213 |
title = {
|
214 |
'es': "Relaciones Semánticas Relevantes",
|
215 |
'en': "Relevant Semantic Relations",
|
216 |
'fr': "Relations Sémantiques Pertinentes"
|
217 |
}
|
218 |
-
ax.set_title(title[lang], fontsize=24, fontweight='bold')
|
219 |
ax.axis('off')
|
220 |
|
221 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
222 |
label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
|
223 |
for pos in ['NOUN', 'VERB']]
|
224 |
-
ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
|
225 |
|
226 |
-
return fig
|
227 |
|
228 |
|
229 |
############################################################################################################################################
|
|
|
4 |
import networkx as nx
|
5 |
import matplotlib.pyplot as plt
|
6 |
from collections import Counter
|
7 |
+
from collections import defaultdic
|
8 |
|
9 |
# Remove the global nlp model loading
|
10 |
|
|
|
181 |
|
182 |
def visualize_semantic_relations(doc, lang):
|
183 |
G = nx.Graph()
|
184 |
+
word_freq = defaultdict(int)
|
185 |
+
lemma_to_word = {}
|
186 |
|
187 |
+
# Count frequencies of lemmas and map lemmas to their most common word form
|
188 |
for token in doc:
|
189 |
+
if token.pos_ in ['NOUN', 'VERB']:
|
190 |
+
lemma = token.lemma_.lower()
|
191 |
+
word_freq[lemma] += 1
|
192 |
+
if lemma not in lemma_to_word or token.text.lower() == lemma:
|
193 |
+
lemma_to_word[lemma] = token.text
|
194 |
|
195 |
+
# Get top 20 most frequent lemmas
|
196 |
+
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
|
197 |
+
|
198 |
+
# Add nodes
|
199 |
+
for lemma in top_lemmas:
|
200 |
+
word = lemma_to_word[lemma]
|
201 |
+
G.add_node(word, pos=doc.vocab[lemma].pos_)
|
202 |
+
|
203 |
+
# Add edges
|
204 |
for token in doc:
|
205 |
+
if token.lemma_.lower() in top_lemmas:
|
206 |
+
if token.head.lemma_.lower() in top_lemmas:
|
207 |
+
source = lemma_to_word[token.lemma_.lower()]
|
208 |
+
target = lemma_to_word[token.head.lemma_.lower()]
|
209 |
+
if source != target: # Avoid self-loops
|
210 |
+
G.add_edge(source, target, label=token.dep_)
|
211 |
|
212 |
+
fig, ax = plt.subplots(figsize=(36, 27))
|
213 |
+
pos = nx.spring_layout(G, k=0.7, iterations=50)
|
214 |
|
215 |
node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
|
216 |
|
217 |
nx.draw(G, pos, node_color=node_colors, with_labels=True,
|
218 |
+
node_size=10000,
|
219 |
+
font_size=16,
|
220 |
font_weight='bold',
|
221 |
arrows=True,
|
222 |
+
arrowsize=30,
|
223 |
+
width=3,
|
224 |
edge_color='gray',
|
225 |
+
ax=ax)
|
226 |
|
227 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
228 |
+
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
|
229 |
|
230 |
title = {
|
231 |
'es': "Relaciones Semánticas Relevantes",
|
232 |
'en': "Relevant Semantic Relations",
|
233 |
'fr': "Relations Sémantiques Pertinentes"
|
234 |
}
|
235 |
+
ax.set_title(title[lang], fontsize=24, fontweight='bold')
|
236 |
ax.axis('off')
|
237 |
|
238 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
239 |
label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
|
240 |
for pos in ['NOUN', 'VERB']]
|
241 |
+
ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
|
242 |
|
243 |
+
return fig
|
244 |
|
245 |
|
246 |
############################################################################################################################################
|