AIdeaText commited on
Commit
ed063c4
1 Parent(s): 43b44fb

Update modules/semantic_analysis.py

Browse files
Files changed (1) hide show
  1. modules/semantic_analysis.py +35 -18
modules/semantic_analysis.py CHANGED
@@ -4,6 +4,7 @@ import spacy
4
  import networkx as nx
5
  import matplotlib.pyplot as plt
6
  from collections import Counter
 
7
 
8
  # Remove the global nlp model loading
9
 
@@ -180,50 +181,66 @@ def visualize_context_graph(doc, lang):
180
 
181
  def visualize_semantic_relations(doc, lang):
182
  G = nx.Graph()
183
- word_freq = Counter(token.text.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'])
184
- top_words = [word for word, _ in word_freq.most_common(20)] # Top 20 most frequent nouns and verbs
185
 
 
186
  for token in doc:
187
- if token.pos_ in ['NOUN', 'VERB'] and token.text.lower() in top_words:
188
- G.add_node(token.text, pos=token.pos_)
 
 
 
189
 
 
 
 
 
 
 
 
 
 
190
  for token in doc:
191
- if token.pos_ in ['NOUN', 'VERB'] and token.text.lower() in top_words:
192
- if token.head.pos_ in ['NOUN', 'VERB'] and token.head.text.lower() in top_words:
193
- G.add_edge(token.text, token.head.text, label=token.dep_)
 
 
 
194
 
195
- fig, ax = plt.subplots(figsize=(36, 27)) # Create a figure and axis
196
- pos = nx.spring_layout(G, k=0.7, iterations=50) # Adjusted layout
197
 
198
  node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
199
 
200
  nx.draw(G, pos, node_color=node_colors, with_labels=True,
201
- node_size=10000, # Increased node size
202
- font_size=16, # Increased font size
203
  font_weight='bold',
204
  arrows=True,
205
- arrowsize=30, # Increased arrow size
206
- width=3, # Increased edge width
207
  edge_color='gray',
208
- ax=ax) # Draw on the axis
209
 
210
  edge_labels = nx.get_edge_attributes(G, 'label')
211
- nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax) # Increased edge label font size
212
 
213
  title = {
214
  'es': "Relaciones Semánticas Relevantes",
215
  'en': "Relevant Semantic Relations",
216
  'fr': "Relations Sémantiques Pertinentes"
217
  }
218
- ax.set_title(title[lang], fontsize=24, fontweight='bold') # Set title on the axis
219
  ax.axis('off')
220
 
221
  legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
222
  label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
223
  for pos in ['NOUN', 'VERB']]
224
- ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16) # Add legend to the axis
225
 
226
- return fig # Return the figure instead of plt
227
 
228
 
229
  ############################################################################################################################################
 
4
  import networkx as nx
5
  import matplotlib.pyplot as plt
6
  from collections import Counter
7
+ from collections import defaultdic
8
 
9
  # Remove the global nlp model loading
10
 
 
181
 
182
  def visualize_semantic_relations(doc, lang):
183
  G = nx.Graph()
184
+ word_freq = defaultdict(int)
185
+ lemma_to_word = {}
186
 
187
+ # Count frequencies of lemmas and map lemmas to their most common word form
188
  for token in doc:
189
+ if token.pos_ in ['NOUN', 'VERB']:
190
+ lemma = token.lemma_.lower()
191
+ word_freq[lemma] += 1
192
+ if lemma not in lemma_to_word or token.text.lower() == lemma:
193
+ lemma_to_word[lemma] = token.text
194
 
195
+ # Get top 20 most frequent lemmas
196
+ top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
197
+
198
+ # Add nodes
199
+ for lemma in top_lemmas:
200
+ word = lemma_to_word[lemma]
201
+ G.add_node(word, pos=doc.vocab[lemma].pos_)
202
+
203
+ # Add edges
204
  for token in doc:
205
+ if token.lemma_.lower() in top_lemmas:
206
+ if token.head.lemma_.lower() in top_lemmas:
207
+ source = lemma_to_word[token.lemma_.lower()]
208
+ target = lemma_to_word[token.head.lemma_.lower()]
209
+ if source != target: # Avoid self-loops
210
+ G.add_edge(source, target, label=token.dep_)
211
 
212
+ fig, ax = plt.subplots(figsize=(36, 27))
213
+ pos = nx.spring_layout(G, k=0.7, iterations=50)
214
 
215
  node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
216
 
217
  nx.draw(G, pos, node_color=node_colors, with_labels=True,
218
+ node_size=10000,
219
+ font_size=16,
220
  font_weight='bold',
221
  arrows=True,
222
+ arrowsize=30,
223
+ width=3,
224
  edge_color='gray',
225
+ ax=ax)
226
 
227
  edge_labels = nx.get_edge_attributes(G, 'label')
228
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
229
 
230
  title = {
231
  'es': "Relaciones Semánticas Relevantes",
232
  'en': "Relevant Semantic Relations",
233
  'fr': "Relations Sémantiques Pertinentes"
234
  }
235
+ ax.set_title(title[lang], fontsize=24, fontweight='bold')
236
  ax.axis('off')
237
 
238
  legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
239
  label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
240
  for pos in ['NOUN', 'VERB']]
241
+ ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
242
 
243
+ return fig
244
 
245
 
246
  ############################################################################################################################################