AIdeaText commited on
Commit
738b9a7
1 Parent(s): 091bb1e

Update modules/semantic_analysis.py

Browse files
Files changed (1) hide show
  1. modules/semantic_analysis.py +71 -48
modules/semantic_analysis.py CHANGED
@@ -1,4 +1,4 @@
1
- #syntax_analysis.py
2
  import streamlit as st
3
  import spacy
4
  import networkx as nx
@@ -84,71 +84,94 @@ POS_TRANSLATIONS = {
84
  }
85
  }
86
  ########################################################################################################################################
87
- def count_pos(doc):
88
- return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
89
- #######################################################################################################################################
90
- def create_syntax_graph(doc, lang):
91
- G = nx.DiGraph()
92
- pos_counts = count_pos(doc)
93
- word_nodes = {}
94
- word_colors = {}
95
-
96
- for token in doc:
97
- if token.pos_ != 'PUNCT':
98
- lower_text = token.text.lower()
99
- if lower_text not in word_nodes:
100
- node_id = len(word_nodes)
101
- word_nodes[lower_text] = node_id
102
- color = POS_COLORS.get(token.pos_, '#FFFFFF')
103
- word_colors[lower_text] = color
104
- G.add_node(node_id,
105
- label=f"{token.text}\n[{POS_TRANSLATIONS[lang].get(token.pos_, token.pos_)}]",
106
- pos=token.pos_,
107
- size=pos_counts[token.pos_] * 500,
108
- color=color)
109
-
110
- if token.dep_ != "ROOT" and token.head.pos_ != 'PUNCT':
111
- head_id = word_nodes.get(token.head.text.lower())
112
- if head_id is not None:
113
- G.add_edge(head_id, word_nodes[lower_text], label=token.dep_)
114
-
115
- return G, word_colors
116
- ####################################################################################################################################
117
- def visualize_syntax_graph(doc, lang):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  G, word_colors = create_syntax_graph(doc, lang)
119
 
120
- plt.figure(figsize=(24, 18)) # Increase figure size
121
- pos = nx.spring_layout(G, k=0.9, iterations=50) # Adjust layout parameters
122
 
123
  node_colors = [data['color'] for _, data in G.nodes(data=True)]
124
  node_sizes = [data['size'] for _, data in G.nodes(data=True)]
125
 
126
  nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True,
127
- arrowsize=20, width=2, edge_color='gray') # Adjust node and edge appearance
128
 
129
  nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)},
130
- font_size=10, font_weight='bold') # Increase font size and make bold
131
 
132
  edge_labels = nx.get_edge_attributes(G, 'label')
133
  nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
134
 
135
- plt.title("Syntactic Analysis" if lang == 'en' else "Analyse Syntaxique" if lang == 'fr' else "Análisis Sintáctico",
136
- fontsize=20, fontweight='bold') # Increase title font size
137
  plt.axis('off')
138
 
139
  legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none',
140
  label=f"{POS_TRANSLATIONS[lang][pos]} ({count_pos(doc)[pos]})")
141
  for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())]
142
- plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12) # Increase legend font size
143
 
144
  return plt
145
- ################################################################################################################################
146
- def visualize_syntax(text, nlp, lang):
147
- max_tokens = 5000
148
- doc = nlp(text)
149
- if len(doc) > max_tokens:
150
- doc = nlp(text[:max_tokens])
151
- print(f"Warning: The input text is too long. Only the first {max_tokens} tokens will be visualized.")
152
- return visualize_syntax_graph(doc, lang)
153
 
154
- pass
 
 
 
 
 
1
+ #semantic_analysis.py
2
  import streamlit as st
3
  import spacy
4
  import networkx as nx
 
84
  }
85
  }
86
  ########################################################################################################################################
87
+
88
+ def extract_entities(doc):
89
+ entities = {
90
+ "Personas": [],
91
+ "Conceptos": [],
92
+ "Lugares": [],
93
+ "Fechas": []
94
+ }
95
+
96
+ for ent in doc.ents:
97
+ if ent.label_ == "PER":
98
+ entities["Personas"].append(ent.text)
99
+ elif ent.label_ in ["LOC", "GPE"]:
100
+ entities["Lugares"].append(ent.text)
101
+ elif ent.label_ == "DATE":
102
+ entities["Fechas"].append(ent.text)
103
+ else:
104
+ entities["Conceptos"].append(ent.text)
105
+
106
+ return entities
107
+
108
+ def visualize_context_graph(doc, lang):
109
+ G = nx.Graph()
110
+ entities = extract_entities(doc)
111
+
112
+ # Add nodes
113
+ for category, items in entities.items():
114
+ for item in items:
115
+ G.add_node(item, category=category)
116
+
117
+ # Add edges
118
+ for sent in doc.sents:
119
+ sent_entities = [ent.text for ent in sent.ents if ent.text in G.nodes()]
120
+ for i in range(len(sent_entities)):
121
+ for j in range(i+1, len(sent_entities)):
122
+ G.add_edge(sent_entities[i], sent_entities[j])
123
+
124
+ # Visualize
125
+ plt.figure(figsize=(20, 15))
126
+ pos = nx.spring_layout(G, k=0.5, iterations=50)
127
+
128
+ color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
129
+ node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
130
+
131
+ nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=3000, font_size=8, font_weight='bold')
132
+
133
+ # Add a legend
134
+ legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none') for color in color_map.values()]
135
+ plt.legend(legend_elements, color_map.keys(), loc='upper left', bbox_to_anchor=(1, 1))
136
+
137
+ plt.title("Análisis de Contexto" if lang == 'es' else "Context Analysis" if lang == 'en' else "Analyse de Contexte", fontsize=20)
138
+ plt.axis('off')
139
+
140
+ return plt
141
+
142
+ def visualize_semantic_relations(doc, lang):
143
+ # Esta función puede mantener la lógica que ya tienes en visualize_syntax_graph
144
+ # con algunas modificaciones para enfocarse en relaciones semánticas
145
  G, word_colors = create_syntax_graph(doc, lang)
146
 
147
+ plt.figure(figsize=(24, 18))
148
+ pos = nx.spring_layout(G, k=0.9, iterations=50)
149
 
150
  node_colors = [data['color'] for _, data in G.nodes(data=True)]
151
  node_sizes = [data['size'] for _, data in G.nodes(data=True)]
152
 
153
  nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True,
154
+ arrowsize=20, width=2, edge_color='gray')
155
 
156
  nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)},
157
+ font_size=10, font_weight='bold')
158
 
159
  edge_labels = nx.get_edge_attributes(G, 'label')
160
  nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
161
 
162
+ plt.title("Análisis de Relaciones Semánticas" if lang == 'es' else "Semantic Relations Analysis" if lang == 'en' else "Analyse des Relations Sémantiques",
163
+ fontsize=20, fontweight='bold')
164
  plt.axis('off')
165
 
166
  legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none',
167
  label=f"{POS_TRANSLATIONS[lang][pos]} ({count_pos(doc)[pos]})")
168
  for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())]
169
+ plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)
170
 
171
  return plt
 
 
 
 
 
 
 
 
172
 
173
+ def perform_semantic_analysis(text, nlp, lang):
174
+ doc = nlp(text)
175
+ context_graph = visualize_context_graph(doc, lang)
176
+ relations_graph = visualize_semantic_relations(doc, lang)
177
+ return context_graph, relations_graph