Update modules/semantic_analysis.py
Browse files- modules/semantic_analysis.py +17 -31
modules/semantic_analysis.py
CHANGED
@@ -88,6 +88,13 @@ POS_TRANSLATIONS = {
|
|
88 |
def count_pos(doc):
|
89 |
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
def extract_entities(doc):
|
92 |
entities = {
|
93 |
"Personas": [],
|
@@ -97,7 +104,7 @@ def extract_entities(doc):
|
|
97 |
}
|
98 |
|
99 |
for ent in doc.ents:
|
100 |
-
if ent.label_ == "
|
101 |
entities["Personas"].append(ent.text)
|
102 |
elif ent.label_ in ["LOC", "GPE"]:
|
103 |
entities["Lugares"].append(ent.text)
|
@@ -112,6 +119,8 @@ def visualize_context_graph(doc, lang):
|
|
112 |
G = nx.Graph()
|
113 |
entities = extract_entities(doc)
|
114 |
|
|
|
|
|
115 |
# Add nodes
|
116 |
for category, items in entities.items():
|
117 |
for item in items:
|
@@ -120,47 +129,28 @@ def visualize_context_graph(doc, lang):
|
|
120 |
# Add edges
|
121 |
for sent in doc.sents:
|
122 |
sent_entities = [ent for ent in sent.ents if ent.text in G.nodes()]
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
if ent != person:
|
127 |
-
G.add_edge(person.text, ent.text)
|
128 |
|
129 |
# Visualize
|
130 |
plt.figure(figsize=(20, 15))
|
131 |
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
132 |
|
133 |
-
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
|
134 |
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
135 |
|
136 |
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=3000, font_size=8, font_weight='bold')
|
137 |
|
138 |
# Add a legend
|
139 |
-
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none')
|
140 |
-
|
|
|
141 |
|
142 |
plt.title("Análisis del Contexto" if lang == 'es' else "Context Analysis" if lang == 'en' else "Analyse du Contexte", fontsize=20)
|
143 |
plt.axis('off')
|
144 |
|
145 |
return plt
|
146 |
|
147 |
-
def create_semantic_graph(doc, lang):
|
148 |
-
G = nx.Graph()
|
149 |
-
pos_counts = count_pos(doc)
|
150 |
-
|
151 |
-
for token in doc:
|
152 |
-
if token.pos_ != 'PUNCT':
|
153 |
-
G.add_node(token.text,
|
154 |
-
pos=token.pos_,
|
155 |
-
color=POS_COLORS.get(token.pos_, '#CCCCCC'), # Color gris por defecto
|
156 |
-
size=pos_counts.get(token.pos_, 1) * 100) # Tamaño mínimo si no hay conteo
|
157 |
-
|
158 |
-
for token in doc:
|
159 |
-
if token.dep_ != "ROOT" and token.head.text in G.nodes and token.text in G.nodes:
|
160 |
-
G.add_edge(token.head.text, token.text, label=token.dep_)
|
161 |
-
|
162 |
-
return G, pos_counts
|
163 |
-
|
164 |
def visualize_semantic_relations(doc, lang):
|
165 |
G = nx.Graph()
|
166 |
word_freq = Counter(token.text.lower() for token in doc if token.pos_ not in ['PUNCT', 'SPACE'])
|
@@ -200,8 +190,4 @@ def perform_semantic_analysis(text, nlp, lang):
|
|
200 |
doc = nlp(text)
|
201 |
context_graph = visualize_context_graph(doc, lang)
|
202 |
relations_graph = visualize_semantic_relations(doc, lang)
|
203 |
-
|
204 |
-
# Extraer entidades para mostrar en forma de lista
|
205 |
-
entities = extract_entities(doc)
|
206 |
-
|
207 |
-
return context_graph, relations_graph, entities
|
|
|
88 |
def count_pos(doc):
|
89 |
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
|
90 |
|
91 |
+
import spacy
|
92 |
+
import networkx as nx
|
93 |
+
import matplotlib.pyplot as plt
|
94 |
+
from collections import Counter
|
95 |
+
|
96 |
+
# Mantén las definiciones de POS_COLORS y POS_TRANSLATIONS que ya tienes
|
97 |
+
|
98 |
def extract_entities(doc):
|
99 |
entities = {
|
100 |
"Personas": [],
|
|
|
104 |
}
|
105 |
|
106 |
for ent in doc.ents:
|
107 |
+
if ent.label_ == "PERSON":
|
108 |
entities["Personas"].append(ent.text)
|
109 |
elif ent.label_ in ["LOC", "GPE"]:
|
110 |
entities["Lugares"].append(ent.text)
|
|
|
119 |
G = nx.Graph()
|
120 |
entities = extract_entities(doc)
|
121 |
|
122 |
+
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
|
123 |
+
|
124 |
# Add nodes
|
125 |
for category, items in entities.items():
|
126 |
for item in items:
|
|
|
129 |
# Add edges
|
130 |
for sent in doc.sents:
|
131 |
sent_entities = [ent for ent in sent.ents if ent.text in G.nodes()]
|
132 |
+
for i in range(len(sent_entities)):
|
133 |
+
for j in range(i+1, len(sent_entities)):
|
134 |
+
G.add_edge(sent_entities[i].text, sent_entities[j].text)
|
|
|
|
|
135 |
|
136 |
# Visualize
|
137 |
plt.figure(figsize=(20, 15))
|
138 |
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
139 |
|
|
|
140 |
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
141 |
|
142 |
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=3000, font_size=8, font_weight='bold')
|
143 |
|
144 |
# Add a legend
|
145 |
+
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none', label=category)
|
146 |
+
for category, color in color_map.items()]
|
147 |
+
plt.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1, 1))
|
148 |
|
149 |
plt.title("Análisis del Contexto" if lang == 'es' else "Context Analysis" if lang == 'en' else "Analyse du Contexte", fontsize=20)
|
150 |
plt.axis('off')
|
151 |
|
152 |
return plt
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
def visualize_semantic_relations(doc, lang):
|
155 |
G = nx.Graph()
|
156 |
word_freq = Counter(token.text.lower() for token in doc if token.pos_ not in ['PUNCT', 'SPACE'])
|
|
|
190 |
doc = nlp(text)
|
191 |
context_graph = visualize_context_graph(doc, lang)
|
192 |
relations_graph = visualize_semantic_relations(doc, lang)
|
193 |
+
return context_graph, relations_graph
|
|
|
|
|
|
|
|