test2 / modules /semantic_analysis.py
AIdeaText's picture
Update modules/semantic_analysis.py
738b9a7 verified
raw
history blame
No virus
6.13 kB
#semantic_analysis.py
import streamlit as st
import spacy
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
# Remove the global nlp model loading
# Define colors for grammatical categories
POS_COLORS = {
'ADJ': '#FFA07A', # Light Salmon
'ADP': '#98FB98', # Pale Green
'ADV': '#87CEFA', # Light Sky Blue
'AUX': '#DDA0DD', # Plum
'CCONJ': '#F0E68C', # Khaki
'DET': '#FFB6C1', # Light Pink
'INTJ': '#FF6347', # Tomato
'NOUN': '#90EE90', # Light Green
'NUM': '#FAFAD2', # Light Goldenrod Yellow
'PART': '#D3D3D3', # Light Gray
'PRON': '#FFA500', # Orange
'PROPN': '#20B2AA', # Light Sea Green
'SCONJ': '#DEB887', # Burlywood
'SYM': '#7B68EE', # Medium Slate Blue
'VERB': '#FF69B4', # Hot Pink
'X': '#A9A9A9', # Dark Gray
}
POS_TRANSLATIONS = {
'es': {
'ADJ': 'Adjetivo',
'ADP': 'Adposici贸n',
'ADV': 'Adverbio',
'AUX': 'Auxiliar',
'CCONJ': 'Conjunci贸n Coordinante',
'DET': 'Determinante',
'INTJ': 'Interjecci贸n',
'NOUN': 'Sustantivo',
'NUM': 'N煤mero',
'PART': 'Part铆cula',
'PRON': 'Pronombre',
'PROPN': 'Nombre Propio',
'SCONJ': 'Conjunci贸n Subordinante',
'SYM': 'S铆mbolo',
'VERB': 'Verbo',
'X': 'Otro',
},
'en': {
'ADJ': 'Adjective',
'ADP': 'Adposition',
'ADV': 'Adverb',
'AUX': 'Auxiliary',
'CCONJ': 'Coordinating Conjunction',
'DET': 'Determiner',
'INTJ': 'Interjection',
'NOUN': 'Noun',
'NUM': 'Number',
'PART': 'Particle',
'PRON': 'Pronoun',
'PROPN': 'Proper Noun',
'SCONJ': 'Subordinating Conjunction',
'SYM': 'Symbol',
'VERB': 'Verb',
'X': 'Other',
},
'fr': {
'ADJ': 'Adjectif',
'ADP': 'Adposition',
'ADV': 'Adverbe',
'AUX': 'Auxiliaire',
'CCONJ': 'Conjonction de Coordination',
'DET': 'D茅terminant',
'INTJ': 'Interjection',
'NOUN': 'Nom',
'NUM': 'Nombre',
'PART': 'Particule',
'PRON': 'Pronom',
'PROPN': 'Nom Propre',
'SCONJ': 'Conjonction de Subordination',
'SYM': 'Symbole',
'VERB': 'Verbe',
'X': 'Autre',
}
}
########################################################################################################################################
def extract_entities(doc):
entities = {
"Personas": [],
"Conceptos": [],
"Lugares": [],
"Fechas": []
}
for ent in doc.ents:
if ent.label_ == "PER":
entities["Personas"].append(ent.text)
elif ent.label_ in ["LOC", "GPE"]:
entities["Lugares"].append(ent.text)
elif ent.label_ == "DATE":
entities["Fechas"].append(ent.text)
else:
entities["Conceptos"].append(ent.text)
return entities
def visualize_context_graph(doc, lang):
G = nx.Graph()
entities = extract_entities(doc)
# Add nodes
for category, items in entities.items():
for item in items:
G.add_node(item, category=category)
# Add edges
for sent in doc.sents:
sent_entities = [ent.text for ent in sent.ents if ent.text in G.nodes()]
for i in range(len(sent_entities)):
for j in range(i+1, len(sent_entities)):
G.add_edge(sent_entities[i], sent_entities[j])
# Visualize
plt.figure(figsize=(20, 15))
pos = nx.spring_layout(G, k=0.5, iterations=50)
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=3000, font_size=8, font_weight='bold')
# Add a legend
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none') for color in color_map.values()]
plt.legend(legend_elements, color_map.keys(), loc='upper left', bbox_to_anchor=(1, 1))
plt.title("An谩lisis de Contexto" if lang == 'es' else "Context Analysis" if lang == 'en' else "Analyse de Contexte", fontsize=20)
plt.axis('off')
return plt
def visualize_semantic_relations(doc, lang):
# Esta funci贸n puede mantener la l贸gica que ya tienes en visualize_syntax_graph
# con algunas modificaciones para enfocarse en relaciones sem谩nticas
G, word_colors = create_syntax_graph(doc, lang)
plt.figure(figsize=(24, 18))
pos = nx.spring_layout(G, k=0.9, iterations=50)
node_colors = [data['color'] for _, data in G.nodes(data=True)]
node_sizes = [data['size'] for _, data in G.nodes(data=True)]
nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True,
arrowsize=20, width=2, edge_color='gray')
nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)},
font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
plt.title("An谩lisis de Relaciones Sem谩nticas" if lang == 'es' else "Semantic Relations Analysis" if lang == 'en' else "Analyse des Relations S茅mantiques",
fontsize=20, fontweight='bold')
plt.axis('off')
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none',
label=f"{POS_TRANSLATIONS[lang][pos]} ({count_pos(doc)[pos]})")
for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())]
plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)
return plt
def perform_semantic_analysis(text, nlp, lang):
doc = nlp(text)
context_graph = visualize_context_graph(doc, lang)
relations_graph = visualize_semantic_relations(doc, lang)
return context_graph, relations_graph