File size: 3,772 Bytes
568dae8
5b5c9f7
 
 
 
 
 
ba1dfbc
 
 
 
 
 
 
 
5b5c9f7
 
 
0ad9955
ad66243
 
0ad9955
6fe1667
 
 
 
5b5c9f7
 
 
a43be03
 
75be026
 
 
 
 
 
 
 
 
 
 
a43be03
5b5c9f7
75be026
5b5c9f7
be2df01
5b5c9f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a58fbcf
 
5b5c9f7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

# app.py
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

import streamlit as st

# Configure the page to use the full width
st.set_page_config(
    page_title="AIdeaText",
    layout="wide",
    page_icon="random"        
    )

import spacy
from spacy import displacy
import re
import pydantic
import numpy as np 
import thinc

#st.write(f"spaCy version: {spacy.__version__}")
#st.write(f"Pydantic version: {pydantic.__version__}")
#st.write(f"NumPy version: {np.__version__}")
#st.write(f"Thinc version: {thinc.__version__}")

from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax

@st.cache_resource
def load_spacy_model():
    return spacy.load("es_core_news_lg")

#@st.cache_resource
#def load_spacy_model():
#    try:
#        nlp = spacy.load("es_core_news_lg")
#    except IOError:
#        st.info("Downloading spaCy model...")
#        spacy.cli.download("es_core_news_lg")
#        nlp = spacy.load("es_core_news_lg")
#    return nlp

# Load spaCy model
nlp = spacy.load("es_core_news_lg")

st.markdown("### AIdeaText")

# First horizontal band: Text Input
sentence_input = st.text_area("Ingresa un texto para analizar (max 5,000 words):", height=150)

if st.button("Analizar texto"):
    if sentence_input:
        doc = nlp(sentence_input)

        # Second horizontal band: Highlighted Repeated Words
        with st.expander("Palabras repetidas", expanded=True):
            #st.markdown("#### Palabras repetidas")
            #st.write("En esta sección, se indican las palabras repetidas por categoría gramatical.")
            word_colors = get_repeated_words_colors(doc)
            highlighted_text = highlight_repeated_words(doc, word_colors)
            st.markdown(highlighted_text, unsafe_allow_html=True)

            # Legend for grammatical categories
            st.markdown("##### Legenda: Categorías gramaticales")
            legend_html = "<div style='display: flex; flex-wrap: wrap;'>"
            for pos, color in POS_COLORS.items():
                if pos in POS_TRANSLATIONS:
                    legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>"
            legend_html += "</div>"
            st.markdown(legend_html, unsafe_allow_html=True)

        # Third horizontal band: Arc Diagram
        with st.expander("Análisis sintáctico: Diagrama de arco", expanded=True):
            #st.write("This section displays the syntactic structure of each sentence using arc diagrams.")

            sentences = list(doc.sents)
            for i, sent in enumerate(sentences):
                st.subheader(f"Sentence {i+1}")
                html = displacy.render(sent, style="dep", options={"distance": 100})
                # Reduce the height of the SVG
                html = html.replace('height="375"', 'height="200"')
                # Reduce the top margin of the SVG
                html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
                html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
                          lambda m: f'<g transform="translate({m.group(1)},50)"', html)
                st.write(html, unsafe_allow_html=True)

        # Fourth horizontal band: Network graph
        with st.expander("Análisis sintáctico: Diagrama de red", expanded=True):
            #st.markdown("#### Análisis sintáctico: Diagrama de red")
            #st.write("Esta sección muestra la estructura sintáctica del texto completo usando un diagrama de red.")

            fig = visualize_syntax(sentence_input)
            st.pyplot(fig)