File size: 6,053 Bytes
5b5c9f7
 
 
 
 
 
e0f08c6
 
 
ba1dfbc
 
 
 
 
e0f08c6
 
5b5c9f7
20c3897
5b5c9f7
a43be03
e0f08c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d9ee0d
e0f08c6
 
 
 
 
 
 
 
 
 
1d9ee0d
e0f08c6
 
 
 
 
 
 
 
 
 
1d9ee0d
e0f08c6
 
 
 
 
 
 
 
 
 
 
 
 
 
86eef11
 
 
 
e0f08c6
86eef11
 
 
 
e0f08c6
 
5b5c9f7
e0f08c6
5b5c9f7
e0f08c6
 
5b5c9f7
 
 
 
e0f08c6
 
 
 
 
 
 
 
 
 
 
5b5c9f7
 
e0f08c6
5b5c9f7
 
 
e0f08c6
5b5c9f7
 
86eef11
e0f08c6
afd5df1
5b5c9f7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# app.py
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

import streamlit as st
import spacy
from spacy import displacy
import re

# Configure the page to use the full width
st.set_page_config(
    page_title="AIdeaText",
    layout="wide",
    page_icon="random"
)

from modules.syntax_analysis import visualize_syntax, highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS

@st.cache_resource
def load_spacy_models():
    return {
        'es': spacy.load("es_core_news_lg"),
        'en': spacy.load("en_core_web_lg"),
        'fr': spacy.load("fr_core_news_lg")
    }

# Load spaCy models
nlp_models = load_spacy_models()

# Language selection
languages = {
    'Español': 'es',
    'English': 'en',
    'Français': 'fr'
}
selected_lang = st.sidebar.selectbox("Select Language / Seleccione el idioma / Choisissez la langue", list(languages.keys()))
lang_code = languages[selected_lang]

# Translations
translations = {
    'es': {
        'title': "AIdeaText - Análisis morfológico y sintáctico",
        'input_label': "Ingrese un texto para analizar (máx. 5,000 palabras):",
        'input_placeholder': "El objetivo de esta aplicación es que mejore sus habilidades de redacción. Para ello, después de ingresar su texto y presionar el botón obtendrá tres vistas horizontales. La primera, le indicará las palabras que se repiten por categoría gramátical; la segunda, un diagrama de arco le indicara las conexiones sintácticas en cada oración; y la tercera, es un grafo en el cual visualizara la configuración de su texto.",  
        'analyze_button': "Analizar texto",
        'repeated_words': "Palabras repetidas",
        'legend': "Leyenda: Categorías gramaticales",
        'arc_diagram': "Análisis sintáctico: Diagrama de arco",
        'network_diagram': "Análisis sintáctico: Diagrama de red",
        'sentence': "Oración"
    },
    'en': {
        'title': "AIdeaText - Morphological and Syntactic Analysis",
        'input_label': "Enter a text to analyze (max 5,000 words):",
        'input_placeholder': "The goal of this app is for you to improve your writing skills. To do this, after entering your text and pressing the button you will get three horizontal views. The first will indicate the words that are repeated by grammatical category; second, an arc diagram will indicate the syntactic connections in each sentence; and the third is a graph in which you will visualize the configuration of your text.",
        'analyze_button': "Analyze text",
        'repeated_words': "Repeated words",
        'legend': "Legend: Grammatical categories",
        'arc_diagram': "Syntactic analysis: Arc diagram",
        'network_diagram': "Syntactic analysis: Network diagram",
        'sentence': "Sentence"
    },
    'fr': {
        'title': "AIdeaText - Analyse morphologique et syntaxique",
        'input_label': "Entrez un texte à analyser (max 5 000 mots) :",
        'input_placeholder': "Le but de cette application est d'améliorer vos compétences en rédaction. Pour ce faire, après avoir saisi votre texte et appuyé sur le bouton vous obtiendrez trois vues horizontales. Le premier indiquera les mots répétés par catégorie grammaticale; deuxièmement, un diagramme en arcs indiquera les connexions syntaxiques dans chaque phrase; et le troisième est un graphique dans lequel vous visualiserez la configuration de votre texte.",
        'analyze_button': "Analyser le texte",
        'repeated_words': "Mots répétés",
        'legend': "Légende : Catégories grammaticales",
        'arc_diagram': "Analyse syntaxique : Diagramme en arc",
        'network_diagram': "Analyse syntaxique : Diagramme de réseau",
        'sentence': "Phrase"
    }
}

# Use translations
t = translations[lang_code]

st.markdown(f"### {t['title']}")

# Initialize session state for input text if it doesn't exist
if 'input_text' not in st.session_state:
    st.session_state.input_text = ""

# Text Input with instructions
sentence_input = st.text_area(t['input_label'], height=150, placeholder=t['input_placeholder'], value=st.session_state.input_text)

# Update session state with current input
st.session_state.input_text = sentence_input

if st.button(t['analyze_button']):
    if sentence_input:
        doc = nlp_models[lang_code](sentence_input)

        # Highlighted Repeated Words
        with st.expander(t['repeated_words'], expanded=True):
            word_colors = get_repeated_words_colors(doc)
            highlighted_text = highlight_repeated_words(doc, word_colors)
            st.markdown(highlighted_text, unsafe_allow_html=True)

        # Legend for grammatical categories
        st.markdown(f"##### {t['legend']}")
        legend_html = "<div style='display: flex; flex-wrap: wrap;'>"
        for pos, color in POS_COLORS.items():
            if pos in POS_TRANSLATIONS:
                legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>"
        legend_html += "</div>"
        st.markdown(legend_html, unsafe_allow_html=True)

        # Arc Diagram
        with st.expander(t['arc_diagram'], expanded=True):
            sentences = list(doc.sents)
            for i, sent in enumerate(sentences):
                st.subheader(f"{t['sentence']} {i+1}")
                html = displacy.render(sent, style="dep", options={"distance": 100})
                html = html.replace('height="375"', 'height="200"')
                html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
                html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
                st.write(html, unsafe_allow_html=True)

        # Network graph
        with st.expander(t['network_diagram'], expanded=True):
            fig = visualize_syntax(sentence_input, nlp_models[lang_code], lang_code)
            st.pyplot(fig)