Update modules/morpho_analysis.py
Browse files- modules/morpho_analysis.py +18 -2
modules/morpho_analysis.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
# /modules/morpho_analysis.py
|
2 |
import spacy
|
3 |
from collections import Counter
|
|
|
|
|
4 |
|
5 |
# Define colors for grammatical categories
|
6 |
POS_COLORS = {
|
@@ -79,6 +81,7 @@ POS_TRANSLATIONS = {
|
|
79 |
}
|
80 |
}
|
81 |
|
|
|
82 |
def get_repeated_words_colors(doc):
|
83 |
word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
|
84 |
repeated_words = {word: count for word, count in word_counts.items() if count > 1}
|
@@ -89,7 +92,8 @@ def get_repeated_words_colors(doc):
|
|
89 |
word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
|
90 |
|
91 |
return word_colors
|
92 |
-
|
|
|
93 |
def highlight_repeated_words(doc, word_colors):
|
94 |
highlighted_text = []
|
95 |
for token in doc:
|
@@ -98,4 +102,16 @@ def highlight_repeated_words(doc, word_colors):
|
|
98 |
highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
|
99 |
else:
|
100 |
highlighted_text.append(token.text)
|
101 |
-
return ' '.join(highlighted_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# /modules/morpho_analysis.py
|
2 |
import spacy
|
3 |
from collections import Counter
|
4 |
+
from spacy import displacy
|
5 |
+
import re
|
6 |
|
7 |
# Define colors for grammatical categories
|
8 |
POS_COLORS = {
|
|
|
81 |
}
|
82 |
}
|
83 |
|
84 |
+
#############################################################################################
|
85 |
def get_repeated_words_colors(doc):
|
86 |
word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
|
87 |
repeated_words = {word: count for word, count in word_counts.items() if count > 1}
|
|
|
92 |
word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
|
93 |
|
94 |
return word_colors
|
95 |
+
|
96 |
+
######################################################################################################
|
97 |
def highlight_repeated_words(doc, word_colors):
|
98 |
highlighted_text = []
|
99 |
for token in doc:
|
|
|
102 |
highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
|
103 |
else:
|
104 |
highlighted_text.append(token.text)
|
105 |
+
return ' '.join(highlighted_text)
|
106 |
+
|
107 |
+
#################################################################################################
|
108 |
+
def generate_arc_diagram(doc, lang_code):
|
109 |
+
sentences = list(doc.sents)
|
110 |
+
arc_diagrams = []
|
111 |
+
for sent in sentences:
|
112 |
+
html = displacy.render(sent, style="dep", options={"distance": 100})
|
113 |
+
html = html.replace('height="375"', 'height="200"')
|
114 |
+
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
|
115 |
+
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
|
116 |
+
arc_diagrams.append(html)
|
117 |
+
return arc_diagrams
|