AIdeaText commited on
Commit
8a20582
1 Parent(s): 60e4a0a

Update modules/morpho_analysis.py

Browse files
Files changed (1) hide show
  1. modules/morpho_analysis.py +18 -2
modules/morpho_analysis.py CHANGED
@@ -1,6 +1,8 @@
1
  # /modules/morpho_analysis.py
2
  import spacy
3
  from collections import Counter
 
 
4
 
5
  # Define colors for grammatical categories
6
  POS_COLORS = {
@@ -79,6 +81,7 @@ POS_TRANSLATIONS = {
79
  }
80
  }
81
 
 
82
  def get_repeated_words_colors(doc):
83
  word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
84
  repeated_words = {word: count for word, count in word_counts.items() if count > 1}
@@ -89,7 +92,8 @@ def get_repeated_words_colors(doc):
89
  word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
90
 
91
  return word_colors
92
-
 
93
  def highlight_repeated_words(doc, word_colors):
94
  highlighted_text = []
95
  for token in doc:
@@ -98,4 +102,16 @@ def highlight_repeated_words(doc, word_colors):
98
  highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
99
  else:
100
  highlighted_text.append(token.text)
101
- return ' '.join(highlighted_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /modules/morpho_analysis.py
2
  import spacy
3
  from collections import Counter
4
+ from spacy import displacy
5
+ import re
6
 
7
  # Define colors for grammatical categories
8
  POS_COLORS = {
 
81
  }
82
  }
83
 
84
+ #############################################################################################
85
  def get_repeated_words_colors(doc):
86
  word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
87
  repeated_words = {word: count for word, count in word_counts.items() if count > 1}
 
92
  word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
93
 
94
  return word_colors
95
+
96
+ ######################################################################################################
97
  def highlight_repeated_words(doc, word_colors):
98
  highlighted_text = []
99
  for token in doc:
 
102
  highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
103
  else:
104
  highlighted_text.append(token.text)
105
+ return ' '.join(highlighted_text)
106
+
107
+ #################################################################################################
108
+ def generate_arc_diagram(doc, lang_code):
109
+ sentences = list(doc.sents)
110
+ arc_diagrams = []
111
+ for sent in sentences:
112
+ html = displacy.render(sent, style="dep", options={"distance": 100})
113
+ html = html.replace('height="375"', 'height="200"')
114
+ html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
115
+ html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
116
+ arc_diagrams.append(html)
117
+ return arc_diagrams