vineelpratap commited on
Commit
2bc2fff
1 Parent(s): 9981498

Update utils/text_norm.py

Browse files
Files changed (1) hide show
  1. utils/text_norm.py +8 -3
utils/text_norm.py CHANGED
@@ -6,7 +6,12 @@ from utils.norm_config import norm_config
6
 
7
 
8
  def text_normalize(
9
- text, iso_code, lower_case=True, remove_numbers=True, remove_brackets=False
 
 
 
 
 
10
  ):
11
 
12
  """Given a text, normalize it by changing to lower case, removing punctuations, removing words that only contain digits and removing extra spaces
@@ -95,7 +100,7 @@ def text_normalize(
95
 
96
  normalized_text = unidecode(normalized_text)
97
 
98
- # Remove extra spaces
99
- normalized_text = re.sub(r"\s+", " ", normalized_text).strip()
100
 
101
  return normalized_text
 
6
 
7
 
8
  def text_normalize(
9
+ text,
10
+ iso_code="xxx",
11
+ lower_case=True,
12
+ remove_numbers=False,
13
+ remove_brackets=False,
14
+ rm_extra_spaces=False,
15
  ):
16
 
17
  """Given a text, normalize it by changing to lower case, removing punctuations, removing words that only contain digits and removing extra spaces
 
100
 
101
  normalized_text = unidecode(normalized_text)
102
 
103
+ if rm_extra_spaces:
104
+ normalized_text = re.sub(r"\s+", " ", normalized_text).strip()
105
 
106
  return normalized_text