Oysiyl commited on
Commit
41f1fc2
1 Parent(s): 066d18b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -48
app.py CHANGED
@@ -24,60 +24,60 @@ def create_speaker_embedding(speaker_model, waveform: np.ndarray) -> np.ndarray:
24
  def remove_special_characters_s(text: Text) -> Text:
25
  chars_to_remove_regex = '[\=\´\–\“\”\…\=]'
26
  # remove special characters
27
- text = re.sub(chars_to_remove_regex, '', text).lower()
28
- text = re.sub("‘", "'", text).lower()
29
- text = re.sub("’", "'", text).lower()
30
- text = re.sub("´", "'", text).lower()
31
  text = text.lower()
32
  return text
33
 
34
 
35
  def dutch_to_english(text: Text) -> Text:
36
  replacements = [
37
- ("à", "a"),
38
- ("ç", "c"),
39
- ("è", "e"),
40
- ("ë", "e"),
41
- ("í", "i"),
42
- ("ï", "i"),
43
- ("ö", "o"),
44
- ("ü", "u"),
45
- ('&', "en"),
46
- ('á','a'),
47
- ('ä','a'),
48
- ('î','i'),
49
- ('ó','o'),
50
- ('ö','o'),
51
- ('ú','u'),
52
- ('û','u'),
53
- ('ă','a'),
54
- ('ć','c'),
55
- ('đ','d'),
56
- ('š','s'),
57
- ('ţ','t'),
58
- ('j', 'y'),
59
- ('k', 'k'),
60
- ('ci', 'si'),
61
- ('ce', 'se'),
62
- ('ca', 'ka'),
63
- ('co', 'ko'),
64
- ('cu', 'ku'),
65
- (' sch', ' sg'),
66
- ('sch ', 's '),
67
- ('ch', 'g'),
68
- ('eeuw', 'eaw'),
69
- ('ee', 'ea'),
70
- ('aai','ay'),
71
- ('oei', 'ooy'),
72
- ('ooi', 'oay'),
73
- ('ieuw', 'eew'),
74
- ('ie', 'ee'),
75
- ('oo', 'oa'),
76
- ('oe', 'oo'),
77
- ('ei', '\\i\\'),
78
- ('ij', 'i'),
79
- ('\\i\\', 'i')
80
- ]
81
 
82
  for src, dst in replacements:
83
  text = text.replace(src, dst)
 
24
  def remove_special_characters_s(text: Text) -> Text:
25
  chars_to_remove_regex = '[\=\´\–\“\”\…\=]'
26
  # remove special characters
27
+ text = re.sub(chars_to_remove_regex, '', text)
28
+ text = re.sub("‘", "'", text)
29
+ text = re.sub("’", "'", text)
30
+ text = re.sub("´", "'", text)
31
  text = text.lower()
32
  return text
33
 
34
 
35
  def dutch_to_english(text: Text) -> Text:
36
  replacements = [
37
+ ("à", "a"),
38
+ ("ç", "c"),
39
+ ("è", "e"),
40
+ ("ë", "e"),
41
+ ("í", "i"),
42
+ ("ï", "i"),
43
+ ("ö", "o"),
44
+ ("ü", "u"),
45
+ ('&', "en"),
46
+ ('á','a'),
47
+ ('ä','a'),
48
+ ('î','i'),
49
+ ('ó','o'),
50
+ ('ö','o'),
51
+ ('ú','u'),
52
+ ('û','u'),
53
+ ('ă','a'),
54
+ ('ć','c'),
55
+ ('đ','d'),
56
+ ('š','s'),
57
+ ('ţ','t'),
58
+ ('j', 'y'),
59
+ ('k', 'k'),
60
+ ('ci', 'si'),
61
+ ('ce', 'se'),
62
+ ('ca', 'ka'),
63
+ ('co', 'ko'),
64
+ ('cu', 'ku'),
65
+ (' sch', ' sg'),
66
+ ('sch ', 's '),
67
+ ('ch', 'g'),
68
+ ('eeuw', 'eaw'),
69
+ ('ee', 'ea'),
70
+ ('aai','ay'),
71
+ ('oei', 'ooy'),
72
+ ('ooi', 'oay'),
73
+ ('ieuw', 'eew'),
74
+ ('ie', 'ee'),
75
+ ('oo', 'oa'),
76
+ ('oe', 'oo'),
77
+ ('ei', '\\i\\'),
78
+ ('ij', 'i'),
79
+ ('\\i\\', 'i')
80
+ ]
81
 
82
  for src, dst in replacements:
83
  text = text.replace(src, dst)