# coding: utf-8 # Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean.py import re from TTS.tts.utils.text.korean.ko_dictionary import english_dictionary, etc_dictionary def normalize(text): text = text.strip() text = re.sub("[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text) text = normalize_with_dictionary(text, etc_dictionary) text = normalize_english(text) text = text.lower() return text def normalize_with_dictionary(text, dic): if any(key in text for key in dic.keys()): pattern = re.compile("|".join(re.escape(key) for key in dic.keys())) return pattern.sub(lambda x: dic[x.group()], text) return text def normalize_english(text): def fn(m): word = m.group() if word in english_dictionary: return english_dictionary.get(word) return word text = re.sub("([A-Za-z]+)", fn, text) return text