Shadhil's picture
voice-clone with single audio sample input
9b2107c
raw
history blame
958 Bytes
from jamo import hangul_to_jamo
from TTS.tts.utils.text.korean.korean import normalize
g2p = None
def korean_text_to_phonemes(text, character: str = "hangeul") -> str:
"""
The input and output values look the same, but they are different in Unicode.
example :
input = 'ν•˜λŠ˜' (Unicode : \ud558\ub298), (ν•˜ + 늘)
output = 'α„’α…‘α„‚α…³α†―' (Unicode :\u1112\u1161\u1102\u1173\u11af), (α„’ + α…‘ + α„‚ + α…³ + α†―)
"""
global g2p # pylint: disable=global-statement
if g2p is None:
from g2pkk import G2p
g2p = G2p()
if character == "english":
from anyascii import anyascii
text = normalize(text)
text = g2p(text)
text = anyascii(text)
return text
text = normalize(text)
text = g2p(text)
text = list(hangul_to_jamo(text)) # 'ν•˜λŠ˜' --> ['α„’', 'α…‘', 'α„‚', 'α…³', 'α†―']
return "".join(text)