Shadhil's picture
voice-clone with single audio sample input
9b2107c
raw
history blame
1.14 kB
from typing import List
import jieba
import pypinyin
from .pinyinToPhonemes import PINYIN_DICT
def _chinese_character_to_pinyin(text: str) -> List[str]:
pinyins = pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True)
pinyins_flat_list = [item for sublist in pinyins for item in sublist]
return pinyins_flat_list
def _chinese_pinyin_to_phoneme(pinyin: str) -> str:
segment = pinyin[:-1]
tone = pinyin[-1]
phoneme = PINYIN_DICT.get(segment, [""])[0]
return phoneme + tone
def chinese_text_to_phonemes(text: str, seperator: str = "|") -> str:
tokenized_text = jieba.cut(text, HMM=False)
tokenized_text = " ".join(tokenized_text)
pinyined_text: List[str] = _chinese_character_to_pinyin(tokenized_text)
results: List[str] = []
for token in pinyined_text:
if token[-1] in "12345": # TODO transform to is_pinyin()
pinyin_phonemes = _chinese_pinyin_to_phoneme(token)
results += list(pinyin_phonemes)
else: # is ponctuation or other
results += list(token)
return seperator.join(results)