ChatTTS-Story-Telling

Running

App Files Files Community

ChatTTS-Story-Telling / utils.py

fcyai

init

0196a95 3 months ago

raw

history blame

No virus

11.1 kB

	try:
	import cn2an
	except ImportError:
	print("The 'cn2an' module is not installed. Please install it using 'pip install cn2an'.")
	exit(1)

	try:
	import jieba
	except ImportError:
	print("The 'jieba' module is not installed. Please install it using 'pip install jieba'.")
	exit(1)

	import re
	import numpy as np
	import wave
	import jieba.posseg as pseg


	def save_audio(file_name, audio, rate=24000):
	"""
	保存音频文件
	:param file_name:
	:param audio:
	:param rate:
	:return:
	"""
	import os
	from config import DEFAULT_DIR
	audio = (audio * 32767).astype(np.int16)

	# 检查默认目录
	if not os.path.exists(DEFAULT_DIR):
	os.makedirs(DEFAULT_DIR)
	full_path = os.path.join(DEFAULT_DIR, file_name)
	with wave.open(full_path, "w") as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(rate)
	wf.writeframes(audio.tobytes())
	return full_path


	def combine_audio(wavs):
	"""
	合并多段音频
	:param wavs:
	:return:
	"""
	wavs = [normalize_audio(w) for w in wavs] # 先对每段音频归一化
	combined_audio = np.concatenate(wavs, axis=1) # 沿着时间轴合并
	return normalize_audio(combined_audio) # 合并后再次归一化


	def normalize_audio(audio):
	"""
	Normalize audio array to be between -1 and 1
	:param audio: Input audio array
	:return: Normalized audio array
	"""
	audio = np.clip(audio, -1, 1)
	max_val = np.max(np.abs(audio))
	if max_val > 0:
	audio = audio / max_val
	return audio


	def combine_audio_with_crossfade(audio_arrays, crossfade_duration=0.1, rate=24000):
	"""
	Combine audio arrays with crossfade to avoid clipping noise at the junctions.
	:param audio_arrays: List of audio arrays to combine
	:param crossfade_duration: Duration of the crossfade in seconds
	:param rate: Sample rate of the audio
	:return: Combined audio array
	"""
	crossfade_samples = int(crossfade_duration * rate)
	combined_audio = np.array([], dtype=np.float32)

	for i in range(len(audio_arrays)):
	audio_arrays[i] = np.squeeze(audio_arrays[i]) # Ensure all arrays are 1D
	if i == 0:
	combined_audio = audio_arrays[i] # Start with the first audio array
	else:
	# Apply crossfade between the end of the current combined audio and the start of the next array
	overlap = np.minimum(len(combined_audio), crossfade_samples)
	crossfade_end = combined_audio[-overlap:]
	crossfade_start = audio_arrays[i][:overlap]
	# Crossfade by linearly blending the audio samples
	t = np.linspace(0, 1, overlap)
	crossfaded = crossfade_end * (1 - t) + crossfade_start * t
	# Combine audio by replacing the end of the current combined audio with the crossfaded audio
	combined_audio[-overlap:] = crossfaded
	# Append the rest of the new array
	combined_audio = np.concatenate((combined_audio, audio_arrays[i][overlap:]))

	return combined_audio


	def remove_chinese_punctuation(text):
	"""
	移除文本中的中文标点符号 [：；！（），【】『』「」《》－‘“’”:,;!\[\]><\-] 替换为，
	:param text:
	:return:
	"""
	chinese_punctuation_pattern = r"[：；！（），【】『』「」《》－‘“’”:,;!\[\]><\-·]"
	text = re.sub(chinese_punctuation_pattern, '，', text)
	# 使用正则表达式将多个连续的句号替换为一个句号
	text = re.sub(r'[。，]{2,}', '。', text)
	# 删除开头和结尾的，号
	text = re.sub(r'^，\|，$', '', text)
	return text

	def remove_english_punctuation(text):
	"""
	移除文本中的中文标点符号 [：；！（），【】『』「」《》－‘“’”:,;!\[\]><\-] 替换为，
	:param text:
	:return:
	"""
	chinese_punctuation_pattern = r"[：；！（），【】『』「」《》－‘“’”:,;!\[\]><\-·]"
	text = re.sub(chinese_punctuation_pattern, ',', text)
	# 使用正则表达式将多个连续的句号替换为一个句号
	text = re.sub(r'[,\.]{2,}', '.', text)
	# 删除开头和结尾的，号
	text = re.sub(r'^,\|,$', '', text)
	return text


	def text_normalize(text):
	"""
	对文本进行归一化处理（PaddlePaddle版本）
	:param text:
	:return:
	"""
	from zh_normalization import TextNormalizer
	# ref: https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
	tx = TextNormalizer()
	sentences = tx.normalize(text)
	_txt = ''.join(sentences)
	return _txt


	def convert_numbers_to_chinese(text):
	"""
	将文本中的数字转换为中文数字例如 123 -> 一百二十三
	:param text:
	:return:
	"""
	return cn2an.transform(text, "an2cn")


	def detect_language(sentence):
	# ref: https://github.com/2noise/ChatTTS/blob/main/ChatTTS/utils/infer_utils.py#L55
	chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]')
	english_word_pattern = re.compile(r'\b[A-Za-z]+\b')

	chinese_chars = chinese_char_pattern.findall(sentence)
	english_words = english_word_pattern.findall(sentence)

	if len(chinese_chars) > len(english_words):
	return "zh"
	else:
	return "en"


	def split_text(text, min_length=60):
	"""
	将文本分割为长度不小于min_length的句子
	:param text:
	:param min_length:
	:return:
	"""
	# 短句分割符号
	sentence_delimiters = re.compile(r'([。？！\.]+)')
	# 匹配多个连续的回车符作为段落点强制分段
	paragraph_delimiters = re.compile(r'(\s\n\s)+')

	paragraphs = re.split(paragraph_delimiters, text)

	result = []

	for paragraph in paragraphs:
	if not paragraph.strip():
	continue # 跳过空段落
	# 小于阈值的段落直接分开
	if len(paragraph.strip()) < min_length:
	result.append(paragraph.strip())
	continue
	# 大于的再计算拆分
	sentences = re.split(sentence_delimiters, paragraph)
	current_sentence = ''
	for sentence in sentences:
	if re.match(sentence_delimiters, sentence):
	current_sentence += sentence.strip() + ''
	if len(current_sentence) >= min_length:
	result.append(current_sentence.strip())
	current_sentence = ''
	else:
	current_sentence += sentence.strip()

	if current_sentence:
	if len(current_sentence) < min_length and len(result) > 0:
	result[-1] += current_sentence
	else:
	result.append(current_sentence)
	if detect_language(text[:1024]) == "zh":
	result = [normalize_zh(_.strip()) for _ in result if _.strip()]
	else:
	result = [normalize_en(_.strip()) for _ in result if _.strip()]
	return result


	def normalize_en(text):
	# 不再在 ChatTTS 外正则化文本
	# from tn.english.normalizer import Normalizer
	# normalizer = Normalizer()
	# text = normalizer.normalize(text)
	# text = remove_english_punctuation(text)
	return text


	def normalize_zh(text):
	# 不再在 ChatTTS 外正则化文本
	# from tn.chinese.normalizer import Normalizer
	# normalizer = Normalizer()
	# text = normalizer.normalize(text)
	# text = remove_chinese_punctuation(text)
	text = process_ddd(text)
	return text


	def batch_split(items, batch_size=5):
	"""
	将items划分为大小为batch_size的批次
	:param items:
	:param batch_size:
	:return:
	"""
	return [items[i:i + batch_size] for i in range(0, len(items), batch_size)]


	# 读取 txt 文件，支持自动判断文件编码
	def read_long_text(file_path):
	"""
	读取长文本文件，自动判断文件编码
	:param file_path: 文件路径
	:return: 文本内容
	"""
	encodings = ['utf-8', 'gbk', 'iso-8859-1', 'utf-16']

	for encoding in encodings:
	try:
	with open(file_path, 'r', encoding=encoding) as file:
	return file.read()
	except (UnicodeDecodeError, LookupError):
	continue

	raise ValueError("无法识别文件编码")


	def replace_tokens(text):
	remove_tokens = ['UNK']
	for token in remove_tokens:
	text = re.sub(r'\[' + re.escape(token) + r'\]', '', text)

	tokens = ['uv_break', 'laugh','lbreak']
	for token in tokens:
	text = re.sub(r'\[' + re.escape(token) + r'\]', f'uu{token}uu', text)
	text = text.replace('_', '')
	return text


	def restore_tokens(text):
	tokens = ['uvbreak', 'laugh', 'UNK', 'lbreak']
	for token in tokens:
	text = re.sub(r'uu' + re.escape(token) + r'uu', f'[{token}]', text)
	text = text.replace('[uvbreak]', '[uv_break]')
	return text


	def process_ddd(text):
	"""
	处理“地”、“得” 字的使用，都替换为“的”
	依据：地、得的使用，主要是在动词和形容词前后，本方法没有严格按照语法替换，因为时常遇到用错的情况。
	另外受 jieba 分词准确率的影响，部分情况下可能会出漏掉。例如：小红帽疑惑地问
	:param text: 输入的文本
	:return: 处理后的文本
	"""
	word_list = [(word, flag) for word, flag in pseg.cut(text, use_paddle=False)]
	# print(word_list)
	processed_words = []
	for i, (word, flag) in enumerate(word_list):
	if word in ["地", "得"]:
	# Check previous and next word's flag
	# prev_flag = word_list[i - 1][1] if i > 0 else None
	# next_flag = word_list[i + 1][1] if i + 1 < len(word_list) else None

	# if prev_flag in ['v', 'a'] or next_flag in ['v', 'a']:
	if flag in ['uv', 'ud']:
	processed_words.append("的")
	else:
	processed_words.append(word)
	else:
	processed_words.append(word)

	return ''.join(processed_words)


	def replace_space_between_chinese(text):
	return re.sub(r'(?<=[\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])', '', text)


	if __name__ == '__main__':
	# txts = [
	# "快速地跑过红色的大门",
	# "笑得很开心，学得很好",
	# "小红帽疑惑地问？",
	# "大灰狼慌张地回答",
	# "哦，这是为了更好地听你说话。",
	# "大灰狼不耐烦地说：“为了更好地抱你。”",
	# "他跑得很快，工作做得非常认真，这是他努力地结果。得到",
	# ]
	# for txt in txts:
	# print(txt, '-->', process_ddd(txt))

	txts = [
	"电影中梁朝伟扮演的陈永仁的编号27149",
	"这块黄金重达324.75克我们班的最高总分为583分",
	"12\~23 -1.5\~2",
	"居维埃·拉色别德①、杜梅里②、卡特法日③，"

	]
	for txt in txts:
	print(txt, '-->', text_normalize(txt))
	# print(txt, '-->', convert_numbers_to_chinese(txt))