import random import torch import gradio as gr import numpy as np from tools.logger import get_logger logger = get_logger(" WebUI ") import ChatTTS chat = ChatTTS.Chat(get_logger("ChatTTS")) # 音色选项:用于预置合适的音色 voices = { "默认": {"seed": 2}, "音色1": {"seed": 1111}, "音色2": {"seed": 2222}, "音色3": {"seed": 3333}, "音色4": {"seed": 4444}, "音色5": {"seed": 5555}, "音色6": {"seed": 6666}, "音色7": {"seed": 7777}, "音色8": {"seed": 8888}, "音色9": {"seed": 9999}, "音色10": {"seed": 11111}, } def generate_seed(): return gr.update(value=random.randint(1, 100000000)) # 返回选择音色对应的seed def on_voice_change(vocie_selection): return voices.get(vocie_selection)['seed'] def refine_text(text, audio_seed_input, text_seed_input, refine_text_flag): if not refine_text_flag: return text global chat torch.manual_seed(audio_seed_input) params_refine_text = {'prompt': '[oral_2][laugh_0][break_6]'} torch.manual_seed(text_seed_input) text = chat.infer(text, skip_refine_text=False, refine_text_only=True, params_refine_text=params_refine_text, ) return text[0] if isinstance(text, list) else text def generate_audio(text, temperature, top_P, top_K, audio_seed_input, text_seed_input, stream): if not text: return None global chat torch.manual_seed(audio_seed_input) rand_spk = chat.sample_random_speaker() params_infer_code = { 'spk_emb': rand_spk, 'temperature': temperature, 'top_P': top_P, 'top_K': top_K, } torch.manual_seed(text_seed_input) wav = chat.infer( text, skip_refine_text=True, params_infer_code=params_infer_code, stream=stream, ) if stream: for gen in wav: wavs = [np.array([[]])] wavs[0] = np.hstack([wavs[0], np.array(gen[0])]) audio = wavs[0][0] # normalize am = np.abs(audio).max() * 32768 if am > 32768: am = 32768 * 32768 / am np.multiply(audio, am, audio) audio = audio.astype(np.int16) yield 24000, audio return audio_data = np.array(wav[0]).flatten() # normalize am = np.abs(audio_data).max() * 32768 if am > 32768: am = 32768 * 32768 / am np.multiply(audio_data, am, audio_data) audio_data = audio_data.astype(np.int16) sample_rate = 24000 yield sample_rate, audio_data