import numpy as np from bark import SAMPLE_RATE, generate_audio, preload_models from bark.generation import SUPPORTED_LANGS DEBUG_MODE = False if not DEBUG_MODE: _ = preload_models(text_use_gpu=False, text_use_small=True) AVAILABLE_PROMPTS = ["Unconditional", "Announcer"] PROMPT_LOOKUP = {} for _, lang in SUPPORTED_LANGS: for n in range(10): label = f"Speaker {n} ({lang})" AVAILABLE_PROMPTS.append(label) PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}" PROMPT_LOOKUP["Unconditional"] = None PROMPT_LOOKUP["Announcer"] = "announcer" def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform): if not text: # text is None or empty raise ValueError("text cannot be None or empty") history_prompt = PROMPT_LOOKUP[history_prompt] if DEBUG_MODE: audio_arr = np.zeros(SAMPLE_RATE) else: # , text_temp=temp_semantic, waveform_temp=temp_waveform) audio_arr = generate_audio(text, history_prompt=history_prompt) audio_arr = (audio_arr * 32767).astype(np.int16) return SAMPLE_RATE, audio_arr