import io import os import gradio as gr import nltk # os.system("python -m unidic download") from melo.api import TTS # noqa: E402 # nltk.download("averaged_perceptron_tagger_eng") # Get device device = "auto" model = TTS(language="EN", device=device) speaker_ids = model.hps.data.spk2id def inference( text: str, speed: float, speaker: str, progress=gr.Progress(track_tqdm=True) ): out_path = "audio.wav" model.tts_to_file(text, speaker_ids[speaker], out_path, speed=speed, format='wav') return out_path if __name__ == "__main__": demo = gr.Interface( title="Text-to-Speech", description="Convert English text to speech", fn=inference, inputs=[ gr.Textbox(label="Text to Synthesize"), gr.Slider(minimum=0.5, maximum=3.0, value=1.0, label="Speed"), gr.Dropdown( label="Speaker", choices=["EN", "EN-US", "EN-BR", "EN_INDIA", "EN-AU", "EN-Default"], value="EN-US", ), ], outputs=[gr.Audio()], examples=[ [ "Hello, my name is Chi-ku-wa-bu. " "I am a text-to-speech system designed to assist you. " "How can I help you today?", 1.0, "EN-US", ], ], cache_examples=False, ) demo.queue().launch()