import os import gradio as gr import nltk os.system("python -m unidic download") from melo.api import TTS # noqa: E402 nltk.download("averaged_perceptron_tagger_eng") # Get device device = "auto" model = TTS(language="EN", device=device) speaker_ids = model.hps.data.spk2id def inference( text: str, speed: float, speaker: str, progress=gr.Progress(track_tqdm=True) ): try: out_path = "audio.wav" model.tts_to_file( text, speaker_ids[speaker], out_path, speed=speed, format="wav", ) except Exception as e: return gr.Error(str(e)) return out_path if __name__ == "__main__": theme = gr.themes.Soft( primary_hue=gr.themes.colors.emerald, secondary_hue=gr.themes.colors.emerald ) sample_text = ( "Hello, my name is Chi-ku-wa-bu. " "I am a text-to-speech system designed to assist you. " "How can I help you today?" ) demo = gr.Interface( title="Text-to-Speech", description="Convert English text to speech", fn=inference, inputs=[ gr.Textbox(label="Text to Synthesize", value=sample_text), gr.Slider(minimum=0.5, maximum=3.0, value=1.0, label="Speed"), gr.Dropdown( label="Speaker", choices=["EN-US", "EN-BR", "EN_INDIA", "EN-AU", "EN-Default"], value="EN-US", ), ], outputs=[gr.Audio(value="audio.wav")], examples=[ [ sample_text, 1.0, "EN-US", ], ], cache_examples=False, theme=theme, ) demo.queue().launch()