import io
import os

import gradio as gr
import nltk

# os.system("python -m unidic download")
from melo.api import TTS  # noqa: E402

# nltk.download("averaged_perceptron_tagger_eng")

# Get device
device = "auto"
model = TTS(language="EN", device=device)
speaker_ids = model.hps.data.spk2id


def inference(
        text: str, speed: float, speaker: str, progress=gr.Progress(track_tqdm=True)
):
    out_path = "audio.wav"
    model.tts_to_file(text, speaker_ids[speaker], out_path, speed=speed, format='wav')
    return out_path


if __name__ == "__main__":
    demo = gr.Interface(
        title="Text-to-Speech",
        description="Convert English text to speech",
        fn=inference,
        inputs=[
            gr.Textbox(label="Text to Synthesize"),
            gr.Slider(minimum=0.5, maximum=3.0, value=1.0, label="Speed"),
            gr.Dropdown(
                label="Speaker",
                choices=["EN", "EN-US", "EN-BR", "EN_INDIA", "EN-AU", "EN-Default"],
                value="EN-US",
            ),
        ],
        outputs=[gr.Audio()],
        examples=[
            [
                "Hello, my name is Chi-ku-wa-bu. "
                "I am a text-to-speech system designed to assist you. "
                "How can I help you today?",
                1.0,
                "EN-US",
            ],
        ],
        cache_examples=False,
    )
    demo.queue().launch()