import gradio
import torchaudio
from fastai.vision.all import *
from fastai.learner import load_learner
from torchvision.utils import save_image
from huggingface_hub import hf_hub_download


model = load_learner(
    hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
)


EXAMPLES_PATH = Path("./examples")
labels = model.dls.vocab

interface_options = {
    "title": "Music Genre Classification",
    "description": " ",
    "interpretation": "default",
    "layout": "horizontal",
    # Audio from validation file
    "examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"],
    "allow_flagging": "never"
}

## Code from Dien Hoa Truong inference notebook: https://www.kaggle.com/code/dienhoa/inference-submission-music-genre
N_FFT = 2048
HOP_LEN = 1024


def create_spectrogram(filename):
    audio, sr = torchaudio.load(filename)
    specgram = torchaudio.transforms.MelSpectrogram(
        sample_rate=sr,
        n_fft=N_FFT,
        win_length=N_FFT,
        hop_length=HOP_LEN,
        center=True,
        pad_mode="reflect",
        power=2.0,
        norm="slaney",
        onesided=True,
        n_mels=224,
        mel_scale="htk",
    )(audio).mean(axis=0)
    specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
    specgram = specgram - specgram.min()
    specgram = specgram / specgram.max()

    return specgram


def create_image(filename):
    specgram = create_spectrogram(filename)
    dest = Path("temp.png")
    save_image(specgram, "temp.png")


# Code from: https://huggingface.co/spaces/suvash/food-101-resnet50
def predict(img):
    img = PILImage.create(img)
    _pred, _pred_w_idx, probs = model.predict(img)
    # gradio doesn't support tensors, so converting to float
    labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
    return labels_probs


def end2endpipeline(filename):
    create_image(filename)
    return predict("temp.png")


demo = gradio.Interface(
    fn=end2endpipeline,
    inputs=gradio.inputs.Audio(source="upload", type="filepath"),
    outputs=gradio.outputs.Label(num_top_classes=5),
    **interface_options,
)

launch_options = {
    "enable_queue": True,
    "share": False,
    # thanks Alex for pointing this option to cache examples
    "cache_examples": True,
}

demo.launch(**launch_options)