File size: 2,947 Bytes
c5b9912
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
from audiocraft.data.audio_utils import normalize_audio
from audiocraft.models import MusicGen
from audiotools import AudioSignal

from pyharp import ModelCard, build_endpoint, save_and_return_filepath

card = ModelCard(
    name='Micro Musicgen Jungle',
    description="The jungle version of the micro-musicgen model series. Use a prompt duration of 0 to generate unconditional audio. (WHICH WORKS BETTER) Outpainting is not really tested by me, I just thought it would be cool to have it here because you work with input audio.\n\n HAVE FUNNNNNNNNN",
    author='Aaron Abebe',
    tags=['musicgen', 'jungle', 'micro-musicgen', 'unconditional', 'generation']
)

model = MusicGen.get_pretrained("pharoAIsanders420/micro-musicgen-jungle")


def process_fn(input_audio_path, gen_duration, prompt_duration):
    """
    Process the input audio and generate new audio by sampling from the micro-musicgen-jungle model.
    Supports both unconditional and conditional generation.

    Args:
        input_audio_path (str): the audio filepath to be processed.
        gen_duration (int): the duration of the generated audio.
        prompt_duration (int): the duration of the input conditioning audio.

    Returns:
        output_audio_path (str): the filepath of the processed audio.
    """
    sig = AudioSignal(input_audio_path)
    y, sr = sig.audio_data[0], sig.sample_rate

    model.set_generation_params(
        duration=gen_duration,
        temperature=1.05,
        cfg_coef=3,
    )

    if prompt_duration is None or prompt_duration == 0:
        output = model.generate_unconditional(1)
    else:
        num_samples = int(prompt_duration * sr)
        if y.shape[1] < num_samples:
            raise ValueError("The existing audio is too short for the specified prompt duration.")

        start_sample = y.shape[1] - num_samples
        prompt_waveform = y[..., start_sample:]


        output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr)

    output = normalize_audio(
        output,
        sample_rate=model.sample_rate, # musicgen outputs at 32kHz
        strategy="loudness",
        loudness_headroom_db=10,
        loudness_compressor=True,
    )
    sig.audio_data = output.cpu()
    return save_and_return_filepath(sig)


with gr.Blocks() as demo:
    inputs = [
        gr.Audio(
            label="Ignore Me: I only generate, I don't consume",
            type='filepath'
        ),
        gr.Slider(
            minimum=10,
            maximum=30,
            step=1,
            value=10,
            label="Generation Duration"
        ),
        gr.Slider(
            minimum=0,
            maximum=10,
            step=1,
            value=2,
            label="Input Conditioning Duration"
        ),
    ]

    output = gr.Audio(label='Audio Output', type='filepath')
    widgets = build_endpoint(inputs, output, process_fn, card)

demo.queue()
demo.launch(share=True)