OpenSound commited on
Commit
eb39bd3
1 Parent(s): 5d104f9
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +12 -6
  2. audiotools/__pycache__/__init__.cpython-310.pyc +0 -0
  3. audiotools/core/__pycache__/__init__.cpython-310.pyc +0 -0
  4. audiotools/core/__pycache__/audio_signal.cpython-310.pyc +0 -0
  5. audiotools/core/__pycache__/display.cpython-310.pyc +0 -0
  6. audiotools/core/__pycache__/dsp.cpython-310.pyc +0 -0
  7. audiotools/core/__pycache__/effects.cpython-310.pyc +0 -0
  8. audiotools/core/__pycache__/ffmpeg.cpython-310.pyc +0 -0
  9. audiotools/core/__pycache__/loudness.cpython-310.pyc +0 -0
  10. audiotools/core/__pycache__/playback.cpython-310.pyc +0 -0
  11. audiotools/core/__pycache__/util.cpython-310.pyc +0 -0
  12. audiotools/core/__pycache__/whisper.cpython-310.pyc +0 -0
  13. audiotools/core/templates/__pycache__/__init__.cpython-310.pyc +0 -0
  14. audiotools/data/__pycache__/__init__.cpython-310.pyc +0 -0
  15. audiotools/data/__pycache__/datasets.cpython-310.pyc +0 -0
  16. audiotools/data/__pycache__/preprocess.cpython-310.pyc +0 -0
  17. audiotools/data/__pycache__/transforms.cpython-310.pyc +0 -0
  18. audiotools/metrics/__pycache__/__init__.cpython-310.pyc +0 -0
  19. audiotools/metrics/__pycache__/distance.cpython-310.pyc +0 -0
  20. audiotools/metrics/__pycache__/quality.cpython-310.pyc +0 -0
  21. audiotools/metrics/__pycache__/spectral.cpython-310.pyc +0 -0
  22. audiotools/ml/__pycache__/__init__.cpython-310.pyc +0 -0
  23. audiotools/ml/__pycache__/accelerator.cpython-310.pyc +0 -0
  24. audiotools/ml/__pycache__/decorators.cpython-310.pyc +0 -0
  25. audiotools/ml/__pycache__/experiment.cpython-310.pyc +0 -0
  26. audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc +0 -0
  27. audiotools/ml/layers/__pycache__/base.cpython-310.pyc +0 -0
  28. audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc +0 -0
  29. src/__pycache__/inference.cpython-310.pyc +0 -0
  30. src/models/__pycache__/blocks.cpython-310.pyc +0 -0
  31. src/models/__pycache__/conditioners.cpython-310.pyc +0 -0
  32. src/models/__pycache__/udit.cpython-310.pyc +0 -0
  33. src/models/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  34. src/models/utils/__pycache__/attention.cpython-310.pyc +0 -0
  35. src/models/utils/__pycache__/modules.cpython-310.pyc +0 -0
  36. src/models/utils/__pycache__/rotary.cpython-310.pyc +0 -0
  37. src/models/utils/__pycache__/span_mask.cpython-310.pyc +0 -0
  38. src/models/utils/__pycache__/timm.cpython-310.pyc +0 -0
  39. src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc +0 -0
  40. src/modules/dac/__pycache__/__init__.cpython-310.pyc +0 -0
  41. src/modules/dac/model/__pycache__/__init__.cpython-310.pyc +0 -0
  42. src/modules/dac/model/__pycache__/base.cpython-310.pyc +0 -0
  43. src/modules/dac/model/__pycache__/dac.cpython-310.pyc +0 -0
  44. src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc +0 -0
  45. src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc +0 -0
  46. src/modules/dac/nn/__pycache__/layers.cpython-310.pyc +0 -0
  47. src/modules/dac/nn/__pycache__/loss.cpython-310.pyc +0 -0
  48. src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc +0 -0
  49. src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  50. src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc +0 -0
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import os
2
  import torch
3
  import random
4
- import spaces
5
  import numpy as np
6
  import gradio as gr
7
  import soundfile as sf
 
8
  from transformers import T5Tokenizer, T5EncoderModel
9
  from diffusers import DDIMScheduler
10
  from src.models.conditioners import MaskDiT
@@ -33,9 +34,12 @@ def load_models(config_name, ckpt_path, vae_path, device):
33
  unet.load_state_dict(torch.load(ckpt_path)['model'])
34
  unet.eval()
35
 
 
 
 
36
  # Load noise scheduler
37
  noise_scheduler = DDIMScheduler(**params['diff'])
38
-
39
  latents = torch.randn((1, 128, 128), device=device)
40
  noise = torch.randn_like(latents)
41
  timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
@@ -43,6 +47,7 @@ def load_models(config_name, ckpt_path, vae_path, device):
43
 
44
  return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
45
 
 
46
  MAX_SEED = np.iinfo(np.int32).max
47
 
48
  # Model and config paths
@@ -57,6 +62,7 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
57
  autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
58
  device)
59
 
 
60
  @spaces.GPU
61
  def generate_audio(text, length,
62
  guidance_scale, guidance_rescale, ddim_steps, eta,
@@ -102,7 +108,7 @@ css = """
102
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
103
  with gr.Column(elem_id="col-container"):
104
  gr.Markdown("""
105
- # EzAudio Text-to-Audio Generator
106
  Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
107
  """)
108
 
@@ -125,10 +131,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
125
  with gr.Accordion("Advanced Settings", open=False):
126
  guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
127
  guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
128
- ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=100, label="DDIM Steps")
129
  eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
130
  seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
131
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=False)
132
 
133
  # Examples block
134
  gr.Examples(
@@ -147,4 +153,4 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
147
  )
148
 
149
  # Launch the Gradio demo
150
- demo.launch()
 
1
  import os
2
  import torch
3
  import random
4
+ # import spaces
5
  import numpy as np
6
  import gradio as gr
7
  import soundfile as sf
8
+ from accelerate import Accelerator
9
  from transformers import T5Tokenizer, T5EncoderModel
10
  from diffusers import DDIMScheduler
11
  from src.models.conditioners import MaskDiT
 
34
  unet.load_state_dict(torch.load(ckpt_path)['model'])
35
  unet.eval()
36
 
37
+ accelerator = Accelerator(mixed_precision="fp16")
38
+ unet = accelerator.prepare(unet)
39
+
40
  # Load noise scheduler
41
  noise_scheduler = DDIMScheduler(**params['diff'])
42
+
43
  latents = torch.randn((1, 128, 128), device=device)
44
  noise = torch.randn_like(latents)
45
  timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
 
47
 
48
  return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
49
 
50
+
51
  MAX_SEED = np.iinfo(np.int32).max
52
 
53
  # Model and config paths
 
62
  autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
63
  device)
64
 
65
+
66
  @spaces.GPU
67
  def generate_audio(text, length,
68
  guidance_scale, guidance_rescale, ddim_steps, eta,
 
108
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
109
  with gr.Column(elem_id="col-container"):
110
  gr.Markdown("""
111
+ # EzAudio: High-quality Text-to-Audio Generator
112
  Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
113
  """)
114
 
 
131
  with gr.Accordion("Advanced Settings", open=False):
132
  guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
133
  guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
134
+ ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=50, label="DDIM Steps")
135
  eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
136
  seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
137
+ randomize_seed = gr.Checkbox(label="Randomize Seed (Disable Seed)", value=True)
138
 
139
  # Examples block
140
  gr.Examples(
 
153
  )
154
 
155
  # Launch the Gradio demo
156
+ demo.launch()
audiotools/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (432 Bytes). View file
 
audiotools/core/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (283 Bytes). View file
 
audiotools/core/__pycache__/audio_signal.cpython-310.pyc ADDED
Binary file (45.4 kB). View file
 
audiotools/core/__pycache__/display.cpython-310.pyc ADDED
Binary file (6.36 kB). View file
 
audiotools/core/__pycache__/dsp.cpython-310.pyc ADDED
Binary file (11.6 kB). View file
 
audiotools/core/__pycache__/effects.cpython-310.pyc ADDED
Binary file (17.5 kB). View file
 
audiotools/core/__pycache__/ffmpeg.cpython-310.pyc ADDED
Binary file (5.59 kB). View file
 
audiotools/core/__pycache__/loudness.cpython-310.pyc ADDED
Binary file (8.44 kB). View file
 
audiotools/core/__pycache__/playback.cpython-310.pyc ADDED
Binary file (6.87 kB). View file
 
audiotools/core/__pycache__/util.cpython-310.pyc ADDED
Binary file (18.6 kB). View file
 
audiotools/core/__pycache__/whisper.cpython-310.pyc ADDED
Binary file (2.93 kB). View file
 
audiotools/core/templates/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (145 Bytes). View file
 
audiotools/data/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (234 Bytes). View file
 
audiotools/data/__pycache__/datasets.cpython-310.pyc ADDED
Binary file (17 kB). View file
 
audiotools/data/__pycache__/preprocess.cpython-310.pyc ADDED
Binary file (2.83 kB). View file
 
audiotools/data/__pycache__/transforms.cpython-310.pyc ADDED
Binary file (55.5 kB). View file
 
audiotools/metrics/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (310 Bytes). View file
 
audiotools/metrics/__pycache__/distance.cpython-310.pyc ADDED
Binary file (3.82 kB). View file
 
audiotools/metrics/__pycache__/quality.cpython-310.pyc ADDED
Binary file (4.45 kB). View file
 
audiotools/metrics/__pycache__/spectral.cpython-310.pyc ADDED
Binary file (7.43 kB). View file
 
audiotools/ml/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (321 Bytes). View file
 
audiotools/ml/__pycache__/accelerator.cpython-310.pyc ADDED
Binary file (6.65 kB). View file
 
audiotools/ml/__pycache__/decorators.cpython-310.pyc ADDED
Binary file (14.2 kB). View file
 
audiotools/ml/__pycache__/experiment.cpython-310.pyc ADDED
Binary file (3.32 kB). View file
 
audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (228 Bytes). View file
 
audiotools/ml/layers/__pycache__/base.cpython-310.pyc ADDED
Binary file (9.27 kB). View file
 
audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc ADDED
Binary file (3.87 kB). View file
 
src/__pycache__/inference.cpython-310.pyc ADDED
Binary file (4.24 kB). View file
 
src/models/__pycache__/blocks.cpython-310.pyc ADDED
Binary file (7.27 kB). View file
 
src/models/__pycache__/conditioners.cpython-310.pyc ADDED
Binary file (5.59 kB). View file
 
src/models/__pycache__/udit.cpython-310.pyc ADDED
Binary file (7.86 kB). View file
 
src/models/utils/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/src/models/utils/__pycache__/__init__.cpython-310.pyc and b/src/models/utils/__pycache__/__init__.cpython-310.pyc differ
 
src/models/utils/__pycache__/attention.cpython-310.pyc CHANGED
Binary files a/src/models/utils/__pycache__/attention.cpython-310.pyc and b/src/models/utils/__pycache__/attention.cpython-310.pyc differ
 
src/models/utils/__pycache__/modules.cpython-310.pyc CHANGED
Binary files a/src/models/utils/__pycache__/modules.cpython-310.pyc and b/src/models/utils/__pycache__/modules.cpython-310.pyc differ
 
src/models/utils/__pycache__/rotary.cpython-310.pyc CHANGED
Binary files a/src/models/utils/__pycache__/rotary.cpython-310.pyc and b/src/models/utils/__pycache__/rotary.cpython-310.pyc differ
 
src/models/utils/__pycache__/span_mask.cpython-310.pyc CHANGED
Binary files a/src/models/utils/__pycache__/span_mask.cpython-310.pyc and b/src/models/utils/__pycache__/span_mask.cpython-310.pyc differ
 
src/models/utils/__pycache__/timm.cpython-310.pyc CHANGED
Binary files a/src/models/utils/__pycache__/timm.cpython-310.pyc and b/src/models/utils/__pycache__/timm.cpython-310.pyc differ
 
src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc ADDED
Binary file (2.34 kB). View file
 
src/modules/dac/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (451 Bytes). View file
 
src/modules/dac/model/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (292 Bytes). View file
 
src/modules/dac/model/__pycache__/base.cpython-310.pyc ADDED
Binary file (7.19 kB). View file
 
src/modules/dac/model/__pycache__/dac.cpython-310.pyc ADDED
Binary file (10.6 kB). View file
 
src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc ADDED
Binary file (7.99 kB). View file
 
src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (227 Bytes). View file
 
src/modules/dac/nn/__pycache__/layers.cpython-310.pyc ADDED
Binary file (1.45 kB). View file
 
src/modules/dac/nn/__pycache__/loss.cpython-310.pyc ADDED
Binary file (11.6 kB). View file
 
src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc ADDED
Binary file (8.66 kB). View file
 
src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (2.84 kB). View file
 
src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.2 kB). View file