Drew Skillman commited on
Commit
2a76b54
1 Parent(s): f581b9c

switch to cache model and not use zero

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. app.py +10 -4
.gitignore CHANGED
@@ -1 +1,3 @@
1
  .DS_Store
 
 
 
1
  .DS_Store
2
+ *.wav
3
+ *.mp3
app.py CHANGED
@@ -11,16 +11,20 @@ from pydub import AudioSegment
11
  from stable_audio_tools import get_pretrained_model
12
  from stable_audio_tools.inference.generation import generate_diffusion_cond
13
 
 
 
14
  # Load the model outside of the GPU-decorated function
15
  def load_model():
 
16
  print("Loading model...")
17
  model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0")
18
  print("Model loaded successfully.")
19
  return model, model_config
20
 
21
  # Function to set up, generate, and process the audio
22
- @spaces.GPU(duration=120) # Allocate GPU only when this function is called
23
  def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
 
 
24
  print(f"Prompt received: {prompt}")
25
  print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
26
 
@@ -32,7 +36,7 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
32
  print(f"Hugging Face token: {hf_token}")
33
 
34
  # Use pre-loaded model and configuration
35
- model, model_config = load_model()
36
  sample_rate = model_config["sample_rate"]
37
  sample_size = model_config["sample_size"]
38
 
@@ -79,7 +83,8 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
79
  # Save to file
80
  torchaudio.save(unique_filename, output, sample_rate)
81
  print(f"Audio saved: {unique_filename}")
82
-
 
83
  # Convert WAV to MP3 using pydub without ffmpeg
84
  audio = AudioSegment.from_wav(unique_filename)
85
  full_path_mp3 = unique_filename.replace('wav', 'mp3')
@@ -89,6 +94,7 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
89
 
90
  # Return the path to the generated audio file
91
  return full_path_mp3
 
92
 
93
  # Setting up the Gradio Interface
94
  interface = gr.Interface(
@@ -117,4 +123,4 @@ with gr.Blocks() as demo:
117
  # Pre-load the model to avoid multiprocessing issues
118
  model, model_config = load_model()
119
 
120
- demo.launch()
 
11
  from stable_audio_tools import get_pretrained_model
12
  from stable_audio_tools.inference.generation import generate_diffusion_cond
13
 
14
+ global model, model_config
15
+
16
  # Load the model outside of the GPU-decorated function
17
  def load_model():
18
+ global model, model_config
19
  print("Loading model...")
20
  model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0")
21
  print("Model loaded successfully.")
22
  return model, model_config
23
 
24
  # Function to set up, generate, and process the audio
 
25
  def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
26
+ global model, model_config
27
+
28
  print(f"Prompt received: {prompt}")
29
  print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
30
 
 
36
  print(f"Hugging Face token: {hf_token}")
37
 
38
  # Use pre-loaded model and configuration
39
+ #model, model_config = load_model()
40
  sample_rate = model_config["sample_rate"]
41
  sample_size = model_config["sample_size"]
42
 
 
83
  # Save to file
84
  torchaudio.save(unique_filename, output, sample_rate)
85
  print(f"Audio saved: {unique_filename}")
86
+ return unique_filename
87
+ '''
88
  # Convert WAV to MP3 using pydub without ffmpeg
89
  audio = AudioSegment.from_wav(unique_filename)
90
  full_path_mp3 = unique_filename.replace('wav', 'mp3')
 
94
 
95
  # Return the path to the generated audio file
96
  return full_path_mp3
97
+ '''
98
 
99
  # Setting up the Gradio Interface
100
  interface = gr.Interface(
 
123
  # Pre-load the model to avoid multiprocessing issues
124
  model, model_config = load_model()
125
 
126
+ demo.launch(share=True)