fffiloni commited on
Commit
5d9f0c4
1 Parent(s): fd67dba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -97
app.py CHANGED
@@ -94,95 +94,106 @@ def get_caption(image_in):
94
  def get_magnet(prompt):
95
  amended_prompt = f"{prompt}"
96
  print(amended_prompt)
97
- client = Client("https://fffiloni-magnet.hf.space/")
98
- result = client.predict(
99
- "facebook/audio-magnet-medium", # Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium'] in 'Model' Radio component
100
- "", # str in 'Model Path (custom models)' Textbox component
101
- amended_prompt, # str in 'Input Text' Textbox component
102
- 3, # float in 'Temperature' Number component
103
- 0.9, # float in 'Top-p' Number component
104
- 10, # float in 'Max CFG coefficient' Number component
105
- 1, # float in 'Min CFG coefficient' Number component
106
- 20, # float in 'Decoding Steps (stage 1)' Number component
107
- 10, # float in 'Decoding Steps (stage 2)' Number component
108
- 10, # float in 'Decoding Steps (stage 3)' Number component
109
- 10, # float in 'Decoding Steps (stage 4)' Number component
110
- "prod-stride1 (new!)", # Literal['max-nonoverlap', 'prod-stride1 (new!)'] in 'Span Scoring' Radio component
111
- api_name="/predict_full"
112
- )
113
- print(result)
114
- return result[1]
 
 
 
115
 
116
  def get_audioldm(prompt):
117
- client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
118
- result = client.predict(
119
- prompt, # str in 'Input text' Textbox component
120
- "Low quality. Music.", # str in 'Negative prompt' Textbox component
121
- 10, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
122
- 3.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
123
- 45, # int | float in 'Seed' Number component
124
- 3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
125
- fn_index=1
126
- )
127
- print(result)
128
- audio_result = extract_audio(result)
129
- return audio_result
 
 
 
130
 
131
  def get_audiogen(prompt):
132
- client = Client("https://fffiloni-audiogen.hf.space/")
133
- result = client.predict(
134
- prompt,
135
- 10,
136
- api_name="/infer"
137
- )
138
- return result
 
 
 
139
 
140
  def get_tango(prompt):
141
  try:
142
- #client = Client("https://declare-lab-tango.hf.space/")
143
- client = Client("https://fffiloni-tango.hf.space/", hf_token=hf_token)
144
- except:
145
- raise gr.Error("Tango space API is not ready, please try again in few minutes ")
146
-
147
- result = client.predict(
148
  prompt, # str representing string value in 'Prompt' Textbox component
149
  100, # int | float representing numeric value between 100 and 200 in 'Steps' Slider component
150
  4, # int | float representing numeric value between 1 and 10 in 'Guidance Scale' Slider component
151
  api_name="/predict"
152
- )
153
- print(result)
154
- return result
 
 
 
 
155
 
156
  def get_tango2(prompt):
157
  try:
158
  client = Client("declare-lab/tango2")
159
- except:
160
- raise gr.Error("Tango2 space API is not ready, please try again in few minutes ")
161
-
162
- result = client.predict(
163
  prompt,
164
  100,
165
  4,
166
  api_name="/predict"
167
- )
168
- print(result)
169
- return result
 
 
 
 
170
 
171
  def get_stable_audio_open(prompt):
172
  try:
173
  client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
 
 
 
 
 
 
 
 
 
174
  except:
175
  raise gr.Error("Stable Audio Open space API is not ready, please try again in few minutes ")
176
 
177
- result = client.predict(
178
- prompt=prompt,
179
- seconds_total=30,
180
- steps=100,
181
- cfg_scale=7,
182
- api_name="/predict"
183
- )
184
- print(result)
185
- return result
186
 
187
  def blend_vsfx(video_in, audio_result):
188
  audioClip = AudioFileClip(audio_result)
@@ -203,46 +214,44 @@ def blend_vsfx(video_in, audio_result):
203
  def infer(video_in, chosen_model):
204
  image_in = extract_firstframe(video_in)
205
  caption = get_caption(image_in)
206
- try:
207
- if chosen_model == "MAGNet" :
208
- audio_result = get_magnet(caption)
209
- elif chosen_model == "AudioLDM-2" :
210
- audio_result = get_audioldm(caption)
211
- elif chosen_model == "AudioGen" :
212
- audio_result = get_audiogen(caption)
213
- elif chosen_model == "Tango" :
214
- audio_result = get_tango(caption)
215
- elif chosen_model == "Tango 2" :
216
- audio_result = get_tango2(caption)
217
- elif chosen_model == "Stable Audio Open" :
218
- audio_result = get_stable_audio_open(caption)
219
- final_res = blend_vsfx(video_in, audio_result)
220
- return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
221
- except:
222
- raise gr.Error(f"an error occured with {chosen_model}")
223
 
224
 
225
 
226
  def retry(edited_prompt, video_in, chosen_model):
227
  image_in = extract_firstframe(video_in)
228
  caption = edited_prompt
229
- try:
230
- if chosen_model == "MAGNet" :
231
- audio_result = get_magnet(caption)
232
- elif chosen_model == "AudioLDM-2" :
233
- audio_result = get_audioldm(caption)
234
- elif chosen_model == "AudioGen" :
235
- audio_result = get_audiogen(caption)
236
- elif chosen_model == "Tango" :
237
- audio_result = get_tango(caption)
238
- elif chosen_model == "Tango 2" :
239
- audio_result = get_tango2(caption)
240
- elif chosen_model == "Stable Audio Open" :
241
- audio_result = get_stable_audio_open(caption)
242
- final_res = blend_vsfx(video_in, audio_result)
243
- return audio_result, final_res
244
- except:
245
- raise gr.Error(f"an error occured with {chosen_model}")
246
 
247
 
248
  def refresh():
 
94
  def get_magnet(prompt):
95
  amended_prompt = f"{prompt}"
96
  print(amended_prompt)
97
+ try:
98
+ client = Client("https://fffiloni-magnet.hf.space/")
99
+ result = client.predict(
100
+ "facebook/audio-magnet-medium", # Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium'] in 'Model' Radio component
101
+ "", # str in 'Model Path (custom models)' Textbox component
102
+ amended_prompt, # str in 'Input Text' Textbox component
103
+ 3, # float in 'Temperature' Number component
104
+ 0.9, # float in 'Top-p' Number component
105
+ 10, # float in 'Max CFG coefficient' Number component
106
+ 1, # float in 'Min CFG coefficient' Number component
107
+ 20, # float in 'Decoding Steps (stage 1)' Number component
108
+ 10, # float in 'Decoding Steps (stage 2)' Number component
109
+ 10, # float in 'Decoding Steps (stage 3)' Number component
110
+ 10, # float in 'Decoding Steps (stage 4)' Number component
111
+ "prod-stride1 (new!)", # Literal['max-nonoverlap', 'prod-stride1 (new!)'] in 'Span Scoring' Radio component
112
+ api_name="/predict_full"
113
+ )
114
+ print(result)
115
+ return result[1]
116
+ except:
117
+ raise gr.Error("MAGNet space API is not ready, please try again in few minutes ")
118
 
119
  def get_audioldm(prompt):
120
+ try:
121
+ client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
122
+ result = client.predict(
123
+ prompt, # str in 'Input text' Textbox component
124
+ "Low quality. Music.", # str in 'Negative prompt' Textbox component
125
+ 10, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
126
+ 3.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
127
+ 45, # int | float in 'Seed' Number component
128
+ 3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
129
+ fn_index=1
130
+ )
131
+ print(result)
132
+ audio_result = extract_audio(result)
133
+ return audio_result
134
+ except:
135
+ raise gr.Error("AudioLDM space API is not ready, please try again in few minutes ")
136
 
137
  def get_audiogen(prompt):
138
+ try:
139
+ client = Client("https://fffiloni-audiogen.hf.space/")
140
+ result = client.predict(
141
+ prompt,
142
+ 10,
143
+ api_name="/infer"
144
+ )
145
+ return result
146
+ except:
147
+ raise gr.Error("AudioGen space API is not ready, please try again in few minutes ")
148
 
149
  def get_tango(prompt):
150
  try:
151
+ client = Client("fffiloni/tango", hf_token=hf_token)
152
+ result = client.predict(
 
 
 
 
153
  prompt, # str representing string value in 'Prompt' Textbox component
154
  100, # int | float representing numeric value between 100 and 200 in 'Steps' Slider component
155
  4, # int | float representing numeric value between 1 and 10 in 'Guidance Scale' Slider component
156
  api_name="/predict"
157
+ )
158
+ print(result)
159
+ return result
160
+ except:
161
+ raise gr.Error("Tango space API is not ready, please try again in few minutes ")
162
+
163
+
164
 
165
  def get_tango2(prompt):
166
  try:
167
  client = Client("declare-lab/tango2")
168
+ result = client.predict(
 
 
 
169
  prompt,
170
  100,
171
  4,
172
  api_name="/predict"
173
+ )
174
+ print(result)
175
+ return result
176
+ except:
177
+ raise gr.Error("Tango2 space API is not ready, please try again in few minutes ")
178
+
179
+
180
 
181
  def get_stable_audio_open(prompt):
182
  try:
183
  client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
184
+ result = client.predict(
185
+ prompt=prompt,
186
+ seconds_total=30,
187
+ steps=100,
188
+ cfg_scale=7,
189
+ api_name="/predict"
190
+ )
191
+ print(result)
192
+ return result
193
  except:
194
  raise gr.Error("Stable Audio Open space API is not ready, please try again in few minutes ")
195
 
196
+
 
 
 
 
 
 
 
 
197
 
198
  def blend_vsfx(video_in, audio_result):
199
  audioClip = AudioFileClip(audio_result)
 
214
  def infer(video_in, chosen_model):
215
  image_in = extract_firstframe(video_in)
216
  caption = get_caption(image_in)
217
+
218
+ if chosen_model == "MAGNet" :
219
+ audio_result = get_magnet(caption)
220
+ elif chosen_model == "AudioLDM-2" :
221
+ audio_result = get_audioldm(caption)
222
+ elif chosen_model == "AudioGen" :
223
+ audio_result = get_audiogen(caption)
224
+ elif chosen_model == "Tango" :
225
+ audio_result = get_tango(caption)
226
+ elif chosen_model == "Tango 2" :
227
+ audio_result = get_tango2(caption)
228
+ elif chosen_model == "Stable Audio Open" :
229
+ audio_result = get_stable_audio_open(caption)
230
+ final_res = blend_vsfx(video_in, audio_result)
231
+ return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
232
+
 
233
 
234
 
235
 
236
  def retry(edited_prompt, video_in, chosen_model):
237
  image_in = extract_firstframe(video_in)
238
  caption = edited_prompt
239
+
240
+ if chosen_model == "MAGNet" :
241
+ audio_result = get_magnet(caption)
242
+ elif chosen_model == "AudioLDM-2" :
243
+ audio_result = get_audioldm(caption)
244
+ elif chosen_model == "AudioGen" :
245
+ audio_result = get_audiogen(caption)
246
+ elif chosen_model == "Tango" :
247
+ audio_result = get_tango(caption)
248
+ elif chosen_model == "Tango 2" :
249
+ audio_result = get_tango2(caption)
250
+ elif chosen_model == "Stable Audio Open" :
251
+ audio_result = get_stable_audio_open(caption)
252
+ final_res = blend_vsfx(video_in, audio_result)
253
+ return audio_result, final_res
254
+
 
255
 
256
 
257
  def refresh():