Spaces:
Runtime error
Runtime error
Add text input and generate audio output
Browse files
app.py
CHANGED
@@ -4,22 +4,30 @@ import scipy.io.wavfile
|
|
4 |
import torch
|
5 |
import torch.nn.functional as F
|
6 |
from whisperspeech.pipeline import Pipeline
|
|
|
7 |
|
8 |
-
def process_audio(audio_elem):
|
|
|
|
|
9 |
|
10 |
-
scipy.io.wavfile.write('test.mp3', 48000, audio_elem[1])
|
11 |
-
|
12 |
# print out details about ut
|
13 |
pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
|
14 |
# save audio_elem as a file
|
15 |
speaker = pipe.extract_spk_emb("test.mp3")
|
16 |
-
|
17 |
-
print(speaker)
|
18 |
#save it locally
|
19 |
-
np.savez_compressed("speaker", features=
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
# Define Gradio interface
|
24 |
-
with gr.Interface(fn=process_audio, inputs="audio", outputs="file") as iface:
|
25 |
iface.launch()
|
|
|
4 |
import torch
|
5 |
import torch.nn.functional as F
|
6 |
from whisperspeech.pipeline import Pipeline
|
7 |
+
import time
|
8 |
|
9 |
+
def process_audio(audio_elem,text="This is a test voice genereation"):
|
10 |
+
|
11 |
+
scipy.io.wavfile.write('test.mp3', audio_elem[0], audio_elem[1])
|
12 |
|
|
|
|
|
13 |
# print out details about ut
|
14 |
pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
|
15 |
# save audio_elem as a file
|
16 |
speaker = pipe.extract_spk_emb("test.mp3")
|
17 |
+
speaker2 = speaker.cpu().numpy() # Move tensor from GPU to CPU and convert to numpy array
|
|
|
18 |
#save it locally
|
19 |
+
np.savez_compressed("speaker", features=speaker2)
|
20 |
+
try:
|
21 |
+
|
22 |
+
pipe.generate_to_file('test.wav', text, lang='en', cps=10.5, speaker=speaker)
|
23 |
+
except Exception as e:
|
24 |
+
print("Error: ", e)
|
25 |
+
|
26 |
+
return "speaker.npz", "test.wav"
|
27 |
+
|
28 |
+
|
29 |
|
30 |
|
31 |
# Define Gradio interface
|
32 |
+
with gr.Interface(fn=process_audio, inputs=["audio","text"], outputs=["file",'audio']) as iface:
|
33 |
iface.launch()
|