CazC commited on
Commit
c607d95
1 Parent(s): 61fdf99

Add text input and generate audio output

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -4,22 +4,30 @@ import scipy.io.wavfile
4
  import torch
5
  import torch.nn.functional as F
6
  from whisperspeech.pipeline import Pipeline
 
7
 
8
- def process_audio(audio_elem):
 
 
9
 
10
- scipy.io.wavfile.write('test.mp3', 48000, audio_elem[1])
11
-
12
  # print out details about ut
13
  pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
14
  # save audio_elem as a file
15
  speaker = pipe.extract_spk_emb("test.mp3")
16
- speaker = speaker.cpu().numpy() # Move tensor from GPU to CPU and convert to numpy array
17
- print(speaker)
18
  #save it locally
19
- np.savez_compressed("speaker", features=speaker)
20
- return "speaker.npz"
 
 
 
 
 
 
 
 
21
 
22
 
23
  # Define Gradio interface
24
- with gr.Interface(fn=process_audio, inputs="audio", outputs="file") as iface:
25
  iface.launch()
 
4
  import torch
5
  import torch.nn.functional as F
6
  from whisperspeech.pipeline import Pipeline
7
+ import time
8
 
9
+ def process_audio(audio_elem,text="This is a test voice genereation"):
10
+
11
+ scipy.io.wavfile.write('test.mp3', audio_elem[0], audio_elem[1])
12
 
 
 
13
  # print out details about ut
14
  pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
15
  # save audio_elem as a file
16
  speaker = pipe.extract_spk_emb("test.mp3")
17
+ speaker2 = speaker.cpu().numpy() # Move tensor from GPU to CPU and convert to numpy array
 
18
  #save it locally
19
+ np.savez_compressed("speaker", features=speaker2)
20
+ try:
21
+
22
+ pipe.generate_to_file('test.wav', text, lang='en', cps=10.5, speaker=speaker)
23
+ except Exception as e:
24
+ print("Error: ", e)
25
+
26
+ return "speaker.npz", "test.wav"
27
+
28
+
29
 
30
 
31
  # Define Gradio interface
32
+ with gr.Interface(fn=process_audio, inputs=["audio","text"], outputs=["file",'audio']) as iface:
33
  iface.launch()