CazC commited on
Commit
61fdf99
1 Parent(s): a16d8d2
Files changed (2) hide show
  1. app.py +25 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import scipy.io.wavfile
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from whisperspeech.pipeline import Pipeline
7
+
8
+ def process_audio(audio_elem):
9
+
10
+ scipy.io.wavfile.write('test.mp3', 48000, audio_elem[1])
11
+
12
+ # print out details about ut
13
+ pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
14
+ # save audio_elem as a file
15
+ speaker = pipe.extract_spk_emb("test.mp3")
16
+ speaker = speaker.cpu().numpy() # Move tensor from GPU to CPU and convert to numpy array
17
+ print(speaker)
18
+ #save it locally
19
+ np.savez_compressed("speaker", features=speaker)
20
+ return "speaker.npz"
21
+
22
+
23
+ # Define Gradio interface
24
+ with gr.Interface(fn=process_audio, inputs="audio", outputs="file") as iface:
25
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ python ==3.10
2
+ WhisperSpeech==0.8
3
+ torch==2.0.1
4
+ gradio