jiuuee commited on
Commit
902bec4
1 Parent(s): 542e22d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -1,21 +1,38 @@
1
  import gradio as gr
 
2
  from nemo.collections.asr.models import ASRModel
3
 
4
  # Load the NeMo ASR model
5
  model = ASRModel.from_pretrained("nvidia/canary-1b")
6
  model.eval()
7
 
8
- def transcribe(audio):
 
 
 
9
  if audio is None:
10
- raise gr.InterfaceError("Please provide some input audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Perform speech recognition
13
  transcription = model.transcribe([audio])
14
 
15
  return transcription[0]
16
 
17
- audio_input = gr.inputs.Audio()
18
-
19
- iface = gr.Interface(transcribe, audio_input, "text", title="ASR with NeMo Canary Model")
20
- iface.launch()
21
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
  from nemo.collections.asr.models import ASRModel
4
 
5
  # Load the NeMo ASR model
6
  model = ASRModel.from_pretrained("nvidia/canary-1b")
7
  model.eval()
8
 
9
+ # Load the keyword spotting model
10
+ kws_model = torch.hub.load('snakers4/silero-vad', 'silero_vad')
11
+
12
+ def detect_trigger(audio):
13
  if audio is None:
14
+ raise gr.InterfaceError("Please provide some input audio: either upload an audio file or use the microphone")
15
+
16
+ # Perform keyword spotting
17
+ is_triggered = kws_model(audio) # You need to adapt this line to the actual API of your keyword spotting model
18
+
19
+ return is_triggered
20
+
21
+ def transcribe_triggered(audio):
22
+ if audio is None:
23
+ raise gr.InterfaceError("Please provide some input audio: either upload an audio file or use the microphone")
24
+
25
+ # Check if trigger word is detected
26
+ is_triggered = detect_trigger(audio)
27
+ if not is_triggered:
28
+ return "Trigger word not detected."
29
 
30
  # Perform speech recognition
31
  transcription = model.transcribe([audio])
32
 
33
  return transcription[0]
34
 
35
+ audio_input = gr.components.Audio()
 
 
 
36
 
37
+ iface = gr.Interface(transcribe_triggered, audio_input, "text", title="ASR with NeMo Canary Model (Triggered by 'Hey Alexa')")
38
+ iface.launch()