Spaces:

jiuuee
/

my-alexa

Sleeping

jiuuee commited on May 2

Commit

902bec4

•

1 Parent(s): 542e22d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,21 +1,38 @@
 import gradio as gr
 from nemo.collections.asr.models import ASRModel
 # Load the NeMo ASR model
 model = ASRModel.from_pretrained("nvidia/canary-1b")
 model.eval()
-def transcribe(audio):
     if audio is None:
-        raise gr.InterfaceError("Please provide some input audio")
     # Perform speech recognition
     transcription = model.transcribe([audio])
     return transcription[0]
-audio_input = gr.inputs.Audio()
-iface = gr.Interface(transcribe, audio_input, "text", title="ASR with NeMo Canary Model")
-iface.launch()

 import gradio as gr
+import torch
 from nemo.collections.asr.models import ASRModel
 # Load the NeMo ASR model
 model = ASRModel.from_pretrained("nvidia/canary-1b")
 model.eval()
+# Load the keyword spotting model
+kws_model = torch.hub.load('snakers4/silero-vad', 'silero_vad')
+def detect_trigger(audio):
     if audio is None:
+        raise gr.InterfaceError("Please provide some input audio: either upload an audio file or use the microphone")
+    # Perform keyword spotting
+    is_triggered = kws_model(audio)  # You need to adapt this line to the actual API of your keyword spotting model
+    return is_triggered
+def transcribe_triggered(audio):
+    if audio is None:
+        raise gr.InterfaceError("Please provide some input audio: either upload an audio file or use the microphone")
+    # Check if trigger word is detected
+    is_triggered = detect_trigger(audio)
+    if not is_triggered:
+        return "Trigger word not detected."
     # Perform speech recognition
     transcription = model.transcribe([audio])
     return transcription[0]
+audio_input = gr.components.Audio()
+iface = gr.Interface(transcribe_triggered, audio_input, "text", title="ASR with NeMo Canary Model (Triggered by 'Hey Alexa')")
+iface.launch()