jiuuee commited on
Commit
3f5328d
1 Parent(s): 09a2c5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -2
app.py CHANGED
@@ -1,4 +1,30 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from nemo.collections.asr.models import ASRModel
3
  import librosa
4
 
@@ -33,4 +59,4 @@ def transcribe(audio):
33
  audio_input = gr.components.Audio()
34
 
35
  iface = gr.Interface(transcribe, audio_input, "text", title="ASR with NeMo Canary Model")
36
- iface.launch()
 
1
+ from nemo.collections.asr.models import EncDecMultiTaskModel
2
+
3
+ # Load the Canary-1B model
4
+ canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
5
+
6
+ # Define the input manifest file for ASR
7
+ input_manifest = {
8
+ "audio_filepath": "/path/to/audio.wav",
9
+ "duration": 1000, # duration of the audio, can be set to `None` if using NeMo main branch
10
+ "taskname": "asr",
11
+ "source_lang": "en", # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
12
+ "target_lang": "en", # language of the text output, choices=['en','de','es','fr']
13
+ "pnc": "yes", # whether to have PnC output, choices=['yes', 'no']
14
+ "answer": "na",
15
+ }
16
+
17
+ # Transcribe audio using the Canary-1B model
18
+ predicted_text = canary_model.transcribe(
19
+ input_manifest,
20
+ batch_size=16 # batch size to run the inference with
21
+ )
22
+
23
+ print("Predicted Text:", predicted_text)
24
+
25
+
26
+
27
+ '''import gradio as gr
28
  from nemo.collections.asr.models import ASRModel
29
  import librosa
30
 
 
59
  audio_input = gr.components.Audio()
60
 
61
  iface = gr.Interface(transcribe, audio_input, "text", title="ASR with NeMo Canary Model")
62
+ iface.launch()'''