DrishtiSharma commited on
Commit
f1cf01a
1 Parent(s): 2d03bb7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -13
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
- import librosa
3
- from transformers import AutoFeatureExtractor, pipeline
4
 
5
 
6
  def load_and_fix_data(input_file, model_sampling_rate):
@@ -12,20 +12,26 @@ def load_and_fix_data(input_file, model_sampling_rate):
12
  return speech
13
 
14
 
15
- feature_extractor = AutoFeatureExtractor.from_pretrained(
16
- "anuragshas/wav2vec2-xls-r-1b-hi-with-lm"
17
- )
18
  sampling_rate = feature_extractor.sampling_rate
19
 
20
- asr = pipeline(
21
- "automatic-speech-recognition", model="anuragshas/wav2vec2-xls-r-1b-hi-with-lm"
22
- )
23
 
24
 
 
 
 
 
 
 
25
  def predict_and_ctc_lm_decode(input_file):
26
  speech = load_and_fix_data(input_file, sampling_rate)
27
  transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)
28
- return transcribed_text["text"]
 
 
 
 
29
 
30
 
31
  gr.Interface(
@@ -34,10 +40,10 @@ gr.Interface(
34
  gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")
35
  ],
36
  outputs=[gr.outputs.Textbox()],
37
- examples=[["example1.wav"]],
38
- title="Hindi ASR using Wav2Vec2-1B with LM",
39
- article="<p><center><img src='https://visitor-badge.glitch.me/badge?page_id=anuragshas/Hindi_ASR' alt='visitor badge'></center></p>",
40
- description="Built during Robust Speech Event",
41
  layout="horizontal",
42
  theme="huggingface",
43
  ).launch(enable_queue=True, cache_examples=True)
 
1
  import gradio as gr
2
+ import librosa
3
+ from transformers import AutoFeatureExtractor, AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
4
 
5
 
6
  def load_and_fix_data(input_file, model_sampling_rate):
 
12
  return speech
13
 
14
 
15
+ feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
 
 
16
  sampling_rate = feature_extractor.sampling_rate
17
 
18
+ asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
 
 
19
 
20
 
21
+
22
+ model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
23
+ tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
24
+
25
+ new_line = '\n'
26
+
27
  def predict_and_ctc_lm_decode(input_file):
28
  speech = load_and_fix_data(input_file, sampling_rate)
29
  transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)
30
+ transcribed_text = transcribed_text["text"]
31
+ input_ids = tokenizer('translate Spanish to Nahuatl: ' + transcribed_text, return_tensors='pt').input_ids
32
+ outputs = model.generate(input_ids, max_length=512)
33
+ outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
34
+ return f"Spanish Audio Transcription: {transcribed_text} {new_line} Nahuatl Translation :{outputs}"
35
 
36
 
37
  gr.Interface(
 
40
  gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")
41
  ],
42
  outputs=[gr.outputs.Textbox()],
43
+ examples=[["audio1.wav"], ["travel.wav"]],
44
+ title="Spanish-Audio-Transcriptions-to-Nahuatl-Translation",
45
+ description = "This is a Gradio demo of Spanish Audio Transcriptions to Nahuatl Translation. To use this, simply provide an audio input (audio recording or via microphone), which will subsequently be transcribed and translated to Nahuatl language.",
46
+ #article="<p><center><img src='........e'></center></p>",
47
  layout="horizontal",
48
  theme="huggingface",
49
  ).launch(enable_queue=True, cache_examples=True)