DrishtiSharma commited on
Commit
cf1be34
1 Parent(s): 4fdac5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -22,6 +22,7 @@ asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-la
22
  model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
23
  tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
24
 
 
25
 
26
  def predict_and_ctc_lm_decode(input_file):
27
  speech = load_and_fix_data(input_file, sampling_rate)
@@ -30,7 +31,7 @@ def predict_and_ctc_lm_decode(input_file):
30
  input_ids = tokenizer('translate Spanish to Nahuatl: ' + transcribed_text, return_tensors='pt').input_ids
31
  outputs = model.generate(input_ids, max_length=512)
32
  outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
33
- return f"Spanish Audio Transcription: {transcribed_text} & the corresponding Nahuatl Translation is :{outputs}"
34
 
35
 
36
  gr.Interface(
 
22
  model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
23
  tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl')
24
 
25
+ new_line = '\n'
26
 
27
  def predict_and_ctc_lm_decode(input_file):
28
  speech = load_and_fix_data(input_file, sampling_rate)
 
31
  input_ids = tokenizer('translate Spanish to Nahuatl: ' + transcribed_text, return_tensors='pt').input_ids
32
  outputs = model.generate(input_ids, max_length=512)
33
  outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
34
+ return f"Spanish Audio Transcription: {transcribed_text} {new_line} The corresponding Nahuatl Translation is :{outputs}"
35
 
36
 
37
  gr.Interface(