ylacombe HF staff commited on
Commit
9944bc3
1 Parent(s): 92b3c01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -93,7 +93,7 @@ def text_to_text_translation(text, src_lang, tgt_lang):
93
  if src_lang == tgt_lang:
94
  return text
95
  text_inputs = processor(text = text, src_lang=src_lang, return_tensors="pt").to(device)
96
- output_tokens = text_to_text_model.generate(**text_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
97
  translated_text_from_text = processor.decode(output_tokens.tolist(), skip_special_tokens=True)
98
 
99
  return translated_text_from_text
@@ -237,14 +237,14 @@ def transcribe(numpy_array):
237
  array = torchaudio.functional.resample(torch.tensor(numpy_array[1]).float(), src_sr, tgt_sr)
238
 
239
  audio_inputs = processor(audios=array, return_tensors="pt").to(device)
240
- text = speech_to_text_model.generate(**audio_inputs, tgt_lang="eng")[0].cpu().numpy().squeeze()
241
  text = processor.decode(text.tolist(), skip_special_tokens=True).strip()
242
 
243
 
244
  src_lang = detect_language_from_audio(numpy_array)
245
 
246
  if src_lang != "eng":
247
- original_text = speech_to_text_model.generate(**audio_inputs, tgt_lang=src_lang)[0].cpu().numpy().squeeze()
248
  original_text = processor.decode(original_text.tolist(), skip_special_tokens=True).strip()
249
  else:
250
  original_text = text
 
93
  if src_lang == tgt_lang:
94
  return text
95
  text_inputs = processor(text = text, src_lang=src_lang, return_tensors="pt").to(device)
96
+ output_tokens = text_to_text_model.generate(**text_inputs, tgt_lang=tgt_lang, max_new_tokens=1024)[0].cpu().numpy().squeeze()
97
  translated_text_from_text = processor.decode(output_tokens.tolist(), skip_special_tokens=True)
98
 
99
  return translated_text_from_text
 
237
  array = torchaudio.functional.resample(torch.tensor(numpy_array[1]).float(), src_sr, tgt_sr)
238
 
239
  audio_inputs = processor(audios=array, return_tensors="pt").to(device)
240
+ text = speech_to_text_model.generate(**audio_inputs, tgt_lang="eng", max_new_tokens=1024)[0].cpu().numpy().squeeze()
241
  text = processor.decode(text.tolist(), skip_special_tokens=True).strip()
242
 
243
 
244
  src_lang = detect_language_from_audio(numpy_array)
245
 
246
  if src_lang != "eng":
247
+ original_text = speech_to_text_model.generate(**audio_inputs, tgt_lang=src_lang, max_new_tokens=1024)[0].cpu().numpy().squeeze()
248
  original_text = processor.decode(original_text.tolist(), skip_special_tokens=True).strip()
249
  else:
250
  original_text = text