Tonic commited on
Commit
aa13f09
1 Parent(s): a903ae8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -37,21 +37,23 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
37
  resample_audio = resampler(newsr=24000)
38
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
39
 
40
- # Normalize and write to a WAV file
41
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
42
  tmp_file_name = tmp_file.name
43
  audio_np = audio_data_resampled.numpy() # Convert to numpy array
44
 
45
- # Normalize if necessary
46
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
47
  audio_np = audio_np / np.max(np.abs(audio_np))
48
 
49
- # Ensure the audio data is 2D (num_samples, num_channels)
50
- if audio_np.ndim == 1:
51
- audio_np = np.expand_dims(audio_np, axis=1)
52
 
53
- # Write the file
54
- sf.write(tmp_file_name, audio_np, 24000)
 
 
 
 
 
55
 
56
  return tmp_file_name
57
 
 
37
  resample_audio = resampler(newsr=24000)
38
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
39
 
 
40
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
41
  tmp_file_name = tmp_file.name
42
  audio_np = audio_data_resampled.numpy() # Convert to numpy array
43
 
 
44
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
45
  audio_np = audio_np / np.max(np.abs(audio_np))
46
 
47
+ if audio_np.ndim > 1:
48
+ audio_np = audio_np[:,0]
 
49
 
50
+ audio_np = np.int16(audio_np * 32767)
51
+
52
+ with wave.open(tmp_file_name, 'w') as wav_file:
53
+ wav_file.setnchannels(1)
54
+ wav_file.setsampwidth(2)
55
+ wav_file.setframerate(24000)
56
+ wav_file.writeframes(audio_np.tobytes())
57
 
58
  return tmp_file_name
59