Tonic commited on
Commit
9c3ab74
1 Parent(s): 9c74b19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -0
app.py CHANGED
@@ -35,10 +35,21 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
35
 
36
  resample_audio = resampler(newsr=24000)
37
  audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
 
38
  audio_np = audio_data_resampled.cpu().numpy()
39
  audio_np = audio_np / np.max(np.abs(audio_np))
 
 
40
  audio_np = np.asarray(audio_np, dtype=np.float32)
 
 
41
  audio_stereo = np.stack((audio_np, audio_np), axis=-1)
 
 
 
 
 
 
42
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
43
  # Write the stereo data with a sample rate of 24000 Hz
44
  sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
 
35
 
36
  resample_audio = resampler(newsr=24000)
37
  audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
38
+ # Normalize audio
39
  audio_np = audio_data_resampled.cpu().numpy()
40
  audio_np = audio_np / np.max(np.abs(audio_np))
41
+
42
+ # Ensure audio data is in the correct format
43
  audio_np = np.asarray(audio_np, dtype=np.float32)
44
+
45
+ # Create stereo audio by duplicating the mono channel
46
  audio_stereo = np.stack((audio_np, audio_np), axis=-1)
47
+
48
+ # Debugging: Inspect the shape and dtype of the audio array
49
+ print("Audio Array Shape:", audio_stereo.shape)
50
+ print("Audio Array Dtype:", audio_stereo.dtype)
51
+
52
+ # Save to a temporary WAV file as stereo
53
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
54
  # Write the stereo data with a sample rate of 24000 Hz
55
  sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')