Tonic commited on
Commit
decaf77
1 Parent(s): 9c3ab74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -10
app.py CHANGED
@@ -35,23 +35,16 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
35
 
36
  resample_audio = resampler(newsr=24000)
37
  audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
38
- # Normalize audio
39
  audio_np = audio_data_resampled.cpu().numpy()
40
  audio_np = audio_np / np.max(np.abs(audio_np))
41
-
42
- # Ensure audio data is in the correct format
43
  audio_np = np.asarray(audio_np, dtype=np.float32)
44
 
45
- # Create stereo audio by duplicating the mono channel
46
  audio_stereo = np.stack((audio_np, audio_np), axis=-1)
 
47
 
48
- # Debugging: Inspect the shape and dtype of the audio array
49
- print("Audio Array Shape:", audio_stereo.shape)
50
- print("Audio Array Dtype:", audio_stereo.dtype)
51
-
52
- # Save to a temporary WAV file as stereo
53
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
54
- # Write the stereo data with a sample rate of 24000 Hz
55
  sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
56
  return tmp_file.name
57
 
 
35
 
36
  resample_audio = resampler(newsr=24000)
37
  audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
 
38
  audio_np = audio_data_resampled.cpu().numpy()
39
  audio_np = audio_np / np.max(np.abs(audio_np))
 
 
40
  audio_np = np.asarray(audio_np, dtype=np.float32)
41
 
 
42
  audio_stereo = np.stack((audio_np, audio_np), axis=-1)
43
+ audio_stereo = audio_stereo.reshape(-1, 2)
44
 
45
+ # print("Audio Array Shape:", audio_stereo.shape)
46
+ # print("Audio Array Dtype:", audio_stereo.dtype)
 
 
 
47
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
 
48
  sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
49
  return tmp_file.name
50