Tonic commited on
Commit
20975ce
1 Parent(s): 3796c5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -38,24 +38,25 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
38
  resample_audio = resampler(newsr=24000)
39
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
40
 
 
41
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
42
  tmp_file_name = tmp_file.name
43
  audio_np = audio_data_resampled.numpy() # Convert to numpy array
44
-
45
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
46
  audio_np = audio_np / np.max(np.abs(audio_np))
47
-
48
- if audio_np.ndim > 1:
49
- audio_np = audio_np[:,0]
50
-
51
  audio_np = np.int16(audio_np * 32767)
52
-
53
  with wave.open(tmp_file_name, 'w') as wav_file:
54
- wav_file.setnchannels(1)
55
- wav_file.setsampwidth(2)
56
  wav_file.setframerate(24000)
57
  wav_file.writeframes(audio_np.tobytes())
58
-
59
  return tmp_file_name
60
 
61
  with gr.Blocks() as demo:
 
38
  resample_audio = resampler(newsr=24000)
39
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
40
 
41
+ # Normalize and convert to 2D stereo if necessary
42
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
43
  tmp_file_name = tmp_file.name
44
  audio_np = audio_data_resampled.numpy() # Convert to numpy array
45
+
46
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
47
  audio_np = audio_np / np.max(np.abs(audio_np))
48
+
49
+ if audio_np.ndim == 1:
50
+ audio_np = np.stack((audio_np, audio_np), axis=-1)
51
+
52
  audio_np = np.int16(audio_np * 32767)
53
+
54
  with wave.open(tmp_file_name, 'w') as wav_file:
55
+ wav_file.setnchannels(2)
56
+ wav_file.setsampwidth(2)
57
  wav_file.setframerate(24000)
58
  wav_file.writeframes(audio_np.tobytes())
59
+
60
  return tmp_file_name
61
 
62
  with gr.Blocks() as demo: