Tonic commited on
Commit
5033513
1 Parent(s): 20975ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -19
app.py CHANGED
@@ -38,26 +38,14 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
38
  resample_audio = resampler(newsr=24000)
39
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
40
 
41
- # Normalize and convert to 2D stereo if necessary
42
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
43
- tmp_file_name = tmp_file.name
44
- audio_np = audio_data_resampled.numpy() # Convert to numpy array
45
 
46
- if audio_np.max() > 1.0 or audio_np.min() < -1.0:
47
- audio_np = audio_np / np.max(np.abs(audio_np))
48
-
49
- if audio_np.ndim == 1:
50
- audio_np = np.stack((audio_np, audio_np), axis=-1)
51
-
52
- audio_np = np.int16(audio_np * 32767)
53
-
54
- with wave.open(tmp_file_name, 'w') as wav_file:
55
- wav_file.setnchannels(2)
56
- wav_file.setsampwidth(2)
57
- wav_file.setframerate(24000)
58
- wav_file.writeframes(audio_np.tobytes())
59
-
60
- return tmp_file_name
61
 
62
  with gr.Blocks() as demo:
63
  gr.Markdown(title)
 
38
  resample_audio = resampler(newsr=24000)
39
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
40
 
41
+ # Normalize audio
42
+ audio_np = audio_data.numpy()
43
+ audio_np = audio_np / np.max(np.abs(audio_np))
 
44
 
45
+ # Save to a temporary WAV file
46
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
47
+ sf.write(tmp_file.name, audio_np, 24000, 'PCM_16')
48
+ return tmp_file.name
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  with gr.Blocks() as demo:
51
  gr.Markdown(title)