gorkemgoknar commited on
Commit
310916c
1 Parent(s): 42db371

add metrics text

Browse files
Files changed (1) hide show
  1. app.py +27 -3
app.py CHANGED
@@ -5,6 +5,7 @@ import random
5
  from zipfile import ZipFile
6
  import uuid
7
 
 
8
  import torch
9
  import torchaudio
10
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
@@ -59,6 +60,7 @@ DEVICE_ASSERT_DETECTED=0
59
  DEVICE_ASSERT_PROMPT=None
60
  DEVICE_ASSERT_LANG=None
61
 
 
62
  def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_cleanup, no_lang_auto_detect, agree,):
63
  if agree == True:
64
  supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn"]
@@ -165,9 +167,18 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
165
  global DEVICE_ASSERT_LANG
166
  #It will likely never come here as we restart space on first unrecoverable error now
167
  print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
 
 
 
168
 
 
169
  gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
 
 
 
170
  wav_chunks = []
 
 
171
 
172
  chunks = model.inference_stream(
173
  prompt,
@@ -175,14 +186,24 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
175
  gpt_cond_latent,
176
  speaker_embedding,)
177
  try:
 
178
  for i, chunk in enumerate(chunks):
 
 
 
 
 
 
179
  wav_chunks.append(chunk)
180
  print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
 
181
  out_file = f'{i}.wav'
182
  write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
183
  audio = AudioSegment.from_file(out_file)
184
  audio.export(out_file, format='wav')
185
- yield (None, out_file, None)
 
 
186
  except RuntimeError as e :
187
  if "device-side assert" in str(e):
188
  # cannot do anything on cuda device side error, need tor estart
@@ -212,6 +233,7 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
212
  audio="output.wav",
213
  ),
214
  "sil.wav",
 
215
  speaker_wav,
216
  )
217
  else:
@@ -220,6 +242,7 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
220
  None,
221
  None,
222
  None,
 
223
  )
224
 
225
 
@@ -439,10 +462,11 @@ gr.Interface(
439
  outputs=[
440
  gr.Video(label="Waveform Visual"),
441
  gr.Audio(label="Synthesised Audio", streaming=True, autoplay=True),
 
442
  gr.Audio(label="Reference Audio Used"),
443
  ],
444
  title=title,
445
  description=description,
446
  article=article,
447
- #examples=examples,
448
- ).queue().launch(debug=True,show_api=False)
 
5
  from zipfile import ZipFile
6
  import uuid
7
 
8
+ import time
9
  import torch
10
  import torchaudio
11
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
 
60
  DEVICE_ASSERT_PROMPT=None
61
  DEVICE_ASSERT_LANG=None
62
 
63
+
64
  def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_cleanup, no_lang_auto_detect, agree,):
65
  if agree == True:
66
  supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn"]
 
167
  global DEVICE_ASSERT_LANG
168
  #It will likely never come here as we restart space on first unrecoverable error now
169
  print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
170
+
171
+
172
+ metrics_text= ""
173
 
174
+ t_latent=time.time()
175
  gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
176
+ latent_calculation_time = time.time() - t_latent
177
+ metrics_text=f"Embedding calculation time: {latent_calculation_time:.2f} seconds\n"
178
+
179
  wav_chunks = []
180
+
181
+ t_inference=time.time()
182
 
183
  chunks = model.inference_stream(
184
  prompt,
 
186
  gpt_cond_latent,
187
  speaker_embedding,)
188
  try:
189
+ first_chunk=True
190
  for i, chunk in enumerate(chunks):
191
+ if first_chunk:
192
+ first_chunk_time = time.time() - t_inference
193
+ metrics_text+=f"Streaming: First chunk actual latency: {first_chunk_time:.2f} seconds\n"
194
+ first_chunk=False
195
+
196
+
197
  wav_chunks.append(chunk)
198
  print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
199
+
200
  out_file = f'{i}.wav'
201
  write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
202
  audio = AudioSegment.from_file(out_file)
203
  audio.export(out_file, format='wav')
204
+
205
+ yield (None, out_file, metrics_text, None)
206
+
207
  except RuntimeError as e :
208
  if "device-side assert" in str(e):
209
  # cannot do anything on cuda device side error, need tor estart
 
233
  audio="output.wav",
234
  ),
235
  "sil.wav",
236
+ metrics_text,
237
  speaker_wav,
238
  )
239
  else:
 
242
  None,
243
  None,
244
  None,
245
+ None,
246
  )
247
 
248
 
 
462
  outputs=[
463
  gr.Video(label="Waveform Visual"),
464
  gr.Audio(label="Synthesised Audio", streaming=True, autoplay=True),
465
+ gr.Text(label="Metrics"),
466
  gr.Audio(label="Reference Audio Used"),
467
  ],
468
  title=title,
469
  description=description,
470
  article=article,
471
+ examples=examples,
472
+ ).queue().launch(debug=True,show_api=True)