reach-vb HF staff commited on
Commit
096c2f1
1 Parent(s): d9911b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -17
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import sys
2
- import os,stat
3
  import subprocess
4
  import random
5
  from zipfile import ZipFile
6
  import uuid
7
 
 
 
8
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
9
  os.environ["COQUI_TOS_AGREED"] = "1"
10
 
@@ -13,9 +15,18 @@ os.environ["COQUI_TOS_AGREED"] = "1"
13
  import langid
14
 
15
  import gradio as gr
 
 
 
16
  from TTS.api import TTS
 
 
 
 
17
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
18
  from huggingface_hub import HfApi
 
19
  # will use api to restart space on a unrecoverable error
20
  api = HfApi(token=HF_TOKEN)
21
  repo_id = "coqui/xtts"
@@ -29,8 +40,19 @@ os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
29
 
30
  # Load TTS
31
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
32
- tts.to("cuda")
33
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # This is for debugging purposes only
36
  DEVICE_ASSERT_DETECTED=0
@@ -143,14 +165,24 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
143
  global DEVICE_ASSERT_LANG
144
  #It will likely never come here as we restart space on first unrecoverable error now
145
  print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
146
-
147
- try:
148
- tts.tts_to_file(
149
- text=prompt,
150
- file_path="output.wav",
151
- language=language,
152
- speaker_wav=speaker_wav,
153
- )
 
 
 
 
 
 
 
 
 
 
154
  except RuntimeError as e :
155
  if "device-side assert" in str(e):
156
  # cannot do anything on cuda device side error, need tor estart
@@ -168,13 +200,6 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
168
  else:
169
  print("RuntimeError: non device-side assert error:", str(e))
170
  raise e
171
- return (
172
- gr.make_waveform(
173
- audio="output.wav",
174
- ),
175
- "output.wav",
176
- speaker_wav,
177
- )
178
  else:
179
  gr.Warning("Please accept the Terms & Condition!")
180
  return (
 
1
  import sys
2
+ import io, os, stat
3
  import subprocess
4
  import random
5
  from zipfile import ZipFile
6
  import uuid
7
 
8
+ import torch
9
+ import torchaudio
10
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
11
  os.environ["COQUI_TOS_AGREED"] = "1"
12
 
 
15
  import langid
16
 
17
  import gradio as gr
18
+ from scipy.io.wavfile import write
19
+ from pydub import AudioSegment
20
+
21
  from TTS.api import TTS
22
+ from TTS.tts.configs.xtts_config import XttsConfig
23
+ from TTS.tts.models.xtts import Xtts
24
+ from TTS.utils.generic_utils import get_user_data_dir
25
+
26
  HF_TOKEN = os.environ.get("HF_TOKEN")
27
+
28
  from huggingface_hub import HfApi
29
+
30
  # will use api to restart space on a unrecoverable error
31
  api = HfApi(token=HF_TOKEN)
32
  repo_id = "coqui/xtts"
 
40
 
41
  # Load TTS
42
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
 
43
 
44
+ model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v1")
45
+ config = XttsConfig()
46
+ config.load_json(os.path.join(model_path, "config.json"))
47
+ model = Xtts.init_from_config(config)
48
+ model.load_checkpoint(
49
+ config,
50
+ checkpoint_path=os.path.join(model_path, "model.pth"),
51
+ vocab_path=os.path.join(model_path, "vocab.json"),
52
+ eval=True,
53
+ use_deepspeed=True
54
+ )
55
+ model.cuda()
56
 
57
  # This is for debugging purposes only
58
  DEVICE_ASSERT_DETECTED=0
 
165
  global DEVICE_ASSERT_LANG
166
  #It will likely never come here as we restart space on first unrecoverable error now
167
  print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
168
+
169
+ gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
170
+ wav_chunks = []
171
+
172
+ chunks = model.inference_stream(
173
+ prompt,
174
+ language,
175
+ gpt_cond_latent,
176
+ speaker_embedding,)
177
+ try:
178
+
179
+ for i, chunk in enumerate(chunks):
180
+ print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
181
+ out_file = f'{i}.wav'
182
+ write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
183
+ audio = AudioSegment.from_file(out_file)
184
+ audio.export(out_file, format='wav')
185
+ yield (gr.make_waveform(audio=out_file),out_file, speaker_wav)
186
  except RuntimeError as e :
187
  if "device-side assert" in str(e):
188
  # cannot do anything on cuda device side error, need tor estart
 
200
  else:
201
  print("RuntimeError: non device-side assert error:", str(e))
202
  raise e
 
 
 
 
 
 
 
203
  else:
204
  gr.Warning("Please accept the Terms & Condition!")
205
  return (