jonathanagustin commited on
Commit
98b32f1
1 Parent(s): 0034b02

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +139 -66
app.py CHANGED
@@ -3,16 +3,26 @@ import tempfile
3
  import openai
4
  import requests
5
  import os
6
-
7
- def tts(input_text: str, model: str, voice: str, api_key: str) -> str:
 
 
 
 
 
 
 
 
8
  """
9
  Convert input text to speech using OpenAI's Text-to-Speech API.
10
 
11
  Parameters:
12
  input_text (str): The text to be converted to speech.
13
  model (str): The model to use for synthesis (e.g., 'tts-1', 'tts-1-hd').
14
- voice (str): The voice profile to use (e.g., 'alloy', 'echo', 'fable', etc.).
15
  api_key (str): OpenAI API key.
 
 
16
 
17
  Returns:
18
  str: File path to the generated audio file.
@@ -28,34 +38,61 @@ def tts(input_text: str, model: str, voice: str, api_key: str) -> str:
28
  if not input_text.strip():
29
  raise gr.Error("Input text cannot be empty.")
30
 
31
- openai.api_key = api_key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  try:
34
- response = openai.Audio.create(text=input_text, voice=voice, model=model)
35
- except openai.OpenAIError as e:
36
- # Catch-all for OpenAI exceptions
37
- raise gr.Error(f"An OpenAI error occurred: {e}")
38
- except Exception as e:
39
- # Catch any other exceptions
40
- raise gr.Error(f"An unexpected error occurred: {e}")
41
-
42
- if not hasattr(response, "audio"):
43
- raise gr.Error(
44
- "Invalid response from OpenAI API. The response does not contain audio content."
45
  )
46
-
47
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
48
- temp_file.write(response.audio)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  temp_file_path = temp_file.name
50
 
51
  return temp_file_path
52
 
 
53
  def main():
54
  """
55
  Main function to create and launch the Gradio interface.
56
  """
57
  MODEL_OPTIONS = ["tts-1", "tts-1-hd"]
58
  VOICE_OPTIONS = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
 
59
 
60
  # Predefine voice previews URLs
61
  VOICE_PREVIEW_URLS = {
@@ -81,58 +118,67 @@ def main():
81
  VOICE_PREVIEW_FILES[voice] = local_file_path
82
 
83
  # Set static paths for Gradio to serve
84
- # This needs to be done before creating the Gradio app
85
- gr.set_static_paths([PREVIEW_DIR])
86
 
87
  with gr.Blocks(title="OpenAI - Text to Speech") as demo:
 
88
  with gr.Row():
89
  with gr.Column(scale=1):
90
- gr.Markdown("### Voice Previews")
91
-
92
- # Create an audio component to play the samples
93
- preview_audio = gr.Audio(
94
- interactive=False,
95
- label="Preview Audio",
96
- value=None,
97
- visible=True,
98
- autoplay=True,
99
- )
100
-
101
- # A function to update the preview_audio component
102
- def play_voice_sample(voice):
103
- return gr.update(value=VOICE_PREVIEW_FILES[voice])
104
-
105
- # Create buttons for each voice inside a grid
106
- for voice in VOICE_OPTIONS:
107
- # Create a button for each voice
108
- voice_button = gr.Button(
109
- value=f"{voice.capitalize()}",
110
- variant="secondary",
111
- size="sm",
112
  )
113
 
114
- # Attach the click handler
115
- voice_button.click(
116
- fn=lambda v=voice: play_voice_sample(v),
117
- outputs=preview_audio,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  )
119
- with gr.Column(scale=1):
120
- api_key_input = gr.Textbox(
121
- label="OpenAI API Key",
122
- info="https://platform.openai.com/account/api-keys",
123
- type="password",
124
- placeholder="Enter your OpenAI API Key",
125
- )
126
- model_dropdown = gr.Dropdown(
127
- choices=MODEL_OPTIONS,
128
- label="Model",
129
- value="tts-1",
130
- )
131
- voice_dropdown = gr.Dropdown(
132
- choices=VOICE_OPTIONS,
133
- label="Voice Options",
134
- value="echo",
135
- )
136
 
137
  with gr.Column(scale=2):
138
  input_textbox = gr.Textbox(
@@ -140,6 +186,21 @@ def main():
140
  lines=10,
141
  placeholder="Type your text here...",
142
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  submit_button = gr.Button(
144
  "Convert Text to Speech",
145
  variant="primary",
@@ -148,19 +209,31 @@ def main():
148
  output_audio = gr.Audio(label="Output Audio")
149
 
150
  # Define the event handler for the submit button with error handling
151
- def on_submit(input_text, model, voice, api_key):
152
- audio_file = tts(input_text, model, voice, api_key)
 
 
 
 
153
  return audio_file
154
 
155
  # Trigger the conversion when the submit button is clicked
156
  submit_button.click(
157
  fn=on_submit,
158
- inputs=[input_textbox, model_dropdown, voice_dropdown, api_key_input],
 
 
 
 
 
 
 
159
  outputs=output_audio,
160
  )
161
 
162
  # Launch the Gradio app with error display enabled
163
  demo.launch(show_error=True)
164
 
 
165
  if __name__ == "__main__":
166
  main()
 
3
  import openai
4
  import requests
5
  import os
6
+ from functools import partial
7
+
8
+ def tts(
9
+ input_text: str,
10
+ model: str,
11
+ voice: str,
12
+ api_key: str,
13
+ response_format: str = "mp3",
14
+ speed: float = 1.0,
15
+ ) -> str:
16
  """
17
  Convert input text to speech using OpenAI's Text-to-Speech API.
18
 
19
  Parameters:
20
  input_text (str): The text to be converted to speech.
21
  model (str): The model to use for synthesis (e.g., 'tts-1', 'tts-1-hd').
22
+ voice (str): The voice to use when generating the audio.
23
  api_key (str): OpenAI API key.
24
+ response_format (str): Format of the output audio. Defaults to 'mp3'.
25
+ speed (float): Speed of the generated audio. Defaults to 1.0.
26
 
27
  Returns:
28
  str: File path to the generated audio file.
 
38
  if not input_text.strip():
39
  raise gr.Error("Input text cannot be empty.")
40
 
41
+ if len(input_text) > 4096:
42
+ raise gr.Error("Input text exceeds the maximum length of 4096 characters.")
43
+
44
+ if speed < 0.25 or speed > 4.0:
45
+ raise gr.Error("Speed must be between 0.25 and 4.0.")
46
+
47
+ headers = {
48
+ "Authorization": f"Bearer {api_key}",
49
+ "Content-Type": "application/json",
50
+ }
51
+
52
+ data = {
53
+ "model": model,
54
+ "input": input_text,
55
+ "voice": voice,
56
+ "response_format": response_format,
57
+ "speed": speed,
58
+ }
59
 
60
  try:
61
+ response = requests.post(
62
+ "https://api.openai.com/v1/audio/speech",
63
+ headers=headers,
64
+ json=data,
 
 
 
 
 
 
 
65
  )
66
+ response.raise_for_status()
67
+ except requests.exceptions.HTTPError as http_err:
68
+ raise gr.Error(f"HTTP error occurred: {http_err} - {response.text}")
69
+ except Exception as err:
70
+ raise gr.Error(f"An error occurred: {err}")
71
+
72
+ # The content will be the audio file content
73
+ audio_content = response.content
74
+
75
+ file_extension = response_format.lower()
76
+ # PCM is raw data, so it does not have a standard file extension
77
+ if file_extension == "pcm":
78
+ file_extension = "raw"
79
+
80
+ with tempfile.NamedTemporaryFile(
81
+ suffix=f".{file_extension}", delete=False
82
+ ) as temp_file:
83
+ temp_file.write(audio_content)
84
  temp_file_path = temp_file.name
85
 
86
  return temp_file_path
87
 
88
+
89
  def main():
90
  """
91
  Main function to create and launch the Gradio interface.
92
  """
93
  MODEL_OPTIONS = ["tts-1", "tts-1-hd"]
94
  VOICE_OPTIONS = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
95
+ RESPONSE_FORMAT_OPTIONS = ["mp3", "opus", "aac", "flac", "wav", "pcm"]
96
 
97
  # Predefine voice previews URLs
98
  VOICE_PREVIEW_URLS = {
 
118
  VOICE_PREVIEW_FILES[voice] = local_file_path
119
 
120
  # Set static paths for Gradio to serve
121
+ gr.static(PREVIEW_DIR)
 
122
 
123
  with gr.Blocks(title="OpenAI - Text to Speech") as demo:
124
+ gr.Markdown("# OpenAI Text-to-Speech Demo")
125
  with gr.Row():
126
  with gr.Column(scale=1):
127
+ with gr.Group():
128
+ preview_audio = gr.Audio(
129
+ interactive=False,
130
+ label="Preview Audio",
131
+ value=None,
132
+ visible=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  )
134
 
135
+ # Function to play the selected voice sample
136
+ def play_voice_sample(voice):
137
+ return gr.update(value=VOICE_PREVIEW_FILES[voice])
138
+
139
+ # Create buttons for each voice
140
+ for voice in VOICE_OPTIONS:
141
+ voice_button = gr.Button(
142
+ value=f"{voice.capitalize()}",
143
+ variant="secondary",
144
+ size="sm",
145
+ )
146
+ voice_button.click(
147
+ fn=partial(play_voice_sample, voice=voice),
148
+ outputs=preview_audio,
149
+ )
150
+
151
+ with gr.Column(scale=1):
152
+ api_key_input = gr.Textbox(
153
+ label="OpenAI API Key",
154
+ info="https://platform.openai.com/account/api-keys",
155
+ type="password",
156
+ placeholder="Enter your OpenAI API Key",
157
+ )
158
+ model_dropdown = gr.Dropdown(
159
+ choices=MODEL_OPTIONS,
160
+ label="Model",
161
+ value="tts-1",
162
+ info="Select tts-1 for speed or tts-1-hd for quality.",
163
+ )
164
+ voice_dropdown = gr.Dropdown(
165
+ choices=VOICE_OPTIONS,
166
+ label="Voice Options",
167
+ value="echo",
168
+ info="The voice to use when generating the audio.",
169
+ )
170
+ response_format_dropdown = gr.Dropdown(
171
+ choices=RESPONSE_FORMAT_OPTIONS,
172
+ label="Response Format",
173
+ value="mp3",
174
+ )
175
+ speed_slider = gr.Slider(
176
+ minimum=0.25,
177
+ maximum=4.0,
178
+ step=0.05,
179
+ label="Voice Speed",
180
+ value=1.0,
181
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  with gr.Column(scale=2):
184
  input_textbox = gr.Textbox(
 
186
  lines=10,
187
  placeholder="Type your text here...",
188
  )
189
+ # Add a character counter below the input textbox
190
+ char_count_text = gr.Markdown("0 / 4096")
191
+
192
+ # Function to update the character count
193
+ def update_char_count(input_text):
194
+ char_count = len(input_text)
195
+ return f"**{char_count} / 4096**"
196
+
197
+ # Update character count when the user stops typing
198
+ input_textbox.change(
199
+ fn=update_char_count,
200
+ inputs=input_textbox,
201
+ outputs=char_count_text,
202
+ )
203
+
204
  submit_button = gr.Button(
205
  "Convert Text to Speech",
206
  variant="primary",
 
209
  output_audio = gr.Audio(label="Output Audio")
210
 
211
  # Define the event handler for the submit button with error handling
212
+ def on_submit(
213
+ input_text, model, voice, api_key, response_format, speed
214
+ ):
215
+ audio_file = tts(
216
+ input_text, model, voice, api_key, response_format, speed
217
+ )
218
  return audio_file
219
 
220
  # Trigger the conversion when the submit button is clicked
221
  submit_button.click(
222
  fn=on_submit,
223
+ inputs=[
224
+ input_textbox,
225
+ model_dropdown,
226
+ voice_dropdown,
227
+ api_key_input,
228
+ response_format_dropdown,
229
+ speed_slider,
230
+ ],
231
  outputs=output_audio,
232
  )
233
 
234
  # Launch the Gradio app with error display enabled
235
  demo.launch(show_error=True)
236
 
237
+
238
  if __name__ == "__main__":
239
  main()