jonathanagustin commited on
Commit
ffb2d6a
β€’
1 Parent(s): 9458259

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +3 -3
  2. app.py +200 -108
  3. requirements.txt +5 -2
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: OpenAI - Text to Speech
3
- emoji: πŸ—£οΈ
4
- colorFrom: blue
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
 
1
  ---
2
  title: OpenAI - Text to Speech
3
+ emoji: πŸ”Š
4
+ colorFrom: green
5
+ colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
app.py CHANGED
@@ -1,141 +1,233 @@
1
- """
2
- This script implements a Gradio interface for text-to-speech conversion using OpenAI's API.
3
- Users can input text, select a model and voice, and receive an audio output of the synthesized speech.
4
-
5
- Dependencies:
6
- - gradio
7
- - openai
8
-
9
- Usage:
10
- Run the script to launch a web interface for text-to-speech conversion.
11
-
12
- Note:
13
- - Ensure that you have installed the required packages:
14
- pip install gradio openai
15
- - Obtain a valid OpenAI API key with access to the necessary services.
16
- """
17
-
18
  import gradio as gr
19
  import tempfile
20
  import openai
21
- from typing import Tuple
22
-
23
-
24
- def tts(input_text: str, model: str, voice: str, api_key: str) -> str:
25
- """
26
- Convert input text to speech using OpenAI's Text-to-Speech API.
27
-
28
- :param input_text: The text to be converted to speech.
29
- :type input_text: str
30
- :param model: The model to use for synthesis (e.g., 'tts-1', 'tts-1-hd').
31
- :type model: str
32
- :param voice: The voice profile to use (e.g., 'alloy', 'echo', 'fable', etc.).
33
- :type voice: str
34
- :param api_key: OpenAI API key.
35
- :type api_key: str
36
- :return: File path to the generated audio file.
37
- :rtype: str
38
- :raises ValueError: If input parameters are invalid.
39
- :raises openai.error.OpenAIError: If API call fails.
40
- """
41
- if not input_text.strip():
42
- raise ValueError("Input text cannot be empty.")
43
-
44
- if not api_key.strip():
45
- raise ValueError("API key is required.")
46
-
47
- openai.api_key = api_key
48
-
49
- try:
50
- response = openai.audio.speech.create(
51
- input=input_text,
52
- voice=voice,
53
- model=model
54
- )
55
- except openai.error.OpenAIError as e:
56
- raise e
57
-
58
- if not hasattr(response, 'content'):
59
- raise Exception("Invalid response from OpenAI API. The response does not contain audio content.")
60
-
61
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
62
- temp_file.write(response.content)
63
- temp_file_path = temp_file.name
64
-
65
- return temp_file_path
66
-
67
-
68
- def on_convert_click(input_text: str, model: str, voice: str, api_key: str) -> Tuple[str, str]:
69
  """
70
- Callback function to handle the click event for text-to-speech conversion.
71
-
72
- :param input_text: Text input from the user.
73
- :type input_text: str
74
- :param model: Selected model.
75
- :type model: str
76
- :param voice: Selected voice.
77
- :type voice: str
78
- :param api_key: User's OpenAI API key.
79
- :type api_key: str
80
- :return: Tuple containing the file path to the generated audio file and an error message.
81
- :rtype: Tuple[str, str]
82
  """
83
- try:
84
- file_path = tts(input_text, model, voice, api_key)
85
- return file_path, ""
86
- except Exception as e:
87
- return None, str(e)
88
-
89
 
90
  def main():
91
  """
92
  Main function to create and launch the Gradio interface.
93
  """
94
- # Define model and voice options
95
  MODEL_OPTIONS = ["tts-1", "tts-1-hd"]
96
  VOICE_OPTIONS = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
97
-
98
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  with gr.Column(scale=1):
101
  api_key_input = gr.Textbox(
102
- label="API Key", type="password", placeholder="Enter your OpenAI API Key"
 
 
 
103
  )
104
  model_dropdown = gr.Dropdown(
105
- choices=MODEL_OPTIONS, label="Model", value="tts-1"
 
 
 
106
  )
107
  voice_dropdown = gr.Dropdown(
108
- choices=VOICE_OPTIONS, label="Voice Options", value="echo"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  )
 
110
  with gr.Column(scale=2):
111
  input_textbox = gr.Textbox(
112
- label="Input Text",
113
  lines=10,
114
- placeholder="Type your text here..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  )
116
- submit_button = gr.Button("Convert Text to Speech", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  with gr.Column(scale=1):
118
  output_audio = gr.Audio(label="Output Audio")
119
- error_output = gr.Textbox(
120
- label="Error Message", interactive=False, visible=False
121
- )
122
 
123
- # Define the event handler for the submit button
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  submit_button.click(
125
- fn=on_convert_click,
126
- inputs=[input_textbox, model_dropdown, voice_dropdown, api_key_input],
127
- outputs=[output_audio, error_output]
128
- )
129
-
130
- # Allow pressing Enter in the input textbox to trigger the conversion
131
- input_textbox.submit(
132
- fn=on_convert_click,
133
- inputs=[input_textbox, model_dropdown, voice_dropdown, api_key_input],
134
- outputs=[output_audio, error_output]
135
  )
136
 
137
- demo.launch()
138
-
139
 
140
  if __name__ == "__main__":
141
  main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import tempfile
3
  import openai
4
+ import requests
5
+ import os
6
+ from functools import partial
7
+
8
+ def tts(
9
+ input_text: str,
10
+ model: str,
11
+ voice: str,
12
+ api_key: str,
13
+ response_format: str = "mp3",
14
+ speed: float = 1.0,
15
+ ) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
+ [Function remains unchanged]
 
 
 
 
 
 
 
 
 
 
 
18
  """
19
+ # [Function body remains unchanged]
20
+ # ...
 
 
 
 
21
 
22
  def main():
23
  """
24
  Main function to create and launch the Gradio interface.
25
  """
 
26
  MODEL_OPTIONS = ["tts-1", "tts-1-hd"]
27
  VOICE_OPTIONS = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
28
+ RESPONSE_FORMAT_OPTIONS = ["mp3", "opus", "aac", "flac", "wav", "pcm"]
29
+
30
+ # Predefine voice previews URLs
31
+ VOICE_PREVIEW_URLS = {
32
+ voice: f"https://cdn.openai.com/API/docs/audio/{voice}.wav"
33
+ for voice in VOICE_OPTIONS
34
+ }
35
+
36
+ # Download audio previews to disk before initiating the interface
37
+ PREVIEW_DIR = "voice_previews"
38
+ os.makedirs(PREVIEW_DIR, exist_ok=True)
39
+
40
+ VOICE_PREVIEW_FILES = {}
41
+ for voice, url in VOICE_PREVIEW_URLS.items():
42
+ local_file_path = os.path.join(PREVIEW_DIR, f"{voice}.wav")
43
+ if not os.path.exists(local_file_path):
44
+ try:
45
+ response = requests.get(url)
46
+ response.raise_for_status()
47
+ with open(local_file_path, "wb") as f:
48
+ f.write(response.content)
49
+ except requests.exceptions.RequestException as e:
50
+ print(f"Failed to download {voice} preview: {e}")
51
+ VOICE_PREVIEW_FILES[voice] = local_file_path
52
+
53
+ # Set static paths for Gradio to serve
54
+ gr.set_static_paths(paths=[PREVIEW_DIR])
55
+
56
+ with gr.Blocks(title="OpenAI - Text to Speech") as demo:
57
  with gr.Row():
58
+ with gr.Column(scale=1):
59
+ def play_voice_sample(voice: str):
60
+ """
61
+ Play the preview audio sample for the selected voice.
62
+
63
+ :param voice: The name of the voice to preview.
64
+ :type voice: str
65
+ :return: Updated Gradio Audio component with the selected voice sample.
66
+ :rtype: gr.Audio
67
+ """
68
+ return gr.update(
69
+ value=VOICE_PREVIEW_FILES[voice],
70
+ label=f"Preview Voice: {voice.capitalize()}",
71
+ )
72
+ with gr.Group():
73
+
74
+ preview_audio = gr.Audio(
75
+ interactive=False,
76
+ label="Preview Voice: Echo",
77
+ value=VOICE_PREVIEW_FILES['echo'],
78
+ visible=True,
79
+ show_download_button=False,
80
+ show_share_button=False,
81
+ autoplay=False,
82
+ )
83
+
84
+ # Create buttons for each voice
85
+ for voice in VOICE_OPTIONS:
86
+ voice_button = gr.Button(
87
+ value=f"{voice.capitalize()}",
88
+ variant="secondary",
89
+ size="sm",
90
+ )
91
+ voice_button.click(
92
+ fn=partial(play_voice_sample, voice=voice),
93
+ outputs=preview_audio,
94
+ )
95
+
96
  with gr.Column(scale=1):
97
  api_key_input = gr.Textbox(
98
+ label="OpenAI API Key",
99
+ info="https://platform.openai.com/account/api-keys",
100
+ type="password",
101
+ placeholder="Enter your OpenAI API Key",
102
  )
103
  model_dropdown = gr.Dropdown(
104
+ choices=MODEL_OPTIONS,
105
+ label="Model",
106
+ value="tts-1",
107
+ info="Select tts-1 for speed or tts-1-hd for quality",
108
  )
109
  voice_dropdown = gr.Dropdown(
110
+ choices=VOICE_OPTIONS,
111
+ label="Voice Options",
112
+ value="echo",
113
+ )
114
+ response_format_dropdown = gr.Dropdown(
115
+ choices=RESPONSE_FORMAT_OPTIONS,
116
+ label="Response Format",
117
+ value="mp3",
118
+ )
119
+ speed_slider = gr.Slider(
120
+ minimum=0.25,
121
+ maximum=4.0,
122
+ step=0.05,
123
+ label="Voice Speed",
124
+ value=1.0,
125
  )
126
+
127
  with gr.Column(scale=2):
128
  input_textbox = gr.Textbox(
129
+ label="Input Text (0000 / 4096 chars)",
130
  lines=10,
131
+ placeholder="Type your text here...",
132
+ )
133
+
134
+ def update_label(input_text: str):
135
+ """
136
+ Update the label of the input textbox with the current character count.
137
+
138
+ :param input_text: The current text in the input textbox.
139
+ :type input_text: str
140
+ :return: Updated Gradio component with new label.
141
+ :rtype: gr.update
142
+ """
143
+ char_count = len(input_text)
144
+ new_label = f"Input Text ({char_count:04d} / 4096 chars)"
145
+ return gr.update(label=new_label)
146
+
147
+ # Update the label when the text changes, with progress hidden
148
+ input_textbox.change(
149
+ fn=update_label,
150
+ inputs=input_textbox,
151
+ outputs=input_textbox,
152
+ show_progress='hidden', # Hide the progress indicator
153
  )
154
+
155
+ # Initialize the submit button as non-interactive
156
+ submit_button = gr.Button(
157
+ "Enter OpenAI API Key",
158
+ variant="primary",
159
+ interactive=False,
160
+ )
161
+
162
+ # Function to update the submit button based on API Key input
163
+ def update_button(api_key):
164
+ """
165
+ Update the submit button's label and interactivity based on the API key input.
166
+
167
+ :param api_key: The current text in the API key input.
168
+ :type api_key: str
169
+ :return: Updated Gradio component for the submit button.
170
+ :rtype: gr.update
171
+ """
172
+ if api_key.strip():
173
+ # There is an API key, enable the submit button
174
+ return gr.update(value="Convert Text to Speech", interactive=True)
175
+ else:
176
+ # No API key, disable the submit button
177
+ return gr.update(value="Enter OpenAI API Key", interactive=False)
178
+
179
+ # Update the submit button whenever the API Key input changes
180
+ api_key_input.input(
181
+ fn=update_button,
182
+ inputs=api_key_input,
183
+ outputs=submit_button,
184
+ )
185
+
186
  with gr.Column(scale=1):
187
  output_audio = gr.Audio(label="Output Audio")
 
 
 
188
 
189
+ def on_submit(
190
+ input_text: str, model: str, voice: str, api_key: str, response_format: str, speed: float
191
+ ) -> str:
192
+ """
193
+ Event handler for the submit button; converts text to speech using the tts function.
194
+
195
+ :param input_text: The text to convert to speech.
196
+ :type input_text: str
197
+ :param model: The TTS model to use (e.g., 'tts-1', 'tts-1-hd').
198
+ :type model: str
199
+ :param voice: The voice profile to use (e.g., 'alloy', 'echo', etc.).
200
+ :type voice: str
201
+ :param api_key: OpenAI API key.
202
+ :type api_key: str
203
+ :param response_format: The audio format of the output file.
204
+ :type response_format: str
205
+ :param speed: The speed of the synthesized speech.
206
+ :type speed: float
207
+ :return: File path to the generated audio file.
208
+ :rtype: str
209
+ """
210
+ audio_file = tts(
211
+ input_text, model, voice, api_key, response_format, speed
212
+ )
213
+ return audio_file
214
+
215
+ # Trigger the conversion when the submit button is clicked
216
  submit_button.click(
217
+ fn=on_submit,
218
+ inputs=[
219
+ input_textbox,
220
+ model_dropdown,
221
+ voice_dropdown,
222
+ api_key_input,
223
+ response_format_dropdown,
224
+ speed_slider,
225
+ ],
226
+ outputs=output_audio,
227
  )
228
 
229
+ # Launch the Gradio app with error display enabled
230
+ demo.launch(show_error=True)
231
 
232
  if __name__ == "__main__":
233
  main()
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
- gradio
2
- openai
 
 
 
 
1
+ gradio[oauth]==4.44.0
2
+ openai==1.47.0
3
+ requests==2.31.0
4
+ spaces==0.30.2
5
+ uvicorn==0.30.6