jonathanagustin commited on
Commit
42966de
1 Parent(s): 6cd344a

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +6 -9
  2. app.py +141 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,13 +1,10 @@
1
  ---
2
- title: Speech To Text
3
- emoji: 😻
4
- colorFrom: green
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.26.0
8
  app_file: app.py
9
- pinned: false
10
- license: agpl-3.0
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: OpenAI - Text to Speech
3
+ emoji: 🗣️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
+ pinned: true
 
10
  ---
 
 
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This script implements a Gradio interface for text-to-speech conversion using OpenAI's API.
3
+ Users can input text, select a model and voice, and receive an audio output of the synthesized speech.
4
+
5
+ Dependencies:
6
+ - gradio
7
+ - openai
8
+
9
+ Usage:
10
+ Run the script to launch a web interface for text-to-speech conversion.
11
+
12
+ Note:
13
+ - Ensure that you have installed the required packages:
14
+ pip install gradio openai
15
+ - Obtain a valid OpenAI API key with access to the necessary services.
16
+ """
17
+
18
+ import gradio as gr
19
+ import tempfile
20
+ import openai
21
+ from typing import Tuple
22
+
23
+
24
+ def tts(input_text: str, model: str, voice: str, api_key: str) -> str:
25
+ """
26
+ Convert input text to speech using OpenAI's Text-to-Speech API.
27
+
28
+ :param input_text: The text to be converted to speech.
29
+ :type input_text: str
30
+ :param model: The model to use for synthesis (e.g., 'tts-1', 'tts-1-hd').
31
+ :type model: str
32
+ :param voice: The voice profile to use (e.g., 'alloy', 'echo', 'fable', etc.).
33
+ :type voice: str
34
+ :param api_key: OpenAI API key.
35
+ :type api_key: str
36
+ :return: File path to the generated audio file.
37
+ :rtype: str
38
+ :raises ValueError: If input parameters are invalid.
39
+ :raises openai.error.OpenAIError: If API call fails.
40
+ """
41
+ if not input_text.strip():
42
+ raise ValueError("Input text cannot be empty.")
43
+
44
+ if not api_key.strip():
45
+ raise ValueError("API key is required.")
46
+
47
+ openai.api_key = api_key
48
+
49
+ try:
50
+ response = openai.audio.speech.create(
51
+ input=input_text,
52
+ voice=voice,
53
+ model=model
54
+ )
55
+ except openai.error.OpenAIError as e:
56
+ raise e
57
+
58
+ if not hasattr(response, 'content'):
59
+ raise Exception("Invalid response from OpenAI API. The response does not contain audio content.")
60
+
61
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
62
+ temp_file.write(response.content)
63
+ temp_file_path = temp_file.name
64
+
65
+ return temp_file_path
66
+
67
+
68
+ def on_convert_click(input_text: str, model: str, voice: str, api_key: str) -> Tuple[str, str]:
69
+ """
70
+ Callback function to handle the click event for text-to-speech conversion.
71
+
72
+ :param input_text: Text input from the user.
73
+ :type input_text: str
74
+ :param model: Selected model.
75
+ :type model: str
76
+ :param voice: Selected voice.
77
+ :type voice: str
78
+ :param api_key: User's OpenAI API key.
79
+ :type api_key: str
80
+ :return: Tuple containing the file path to the generated audio file and an error message.
81
+ :rtype: Tuple[str, str]
82
+ """
83
+ try:
84
+ file_path = tts(input_text, model, voice, api_key)
85
+ return file_path, ""
86
+ except Exception as e:
87
+ return None, str(e)
88
+
89
+
90
+ def main():
91
+ """
92
+ Main function to create and launch the Gradio interface.
93
+ """
94
+ # Define model and voice options
95
+ MODEL_OPTIONS = ["tts-1", "tts-1-hd"]
96
+ VOICE_OPTIONS = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
97
+
98
+ with gr.Blocks() as demo:
99
+ with gr.Row():
100
+ with gr.Column(scale=1):
101
+ api_key_input = gr.Textbox(
102
+ label="API Key", type="password", placeholder="Enter your OpenAI API Key"
103
+ )
104
+ model_dropdown = gr.Dropdown(
105
+ choices=MODEL_OPTIONS, label="Model", value="tts-1"
106
+ )
107
+ voice_dropdown = gr.Dropdown(
108
+ choices=VOICE_OPTIONS, label="Voice Options", value="echo"
109
+ )
110
+ with gr.Column(scale=2):
111
+ input_textbox = gr.Textbox(
112
+ label="Input Text",
113
+ lines=10,
114
+ placeholder="Type your text here..."
115
+ )
116
+ submit_button = gr.Button("Convert Text to Speech", variant="primary")
117
+ with gr.Column(scale=1):
118
+ output_audio = gr.Audio(label="Output Audio")
119
+ error_output = gr.Textbox(
120
+ label="Error Message", interactive=False, visible=False
121
+ )
122
+
123
+ # Define the event handler for the submit button
124
+ submit_button.click(
125
+ fn=on_convert_click,
126
+ inputs=[input_textbox, model_dropdown, voice_dropdown, api_key_input],
127
+ outputs=[output_audio, error_output]
128
+ )
129
+
130
+ # Allow pressing Enter in the input textbox to trigger the conversion
131
+ input_textbox.submit(
132
+ fn=on_convert_click,
133
+ inputs=[input_textbox, model_dropdown, voice_dropdown, api_key_input],
134
+ outputs=[output_audio, error_output]
135
+ )
136
+
137
+ demo.launch()
138
+
139
+
140
+ if __name__ == "__main__":
141
+ main()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ openai