aheedsajid commited on
Commit
b209e56
1 Parent(s): 9f94ae0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -24
app.py CHANGED
@@ -1,22 +1,29 @@
1
  import os
2
  import gradio as gr
3
- import google.generativeai as genai
4
  from gradio_client import Client, file
5
  from dotenv import load_dotenv
 
 
 
 
6
 
7
  # Load environment variables from .env file
8
  load_dotenv()
9
 
10
- # Retrieve API key from environment variable
11
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
 
13
- # Retrieve system content from environment variable
 
 
 
 
14
  SYSTEM_CONTENT = os.getenv("SYSTEM_CONTENT")
15
 
16
  # Configure Google Gemini API
17
  genai.configure(api_key=GEMINI_API_KEY)
18
 
19
- # Create the model
20
  generation_config = {
21
  "temperature": 0.7,
22
  "top_p": 0.95,
@@ -53,13 +60,6 @@ model = genai.GenerativeModel(
53
  system_instruction=SYSTEM_CONTENT,
54
  )
55
 
56
- # Initialize Gradio client for new TTS API (outside the function)
57
- try:
58
- tts_client = Client("tonyassi/voice-clone")
59
- except ValueError as e:
60
- print(f"Error initializing TTS client: {e}")
61
- tts_client = None
62
-
63
  def generate_response(user_input, chat_history):
64
  """Generates a response based on user input and chat history."""
65
 
@@ -76,18 +76,43 @@ def generate_response(user_input, chat_history):
76
  # Send the entire chat history as the first message
77
  response = chat_session.send_message("\n".join(chat_history))
78
 
79
- if tts_client:
80
- # Use the new Gradio TTS API
81
- tts_result = tts_client.predict(
82
- text=response.text,
83
- audio=file('audio.wav'), # Use local audio file
84
- api_name="/predict"
85
- )
86
- else:
87
- tts_result = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # Return response and audio, and update chat history
90
- return response.text, tts_result, chat_history
91
 
92
  iface = gr.Interface(
93
  fn=generate_response,
@@ -97,7 +122,7 @@ iface = gr.Interface(
97
  ],
98
  outputs=[
99
  gr.Textbox(label="Response"),
100
- gr.Audio(label="Voice Output", interactive=False, autoplay=True) if tts_client else gr.Textbox(label="Voice Output not available"),
101
  gr.State([]) # State output to update chat history
102
  ],
103
  title="AI Indian Girlfriend",
 
1
  import os
2
  import gradio as gr
 
3
  from gradio_client import Client, file
4
  from dotenv import load_dotenv
5
+ from elevenlabs import VoiceSettings
6
+ from elevenlabs.client import ElevenLabs
7
+ import uuid
8
+ import google.generativeai as genai
9
 
10
  # Load environment variables from .env file
11
  load_dotenv()
12
 
13
+ # Retrieve API key from environment variable for ElevenLabs
14
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
15
 
16
+ # Initialize ElevenLabs client
17
+ client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
18
+
19
+ # Retrieve API key from environment variable for Google Generative AI
20
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
  SYSTEM_CONTENT = os.getenv("SYSTEM_CONTENT")
22
 
23
  # Configure Google Gemini API
24
  genai.configure(api_key=GEMINI_API_KEY)
25
 
26
+ # Create the model for Google Generative AI
27
  generation_config = {
28
  "temperature": 0.7,
29
  "top_p": 0.95,
 
60
  system_instruction=SYSTEM_CONTENT,
61
  )
62
 
 
 
 
 
 
 
 
63
  def generate_response(user_input, chat_history):
64
  """Generates a response based on user input and chat history."""
65
 
 
76
  # Send the entire chat history as the first message
77
  response = chat_session.send_message("\n".join(chat_history))
78
 
79
+ # Convert text to speech and save as file
80
+ save_file_path = text_to_speech_file(response.text)
81
+
82
+ # Return response and audio file path, and update chat history
83
+ return response.text, save_file_path, chat_history
84
+
85
+ def text_to_speech_file(text: str) -> str:
86
+ """Converts text to speech and saves it as a file using ElevenLabs API."""
87
+
88
+ # Calling the text_to_speech conversion API with detailed parameters
89
+ response = client.text_to_speech.convert(
90
+ voice_id="pNInz6obpgDQGcFmaJgB", # Adam pre-made voice
91
+ optimize_streaming_latency="0",
92
+ output_format="mp3_22050_32",
93
+ text=text,
94
+ model_id="eleven_turbo_v2", # use the turbo model for low latency
95
+ voice_settings=VoiceSettings(
96
+ stability=0.0,
97
+ similarity_boost=1.0,
98
+ style=0.0,
99
+ use_speaker_boost=True,
100
+ ),
101
+ )
102
+
103
+ # Generating a unique file name for the output MP3 file
104
+ save_file_path = f"{uuid.uuid4()}.mp3"
105
+
106
+ # Writing the audio to a file
107
+ with open(save_file_path, "wb") as f:
108
+ for chunk in response:
109
+ if chunk:
110
+ f.write(chunk)
111
+
112
+ print(f"{save_file_path}: A new audio file was saved successfully!")
113
 
114
+ # Return the path of the saved audio file
115
+ return save_file_path
116
 
117
  iface = gr.Interface(
118
  fn=generate_response,
 
122
  ],
123
  outputs=[
124
  gr.Textbox(label="Response"),
125
+ gr.Audio(label="Voice Output", interactive=False, autoplay=True),
126
  gr.State([]) # State output to update chat history
127
  ],
128
  title="AI Indian Girlfriend",