import gradio as gr import openai from decouple import config from gtts import gTTS import os import pydub import io import config openai.api_key = config.API_KEYS['openai'] # The Models Job or role messages = [ {"role": "system", "content": "You are a helpful assistant."}, ] # Main method goes here def decipher(audio): global messages # Using openAI's speech to text model audio_file = open(audio, "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) messages.append({"role": "user", "content": transcript["text"]}) response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages ) system_message = response["choices"][0]["message"]["content"] messages.append({"role": "assistant", "content": system_message}) # Convert the text to audio using gTTS tts = gTTS(text=system_message, lang='en') audio_data = io.BytesIO() tts.write_to_fp(audio_data) # Convert the audio to a playable format using pydub audio_data.seek(0) audio = pydub.AudioSegment.from_file(audio_data, format="mp3") # Play the audio using VLC player = pydub.playback.play player(audio) chat_transcript = "" for message in messages: if message['role'] != 'system': chat_transcript += message['role'] + ": " + message['content'] + "\n\n" return chat_transcript # Using Gradio's audio Interface interface = gr.Interface(fn=decipher, inputs=gr.Audio( source="microphone", type="filepath"), outputs="text") interface.launch()