File size: 1,570 Bytes
21d2367
 
 
 
 
9ca7f57
 
21d2367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ca7f57
 
 
 
 
 
 
 
 
 
 
 
 
 
21d2367
 
 
 
 
 
 
 
 
 
 
8b5e86c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
import openai
from decouple import config
from gtts import gTTS
import os
import pydub
import io
import config 

openai.api_key = config.API_KEYS['openai']

# The Models Job or role
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
]

# Main method goes here
def decipher(audio):
    global messages

    # Using openAI's speech to text model
    audio_file = open(audio, "rb")
    transcript = openai.Audio.transcribe("whisper-1", audio_file)

    messages.append({"role": "user", "content": transcript["text"]})

    response =  openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages
    )

    system_message = response["choices"][0]["message"]["content"]
    messages.append({"role": "assistant", "content": system_message})

    # Convert the text to audio using gTTS
    tts = gTTS(text=system_message, lang='en')
    audio_data = io.BytesIO()
    tts.write_to_fp(audio_data)

    # Convert the audio to a playable format using pydub
    audio_data.seek(0)
    audio = pydub.AudioSegment.from_file(audio_data, format="mp3")

    # Play the audio using VLC
    player = pydub.playback.play
    player(audio)

    chat_transcript = ""
    for message in messages:
        if message['role'] != 'system':
            chat_transcript += message['role'] + ": " + message['content'] + "\n\n"

    return chat_transcript

# Using Gradio's audio Interface 
interface = gr.Interface(fn=decipher, inputs=gr.Audio(
    source="microphone", type="filepath"), outputs="text")
interface.launch()