File size: 4,075 Bytes
ba5296b
db05c27
 
 
ba5296b
db05c27
 
ba5296b
 
db05c27
 
ba5296b
db05c27
ba5296b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db05c27
 
 
 
 
ba5296b
 
 
db05c27
 
ba5296b
db05c27
ba5296b
db05c27
ba5296b
db05c27
 
ba5296b
db05c27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba5296b
 
 
 
 
db05c27
 
 
ba5296b
 
 
 
 
db05c27
ba5296b
 
 
 
 
 
 
 
db05c27
 
ba5296b
 
 
 
 
 
db05c27
 
ba5296b
 
ff6a7f5
ba5296b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from llama_index import Document, GPTListIndex, GPTSimpleVectorIndex
import gradio as gr
import openai
import os
from pytube import YouTube


def download_yt_video(ytlink):

    try:

        yt = YouTube(ytlink)

        video = yt.streams.filter(only_audio=True).first()

        out_file = video.download(output_path="./")

        base, ext = os.path.splitext(out_file)
        new_file = base + '.mp3'

        os.rename(out_file, new_file)

        return new_file
    except Exception as e:
        return e


def get_transcript(filename):
    import requests
    import json

    headers = {
        'accept': 'application/json',
        'x-gladia-key': '70ad5f6e-31e6-4acf-8a15-89c166c4cc9f',
        # requests won't add a boundary if this header is set when you pass files=
        # 'Content-Type': 'multipart/form-data',
    }

    files = {
        'audio': (filename, open(filename, 'rb'), 'audio/mpeg'),
        'language': (None, 'english'),
        'language_behaviour': (None, 'manual'),
        'output_format': (None, 'json'),
    }

    response = requests.post(
        'https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)

    data = json.loads(response.text)

    result = ""
    for dict_ in data['prediction']:
        result = result + dict_['transcription'] + " "

    result = ' '.join(result.strip().split())

    with open(f"{filename[:-4]}.txt", "w") as f:
        f.write(result)

    return result


def createindex(url, openaikey):

    try:
        filename = download_yt_video(url)

        transcript = get_transcript(filename)

        os.remove(filename)

        # Store openai key in environment
        os.environ['OPENAI_API_KEY'] = openaikey

        # Create index
        index = GPTListIndex([Document(transcript)], chunk_size_limit=2500)

        index_filename = "index.json"
        index.save_to_disk(index_filename)

        return "Video processed. Now you can start querying."
    except Exception as e:
        return e


def videoques(query, openaikey):

    # Basic Checks
    if not query:
        return "Please enter your query."

    # Basic Checks
    if not openaikey:
        return "Please enter openaikey."

    # Store openai key in environment
    os.environ['OPENAI_API_KEY'] = openaikey

    index_name = "index.json"

    index = GPTListIndex.load_from_disk(index_name)

    # Query based on index
    response = index.query(query, mode="embedding", similarity_top_k=4)

    return response


def cleartext(query, output):
    """
    Function to clear text
    """
    return ["", ""]


with gr.Blocks() as demo:
    gr.Markdown(
        """
    <h1><center><b>VideoQues</center></h1>
    
    """)
    gr.Markdown(
        """
    VideoQues answers your queries on any youtube video.
    
    """)
    with gr.Row():
        with gr.Column():
            url = gr.Textbox(lines=1, label="Enter Youtube Video link.")
            openaikey = gr.Textbox(lines=1, label="Enter Your OpenAI key.")
            submit1_button = gr.Button("Submit")
            ans1_output = gr.Textbox(label="Status.")
            clear1_button = gr.Button("Clear")
        with gr.Column():
            query = gr.Textbox(lines=2, label="Enter Your Query.")
            submit2_button = gr.Button("Submit")
            ans2_output = gr.Textbox(label="Answer.")
            clear2_button = gr.Button("Clear")

    # Submit button for showing YT Video thumbnail.
    submit1_button.click(createindex, inputs=[
                         url, openaikey], outputs=[ans1_output])

    # Submit button for submitting query.
    submit2_button.click(videoques, inputs=[
                         query, openaikey], outputs=[ans2_output])

    # Clear button for clearing query and answer.
    clear1_button.click(cleartext, inputs=[
                        url, ans1_output], outputs=[url, ans1_output])

    # Clear button for clearing query and answer.
    clear2_button.click(cleartext, inputs=[query, ans2_output], outputs=[
                        query, ans2_output])

demo.launch(debug=True)