File size: 6,787 Bytes
b4b5bdf
e9698e9
e0e448c
e9698e9
 
 
 
3fcc7da
 
04ac399
 
 
 
 
 
 
 
b4b5bdf
51727c4
 
cb35787
 
0b9f9a6
 
e9199c3
 
 
 
 
 
 
0b9f9a6
 
 
e9199c3
 
0b9f9a6
e9199c3
0b9f9a6
 
a5371c1
e9199c3
0b9f9a6
e9698e9
e9199c3
 
 
 
 
 
 
 
1203b67
a61504d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c27275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9698e9
 
 
 
 
9a82f74
e9698e9
 
 
 
69a190d
 
71cffeb
2785052
0b9f9a6
 
 
 
 
 
e0e448c
e9698e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fcc7da
 
e9199c3
3fcc7da
 
 
 
 
 
e0e448c
e9698e9
 
3fcc7da
 
 
 
0b9f9a6
 
 
 
 
 
 
e9698e9
 
 
 
 
 
 
 
 
04ac399
 
 
 
 
 
 
 
 
e9698e9
 
91bd99a
fe6af19
e9698e9
a61504d
 
 
3fcc7da
0b9f9a6
3fcc7da
0b9f9a6
3fcc7da
e0e448c
 
f55d652
e9698e9
 
 
 
0b9f9a6
e9698e9
 
b4b5bdf
e9698e9
4c27275
 
 
 
 
 
 
 
 
 
 
 
 
e9698e9
 
dd89d2a
fe6af19
e9698e9
 
a61504d
e9698e9
 
a61504d
 
e9698e9
a61504d
 
e9698e9
a61504d
e9698e9
4c27275
 
 
cb35787
e941702
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import logging
import os
from typing import Optional

import gradio as gr
import pandas as pd

from buster.completers import Completion

from gradio.themes.utils import (
    colors,
    fonts,
    get_matching_version,
    get_theme_assets,
    sizes,
)

import cfg
from cfg import setup_buster

CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))

AVAILABLE_SOURCES_UI = [
    "Gen AI 360: LLMs",
    "Gen AI 360: LangChain",
    "Towards AI Blog",
    "Activeloop Docs",
    "HF Transformers Docs",
    "Wikipedia",
    "OpenAI Docs",
    "LangChain Docs",
]

AVAILABLE_SOURCES = [
    "llm_course",
    "langchain_course",
    "towards_ai",
    "activeloop",
    "hf_transformers",
    "wikipedia",
    "openai",
    "langchain_docs",
]

buster = setup_buster(cfg.buster_cfg)

#  suppress httpx logs they are spammy and uninformative
logging.getLogger("httpx").setLevel(logging.WARNING)

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


def log_likes(completion: Completion, like_data: gr.LikeData):
    collection = "liked_data-test"

    completion_json = completion.to_json(
        columns_to_ignore=["embedding", "similarity", "similarity_to_answer"]
    )
    completion_json["liked"] = like_data.liked
    logger.info(f"User reported {like_data.liked=}")

    try:
        cfg.mongo_db[collection].insert_one(completion_json)
        logger.info("")
    except:
        logger.info("Something went wrong logging")


def log_emails(email: gr.Textbox):
    collection = "email_data-test"

    logger.info(f"User reported {email=}")
    email_document = {"email": email}

    try:
        cfg.mongo_db[collection].insert_one(email_document)
        logger.info("")
    except:
        logger.info("Something went wrong logging")

    return ""


def format_sources(matched_documents: pd.DataFrame) -> str:
    if len(matched_documents) == 0:
        return ""

    documents_answer_template: str = "📝 Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
    document_template: str = "[🔗 {document.source}: {document.title}]({document.url}), relevance: {document.similarity_to_answer:2.1f} %"  # | # total chunks matched: {document.repetition:d}"

    matched_documents.similarity_to_answer = (
        matched_documents.similarity_to_answer * 100
    )
    matched_documents = matched_documents.sort_values(
        "similarity_to_answer", ascending=False
    ).drop_duplicates("title", keep="first")

    display_source_to_ui = {
        ui: src for ui, src in zip(AVAILABLE_SOURCES, AVAILABLE_SOURCES_UI)
    }
    matched_documents["source"] = matched_documents["source"].replace(
        display_source_to_ui
    )
    documents = "\n".join(
        [
            document_template.format(document=document)
            for _, document in matched_documents.iterrows()
        ]
    )
    footnote: str = "I'm a bot 🤖 and not always perfect."

    return documents_answer_template.format(documents=documents, footnote=footnote)


def add_sources(history, completion):
    if completion.answer_relevant:
        formatted_sources = format_sources(completion.matched_documents)
        history.append([None, formatted_sources])

    return history


def user(user_input, history):
    """Adds user's question immediately to the chat."""
    return "", history + [[user_input, None]]


def get_empty_source_completion(user_input):
    return Completion(
        user_inputs=user_input,
        answer_text="You have to select at least one source from the dropdown menu.",
        matched_documents=pd.DataFrame(),
        error=False,
    )


def get_answer(history, sources: Optional[list[str]] = None):
    user_input = history[-1][0]

    if len(sources) == 0:
        completion = get_empty_source_completion(user_input)

    else:
        # Go to code names
        display_ui_to_source = {
            ui: src for ui, src in zip(AVAILABLE_SOURCES_UI, AVAILABLE_SOURCES)
        }

        sources_renamed = [display_ui_to_source[disp] for disp in sources]
        completion = buster.process_input(user_input, sources=sources_renamed)

    history[-1][1] = ""

    for token in completion.answer_generator:
        history[-1][1] += token

        yield history, completion


theme = gr.themes.Soft()
with gr.Blocks(
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="blue",
        font=fonts.GoogleFont("Source Sans Pro"),
        font_mono=fonts.GoogleFont("IBM Plex Mono"),
    )
) as demo:
    with gr.Row():
        gr.Markdown(
            "<h3><center>Towards AI 🤖: A Question-Answering Bot for anything AI-related</center></h3>"
            "<h6><center><i>Powered by Activeloop and 4th Generation Intel® Xeon® Scalable Processors</i></center></h6>"
        )

    latest_completion = gr.State()

    source_selection = gr.Dropdown(
        choices=AVAILABLE_SOURCES_UI,
        label="Select Sources",
        value=AVAILABLE_SOURCES_UI,
        multiselect=True,
    )

    chatbot = gr.Chatbot(elem_id="chatbot", show_copy_button=True)

    with gr.Row():
        question = gr.Textbox(
            label="What's your question?",
            placeholder="Ask a question to our AI tutor here...",
            lines=1,
        )
        submit = gr.Button(value="Send", variant="secondary")

    with gr.Row():
        examples = gr.Examples(
            examples=cfg.example_questions,
            inputs=question,
        )
        with gr.Row():
            email = gr.Textbox(
                label="Want to receive updates about our AI tutor?",
                placeholder="Enter your email here...",
                lines=1,
                scale=3,
            )
            submit_email = gr.Button(value="Submit", variant="secondary", scale=0)

    gr.Markdown(
        "This application uses ChatGPT to search the docs for relevant information and answer questions."
        "\n\n### Built in top of the open-source [Buster 🤖](https://www.github.com/jerpint/buster) project. Huge thanks to them."
    )

    completion = gr.State()

    submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
        get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, completion]
    ).then(add_sources, inputs=[chatbot, completion], outputs=[chatbot])
    question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
        get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, completion]
    ).then(add_sources, inputs=[chatbot, completion], outputs=[chatbot])

    chatbot.like(log_likes, completion)

    submit_email.click(log_emails, email, email)
    email.submit(log_emails, email, email)

demo.queue(concurrency_count=CONCURRENCY_COUNT)
demo.launch(debug=True, share=False)