File size: 5,815 Bytes
b4b5bdf
e9698e9
e0e448c
e9698e9
 
 
 
3fcc7da
 
b4b5bdf
51727c4
 
 
 
 
 
e9698e9
 
 
 
cb35787
 
0b9f9a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9698e9
1203b67
a61504d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9698e9
 
 
 
 
0b9f9a6
e9698e9
 
 
 
2785052
0b9f9a6
 
 
 
2785052
69a190d
 
 
2785052
0b9f9a6
 
 
 
 
 
 
 
e0e448c
e9698e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fcc7da
 
 
 
 
 
 
 
 
e0e448c
e9698e9
 
3fcc7da
 
 
 
0b9f9a6
 
 
 
 
 
 
e9698e9
 
 
 
 
 
 
 
 
0b9f9a6
 
 
 
 
 
 
 
e941702
e9698e9
 
0b9f9a6
e9698e9
a61504d
 
 
3fcc7da
0b9f9a6
3fcc7da
0b9f9a6
3fcc7da
e0e448c
 
0b9f9a6
e9698e9
 
 
 
0b9f9a6
e9698e9
 
b4b5bdf
e9698e9
 
a3a378d
e9698e9
 
 
 
3fcc7da
 
e9698e9
 
a61504d
e9698e9
 
a61504d
 
e9698e9
a61504d
 
e9698e9
a61504d
e9698e9
cb35787
e941702
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import logging
import os
from typing import Optional

import gradio as gr
import pandas as pd

from buster.completers import Completion

import cfg
from cfg import setup_buster

buster = setup_buster(cfg.buster_cfg)

#  suppress httpx logs they are spammy and uninformative
logging.getLogger("httpx").setLevel(logging.WARNING)

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))

AVAILABLE_SOURCES_UI = [
    "Toward's AI",
    "HuggingFace",
    "Wikipedia",
    "Gen AI 360: LangChain",
    "Gen AI 360: LLMs",
]

AVAILABLE_SOURCES = [
    "towards_ai",
    "hf_transformers",
    "wikipedia",
    "langchain_course",
    "llm_course",
]


def log_likes(completion: Completion, like_data: gr.LikeData):
    # make it a str so json-parsable

    collection = "liked_data-test"

    completion_json = completion.to_json(
        columns_to_ignore=["embedding", "similarity", "similarity_to_answer"]
    )
    completion_json["liked"] = like_data.liked
    logger.info(f"User reported {like_data.liked=}")

    try:
        cfg.mongo_db[collection].insert_one(completion_json)
        logger.info("")
    except:
        logger.info("Something went wrong logging")


def format_sources(matched_documents: pd.DataFrame) -> str:
    if len(matched_documents) == 0:
        return ""

    documents_answer_template: str = "πŸ“ Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
    document_template: str = "[πŸ”— {document.source}: {document.title}]({document.url}), highest relevance: {document.similarity_to_answer:2.1f} % | # total chunks matched: {document.repetition:d}"

    matched_documents.similarity_to_answer = (
        matched_documents.similarity_to_answer * 100
    )

    matched_documents["repetition"] = matched_documents.groupby("title")[
        "title"
    ].transform("size")

    # drop duplicates, keep highest ranking ones
    matched_documents = matched_documents.sort_values(
        "similarity_to_answer", ascending=False
    ).drop_duplicates("title", keep="first")

    # Revert back to correct display
    display_source_to_ui = {
        ui: src for ui, src in zip(AVAILABLE_SOURCES, AVAILABLE_SOURCES_UI)
    }
    matched_documents["source"] = matched_documents["source"].replace(
        display_source_to_ui
    )

    documents = "\n".join(
        [
            document_template.format(document=document)
            for _, document in matched_documents.iterrows()
        ]
    )
    footnote: str = "I'm a bot πŸ€– and not always perfect."

    return documents_answer_template.format(documents=documents, footnote=footnote)


def add_sources(history, completion):
    if completion.answer_relevant:
        formatted_sources = format_sources(completion.matched_documents)
        history.append([None, formatted_sources])

    return history


def user(user_input, history):
    """Adds user's question immediately to the chat."""
    return "", history + [[user_input, None]]


def get_empty_source_completion(user_input):
    return Completion(
        user_input=user_input,
        answer_text="You have to select at least one source from the dropdown menu.",
        matched_documents=pd.DataFrame(),
        error=False,
    )


def get_answer(history, sources: Optional[list[str]] = None):
    user_input = history[-1][0]

    if len(sources) == 0:
        completion = get_empty_source_completion(user_input)

    else:
        # Go to code names
        display_ui_to_source = {
            ui: src for ui, src in zip(AVAILABLE_SOURCES_UI, AVAILABLE_SOURCES)
        }

        sources_renamed = [display_ui_to_source[disp] for disp in sources]
        completion = buster.process_input(user_input, sources=sources_renamed)

    history[-1][1] = ""

    for token in completion.answer_generator:
        history[-1][1] += token

        yield history, completion


CSS = """
.contain { display: flex; flex-direction: column; }
.gradio-container { height: 100vh !important; }
#component-0 { height: 100%; }
#chatbot { flex-grow: 1; overflow: auto;}
"""
theme = gr.themes.Base()
demo = gr.Blocks(css=CSS, theme=theme)
with demo:
    with gr.Row():
        gr.Markdown(
            "<h3><center>Toward's AI x Buster πŸ€–: A Question-Answering Bot for anything AI-related</center></h3>"
        )

    latest_completion = gr.State()

    source_selection = gr.Dropdown(
        choices=AVAILABLE_SOURCES_UI,
        label="Select Sources",
        value=AVAILABLE_SOURCES_UI,
        multiselect=True,
    )

    chatbot = gr.Chatbot(elem_id="chatbot")

    with gr.Row():
        question = gr.Textbox(
            label="What's your question?",
            placeholder="Ask a question to our AI tutor here...",
            lines=1,
        )
        submit = gr.Button(value="Send", variant="secondary")

    examples = gr.Examples(
        examples=cfg.example_questions,
        inputs=question,
    )

    gr.Markdown(
        "This application uses ChatGPT to search the docs for relevant info and answer questions. "
        "\n\n### Powered by [Buster πŸ€–](www.github.com/jerpint/buster)"
    )

    completion = gr.State()

    submit.click(user, [question, chatbot], [question, chatbot], queue=False).then(
        get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, completion]
    ).then(add_sources, inputs=[chatbot, completion], outputs=[chatbot])
    question.submit(user, [question, chatbot], [question, chatbot], queue=False).then(
        get_answer, inputs=[chatbot, source_selection], outputs=[chatbot, completion]
    ).then(add_sources, inputs=[chatbot, completion], outputs=[chatbot])

    chatbot.like(log_likes, completion)

demo.queue(concurrency_count=CONCURRENCY_COUNT)
demo.launch(debug=True, share=False)