Spaces:
Running
Running
update buster to pypi version (#8)
Browse files- cfg.py +3 -5
- embed_documents.py +1 -1
- gradio_app.py +2 -2
- requirements.txt +1 -1
cfg.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
|
4 |
from buster.busterbot import Buster, BusterConfig
|
5 |
from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
|
6 |
-
from buster.formatters.documents import
|
7 |
from buster.formatters.prompts import PromptFormatter
|
8 |
from buster.retriever import DeepLakeRetriever, Retriever
|
9 |
from buster.tokenizers import GPTTokenizer
|
@@ -92,7 +92,7 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
|
|
92 |
},
|
93 |
documents_formatter_cfg={
|
94 |
"max_tokens": 3500,
|
95 |
-
"
|
96 |
},
|
97 |
prompt_formatter_cfg={
|
98 |
"max_tokens": 3500,
|
@@ -103,10 +103,8 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
|
|
103 |
"If the answer is in the documentation, summarize it in a helpful way to the user. "
|
104 |
"If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
|
105 |
"Here is the information you can use: "
|
106 |
-
"<DOCUMENTS> "
|
107 |
),
|
108 |
"text_after_docs": (
|
109 |
-
"<\DOCUMENTS>\n"
|
110 |
"REMEMBER:\n"
|
111 |
"You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
|
112 |
"You are provided information found in the <DOCUMENTS> tag. "
|
@@ -134,7 +132,7 @@ def setup_buster(buster_cfg):
|
|
134 |
tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
|
135 |
document_answerer: DocumentAnswerer = DocumentAnswerer(
|
136 |
completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
|
137 |
-
documents_formatter=
|
138 |
tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
|
139 |
),
|
140 |
prompt_formatter=PromptFormatter(
|
|
|
3 |
|
4 |
from buster.busterbot import Buster, BusterConfig
|
5 |
from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
|
6 |
+
from buster.formatters.documents import DocumentsFormatterJSON
|
7 |
from buster.formatters.prompts import PromptFormatter
|
8 |
from buster.retriever import DeepLakeRetriever, Retriever
|
9 |
from buster.tokenizers import GPTTokenizer
|
|
|
92 |
},
|
93 |
documents_formatter_cfg={
|
94 |
"max_tokens": 3500,
|
95 |
+
"columns": ["content", "source", "title"],
|
96 |
},
|
97 |
prompt_formatter_cfg={
|
98 |
"max_tokens": 3500,
|
|
|
103 |
"If the answer is in the documentation, summarize it in a helpful way to the user. "
|
104 |
"If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
|
105 |
"Here is the information you can use: "
|
|
|
106 |
),
|
107 |
"text_after_docs": (
|
|
|
108 |
"REMEMBER:\n"
|
109 |
"You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
|
110 |
"You are provided information found in the <DOCUMENTS> tag. "
|
|
|
132 |
tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
|
133 |
document_answerer: DocumentAnswerer = DocumentAnswerer(
|
134 |
completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
|
135 |
+
documents_formatter=DocumentsFormatterJSON(
|
136 |
tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
|
137 |
),
|
138 |
prompt_formatter=PromptFormatter(
|
embed_documents.py
CHANGED
@@ -3,7 +3,7 @@ from buster.documents_manager import DeepLakeDocumentsManager
|
|
3 |
|
4 |
if __name__ == "__main__":
|
5 |
vector_store_path = "deeplake_store"
|
6 |
-
chunk_file = "
|
7 |
overwrite = True
|
8 |
|
9 |
df = pd.read_csv(chunk_file)
|
|
|
3 |
|
4 |
if __name__ == "__main__":
|
5 |
vector_store_path = "deeplake_store"
|
6 |
+
chunk_file = "data/wiki_and_tai.csv"
|
7 |
overwrite = True
|
8 |
|
9 |
df = pd.read_csv(chunk_file)
|
gradio_app.py
CHANGED
@@ -40,7 +40,7 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
|
|
40 |
"similarity_to_answer", ascending=False
|
41 |
).drop_duplicates("title", keep="first")
|
42 |
|
43 |
-
documents = "\n".join(
|
44 |
[
|
45 |
document_template.format(document=document)
|
46 |
for _, document in matched_documents.iterrows()
|
@@ -115,4 +115,4 @@ with block:
|
|
115 |
|
116 |
|
117 |
block.queue(concurrency_count=16)
|
118 |
-
block.launch(debug=True, share=False
|
|
|
40 |
"similarity_to_answer", ascending=False
|
41 |
).drop_duplicates("title", keep="first")
|
42 |
|
43 |
+
documents = "\n\n".join(
|
44 |
[
|
45 |
document_template.format(document=document)
|
46 |
for _, document in matched_documents.iterrows()
|
|
|
115 |
|
116 |
|
117 |
block.queue(concurrency_count=16)
|
118 |
+
block.launch(debug=True, share=False)
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
|
2 |
gradio
|
3 |
deeplake
|
|
|
1 |
+
buster-doctalk==1.0.19
|
2 |
gradio
|
3 |
deeplake
|