jerpint commited on
Commit
7710388
1 Parent(s): 69a190d

update buster to pypi version (#8)

Browse files
Files changed (4) hide show
  1. cfg.py +3 -5
  2. embed_documents.py +1 -1
  3. gradio_app.py +2 -2
  4. requirements.txt +1 -1
cfg.py CHANGED
@@ -3,7 +3,7 @@ import os
3
 
4
  from buster.busterbot import Buster, BusterConfig
5
  from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
6
- from buster.formatters.documents import DocumentsFormatter
7
  from buster.formatters.prompts import PromptFormatter
8
  from buster.retriever import DeepLakeRetriever, Retriever
9
  from buster.tokenizers import GPTTokenizer
@@ -92,7 +92,7 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
92
  },
93
  documents_formatter_cfg={
94
  "max_tokens": 3500,
95
- "formatter": "{content}",
96
  },
97
  prompt_formatter_cfg={
98
  "max_tokens": 3500,
@@ -103,10 +103,8 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
103
  "If the answer is in the documentation, summarize it in a helpful way to the user. "
104
  "If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
105
  "Here is the information you can use: "
106
- "<DOCUMENTS> "
107
  ),
108
  "text_after_docs": (
109
- "<\DOCUMENTS>\n"
110
  "REMEMBER:\n"
111
  "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
112
  "You are provided information found in the <DOCUMENTS> tag. "
@@ -134,7 +132,7 @@ def setup_buster(buster_cfg):
134
  tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
135
  document_answerer: DocumentAnswerer = DocumentAnswerer(
136
  completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
137
- documents_formatter=DocumentsFormatter(
138
  tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
139
  ),
140
  prompt_formatter=PromptFormatter(
 
3
 
4
  from buster.busterbot import Buster, BusterConfig
5
  from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
6
+ from buster.formatters.documents import DocumentsFormatterJSON
7
  from buster.formatters.prompts import PromptFormatter
8
  from buster.retriever import DeepLakeRetriever, Retriever
9
  from buster.tokenizers import GPTTokenizer
 
92
  },
93
  documents_formatter_cfg={
94
  "max_tokens": 3500,
95
+ "columns": ["content", "source", "title"],
96
  },
97
  prompt_formatter_cfg={
98
  "max_tokens": 3500,
 
103
  "If the answer is in the documentation, summarize it in a helpful way to the user. "
104
  "If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
105
  "Here is the information you can use: "
 
106
  ),
107
  "text_after_docs": (
 
108
  "REMEMBER:\n"
109
  "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
110
  "You are provided information found in the <DOCUMENTS> tag. "
 
132
  tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
133
  document_answerer: DocumentAnswerer = DocumentAnswerer(
134
  completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
135
+ documents_formatter=DocumentsFormatterJSON(
136
  tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
137
  ),
138
  prompt_formatter=PromptFormatter(
embed_documents.py CHANGED
@@ -3,7 +3,7 @@ from buster.documents_manager import DeepLakeDocumentsManager
3
 
4
  if __name__ == "__main__":
5
  vector_store_path = "deeplake_store"
6
- chunk_file = "langchain_course.csv"
7
  overwrite = True
8
 
9
  df = pd.read_csv(chunk_file)
 
3
 
4
  if __name__ == "__main__":
5
  vector_store_path = "deeplake_store"
6
+ chunk_file = "data/wiki_and_tai.csv"
7
  overwrite = True
8
 
9
  df = pd.read_csv(chunk_file)
gradio_app.py CHANGED
@@ -40,7 +40,7 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
40
  "similarity_to_answer", ascending=False
41
  ).drop_duplicates("title", keep="first")
42
 
43
- documents = "\n".join(
44
  [
45
  document_template.format(document=document)
46
  for _, document in matched_documents.iterrows()
@@ -115,4 +115,4 @@ with block:
115
 
116
 
117
  block.queue(concurrency_count=16)
118
- block.launch(debug=True, share=False, auth=check_auth)
 
40
  "similarity_to_answer", ascending=False
41
  ).drop_duplicates("title", keep="first")
42
 
43
+ documents = "\n\n".join(
44
  [
45
  document_template.format(document=document)
46
  for _, document in matched_documents.iterrows()
 
115
 
116
 
117
  block.queue(concurrency_count=16)
118
+ block.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- git+https://github.com/jerpint/buster@main
2
  gradio
3
  deeplake
 
1
+ buster-doctalk==1.0.19
2
  gradio
3
  deeplake