jerpint commited on
Commit
37f8a37
1 Parent(s): 6f8159c

fix the path to the hub dataset (#15)

Browse files
Files changed (1) hide show
  1. cfg.py +9 -9
cfg.py CHANGED
@@ -2,13 +2,12 @@ import logging
2
  import os
3
 
4
  from buster.busterbot import Buster, BusterConfig
5
- from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
6
  from buster.formatters.documents import DocumentsFormatterJSON
7
  from buster.formatters.prompts import PromptFormatter
8
  from buster.retriever import DeepLakeRetriever, Retriever
9
  from buster.tokenizers import GPTTokenizer
10
  from buster.validators import QuestionAnswerValidator, Validator
11
- from huggingface_hub import hf_hub_download
12
 
13
  logger = logging.getLogger(__name__)
14
  logging.basicConfig(level=logging.INFO)
@@ -25,6 +24,7 @@ DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
25
  DEEPLAKE_DATASET_PATH = os.getenv(
26
  "DEEPLAKE_DATASET_PATH", f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
27
  )
 
28
 
29
  example_questions = [
30
  "What is the LLama model?",
@@ -63,7 +63,7 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
63
  },
64
  },
65
  retriever_cfg={
66
- "path": f"./{DEEPLAKE_DATASET}",
67
  "top_k": 3,
68
  "thresh": 0.7,
69
  "max_tokens": 2000,
@@ -92,21 +92,21 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
92
  "max_tokens": 3500,
93
  "text_before_docs": (
94
  "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
95
- "You are provided information found in the <DOCUMENTS> tag. "
96
- "Only respond with infomration inside the <DOCUMENTS> tag. DO NOT use additional information, even if you know the answer. "
97
  "If the answer is in the documentation, summarize it in a helpful way to the user. "
98
  "If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
99
- "Here is the information you can use: "
100
  ),
101
  "text_after_docs": (
102
  "REMEMBER:\n"
103
  "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
104
- "You are provided information found in the <DOCUMENTS> tag. "
105
  "Here are the rules you must follow:\n"
106
- "* Only respond with infomration inside the <DOCUMENTS> tag. DO NOT providew additional information, even if you know the answer. "
107
  "* If the answer is in the documentation, summarize it in a helpful way to the user. "
108
  "* If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
109
- "* Only summarize the information in the <DOCUMENTS> tag, do not respond otherwise. "
110
  "* Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
111
  "* Do not reference any links, urls or hyperlinks in your answers.\n"
112
  "* Make sure to format your answers in Markdown format, including code block and snippets.\n"
 
2
  import os
3
 
4
  from buster.busterbot import Buster, BusterConfig
5
+ from buster.completers import ChatGPTCompleter, DocumentAnswerer
6
  from buster.formatters.documents import DocumentsFormatterJSON
7
  from buster.formatters.prompts import PromptFormatter
8
  from buster.retriever import DeepLakeRetriever, Retriever
9
  from buster.tokenizers import GPTTokenizer
10
  from buster.validators import QuestionAnswerValidator, Validator
 
11
 
12
  logger = logging.getLogger(__name__)
13
  logging.basicConfig(level=logging.INFO)
 
24
  DEEPLAKE_DATASET_PATH = os.getenv(
25
  "DEEPLAKE_DATASET_PATH", f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
26
  )
27
+ logger.info(f"{DEEPLAKE_DATASET_PATH=}")
28
 
29
  example_questions = [
30
  "What is the LLama model?",
 
63
  },
64
  },
65
  retriever_cfg={
66
+ "path": f"{DEEPLAKE_DATASET_PATH}",
67
  "top_k": 3,
68
  "thresh": 0.7,
69
  "max_tokens": 2000,
 
92
  "max_tokens": 3500,
93
  "text_before_docs": (
94
  "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
95
+ "You are provided information found in the json documentation. "
96
+ "Only respond with infomration inside the json documentation. DO NOT use additional information, even if you know the answer. "
97
  "If the answer is in the documentation, summarize it in a helpful way to the user. "
98
  "If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
99
+ "Here is the information you can use (json documentation): "
100
  ),
101
  "text_after_docs": (
102
  "REMEMBER:\n"
103
  "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
104
+ "You are provided information found in the . "
105
  "Here are the rules you must follow:\n"
106
+ "* Only respond with infomration inside the json documentation. DO NOT providew additional information, even if you know the answer. "
107
  "* If the answer is in the documentation, summarize it in a helpful way to the user. "
108
  "* If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
109
+ "* Only summarize the information in the json documentation, do not respond otherwise. "
110
  "* Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
111
  "* Do not reference any links, urls or hyperlinks in your answers.\n"
112
  "* Make sure to format your answers in Markdown format, including code block and snippets.\n"