Spaces:

towardsai-tutors
/

buster

Running

App Files Files Community

jerpint commited on Oct 7, 2023

Commit

37f8a37

•

1 Parent(s): 6f8159c

fix the path to the hub dataset (#15)

Browse files

Files changed (1) hide show

cfg.py +9 -9

cfg.py CHANGED Viewed

@@ -2,13 +2,12 @@ import logging
 import os
 from buster.busterbot import Buster, BusterConfig
-from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
 from buster.formatters.documents import DocumentsFormatterJSON
 from buster.formatters.prompts import PromptFormatter
 from buster.retriever import DeepLakeRetriever, Retriever
 from buster.tokenizers import GPTTokenizer
 from buster.validators import QuestionAnswerValidator, Validator
-from huggingface_hub import hf_hub_download
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -25,6 +24,7 @@ DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
 DEEPLAKE_DATASET_PATH = os.getenv(
     "DEEPLAKE_DATASET_PATH", f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
 )
 example_questions = [
     "What is the LLama model?",
@@ -63,7 +63,7 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
         },
     },
     retriever_cfg={
-        "path": f"./{DEEPLAKE_DATASET}",
         "top_k": 3,
         "thresh": 0.7,
         "max_tokens": 2000,
@@ -92,21 +92,21 @@ A user will now submit a question. Respond 'true' if it is valid, respond 'false
         "max_tokens": 3500,
         "text_before_docs": (
             "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
-            "You are provided information found in the <DOCUMENTS> tag. "
-            "Only respond with infomration inside the <DOCUMENTS> tag. DO NOT use additional information, even if you know the answer. "
             "If the answer is in the documentation, summarize it in a helpful way to the user. "
             "If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
-            "Here is the information you can use: "
         ),
         "text_after_docs": (
             "REMEMBER:\n"
             "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
-            "You are provided information found in the <DOCUMENTS> tag. "
             "Here are the rules you must follow:\n"
-            "* Only respond with infomration inside the <DOCUMENTS> tag. DO NOT providew additional information, even if you know the answer. "
             "* If the answer is in the documentation, summarize it in a helpful way to the user. "
             "* If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
-            "* Only summarize the information in the <DOCUMENTS> tag, do not respond otherwise. "
             "* Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
             "* Do not reference any links, urls or hyperlinks in your answers.\n"
             "* Make sure to format your answers in Markdown format, including code block and snippets.\n"

 import os
 from buster.busterbot import Buster, BusterConfig
+from buster.completers import ChatGPTCompleter, DocumentAnswerer
 from buster.formatters.documents import DocumentsFormatterJSON
 from buster.formatters.prompts import PromptFormatter
 from buster.retriever import DeepLakeRetriever, Retriever
 from buster.tokenizers import GPTTokenizer
 from buster.validators import QuestionAnswerValidator, Validator
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 DEEPLAKE_DATASET_PATH = os.getenv(
     "DEEPLAKE_DATASET_PATH", f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
 )
+logger.info(f"{DEEPLAKE_DATASET_PATH=}")
 example_questions = [
     "What is the LLama model?",
         },
     },
     retriever_cfg={
+        "path": f"{DEEPLAKE_DATASET_PATH}",
         "top_k": 3,
         "thresh": 0.7,
         "max_tokens": 2000,
         "max_tokens": 3500,
         "text_before_docs": (
             "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
+            "You are provided information found in the json documentation. "
+            "Only respond with infomration inside the json documentation. DO NOT use additional information, even if you know the answer. "
             "If the answer is in the documentation, summarize it in a helpful way to the user. "
             "If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
+            "Here is the information you can use (json documentation): "
         ),
         "text_after_docs": (
             "REMEMBER:\n"
             "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)."
+            "You are provided information found in the . "
             "Here are the rules you must follow:\n"
+            "* Only respond with infomration inside the json documentation. DO NOT providew additional information, even if you know the answer. "
             "* If the answer is in the documentation, summarize it in a helpful way to the user. "
             "* If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
+            "* Only summarize the information in the json documentation, do not respond otherwise. "
             "* Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
             "* Do not reference any links, urls or hyperlinks in your answers.\n"
             "* Make sure to format your answers in Markdown format, including code block and snippets.\n"