import logging import os from buster.busterbot import Buster, BusterConfig from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer from buster.formatters.documents import DocumentsFormatter from buster.formatters.prompts import PromptFormatter from buster.retriever import DeepLakeRetriever, Retriever from buster.tokenizers import GPTTokenizer from buster.validators import QuestionAnswerValidator, Validator from huggingface_hub import hf_hub_download from utils import extract_zip logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) # For authentication USERNAME = os.getenv("BUSTER_USERNAME") PASSWORD = os.getenv("BUSTER_PASSWORD") HUB_TOKEN = os.getenv("HUB_TOKEN") REPO_ID = os.getenv("HF_DATASET") HUB_DB_FILE = "deeplake_store.zip" logger.info(f"Downloading {HUB_DB_FILE} from hub...") hf_hub_download( repo_id=REPO_ID, repo_type="dataset", filename=HUB_DB_FILE, token=HUB_TOKEN, local_dir=".", ) extract_zip(zip_file_path=HUB_DB_FILE, output_path="deeplake_store") example_questions = [ "What's the best way to get a job in AI?", "What is prompt engineering?", "What is generative AI?", ] buster_cfg = BusterConfig( validator_cfg={ "unknown_response_templates": [ "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?", ], "unknown_threshold": 0.85, "embedding_model": "text-embedding-ada-002", "use_reranking": True, "invalid_question_response": "This question does not seem relevant to my current knowledge.", "check_question_prompt": """You are an chatbot answering questions on towardsAI, an artificial intelligence blogs. Users will be asking questions about the blog. Your job is to determine wether or not a question is a valid question to ask, and should be answered. More general questions are not considered valid, even if you might know the response. A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid. For example: Q: How can I setup my own chatbot? true Q: What is the meaning of life? false A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.""", "completion_kwargs": { "model": "gpt-3.5-turbo", "stream": False, "temperature": 0, }, }, retriever_cfg={ "path": "./deeplake_store", "top_k": 3, "thresh": 0.7, "max_tokens": 2000, "embedding_model": "text-embedding-ada-002", }, documents_answerer_cfg={ "no_documents_message": "No blog posts are available for this question.", }, completion_cfg={ "completion_kwargs": { "model": "gpt-3.5-turbo", "stream": True, "temperature": 0, }, }, tokenizer_cfg={ "model_name": "gpt-3.5-turbo", }, documents_formatter_cfg={ "max_tokens": 3500, "formatter": "{content}", }, prompt_formatter_cfg={ "max_tokens": 3500, "text_before_docs": ( "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)." "You are provided information found in the tag. " "Only respond with infomration inside the tag. DO NOT use additional information, even if you know the answer. " "If the answer is in the documentation, summarize it in a helpful way to the user. " "If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. " "Here is the information you can use: " " " ), "text_after_docs": ( "<\DOCUMENTS>\n" "REMEMBER:\n" "You are a chatbot assistant answering users' questions about towardsAI content, a blog about applied artificial intelligence (AI)." "You are provided information found in the tag. " "Here are the rules you must follow:\n" "* Only respond with infomration inside the tag. DO NOT providew additional information, even if you know the answer. " "* If the answer is in the documentation, summarize it in a helpful way to the user. " "* If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. " "* Only summarize the information in the tag, do not respond otherwise. " "* Do not refer to the documentation directly, but use the instructions provided within it to answer questions. " "* Do not reference any links, urls or hyperlinks in your answers.\n" "* Make sure to format your answers in Markdown format, including code block and snippets.\n" "* If you do not know the answer to a question, or if it is completely irrelevant to the library usage, simply reply with:\n" "'I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the topics I'm trained on. Is there anything else I can assist you with?'" "For example:\n" "What is the meaning of life for a qa bot?\n" "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the topics I'm trained on. Is there anything else I can assist you with?" "Now answer the following question:\n" ), }, ) # initialize buster with the config in cfg.py (adapt to your needs) ... # buster_cfg = cfg.buster_cfg def setup_buster(buster_cfg): retriever: Retriever = DeepLakeRetriever(**buster_cfg.retriever_cfg) tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg) document_answerer: DocumentAnswerer = DocumentAnswerer( completer=ChatGPTCompleter(**buster_cfg.completion_cfg), documents_formatter=DocumentsFormatter( tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg ), prompt_formatter=PromptFormatter( tokenizer=tokenizer, **buster_cfg.prompt_formatter_cfg ), **buster_cfg.documents_answerer_cfg, ) validator: Validator = QuestionAnswerValidator(**buster_cfg.validator_cfg) buster: Buster = Buster( retriever=retriever, document_answerer=document_answerer, validator=validator ) return buster