Spaces:

towardsai-tutors
/

buster

Running

App Files Files Community

buster / cfg.py

Omar Solano

new response to invalidated question (#21)

b46dbf7 unverified 11 months ago

raw

history blame

No virus

8.15 kB

	import logging
	import os

	from buster.busterbot import Buster, BusterConfig
	from buster.completers import ChatGPTCompleter, DocumentAnswerer
	from buster.formatters.documents import DocumentsFormatterJSON
	from buster.formatters.prompts import PromptFormatter
	from buster.retriever import DeepLakeRetriever, Retriever
	from buster.tokenizers import GPTTokenizer
	from buster.validators import QuestionAnswerValidator, Validator

	from utils import init_mongo_db

	MONGODB_URI = os.getenv("MONGODB_URI")
	mongo_db = init_mongo_db(uri=MONGODB_URI, db_name="towardsai-buster")


	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO)

	# required
	ACTIVELOOP_TOKEN = os.getenv("ACTIVELOOP_TOKEN")
	if ACTIVELOOP_TOKEN is None:
	logger.warning("No activeloop token found, you will not be able to fetch data.")

	DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "dev_vector_store")
	DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")

	# if you want to use a local dataset, set the env. variable, it overrides all others
	DEEPLAKE_DATASET_PATH = os.getenv(
	"DEEPLAKE_DATASET_PATH", f"hub://{DEEPLAKE_ORG}/{DEEPLAKE_DATASET}"
	)
	logger.info(f"{DEEPLAKE_DATASET_PATH=}")

	example_questions = [
	"What is the LLama model?",
	"What is a Large Language Model?",
	"What is an embedding?",
	]


	buster_cfg = BusterConfig(
	validator_cfg={
	"unknown_response_templates": [
	"I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
	],
	"unknown_threshold": 0.85,
	"embedding_model": "text-embedding-ada-002",
	"use_reranking": True,
	"invalid_question_response": "This question does not seem relevant my AI knowledge. If the question is related to AI, please send us feedback! \n PS: I'm still learning, so I might not know the answer to your question, you can also try without acronyms in your question.",
	"check_question_prompt": """You are a chatbot, answering questions about large language models and artificial intelligence.
	Your job is to determine whether user's question is valid or not. Users will not always submit a question either.
	Users will ask all sorts of questions, and some might be tangentially related to artificial intelligence.
	Users will learn to build LLM-powered apps, with LangChain & Deep Lake among other technologies.
	As long as a question is somewhat related to the topic of AI, respond 'true'. If a question is on a different subject or unrelated, respond 'false'.
	Make sure the question is a valid question.

	For example:

	Q: How can I setup my own chatbot?
	true

	Q: What is the meaning of life?
	false

	Q:
	""",
	"completion_kwargs": {
	"model": "gpt-3.5-turbo",
	"stream": False,
	"temperature": 0,
	},
	},
	retriever_cfg={
	"path": f"{DEEPLAKE_DATASET_PATH}",
	"top_k": 10,
	"thresh": 0.55,
	"max_tokens": 13000,
	"embedding_model": "text-embedding-ada-002",
	"exec_option": "compute_engine",
	"use_tql": True,
	"deep_memory": True,
	},
	documents_answerer_cfg={
	"no_documents_message": "No blog posts are available for this question.",
	},
	completion_cfg={
	"completion_kwargs": {
	"model": "gpt-3.5-turbo-16k",
	"stream": True,
	"temperature": 0,
	},
	},
	tokenizer_cfg={
	"model_name": "gpt-3.5-turbo-16k",
	},
	documents_formatter_cfg={
	"max_tokens": 13500,
	"columns": ["content", "source", "title"],
	},
	prompt_formatter_cfg={
	"max_tokens": 13500,
	"text_before_docs": (
	"You are a witty AI teacher, helpfully answering questions from students of an applied artificial intelligence course on Large Language Models (LLMs or llm). Topics covered include training models, fine tuning models, giving memory to LLMs, prompting, hallucinations and bias, vector databases, transformer architectures, embeddings, Langchain, making LLMs interact with tool use, AI agents, reinforcement learning with human feedback. Questions should be understood with this context."
	"You are provided information found in the json documentation. "
	"Only respond with information inside the json documentation. DO NOT use additional information, even if you know the answer. "
	"If the answer is in the documentation, answer the question (depending on the questions and the variety of relevant information in the json documentation, answer in 5 paragraphs."
	"If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
	"Here is the information you can use (json documentation): "
	),
	"text_after_docs": (
	"REMEMBER:\n"
	"You are a witty AI teacher, helpfully answering questions from students of an applied artificial intelligence course on Large Language Models (LLMs or llm). Topics covered include training models, fine tuning models, giving memory to LLMs, prompting, hallucinations and bias, vector databases, transformer architectures, embeddings, Langchain, making LLMs interact with tool use, AI agents, reinforcement learning with human feedback. Questions should be understood with this context."
	"You are provided information found in the json documentation. "
	"Here are the rules you must follow:\n"
	"* Only respond with information inside the json documentation. DO NOT provide additional information, even if you know the answer. "
	"* If the answer is in the documentation, answer the question (depending on the questions and the variety of relevant information in the json documentation, answer in 5 paragraphs. "
	"* If the documentation does not discuss the topic related to the question, kindly respond that you cannot answer the question because it is not part of your knowledge. "
	"* Only use information summarized from the json documentation, do not respond otherwise. "
	"* Do not refer to the json documentation directly, but use the instructions provided within it to answer questions. "
	"* Do not reference any links, urls or hyperlinks in your answers.\n"
	"* Make sure to format your answers in Markdown format, including code block and snippets.\n"
	"* If you do not know the answer to a question, or if it is completely irrelevant to the AI courses, simply reply with:\n"
	"'I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the topics I'm trained on. Is there anything else I can assist you with?'"
	"For example:\n"
	"What is the meaning of life for a qa bot?\n"
	"I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the topics I'm trained on. Is there anything else I can assist you with?"
	"Now answer the following question:\n"
	),
	},
	)


	def setup_buster(buster_cfg):
	retriever: Retriever = DeepLakeRetriever(**buster_cfg.retriever_cfg)
	tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
	document_answerer: DocumentAnswerer = DocumentAnswerer(
	completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
	documents_formatter=DocumentsFormatterJSON(
	tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
	),
	prompt_formatter=PromptFormatter(
	tokenizer=tokenizer, **buster_cfg.prompt_formatter_cfg
	),
	**buster_cfg.documents_answerer_cfg,
	)
	validator: Validator = QuestionAnswerValidator(**buster_cfg.validator_cfg)
	buster: Buster = Buster(
	retriever=retriever, document_answerer=document_answerer, validator=validator
	)

	return buster