from dependencies import * class ChatBot(): def __init__(self, data_change = False): self.execute = data_change self.start_loader() self.start_embeddings() self.init_model() def start_loader(self): load_dotenv() # loader = DirectoryLoader('data', glob="*.md") urls = [ 'https://noqs.in/faqs/', 'https://noqs.in/', 'https://noqs.in/internships/' ] url_loader = UnstructuredURLLoader(urls=urls) #' can also use Web Base Loader url_data = url_loader.load() text_loader = TextLoader('data.txt', encoding = 'UTF-8') text_data = text_loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=600) url_docs = text_splitter.split_documents(url_data) text_docs = text_splitter.split_documents(text_data) self.docs = url_docs + text_docs def start_embeddings(self): embeddings = HuggingFaceEmbeddings() # uses sentence based embddings for ingestion and retrieval load_dotenv() # Initialize Pinecone client pc = Pinecone( api_key=os.environ.get("PINECONE_API_KEY") ) # Define Index Name index_name = "noqs-chatbot-with-web-content-dynamic" # Checking Index if index_name not in pc.list_indexes().names(): # Create new Index pc.create_index(name=index_name, metric="cosine", dimension=768, spec=ServerlessSpec( cloud="aws", region="us-east-1" )) docsearch = langchain_community.vectorstores.Pinecone.from_documents(self.docs, embeddings, index_name=index_name) else: # Embed the documents if self.execute: pc.delete_index(index_name) pc.create_index(name=index_name, metric="cosine", dimension=768, spec=ServerlessSpec( cloud="aws", region="us-east-1" )) self.docsearch = langchain_community.vectorstores.Pinecone.from_documents(self.docs, embeddings, index_name=index_name) else: self.docsearch = langchain_community.vectorstores.Pinecone.from_existing_index(embedding=embeddings, index_name=index_name) def init_model(self): # Define the repo ID and connect to a model on Groq API model_id = "llama3-8b-8192" llm = ChatGroq( model=model_id, temperature=0.5, max_tokens=1200 ) # Creating prompt response template template = """ You are a knowledgeable assistant for NoQs Digital. The Manager of our company is Mr. Adit Agarwal. Users will ask you questions about our company, and you must use the given context to answer their questions accurately. Follow these guidelines: Always base your answers on the provided context. Do not make up information. If the context does not contain the answer, simply say, "I don't know based on the provided information." Offer detailed and thorough responses, but stay relevant to the user's question. Maintain a professional tone in your responses. You have to answer like a Customer Care Specialist. If someone answer about program A just answer politelly that due to high demand we are currently not offering program A. Don't tell user that you are fetching the information from the context Context: {context} User Question: {question} Answer: """ prompt = PromptTemplate( template=template, input_variables=["context", "question"] ) # Chaining llm and prompt self.rag_chain = ( {"context": self.docsearch.as_retriever(), "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() )