from dependencies import *

class ChatBot():
    def __init__(self, data_change = False):
        self.execute = data_change
        self.start_loader()
        self.start_embeddings()
        self.init_model()

    def start_loader(self):
        load_dotenv()
        # loader = DirectoryLoader('data', glob="*.md")
        urls = [
            'https://noqs.in/faqs/',
            'https://noqs.in/',
            'https://noqs.in/internships/'
        ]

        url_loader = UnstructuredURLLoader(urls=urls) #' can also use Web Base Loader
        url_data = url_loader.load()

        text_loader = TextLoader('data.txt', encoding = 'UTF-8')
        text_data = text_loader.load()

        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=600)

        url_docs = text_splitter.split_documents(url_data)    
        text_docs = text_splitter.split_documents(text_data)
        self.docs = url_docs + text_docs

    def start_embeddings(self):
        embeddings = HuggingFaceEmbeddings() # uses sentence based embddings for ingestion and retrieval
        load_dotenv()
        # Initialize Pinecone client
        pc = Pinecone(
            api_key=os.environ.get("PINECONE_API_KEY")
        )

        # Define Index Name
        index_name = "noqs-chatbot-with-web-content-dynamic"

        # Checking Index
        if index_name not in pc.list_indexes().names():
            # Create new Index
            pc.create_index(name=index_name,
                            metric="cosine",
                            dimension=768,
                                spec=ServerlessSpec(
                                    cloud="aws",
                                    region="us-east-1"
                            ))

            docsearch = langchain_community.vectorstores.Pinecone.from_documents(self.docs, embeddings, index_name=index_name)
        else:
            # Embed the documents
            if self.execute:
                pc.delete_index(index_name)
                pc.create_index(name=index_name,
                                metric="cosine",
                                dimension=768,
                                    spec=ServerlessSpec(
                                        cloud="aws",
                                        region="us-east-1"
                                ))

                self.docsearch = langchain_community.vectorstores.Pinecone.from_documents(self.docs, embeddings, index_name=index_name)
            else:
                self.docsearch = langchain_community.vectorstores.Pinecone.from_existing_index(embedding=embeddings, index_name=index_name)

    def init_model(self):
        # Define the repo ID and connect to a model on Groq API
        model_id = "llama3-8b-8192" 
        llm = ChatGroq(
            model=model_id, 
            temperature=0.5,
            max_tokens=1200
        )
        
        # Creating prompt response template
        template = """
            You are a knowledgeable assistant for NoQs Digital.
            The Manager of our company is Mr. Adit Agarwal. 
            Users will ask you questions about our company, and you must use the given context to answer their questions accurately. 
            Follow these guidelines:
            Always base your answers on the provided context. Do not make up information.
            If the context does not contain the answer, simply say, "I don't know based on the provided information."
            Offer detailed and thorough responses, but stay relevant to the user's question.
            Maintain a professional tone in your responses.
            You have to answer like a Customer Care Specialist.
            If someone answer about program A just answer politelly that due to high demand we are currently not offering program A.
            Don't tell user that you are fetching the information from the context

            Context: {context}

            User Question: {question}

            Answer:
        """


        prompt = PromptTemplate(
            template=template, 
            input_variables=["context", "question"]
        )

        # Chaining llm and prompt
        self.rag_chain = (
            {"context": self.docsearch.as_retriever(),  "question": RunnablePassthrough()} 
            | prompt 
            | llm
            | StrOutputParser() 
        )