import streamlit as st
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain_openai import OpenAIEmbeddings,ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template
from langchain_community.llms import HuggingFaceHub

#Llama2
import torch
import transformers
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer
from torch import cuda, bfloat16
import langchain
langchain.verbose = False


def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000, # the character length of the chunck
        chunk_overlap=200, # the character length of the overlap between chuncks
        length_function=len # the length function - in this case, character length (aka the python len() fn.)
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks,selected_embedding):
    if selected_embedding == 'OpenAI':
        print('OpenAI embedding')
        embeddings = OpenAIEmbeddings()
    elif selected_embedding == 'Instructor-xl':
        print('Instructor-xl embedding')
        embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")

    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    vectorstore.save_local("faiss_index")
    return vectorstore

def load_vectorstore(text_chunks,selected_embedding):
    if selected_embedding == 'OpenAI':
        print('OpenAI embedding')
        embeddings = OpenAIEmbeddings()
    elif selected_embedding == 'Instructor-xl':
        print('Instructor-xl embedding')

    vectorstore = FAISS.load_local("faiss_index", embeddings)
    return vectorstore

def get_conversation_chain(vectorstore,selected_llm):
    if selected_llm == 'OpenAI':
        print('OpenAi LLM')
        llm = ChatOpenAI()

    
    elif selected_llm == 'Llama2':
        print('Llama2 LLM')
        model_id = 'meta-llama/Llama-2-7b-chat-hf'
        hf_auth = os.environ.get("HUGGINGFACEHUB_API_TOKEN") 
        
        model_config = transformers.AutoConfig.from_pretrained(
            model_id,
            token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
        )

        device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

        if('cuda' in device):
            # set quantization configuration to load large model with less GPU memory
            # this requires the `bitsandbytes` library
            bnb_config = transformers.BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type='nf4',
                bnb_4bit_use_double_quant=True,
                bnb_4bit_compute_dtype=bfloat16
            )

            model = transformers.AutoModelForCausalLM.from_pretrained(
                model_id,
                trust_remote_code=True,
                config=model_config,
                quantization_config=bnb_config,
                device_map='auto',
                token=hf_auth
            )
        else:
            model = transformers.AutoModelForCausalLM.from_pretrained(
                model_id,
                trust_remote_code=True,
                config=model_config,
                device_map='auto',
                token=hf_auth
            )

        # enable evaluation mode to allow model inference
        model.eval()
        print(f"Model loaded on {device}")

        tokenizer = transformers.AutoTokenizer.from_pretrained(
            model_id,
            token=hf_auth
        )

        pipeline = transformers.pipeline(        
            torch_dtype=torch.float32,
            model=model, 
            tokenizer=tokenizer,
            return_full_text=True,  # langchain expects the full text
            task='text-generation',
            temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
            max_new_tokens=512,  # max number of tokens to generate in the output
            repetition_penalty=1.1  # without this output begins repeating
        )

        llm = HuggingFacePipeline(pipeline=pipeline)

    # Generic LLM    
    memory = ConversationBufferMemory(
    memory_key='chat_history', return_messages=True)
        
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory,
        return_source_documents=False
    )
    #print(conversation_chain)
    
    return conversation_chain


def handle_userinput(user_question):

    print('Question: ' + user_question)
    response = st.session_state.conversation({'question': user_question}) 
    st.session_state.chat_history = response['chat_history']

    
    for i, message in enumerate(st.session_state.chat_history):       
        if i % 2 == 0:
            st.write(user_template.replace(
                "{{MSG}}", message.content), unsafe_allow_html=True)
        else:
            st.write(bot_template.replace(
                "{{MSG}}", message.content), unsafe_allow_html=True)


def main():
    load_dotenv()
    st.set_page_config(page_title="VerAi",
                       page_icon=":books:")
    st.write(css, unsafe_allow_html=True)

    if "conversation" not in st.session_state:                          
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = None


    with st.sidebar:
        st.subheader("Your documents")
        pdf_docs = st.file_uploader(
            "Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True)

        selected_embedding = st.radio("Which Embedding?",["OpenAI", "Instructor-xl"])
        selected_llm = st.radio("Which LLM?",["OpenAI", "Llama2"])

        if st.button("Process"):
            with st.spinner("Processing"):
                # get pdf text
                raw_text = get_pdf_text(pdf_docs)

                # get the text chunks
                text_chunks = get_text_chunks(raw_text)

                # create vector store
                vectorstore = get_vectorstore(text_chunks,selected_embedding)

                # create conversation chain
                st.session_state.conversation = get_conversation_chain(
                    vectorstore,selected_llm)

        if st.button("Load"):
            with st.spinner("Processing"):

                # load vector store
                vectorstore = load_vectorstore(selected_embedding,selected_embedding)

                # create conversation chain
                st.session_state.conversation = get_conversation_chain(
                    vectorstore,selected_llm)
    
    if st.session_state.conversation:
        st.header("VerAi :books:")
        user_question = st.text_input("Stel een vraag hieronder")  
        # Vertel me iets over Wettelijke uren
        # wat zijn Overige verloftypes bij kpn  
    if st.session_state.conversation and user_question:
        handle_userinput(user_question)

if __name__ == '__main__':
    main()