import streamlit as st import os from dotenv import load_dotenv from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain_openai import OpenAIEmbeddings,ChatOpenAI from langchain_community.vectorstores import FAISS from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from htmlTemplates import css, bot_template, user_template from langchain_community.llms import HuggingFaceHub #Llama2 import torch import transformers from langchain_community.llms import HuggingFacePipeline from transformers import AutoTokenizer from torch import cuda, bfloat16 import langchain langchain.verbose = False def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text def get_text_chunks(text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, # the character length of the chunck chunk_overlap=200, # the character length of the overlap between chuncks length_function=len # the length function - in this case, character length (aka the python len() fn.) ) chunks = text_splitter.split_text(text) return chunks def get_vectorstore(text_chunks,selected_embedding): if selected_embedding == 'OpenAI': print('OpenAI embedding') embeddings = OpenAIEmbeddings() elif selected_embedding == 'Instructor-xl': print('Instructor-xl embedding') embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl") vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) vectorstore.save_local("faiss_index") return vectorstore def load_vectorstore(text_chunks,selected_embedding): if selected_embedding == 'OpenAI': print('OpenAI embedding') embeddings = OpenAIEmbeddings() elif selected_embedding == 'Instructor-xl': print('Instructor-xl embedding') vectorstore = FAISS.load_local("faiss_index", embeddings) return vectorstore def get_conversation_chain(vectorstore,selected_llm): if selected_llm == 'OpenAI': print('OpenAi LLM') llm = ChatOpenAI() elif selected_llm == 'Llama2': print('Llama2 LLM') model_id = 'meta-llama/Llama-2-7b-chat-hf' hf_auth = os.environ.get("HUGGINGFACEHUB_API_TOKEN") model_config = transformers.AutoConfig.from_pretrained( model_id, token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") ) device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' if('cuda' in device): # set quantization configuration to load large model with less GPU memory # this requires the `bitsandbytes` library bnb_config = transformers.BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type='nf4', bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=bfloat16 ) model = transformers.AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, config=model_config, quantization_config=bnb_config, device_map='auto', token=hf_auth ) else: model = transformers.AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, config=model_config, device_map='auto', token=hf_auth ) # enable evaluation mode to allow model inference model.eval() print(f"Model loaded on {device}") tokenizer = transformers.AutoTokenizer.from_pretrained( model_id, token=hf_auth ) pipeline = transformers.pipeline( torch_dtype=torch.float32, model=model, tokenizer=tokenizer, return_full_text=True, # langchain expects the full text task='text-generation', temperature=0.1, # 'randomness' of outputs, 0.0 is the min and 1.0 the max max_new_tokens=512, # max number of tokens to generate in the output repetition_penalty=1.1 # without this output begins repeating ) llm = HuggingFacePipeline(pipeline=pipeline) # Generic LLM memory = ConversationBufferMemory( memory_key='chat_history', return_messages=True) conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vectorstore.as_retriever(), memory=memory, return_source_documents=False ) #print(conversation_chain) return conversation_chain def handle_userinput(user_question): print('Question: ' + user_question) response = st.session_state.conversation({'question': user_question}) st.session_state.chat_history = response['chat_history'] for i, message in enumerate(st.session_state.chat_history): if i % 2 == 0: st.write(user_template.replace( "{{MSG}}", message.content), unsafe_allow_html=True) else: st.write(bot_template.replace( "{{MSG}}", message.content), unsafe_allow_html=True) def main(): load_dotenv() st.set_page_config(page_title="VerAi", page_icon=":books:") st.write(css, unsafe_allow_html=True) if "conversation" not in st.session_state: st.session_state.conversation = None if "chat_history" not in st.session_state: st.session_state.chat_history = None with st.sidebar: st.subheader("Your documents") pdf_docs = st.file_uploader( "Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True) selected_embedding = st.radio("Which Embedding?",["OpenAI", "Instructor-xl"]) selected_llm = st.radio("Which LLM?",["OpenAI", "Llama2"]) if st.button("Process"): with st.spinner("Processing"): # get pdf text raw_text = get_pdf_text(pdf_docs) # get the text chunks text_chunks = get_text_chunks(raw_text) # create vector store vectorstore = get_vectorstore(text_chunks,selected_embedding) # create conversation chain st.session_state.conversation = get_conversation_chain( vectorstore,selected_llm) if st.button("Load"): with st.spinner("Processing"): # load vector store vectorstore = load_vectorstore(selected_embedding,selected_embedding) # create conversation chain st.session_state.conversation = get_conversation_chain( vectorstore,selected_llm) if st.session_state.conversation: st.header("VerAi :books:") user_question = st.text_input("Stel een vraag hieronder") # Vertel me iets over Wettelijke uren # wat zijn Overige verloftypes bij kpn if st.session_state.conversation and user_question: handle_userinput(user_question) if __name__ == '__main__': main()