import streamlit as st
from run_llama import load_models
import variables as vr
from load_documents import load_documents_fn
import torch
from langchain.vectorstores import Chroma
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from chromadb.utils import embedding_functions
# print(f"Is CUDA available: {torch.cuda.is_available()}")
# # True
# print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Tesla T4
def model_memory():
# Adding history to the model.
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\
just say that you don't know, don't try to make up an answer.
Question: {question}
Helpful Answer:"""
# template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\
# try to make up an short and sweet answer.
# if the context does not exists or has no relevancy then try answer it on your previous knowledge base.But keep the answer short and precise.
# {context}
# Question: {question}
# Helpful Answer:"""
prompt = PromptTemplate(input_variables=["context", "question"], template=template)
# memory = ConversationBufferMemory(input_key="question")
return prompt
with st.sidebar:
st.subheader("Your documents")
global docs
docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True,type=["pdf","docx","csv","xlsx","html"])
if st.button("Process"):
with st.spinner("Processing"):
# raw_text = extract_text_from_pdfs(docs)
# all_loaders =classify_and_load_files_into_respective_loaders(docs)
# chroma_vectorstore = index_initializing_upserting_chroma_db(all_loaders)
if docs:
loaded_documents = load_documents_fn(docs)
st.error("Error While loading the documents!!!Try Again!!!")
if "EMBEDDINGS" not in st.session_state:
EMBEDDINGS = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL_NAME)
# EMBEDDINGS = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
st.session_state.EMBEDDINGS = EMBEDDINGS
# if "DB" not in st.session_state:
# DB = Chroma(
# persist_directory=loaded_documents,
# embedding_function=st.session_state.EMBEDDINGS,
# client_settings=CHROMA_SETTINGS,
# )
DB = Chroma.from_documents(loaded_documents, st.session_state.EMBEDDINGS,persist_directory="db")
st.session_state.DB = DB
if "RETRIEVER" not in st.session_state:
RETRIEVER = DB.as_retriever()
st.session_state.RETRIEVER = RETRIEVER
if "LLM" not in st.session_state:
LLM = load_models(model_id=MODEL_ID, model_basename=MODEL_BASENAME)
# st.session_state["LLM"] = LLM
if "QA" not in st.session_state:
prompt = model_memory()
QA = RetrievalQA.from_chain_type(
chain_type_kwargs={"prompt": prompt},
st.session_state["QA"] = QA
st.success("LLM Initialized !!! You Chat with your documents!!")
st.title('Chat With Your Documents')
prompt = st.text_input('Input your prompt here')
# while True:
if docs is None:
prompt = ""
# If the user hits enter
if prompt:
# Then pass the prompt to the LLM
response = st.session_state["QA"](prompt)
answer, docs = response["result"], response["source_documents"]
# ...and write it out to the screen