File size: 4,423 Bytes
ec39c22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9408fa0
ec39c22
 
 
 
 
 
 
 
 
 
 
bccf29a
ec39c22
 
 
 
 
 
 
 
bccf29a
 
 
 
 
 
 
 
 
ec39c22
90889bf
ec39c22
 
 
 
 
 
 
 
 
 
 
bccf29a
 
 
 
 
 
 
 
ec39c22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import sys
from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
import torch
from transformers import AutoTokenizer
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.text_splitter import  RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from typing import Callable, Dict, List, Optional, Union
from langchain.vectorstores import Chroma
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_community.llms import llamacpp
import streamlit as st

store = {}
@st.cache_resource
def get_session_history(session_id: str):
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]



@st.cache_resource
def load_pdf_documents(data_path):
    try:
        document_loader = PyPDFDirectoryLoader(data_path)
        return document_loader.load()
    except Exception as e:
        print(f"Error loading documents from {data_path}: {e}")
        return None  # or handle the error in an appropriate manner    


@st.cache_data
def load_txt_documents(data_path):
    documents = []
    for filename in os.listdir(data_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(data_path, filename)
            documents.extend(TextLoader(file_path).load())
    return documents

@st.cache_resource
def split_docs(_documents, chunk_size, chunk_overlap):
    try:
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=chunk_size, chunk_overlap=chunk_overlap,
            separators=["\n \n \n", "\n \n", "\n1", "(?<=\. )", " ", ""]
        )
        docs = text_splitter.split_documents(documents)
        return docs
    except Exception as e:
        print(f"Error splitting documents: {e}")
        return []  # or handle the error in an appropriate manner

@st.cache_data
def load_uploaded_documents(uploaded_files):
    documents = []
    for uploaded_file in uploaded_files:
        content = uploaded_file.read().decode("utf-8")
        documents.append({"content": content, "filename": uploaded_file.name})
    return documents

@st.cache_resource
def chroma_db(docs, embeddings):
    try:
        vectordb = Chroma.from_documents(
            documents=docs, embedding=embeddings, persist_directory="docs/chroma/"
        )
        return vectordb
    except Exception as e:
        print(f"Error creating Chroma vector database: {e}")
        return None  # or handle the error in an appropriate manner

@st.cache_resource
def retriever_from_chroma(vectordb, search_type, k):
    retriever = vectordb.as_retriever(search_type=search_type, search_kwargs={"k": k})
    return retriever    

@st.cache_resource
def history_aware_retriever(llm, retriever, contextualize_q_system_prompt):
    try:
        contextualize_q_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", contextualize_q_system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
        history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
        return history_aware_retriever
    except Exception as e:
        print(f"Error creating history-aware retriever: {e}")
        return None  # or handle the error in an appropriate manner



@st.cache_resource
def echo(question, history):
    ai_message = rag_chain.invoke({"input": question, "chat_history": chat_history})
    chat_history.extend([HumanMessage(content=question), ai_message["answer"]])
    return ai_message['answer']