Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
import os
|
4 |
import streamlit as st
|
5 |
from transformers import pipeline
|
@@ -8,13 +6,12 @@ from langchain.prompts import PromptTemplate
|
|
8 |
from langchain.chains.question_answering import load_qa_chain
|
9 |
from langchain.vectorstores import Chroma
|
10 |
from langchain.retrievers import mmr_retriever
|
11 |
-
from utills import load_txt_documents , split_docs, chroma_db,
|
12 |
-
|
13 |
|
14 |
# Initialize variables and paths
|
15 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
16 |
data_path = "./data/"
|
17 |
-
model_path = os.path.join(script_dir, '
|
18 |
store = {}
|
19 |
|
20 |
# Set up HuggingFace embeddings
|
@@ -42,22 +39,32 @@ def load_txt_documents(data_path):
|
|
42 |
documents.extend(TextLoader(file_path).load())
|
43 |
return documents
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
50 |
|
51 |
-
docs = split_docs(documents, 450, 20)
|
52 |
|
53 |
|
|
|
|
|
54 |
|
55 |
-
|
|
|
|
|
56 |
|
|
|
57 |
|
58 |
-
|
|
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
62 |
|
63 |
@st.cache_resource
|
@@ -75,7 +82,7 @@ def load_llm(model_path):
|
|
75 |
verbose=False,
|
76 |
)
|
77 |
|
78 |
-
llm = load_llm()
|
79 |
|
80 |
contextualize_q_system_prompt = """Given a context, chat history and the latest user question
|
81 |
which maybe reference context in the chat history, formulate a standalone question
|
@@ -138,8 +145,8 @@ def display_documents(docs, on_click=None):
|
|
138 |
if st.button(f"Expand Article {i+1}"):
|
139 |
on_click(i)
|
140 |
|
141 |
-
def
|
142 |
-
"""Main
|
143 |
msgs = st.session_state.get("chat_history", StreamlitChatMessageHistory(key="special_app_key"))
|
144 |
chain_with_history = conversational_rag_chain
|
145 |
|
@@ -164,5 +171,27 @@ def main(conversational_rag_chain):
|
|
164 |
|
165 |
st.session_state["chat_history"] = msgs
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
if __name__ == "__main__":
|
168 |
-
main(
|
|
|
|
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
from transformers import pipeline
|
|
|
6 |
from langchain.chains.question_answering import load_qa_chain
|
7 |
from langchain.vectorstores import Chroma
|
8 |
from langchain.retrievers import mmr_retriever
|
9 |
+
from utills import load_txt_documents , split_docs, chroma_db, load_uploaded_documents
|
|
|
10 |
|
11 |
# Initialize variables and paths
|
12 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
13 |
data_path = "./data/"
|
14 |
+
model_path = os.path.join(script_dir, 'qwen2-0_5b-instruct-q4_0.gguf')
|
15 |
store = {}
|
16 |
|
17 |
# Set up HuggingFace embeddings
|
|
|
39 |
documents.extend(TextLoader(file_path).load())
|
40 |
return documents
|
41 |
|
42 |
+
@st.cache_data
|
43 |
+
def load_uploaded_documents(uploaded_files):
|
44 |
+
documents = []
|
45 |
+
for uploaded_file in uploaded_files:
|
46 |
+
content = uploaded_file.read().decode("utf-8")
|
47 |
+
documents.append({"content": content, "filename": uploaded_file.name})
|
48 |
+
return documents
|
49 |
|
|
|
50 |
|
51 |
|
52 |
+
documents = load_documents(data_path)
|
53 |
+
docs = split_docs(documents, 450, 20)
|
54 |
|
55 |
+
@st.cache_resource
|
56 |
+
def create_chroma_db(docs, hf):
|
57 |
+
return Chroma(docs, hf)
|
58 |
|
59 |
+
chroma_db = create_chroma_db(docs, hf)
|
60 |
|
61 |
+
@st.cache_resource
|
62 |
+
def create_retriever(chroma_db):
|
63 |
+
return mmr_retriever(chroma_db, "mmr", 6)
|
64 |
|
65 |
+
retriever = create_retriever(chroma_db)
|
66 |
|
67 |
+
# Set up LlamaCpp model
|
68 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
69 |
|
70 |
@st.cache_resource
|
|
|
82 |
verbose=False,
|
83 |
)
|
84 |
|
85 |
+
llm = load_llm(model_path)
|
86 |
|
87 |
contextualize_q_system_prompt = """Given a context, chat history and the latest user question
|
88 |
which maybe reference context in the chat history, formulate a standalone question
|
|
|
145 |
if st.button(f"Expand Article {i+1}"):
|
146 |
on_click(i)
|
147 |
|
148 |
+
def main_page(conversational_rag_chain):
|
149 |
+
"""Main page for the Streamlit app."""
|
150 |
msgs = st.session_state.get("chat_history", StreamlitChatMessageHistory(key="special_app_key"))
|
151 |
chain_with_history = conversational_rag_chain
|
152 |
|
|
|
171 |
|
172 |
st.session_state["chat_history"] = msgs
|
173 |
|
174 |
+
def upload_page():
|
175 |
+
"""Page for uploading and viewing documents."""
|
176 |
+
st.title("Upload and Check Documents")
|
177 |
+
|
178 |
+
uploaded_files = st.file_uploader("Upload Text Files", type=["txt"], accept_multiple_files=True)
|
179 |
+
|
180 |
+
if uploaded_files:
|
181 |
+
documents = load_uploaded_documents(uploaded_files)
|
182 |
+
for document in documents:
|
183 |
+
st.write(f"**Filename: {document['filename']}**")
|
184 |
+
st.text(document['content'])
|
185 |
+
|
186 |
+
def main():
|
187 |
+
"""Main function for the Streamlit app with page navigation."""
|
188 |
+
st.sidebar.title("Navigation")
|
189 |
+
page = st.sidebar.radio("Go to", ["Chatbot", "Upload Documents"])
|
190 |
+
|
191 |
+
if page == "Chatbot":
|
192 |
+
main_page(conversational_rag_chain)
|
193 |
+
elif page == "Upload Documents":
|
194 |
+
upload_page()
|
195 |
+
|
196 |
if __name__ == "__main__":
|
197 |
+
main()
|