ArturG9 commited on
Commit
8356c3c
1 Parent(s): d0095f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -32
app.py CHANGED
@@ -7,33 +7,30 @@ from langchain_community.llms import llamacpp
7
  from langchain_core.runnables.history import RunnableWithMessageHistory
8
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
9
  from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
10
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain,ConversationalRetrievalChain
11
  from langchain.document_loaders import TextLoader
12
  from langchain.chains.combine_documents import create_stuff_documents_chain
13
  from langchain_community.chat_message_histories.streamlit import StreamlitChatMessageHistory
14
  from langchain.prompts import PromptTemplate
15
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
16
  from langchain.vectorstores import Chroma
17
- from utills import load_txt_documents , split_docs, load_uploaded_documents,retriever_from_chroma
18
- from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
19
-
20
 
21
  script_dir = os.path.dirname(os.path.abspath(__file__))
22
- data_path = os.path.join(script_dir, "data/")
23
  model_path = os.path.join(script_dir, 'qwen2-0_5b-instruct-q4_0.gguf')
24
  store = {}
25
 
26
-
27
  model_name = "sentence-transformers/all-mpnet-base-v2"
28
  model_kwargs = {'device': 'cpu'}
29
  encode_kwargs = {'normalize_embeddings': True}
30
 
31
-
32
  hf = HuggingFaceEmbeddings(
33
  model_name=model_name,
34
  model_kwargs=model_kwargs,
35
  encode_kwargs=encode_kwargs
36
  )
 
37
  def get_vectorstore(text_chunks):
38
  model_name = "sentence-transformers/all-mpnet-base-v2"
39
  model_kwargs = {'device': 'cpu'}
@@ -42,14 +39,11 @@ def get_vectorstore(text_chunks):
42
  model_name=model_name,
43
  model_kwargs=model_kwargs,
44
  encode_kwargs=encode_kwargs
45
- )
46
 
47
- vectorstore = Chroma.from_documents(texts=text_chunks, embedding=hf,persist_directory="docs/chroma/")
48
  return vectorstore
49
 
50
-
51
-
52
-
53
  def get_pdf_text(pdf_docs):
54
  text = ""
55
  for pdf in pdf_docs:
@@ -58,7 +52,6 @@ def get_pdf_text(pdf_docs):
58
  text += page.extract_text()
59
  return text
60
 
61
-
62
  def get_text_chunks(text):
63
  text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
64
  separator="\n",
@@ -69,31 +62,29 @@ def get_text_chunks(text):
69
  chunks = text_splitter.split_text(text)
70
  return chunks
71
 
72
-
73
-
74
  def create_conversational_rag_chain(vectorstore):
75
  retriever = retriever_from_chroma(docs, hf, "mmr", 6)
76
 
77
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
78
 
79
- llm = LlamaCpp(
80
- model_path= os.path.join(script_dir, 'qwen2-0_5b-instruct-q4_0.gguf'),
81
- n_gpu_layers=1,
82
- temperature=0.1,
83
- top_p=0.9,
84
- n_ctx=22000,
85
- max_tokens=200,
86
- repeat_penalty=1.7,
87
- callback_manager=callback_manager,
88
- verbose=False,
89
- )
90
 
91
  contextualize_q_system_prompt = """Given a context, chat history and the latest user question
92
  which maybe reference context in the chat history, formulate a standalone question
93
  which can be understood without the chat history. Do NOT answer the question,
94
  just reformulate it if needed and otherwise return it as is."""
95
 
96
- ha_retriever = history_aware_retriever(llm, retriever, contextualize_q_system_prompt)
97
 
98
  qa_system_prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Be as informative as possible, be polite and formal.\n{context}"""
99
 
@@ -121,13 +112,12 @@ def create_conversational_rag_chain(vectorstore):
121
  )
122
 
123
  return create_rag_chain()
124
- conversational_rag_chain = create_conversational_rag_chain()
125
 
126
  def main(conversational_rag_chain):
127
  """Main function for the Streamlit app."""
128
  # Initialize chat history if not already present in session state
129
  msgs = st.session_state.get("chat_history", StreamlitChatMessageHistory(key="special_app_key"))
130
- chain_with_history = conversational_rag_chain
131
 
132
  st.title("Conversational RAG Chatbot")
133
 
@@ -144,8 +134,9 @@ def main(conversational_rag_chain):
144
 
145
  # Display retrieved documents (if any and present in response)
146
  if "docs" in response and response["documents"]:
147
- st.write(f"Expanding document {index+1}...")
148
- display_documents(docs, expand_document) # Pass click function
 
149
 
150
  # Update chat history in session state
151
  st.session_state["chat_history"] = msgs
 
7
  from langchain_core.runnables.history import RunnableWithMessageHistory
8
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
9
  from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
10
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain, ConversationalRetrievalChain
11
  from langchain.document_loaders import TextLoader
12
  from langchain.chains.combine_documents import create_stuff_documents_chain
13
  from langchain_community.chat_message_histories.streamlit import StreamlitChatMessageHistory
14
  from langchain.prompts import PromptTemplate
 
15
  from langchain.vectorstores import Chroma
16
+ from utils import load_txt_documents, split_docs, load_uploaded_documents, retriever_from_chroma
17
+ from langchain.text_splitter import TokenTextSplitter, RecursiveCharacterTextSplitter
 
18
 
19
  script_dir = os.path.dirname(os.path.abspath(__file__))
20
+ data_path = os.path.join(script_dir, "data/")
21
  model_path = os.path.join(script_dir, 'qwen2-0_5b-instruct-q4_0.gguf')
22
  store = {}
23
 
 
24
  model_name = "sentence-transformers/all-mpnet-base-v2"
25
  model_kwargs = {'device': 'cpu'}
26
  encode_kwargs = {'normalize_embeddings': True}
27
 
 
28
  hf = HuggingFaceEmbeddings(
29
  model_name=model_name,
30
  model_kwargs=model_kwargs,
31
  encode_kwargs=encode_kwargs
32
  )
33
+
34
  def get_vectorstore(text_chunks):
35
  model_name = "sentence-transformers/all-mpnet-base-v2"
36
  model_kwargs = {'device': 'cpu'}
 
39
  model_name=model_name,
40
  model_kwargs=model_kwargs,
41
  encode_kwargs=encode_kwargs
42
+ )
43
 
44
+ vectorstore = Chroma.from_documents(texts=text_chunks, embedding=hf, persist_directory="docs/chroma/")
45
  return vectorstore
46
 
 
 
 
47
  def get_pdf_text(pdf_docs):
48
  text = ""
49
  for pdf in pdf_docs:
 
52
  text += page.extract_text()
53
  return text
54
 
 
55
  def get_text_chunks(text):
56
  text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
57
  separator="\n",
 
62
  chunks = text_splitter.split_text(text)
63
  return chunks
64
 
 
 
65
  def create_conversational_rag_chain(vectorstore):
66
  retriever = retriever_from_chroma(docs, hf, "mmr", 6)
67
 
68
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
69
 
70
+ llm = llamacpp.LlamaCpp(
71
+ model_path=os.path.join(script_dir, 'qwen2-0_5b-instruct-q4_0.gguf'),
72
+ n_gpu_layers=1,
73
+ temperature=0.1,
74
+ top_p=0.9,
75
+ n_ctx=22000,
76
+ max_tokens=200,
77
+ repeat_penalty=1.7,
78
+ callback_manager=callback_manager,
79
+ verbose=False,
80
+ )
81
 
82
  contextualize_q_system_prompt = """Given a context, chat history and the latest user question
83
  which maybe reference context in the chat history, formulate a standalone question
84
  which can be understood without the chat history. Do NOT answer the question,
85
  just reformulate it if needed and otherwise return it as is."""
86
 
87
+ ha_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_system_prompt)
88
 
89
  qa_system_prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Be as informative as possible, be polite and formal.\n{context}"""
90
 
 
112
  )
113
 
114
  return create_rag_chain()
 
115
 
116
  def main(conversational_rag_chain):
117
  """Main function for the Streamlit app."""
118
  # Initialize chat history if not already present in session state
119
  msgs = st.session_state.get("chat_history", StreamlitChatMessageHistory(key="special_app_key"))
120
+ chain_with_history = create_conversational_rag_chain()
121
 
122
  st.title("Conversational RAG Chatbot")
123
 
 
134
 
135
  # Display retrieved documents (if any and present in response)
136
  if "docs" in response and response["documents"]:
137
+ for index, doc in enumerate(response["documents"]):
138
+ with st.expander(f"Document {index + 1}"):
139
+ st.write(doc)
140
 
141
  # Update chat history in session state
142
  st.session_state["chat_history"] = msgs