ArturG9 commited on
Commit
ec39c22
1 Parent(s): 467c73a

Update utills.py

Browse files
Files changed (1) hide show
  1. utills.py +96 -96
utills.py CHANGED
@@ -1,96 +1,96 @@
1
- import os
2
- import sys
3
- from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
4
- from langchain_community.embeddings import HuggingFaceEmbeddings
5
- import torch
6
- from transformers import AutoTokenizer
7
- from langchain.retrievers.document_compressors import LLMChainExtractor
8
- from langchain.text_splitter import RecursiveCharacterTextSplitter
9
- from langchain.document_loaders import TextLoader
10
- from langchain.chains import RetrievalQA, ConversationalRetrievalChain
11
- from langchain.memory import ConversationBufferMemory
12
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
13
- from langchain.chains.combine_documents import create_stuff_documents_chain
14
- from langchain_core.runnables.history import RunnableWithMessageHistory
15
- from langchain_core.chat_history import BaseChatMessageHistory
16
- from langchain_community.chat_message_histories import ChatMessageHistory
17
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
18
- from typing import Callable, Dict, List, Optional, Union
19
- from langchain.vectorstores import Chroma
20
- from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
21
- from langchain.document_loaders import PyPDFLoader
22
- from langchain.document_loaders.pdf import PyPDFDirectoryLoader
23
- from langchain_community.llms import llamacpp
24
-
25
-
26
- store = {}
27
-
28
- def get_session_history(session_id: str):
29
- if session_id not in store:
30
- store[session_id] = ChatMessageHistory()
31
- return store[session_id]
32
-
33
-
34
-
35
-
36
- def load_documents(data_path):
37
- try:
38
- document_loader = PyPDFDirectoryLoader(data_path)
39
- return document_loader.load()
40
- except Exception as e:
41
- print(f"Error loading documents from {data_path}: {e}")
42
- return None # or handle the error in an appropriate manner
43
-
44
-
45
-
46
- def split_docs(documents, chunk_size, chunk_overlap):
47
- try:
48
- text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
49
- chunk_size=chunk_size, chunk_overlap=chunk_overlap,
50
- separators=["\n \n \n", "\n \n", "\n1", "(?<=\. )", " ", ""]
51
- )
52
- docs = text_splitter.split_documents(documents)
53
- return docs
54
- except Exception as e:
55
- print(f"Error splitting documents: {e}")
56
- return [] # or handle the error in an appropriate manner
57
-
58
-
59
- def chroma_db(docs, embeddings):
60
- try:
61
- vectordb = Chroma.from_documents(
62
- documents=docs, embedding=embeddings, persist_directory="docs/chroma/"
63
- )
64
- return vectordb
65
- except Exception as e:
66
- print(f"Error creating Chroma vector database: {e}")
67
- return None # or handle the error in an appropriate manner
68
-
69
-
70
- def retriever_from_chroma(vectordb, search_type, k):
71
- retriever = vectordb.as_retriever(search_type=search_type, search_kwargs={"k": k})
72
- return retriever
73
-
74
-
75
- def history_aware_retriever(llm, retriever, contextualize_q_system_prompt):
76
- try:
77
- contextualize_q_prompt = ChatPromptTemplate.from_messages(
78
- [
79
- ("system", contextualize_q_system_prompt),
80
- MessagesPlaceholder("chat_history"),
81
- ("human", "{input}"),
82
- ]
83
- )
84
- history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
85
- return history_aware_retriever
86
- except Exception as e:
87
- print(f"Error creating history-aware retriever: {e}")
88
- return None # or handle the error in an appropriate manner
89
-
90
-
91
-
92
-
93
- def echo(question, history):
94
- ai_message = rag_chain.invoke({"input": question, "chat_history": chat_history})
95
- chat_history.extend([HumanMessage(content=question), ai_message["answer"]])
96
- return ai_message['answer']
 
1
+ import os
2
+ import sys
3
+ from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ import torch
6
+ from transformers import AutoTokenizer
7
+ from langchain.retrievers.document_compressors import LLMChainExtractor
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.document_loaders import TextLoader
10
+ from langchain.chains import RetrievalQA, ConversationalRetrievalChain
11
+ from langchain.memory import ConversationBufferMemory
12
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
13
+ from langchain.chains.combine_documents import create_stuff_documents_chain
14
+ from langchain_core.runnables.history import RunnableWithMessageHistory
15
+ from langchain_core.chat_history import BaseChatMessageHistory
16
+ from langchain_community.chat_message_histories import ChatMessageHistory
17
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
18
+ from typing import Callable, Dict, List, Optional, Union
19
+ from langchain.vectorstores import Chroma
20
+ from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
21
+ from langchain.document_loaders import PyPDFLoader
22
+ from langchain.document_loaders.pdf import PyPDFDirectoryLoader
23
+ from langchain_community.llms import llamacpp
24
+
25
+
26
+ store = {}
27
+ @st.cache_resource
28
+ def get_session_history(session_id: str):
29
+ if session_id not in store:
30
+ store[session_id] = ChatMessageHistory()
31
+ return store[session_id]
32
+
33
+
34
+
35
+ @st.cache_resource
36
+ def load_documents(data_path):
37
+ try:
38
+ document_loader = PyPDFDirectoryLoader(data_path)
39
+ return document_loader.load()
40
+ except Exception as e:
41
+ print(f"Error loading documents from {data_path}: {e}")
42
+ return None # or handle the error in an appropriate manner
43
+
44
+
45
+ @st.cache_resource
46
+ def split_docs(documents, chunk_size, chunk_overlap):
47
+ try:
48
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
49
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap,
50
+ separators=["\n \n \n", "\n \n", "\n1", "(?<=\. )", " ", ""]
51
+ )
52
+ docs = text_splitter.split_documents(documents)
53
+ return docs
54
+ except Exception as e:
55
+ print(f"Error splitting documents: {e}")
56
+ return [] # or handle the error in an appropriate manner
57
+
58
+ @st.cache_resource
59
+ def chroma_db(docs, embeddings):
60
+ try:
61
+ vectordb = Chroma.from_documents(
62
+ documents=docs, embedding=embeddings, persist_directory="docs/chroma/"
63
+ )
64
+ return vectordb
65
+ except Exception as e:
66
+ print(f"Error creating Chroma vector database: {e}")
67
+ return None # or handle the error in an appropriate manner
68
+
69
+ @st.cache_resource
70
+ def retriever_from_chroma(vectordb, search_type, k):
71
+ retriever = vectordb.as_retriever(search_type=search_type, search_kwargs={"k": k})
72
+ return retriever
73
+
74
+ @st.cache_resource
75
+ def history_aware_retriever(llm, retriever, contextualize_q_system_prompt):
76
+ try:
77
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
78
+ [
79
+ ("system", contextualize_q_system_prompt),
80
+ MessagesPlaceholder("chat_history"),
81
+ ("human", "{input}"),
82
+ ]
83
+ )
84
+ history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
85
+ return history_aware_retriever
86
+ except Exception as e:
87
+ print(f"Error creating history-aware retriever: {e}")
88
+ return None # or handle the error in an appropriate manner
89
+
90
+
91
+
92
+ @st.cache_resource
93
+ def echo(question, history):
94
+ ai_message = rag_chain.invoke({"input": question, "chat_history": chat_history})
95
+ chat_history.extend([HumanMessage(content=question), ai_message["answer"]])
96
+ return ai_message['answer']