jsaplication commited on
Commit
dd0abe0
1 Parent(s): 8c18fd5

Create module_q.py

Browse files
Files changed (1) hide show
  1. module_q.py +87 -0
module_q.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import List, Tuple
3
+ from langchain import PromptTemplate, LLMChain
4
+ from langchain.document_loaders import TextLoader
5
+ from langchain.embeddings import LlamaCppEmbeddings
6
+ from langchain.llms import GPT4All
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.vectorstores.faiss import FAISS
9
+ from pydantic import BaseModel, Field
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.embeddings import HuggingFaceEmbeddings
12
+ from langchain.llms import LlamaCpp
13
+ import os.path
14
+
15
+
16
+ import langchain
17
+ from langchain.cache import InMemoryCache
18
+ langchain.llm_cache = InMemoryCache()
19
+
20
+ # Constants
21
+ local_path = "./models/gpt4all-converted.bin" #GPT4 module 1
22
+ #local_path = "./models/ggml-gpt4all-l13b-snoozy.bin"
23
+
24
+
25
+ model_path = "./models/ggml-model-q4_0.bin" #1st Embeddings
26
+ #model_path = './models/ggjt-model.bin' #2st Embedding
27
+ text_path = "./docs/acapglobal.txt"
28
+ index_path = "./acapglobal_index"
29
+
30
+
31
+ # Functions
32
+ def initialize_embeddings() -> LlamaCppEmbeddings:
33
+ return LlamaCppEmbeddings(model_path=model_path)
34
+
35
+ def load_documents() -> List:
36
+ loader = TextLoader(text_path,encoding="utf-8")
37
+ return loader.load()
38
+
39
+ def split_chunks(sources: List) -> List:
40
+ chunks = []
41
+ splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
42
+ for chunk in splitter.split_documents(sources):
43
+ chunks.append(chunk)
44
+ return chunks
45
+
46
+ def generate_index(chunks: List, embeddings: LlamaCppEmbeddings) -> FAISS:
47
+ texts = [doc.page_content for doc in chunks]
48
+ metadatas = [doc.metadata for doc in chunks]
49
+ return FAISS.from_texts(texts, embeddings, metadatas=metadatas)
50
+
51
+
52
+ # Main execution
53
+ llm = GPT4All(model=local_path, n_ctx=512, verbose=True,cache=True,embedding=True)
54
+ print('llm GPT4All set.')
55
+ embeddings = initialize_embeddings()
56
+
57
+
58
+ # when refresh document
59
+ #
60
+
61
+ # print('initialize_embeddings.')
62
+ # sources = load_documents()
63
+ # print('load_documents.')
64
+ # chunks = split_chunks(sources)
65
+ # print('split_chunks')
66
+ # vectorstore = generate_index(chunks, embeddings)
67
+ # print('generate_index')
68
+ # vectorstore.save_local("acapglobal_index")
69
+ # print('vectorstore: save_local')
70
+
71
+ #
72
+ # End When refresh document
73
+
74
+
75
+ chat_history = []
76
+ #index = FAISS.load_local(index_path, embeddings)
77
+ index = FAISS.load_local(index_path, embeddings)
78
+
79
+ qa = ConversationalRetrievalChain.from_llm(llm, index.as_retriever(), max_tokens_limit=400)
80
+
81
+ def search_query_data(s_query):
82
+ print("search_query:")
83
+ print( s_query)
84
+ retrieval=qa({"question": s_query, "chat_history": chat_history})
85
+ return retrieval
86
+
87
+