PhaedrusFlow commited on
Commit
9308db0
1 Parent(s): 5c1380f

Upload ingest_data.py

Browse files
Files changed (1) hide show
  1. ingest_data.py +23 -0
ingest_data.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.document_loaders import UnstructuredFileLoader
3
+ from langchain.vectorstores.faiss import FAISS
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+ import pickle
6
+
7
+ # Load Data
8
+ loader = UnstructuredFileLoader("state_of_the_union.txt")
9
+ raw_documents = loader.load()
10
+
11
+ # Split text
12
+ text_splitter = RecursiveCharacterTextSplitter()
13
+ documents = text_splitter.split_documents(raw_documents)
14
+
15
+
16
+ # Load Data to vectorstore
17
+ embeddings = OpenAIEmbeddings()
18
+ vectorstore = FAISS.from_documents(documents, embeddings)
19
+
20
+
21
+ # Save vectorstore
22
+ with open("vectorstore.pkl", "wb") as f:
23
+ pickle.dump(vectorstore, f)