ppsingh commited on
Commit
2cc8ae2
1 Parent(s): d3fc969

Create vectorstore.py

Browse files
Files changed (1) hide show
  1. auditqa/engine/vectorstore.py +45 -0
auditqa/engine/vectorstore.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
2
+ from dotenv import load_dotenv
3
+
4
+ provider_retrieval_model = "HF"
5
+ embeddingmodel = "BAAI/bge-small-en-v1.5"
6
+ load_dotenv()
7
+ HF_Token = os.environ.get("HF_TOKEN")
8
+
9
+
10
+ client_path = f"./vectorstore"
11
+ collection_name = f"collection"
12
+
13
+ if provider_retrieval_model == "HF":
14
+ qdrantClient = QdrantClient(path=client_path, prefer_grpc=True)
15
+
16
+ embeddings = HuggingFaceInferenceAPIEmbeddings(
17
+ api_key=HF_Token, model_name=embeddingmodel
18
+ )
19
+
20
+ dim = 1024
21
+
22
+ elif provider_retrieval_model == "OAI":
23
+
24
+ qdrantClient = QdrantClient(path=client_path, prefer_grpc=True)
25
+
26
+ embeddings = OpenAIEmbeddings(
27
+ model="text-embedding-ada-002",
28
+ openai_api_key=os.getenv("OPENAI_API_KEY"),
29
+ )
30
+
31
+ dim = 1536
32
+
33
+
34
+ qdrantClient.create_collection(
35
+ collection_name=collection_name,
36
+ vectors_config=VectorParams(size=dim, distance=Distance.COSINE),
37
+ )
38
+
39
+ vectorstore = Qdrant(
40
+ client=qdrantClient,
41
+ collection_name=collection_name,
42
+ embeddings=embeddings,
43
+ )
44
+
45
+ vectorstore.add_documents(docs_samp)