ppsingh commited on
Commit
1598ceb
1 Parent(s): bc697d2

Update auditqa/doc_process.py

Browse files
Files changed (1) hide show
  1. auditqa/doc_process.py +2 -2
auditqa/doc_process.py CHANGED
@@ -31,7 +31,7 @@ def process_pdf():
31
  # langchain text splitters: https://python.langchain.com/docs/modules/data_connection/document_transformers/
32
  chunk_size = 256
33
  text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
34
- AutoTokenizer.from_pretrained("BAAI/bge-en-icl"),
35
  chunk_size=chunk_size,
36
  chunk_overlap=10,
37
  add_start_index=True,
@@ -78,7 +78,7 @@ def process_pdf():
78
  embeddings = HuggingFaceEmbeddings(
79
  model_kwargs = {'device': device},
80
  encode_kwargs = {'normalize_embeddings': True},
81
- model_name="BAAI/bge-en-icl"
82
  )
83
  # placeholder for collection
84
  qdrant_collections = {}
 
31
  # langchain text splitters: https://python.langchain.com/docs/modules/data_connection/document_transformers/
32
  chunk_size = 256
33
  text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
34
+ AutoTokenizer.from_pretrained("BAAI/bge-large-en-v1.5"),
35
  chunk_size=chunk_size,
36
  chunk_overlap=10,
37
  add_start_index=True,
 
78
  embeddings = HuggingFaceEmbeddings(
79
  model_kwargs = {'device': device},
80
  encode_kwargs = {'normalize_embeddings': True},
81
+ model_name="BAAI/bge-large-en-v1.5"
82
  )
83
  # placeholder for collection
84
  qdrant_collections = {}