File size: 613 Bytes
b4b5bdf
 
a3a378d
b4b5bdf
 
 
 
 
 
a3a378d
b4b5bdf
a3a378d
 
 
 
 
 
 
b4b5bdf
51727c4
 
 
b4b5bdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import openai
import pandas as pd
from buster.documents_manager import DeepLakeDocumentsManager

from utils import zip_contents


if __name__ == "__main__":
    vector_store_path = "deeplake_store"
    chunk_file = "data/output.csv"
    overwrite = True

    df = pd.read_csv(chunk_file)

    # some pre-processing based on the latest file provided
    df["url"] = df["source"]
    df["source"] = "towardsai_blog"
    df = df.dropna()

    dm = DeepLakeDocumentsManager(vector_store_path, overwrite=overwrite)
    dm.add(df)
    zipped_file_path = dm.to_zip()
    print(f"Contents zipped to: {zipped_file_path}")