Spaces:
Running
Running
File size: 613 Bytes
b4b5bdf a3a378d b4b5bdf a3a378d b4b5bdf a3a378d b4b5bdf 51727c4 b4b5bdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import openai
import pandas as pd
from buster.documents_manager import DeepLakeDocumentsManager
from utils import zip_contents
if __name__ == "__main__":
vector_store_path = "deeplake_store"
chunk_file = "data/output.csv"
overwrite = True
df = pd.read_csv(chunk_file)
# some pre-processing based on the latest file provided
df["url"] = df["source"]
df["source"] = "towardsai_blog"
df = df.dropna()
dm = DeepLakeDocumentsManager(vector_store_path, overwrite=overwrite)
dm.add(df)
zipped_file_path = dm.to_zip()
print(f"Contents zipped to: {zipped_file_path}")
|