Rajat.bans commited on
Commit
2834406
1 Parent(s): 8a74639

Updated the ads index with healthline and webmd urls ads, also picked only health category ads. System prompt updated to remove relation check

Browse files
.DS_Store DELETED
Binary file (6.15 kB)
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ **/.DS_Store
data/.DS_Store DELETED
Binary file (6.15 kB)
 
data/142_adclick_20May_20Jun_webmd_healthline_Health_dupRemoved0.8_someAdsCampaign.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce108af93e6d9924fd80e20d19e86918995e98310c2528021a14ab4b7edd5ee3
3
+ size 9099622
rag.py CHANGED
@@ -12,8 +12,8 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
12
 
13
  load_dotenv(override=True)
14
  client = OpenAI()
15
- DB_FAISS_PATH = "./vectorstore/db_faiss_ads_1month"
16
- data_file_path = "./data/134_ads_data_1May_1June_perform.tsv"
17
  embedding_model_hf = "BAAI/bge-m3"
18
  qa_model_name = "gpt-3.5-turbo"
19
  default_threshold = 0.95
@@ -86,6 +86,11 @@ Got A Rosemary Bush? Here’re 20 Brilliant & Unusual Ways To Use All That Rosem
86
  }
87
  -----------------------------------------------
88
 
 
 
 
 
 
89
  <Sample INPUT(PAGE_TITLE)>
90
  7 Signs and Symptoms of Magnesium Deficiency
91
 
@@ -101,8 +106,6 @@ Ad 3: Weak or Paralyzed Muscles? - A Common Symptom of Cataplexy. About 70% of P
101
  "options": []
102
  }
103
  ------------------------------------------------
104
-
105
- The ADS_DATA provided to you is as follows:
106
  """
107
 
108
  embeddings_hf = HuggingFaceEmbeddings(model_name=embedding_model_hf)
@@ -253,7 +256,7 @@ db = FAISS.load_local(
253
  DB_FAISS_PATH, embeddings_hf, allow_dangerous_deserialization=True
254
  )
255
  data = pd.read_csv(data_file_path, sep="\t")
256
- data.dropna(axis=0, how="any", inplace=True)
257
  data.drop_duplicates(subset=["ad_title", "ad_desc"], inplace=True)
258
  ad_title_content = list(data["ad_title"].values)
259
  with gr.Blocks() as demo:
 
12
 
13
  load_dotenv(override=True)
14
  client = OpenAI()
15
+ DB_FAISS_PATH = "./vectorstore/db_faiss_ads_20May_20Jun_webmd_healthline_Health_dupRemoved0.8"
16
+ data_file_path = "./data/142_adclick_20May_20Jun_webmd_healthline_Health_dupRemoved0.8_someAdsCampaign.tsv"
17
  embedding_model_hf = "BAAI/bge-m3"
18
  qa_model_name = "gpt-3.5-turbo"
19
  default_threshold = 0.95
 
86
  }
87
  -----------------------------------------------
88
 
89
+ The ADS_DATA provided to you is as follows:
90
+ """
91
+
92
+ old_system_prompt_additional_example = """
93
+ -----------------------------------------------
94
  <Sample INPUT(PAGE_TITLE)>
95
  7 Signs and Symptoms of Magnesium Deficiency
96
 
 
106
  "options": []
107
  }
108
  ------------------------------------------------
 
 
109
  """
110
 
111
  embeddings_hf = HuggingFaceEmbeddings(model_name=embedding_model_hf)
 
256
  DB_FAISS_PATH, embeddings_hf, allow_dangerous_deserialization=True
257
  )
258
  data = pd.read_csv(data_file_path, sep="\t")
259
+ # data.dropna(axis=0, how="any", inplace=True)
260
  data.drop_duplicates(subset=["ad_title", "ad_desc"], inplace=True)
261
  ad_title_content = list(data["ad_title"].values)
262
  with gr.Blocks() as demo:
vectorstore/.DS_Store DELETED
Binary file (6.15 kB)
 
vectorstore/db_faiss_ads_20May_20Jun_webmd_healthline_Health_dupRemoved0.8/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a945f8a29f3870a2a3970936089dc3d7ee0ccba9a7ffaa485c6bb29b3a05387
3
+ size 61648941
vectorstore/db_faiss_ads_20May_20Jun_webmd_healthline_Health_dupRemoved0.8/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625301b20eaf328fca1d4eab90912e2688c7637cfefa40f32cb195febd53a0ec
3
+ size 6307355