Rajat.bans commited on
Commit
537373b
1 Parent(s): ead6614

Added all changes - responses are better then before

Browse files
Files changed (1) hide show
  1. rag.py +102 -61
rag.py CHANGED
@@ -22,7 +22,9 @@ embeddings_hf = HuggingFaceEmbeddings(model_name=embedding_model_hf)
22
 
23
  class CLUSTERING:
24
  def __init__(self):
25
- self.clustering_algo = 'kmeans-cc' # ['kmeans-cc', 'kmeans-sp', 'spectral_clustering']
 
 
26
 
27
  def cluster_embeddings(self, embeddings, no_of_clusters, no_of_points):
28
  if self.clustering_algo in {"kmeans-cc", "kmeans-sp"}:
@@ -45,10 +47,14 @@ class CLUSTERING:
45
  for i, label in enumerate(labels):
46
  if len(clusters_indices[label]) < no_of_points:
47
  clusters_indices[label].append(i)
48
- if all(len(cluster) == no_of_points for cluster in clusters_indices):
 
 
49
  break
50
  elif self.clustering_algo == "spectral":
51
- spectral_clustering = SpectralClustering(n_clusters=no_of_clusters, affinity='nearest_neighbors', random_state=42)
 
 
52
  labels = spectral_clustering.fit_predict(embeddings)
53
 
54
  clusters_indices = [[] for _ in range(no_of_clusters)]
@@ -62,6 +68,7 @@ class CLUSTERING:
62
  for i in range(no_of_clusters)
63
  ]
64
 
 
65
  class VECTOR_DB:
66
  def __init__(self):
67
  self.DB_FAISS_PATH = "./vectorstore/db_faiss_ads_20May_20Jun_webmd_healthline_Health_dupRemoved0.8"
@@ -89,20 +96,25 @@ class VECTOR_DB:
89
  retreived_documents[i][0].page_content = remove_html_tags(
90
  retreived_documents[i][0].page_content
91
  )
92
- embeddings = np.array(embeddings_hf.embed_documents([doc[0].page_content for doc in retreived_documents]))
 
 
 
 
 
93
 
94
- clustered_indices = CLUSTERING().cluster_embeddings(
95
- embeddings, self.no_of_clusters, self.no_of_ads_in_each_cluster
96
- )
97
- documents_clusters = [
98
- [retreived_documents[ind] for ind in cluster_indices]
99
- for cluster_indices in clustered_indices
100
- ]
101
 
102
- best_value = 1
103
- if len(retreived_documents):
104
  best_value = retreived_documents[0][1]
105
- return documents_clusters, best_value
 
 
106
 
107
  class ADS_RAG:
108
  def __init__(self):
@@ -110,11 +122,11 @@ class ADS_RAG:
110
  self.db = VECTOR_DB()
111
  self.qa_model_name = "gpt-3.5-turbo"
112
  self.relation_check_best_value_thresh = 0.6
113
- self.bestRelationSystemPrompt = """You are an advertising concierge for text ads on websites. Given an INPUT(PAGE_TITLE) and the available ad inventory (ADS_DATA), your task is to determine whether there are some relevant ADS to INPUT are present in ADS_DATA. ADS WHICH DON'T MATCH USER'S INTENT SHOULD BE CONSIDERED IRRELEVANT
114
 
115
  ---------------------------------------
116
 
117
- **Sample INPUT(PAGE_TITLE)***: What Causes Bright-Yellow Urine and Other Changes in Color?
118
 
119
  Expected json output :
120
  {
@@ -123,42 +135,40 @@ Expected json output :
123
  }
124
  ------------------------------------------------
125
 
126
- **Sample INPUT(PAGE_TITLE)**: The Effects of Aging on Skin
127
 
128
  Expected json output :
129
  {
130
- "reasoning" : "Given the user's search for 'The Effects of Aging on Skin,' it is clear that they are seeking information related to skin aging. Therefore, the ads that are relevant to skin effects should be considered. Ads 1 and 2 focus on wrinkle treatment and anti-aging solutions, making them pertinent to the user's intent. Ad 3 targets vitiligo and not general skin aging but it is related to skin effect. So it is also relevant. Ads 4 and 5 are about advanced lung cancer, which do not address the interest in skin. Ads 1 and 2, 3 are most relevant to the user's search. So ADS_DATA is relevant to INPUT TITLE. ",
131
  "classification": 1
132
  }
133
  ---------------------------------------
134
 
135
  The ADS_DATA provided to you is as follows:
 
136
 
137
- """
138
-
139
- self.bestQuestionSystemPrompt = """1. You are an advertising concierge for text ads on websites. Given an INPUT(PAGE_TITLE) and the available ad inventory (ADS_DATA), your task is to form a relevant QUESTION to ask the user visiting the webpage. This question should help identify the user's intent behind visiting the webpage.
140
- 2. From the ADS_DATA clusters, discard all ads that are not related to the INPUT or do not match the user's intent behind visiting the page. Also, remove any ads that are distantly related to the user's intent.
141
- 3. FROM REMAINING ADS in each ads cluster form an OPTION which should be both the answer for the QUESTION and related to ads in this cluster.
142
- 4. Try to generate intelligent creatives for advertising and keep QUESTION within 70 characters and each OPTION with either 4, 5, or 6 words.
143
- 5. Provide your REASONING behind choosing the QUESTION and the OPTIONS. Now provide the QUESTION and the OPTIONS. Along with each OPTION, provide the ads from ADS_DATA that you associated with it.
144
 
145
  ---------------------------------------
146
 
147
- <Sample INPUT(PAGE_TITLE)>
148
  The Effects of Aging on Skin
149
 
150
  <Sample ADS_DATA>
151
- [{"Ad 1": "Forget Retinol, Use This Household Item To Fill In Wrinkles - Celebrities Are Ditching Pricey Facelifts For This.", "Ad 2": "Stop Covering Your Wrinkles with Make Up - Do This Instead."}, {"Ad 3": "Living With Migraines? - Discover A Treatment Option. Learn about a type of prescription migraine treatment called CGRP receptor antagonists. Discover a range of resources that may help people dealing with migraines."}, {"Ad 4": "What is Advanced Skin Cancer? - Find Disease Information Here.Find Facts About Advanced Skin Cancer and a Potential Treatment Option.", "Ad 5": "Learn About Advanced Melanoma - Find Disease Information Here.Find Facts About Advanced Melanoma and a Potential Treatment Option.", "Ad 6": "Treatment For CKD - Reduce Risk Of Progressing CKD. Ask About A Treatment That Can Help Reduce Your Risk Of Kidney Failure.", "Ad 7": "Are You Living With Vitiligo? - For Patients & Caregivers.Discover An FDA-Approved Topical Cream That May Help With Nonsegmental Vitiligo Repigmentation. Learn About A Copay Savings Card For Eligible Patients With Vitiligo."}]
152
 
153
  <Expected json output>
154
  {
155
- "reasoning" : "Among the seven ads in **Sample ADS_DATA**, Ads 3 and 6 are irrelevant to the INPUT, so they should be discarded. Ad 1, 2, 4, 5, and 7 are relevant to INPUT. The question will be formed in a way to connect the PAGE TITLE content with the goals of these five relevant ads, making sure they appeal to both specific and general user interests.",
156
- "question": "Which of the following methods to combat aging skin are you most interested in?",
157
- "options": {"1. Reduce Wrinkles without Makeup.": ["Ad 1: Stop Covering Your Wrinkles with Make Up - Do This Instead."], "2. Retinol Alternatives for Wrinkle Treatment." : ["Ad 2: Forget Retinol, Use This Household Item To Fill In Wrinkles - Celebrities Are Ditching Pricey Facelifts For This."], "3. Information on Skin Diseases": ["Ad 4: What is Advanced Skin Cancer? - Find Disease Information Here.Find Facts About Advanced Skin Cancer and a Potential Treatment Option.", "Ad 5: Learn About Advanced Melanoma - Find Disease Information Here.Find Facts About Advanced Melanoma and a Potential Treatment Option.", "Ad 7: Are You Living With Vitiligo? - For Patients & Caregivers.Discover An FDA-Approved Topical Cream That May Help With Nonsegmental Vitiligo Repigmentation. Learn About A Copay Savings Card For Eligible Patients With Vitiligo."]}
158
  }
159
  -----------------------------------------------
160
 
161
- <Sample INPUT(PAGE_TITLE)>
162
  Got A Rosemary Bush? Here’re 20 Brilliant & Unusual Ways To Use All That Rosemary
163
 
164
  <Sample ADS_DATA>
@@ -173,25 +183,25 @@ Got A Rosemary Bush? Here’re 20 Brilliant & Unusual Ways To Use All That Rosem
173
  -----------------------------------------------
174
 
175
  The ADS_DATA provided to you is as follows:
176
- """
177
 
178
  old_system_prompt_additional_example = """
179
- -----------------------------------------------
180
- <Sample INPUT(PAGE_TITLE)>
181
- 7 Signs and Symptoms of Magnesium Deficiency
182
-
183
- <Sample ADS_DATA>
184
- Ad 1: 4 Warning Signs Of Dementia - Fight Dementia and Memory Loss. 100% Natural Program To Prevent Cognitive Decline. Developed By Dr. Will Mitchell. Read The Reviews-Get a Special Offer. Doctor Recommended. High Quality Standards. 60-Day Refund.
185
- Ad 2: About Hyperkalemia - Learn About The Symptoms. High Potassium Can Be A Serious Condition. Learn More About Hyperkalemia Today.
186
- Ad 3: Weak or Paralyzed Muscles? - A Common Symptom of Cataplexy. About 70% of People With Narcolepsy Are Believed to Have Cataplexy Symptoms. Learn More. Download the Doctor Discussion Guide to Have a Informed Conversation About Your Health.
187
-
188
- <Expected json output>
189
- {
190
- "reasoning" : "Given the input '7 Signs and Symptoms of Magnesium Deficiency,' it is evident that the user is looking for information specifically about magnesium deficiency. Ads 1, 2, and 3 discuss topics such as dementia, hyperkalemia, weak muscles, which are not related to magnesium deficiency in any way. Therefore, all the ads in the ADS_DATA are not suitable for the user's query and will be discarded.",
191
- "question": "No related ads available to form question and options.",
192
- "options": []
193
- }
194
- ------------------------------------------------
195
  """
196
 
197
  def callOpenAiApi(self, messages):
@@ -202,14 +212,15 @@ The ADS_DATA provided to you is as follows:
202
  messages=messages,
203
  temperature=0,
204
  seed=42,
205
- max_tokens=1000,
206
  response_format={"type": "json_object"},
207
  )
208
  tokens_used = response.usage.total_tokens
209
  answer = json.loads(response.choices[0].message.content)
210
  return answer, tokens_used
211
  except Exception as e:
212
- print("Error-: ", e.message)
 
213
  print("Trying Again")
214
 
215
  def getBestQuestionOnTheBasisOfPageInformationAndAdsData(
@@ -253,7 +264,8 @@ The ADS_DATA provided to you is as follows:
253
  )
254
 
255
  if relation_answer["classification"] != 0:
256
- question_answer, tokens_used_question = self.callOpenAiApi([
 
257
  {
258
  "role": "system",
259
  "content": questionSystemPrompt + adsData,
@@ -264,7 +276,8 @@ The ADS_DATA provided to you is as follows:
264
  "role": "user",
265
  "content": page_information + "\nThe JSON response: ",
266
  }
267
- ])
 
268
  return (relation_answer, tokens_used_relation), (
269
  question_answer,
270
  tokens_used_question,
@@ -272,17 +285,22 @@ The ADS_DATA provided to you is as follows:
272
 
273
  def convertDocumentsClustersToStringForApiCall(self, documents_clusters):
274
  key_counter = count(1)
275
- res = json.dumps([
276
- {f"Ad {next(key_counter)}": document[0].page_content for j, document in enumerate(documents_cluster)}
277
- for i, documents_cluster in enumerate(documents_clusters)
278
- ], indent=4)
 
 
 
 
 
279
  return res
280
 
281
  def changeDocumentsToPrintableString(self, documents_clusters):
282
  res = ""
283
  i = 0
284
  for ind, documents_cluster in enumerate(documents_clusters):
285
- res += f"Cluster {ind+1}-:\n"
286
  for document in documents_cluster:
287
  i += 1
288
  res += f"[Ad {i}] Content: {document[0].page_content}\nRevenue: {document[0].metadata['revenue']}\nAd Click Count: {document[0].metadata['ad_click_count']}\nValue: {document[1]}\n"
@@ -300,8 +318,23 @@ The ADS_DATA provided to you is as follows:
300
  res += "\n"
301
  return res
302
 
303
- def logResult(self, curr_relation_prompt, curr_question_prompt, page_information, relation_answer, question_answer):
304
- print("----------------------------------------------------------------------------------------------------------------------------------------------------------------\n", curr_relation_prompt, curr_question_prompt, page_information, json.dumps(relation_answer, indent=4), json.dumps(question_answer, indent=4), "\n----------------------------------------------------------------------------------------------------------------------------------------------------------------\n\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
  def getRagResponse(
307
  self, RelationPrompt, QuestionPrompt, threshold, page_information
@@ -314,7 +347,9 @@ The ADS_DATA provided to you is as follows:
314
  if QuestionPrompt != None or len(QuestionPrompt):
315
  curr_question_prompt = QuestionPrompt
316
 
317
- documents_clusters, best_value = self.db.queryVectorDB(page_information, threshold)
 
 
318
  relation_answer, question_answer = (
319
  self.getBestQuestionOnTheBasisOfPageInformationAndAdsData(
320
  page_information,
@@ -324,7 +359,13 @@ The ADS_DATA provided to you is as follows:
324
  best_value,
325
  )
326
  )
327
- self.logResult(curr_relation_prompt, curr_relation_prompt, page_information, relation_answer, question_answer)
 
 
 
 
 
 
328
 
329
  docs_info = self.changeDocumentsToPrintableString(documents_clusters)
330
  relation_answer_string = self.changeResponseToPrintableString(
 
22
 
23
  class CLUSTERING:
24
  def __init__(self):
25
+ self.clustering_algo = (
26
+ "kmeans-cc" # ['kmeans-cc', 'kmeans-sp', 'spectral_clustering']
27
+ )
28
 
29
  def cluster_embeddings(self, embeddings, no_of_clusters, no_of_points):
30
  if self.clustering_algo in {"kmeans-cc", "kmeans-sp"}:
 
47
  for i, label in enumerate(labels):
48
  if len(clusters_indices[label]) < no_of_points:
49
  clusters_indices[label].append(i)
50
+ if all(
51
+ len(cluster) == no_of_points for cluster in clusters_indices
52
+ ):
53
  break
54
  elif self.clustering_algo == "spectral":
55
+ spectral_clustering = SpectralClustering(
56
+ n_clusters=no_of_clusters, affinity="nearest_neighbors", random_state=42
57
+ )
58
  labels = spectral_clustering.fit_predict(embeddings)
59
 
60
  clusters_indices = [[] for _ in range(no_of_clusters)]
 
68
  for i in range(no_of_clusters)
69
  ]
70
 
71
+
72
  class VECTOR_DB:
73
  def __init__(self):
74
  self.DB_FAISS_PATH = "./vectorstore/db_faiss_ads_20May_20Jun_webmd_healthline_Health_dupRemoved0.8"
 
96
  retreived_documents[i][0].page_content = remove_html_tags(
97
  retreived_documents[i][0].page_content
98
  )
99
+ if len(retreived_documents):
100
+ embeddings = np.array(
101
+ embeddings_hf.embed_documents(
102
+ [doc[0].page_content for doc in retreived_documents]
103
+ )
104
+ )
105
 
106
+ clustered_indices = CLUSTERING().cluster_embeddings(
107
+ embeddings, self.no_of_clusters, self.no_of_ads_in_each_cluster
108
+ )
109
+ documents_clusters = [
110
+ [retreived_documents[ind] for ind in cluster_indices]
111
+ for cluster_indices in clustered_indices
112
+ ]
113
 
 
 
114
  best_value = retreived_documents[0][1]
115
+ return documents_clusters, best_value
116
+ return [], 1
117
+
118
 
119
  class ADS_RAG:
120
  def __init__(self):
 
122
  self.db = VECTOR_DB()
123
  self.qa_model_name = "gpt-3.5-turbo"
124
  self.relation_check_best_value_thresh = 0.6
125
+ self.bestRelationSystemPrompt = """You are an advertising concierge for text ads on websites. Given an INPUT and the available ad inventory (ADS_DATA), your task is to determine whether there are some relevant ADS to INPUT are present in ADS_DATA. ADS WHICH DON'T MATCH USER'S INTENT SHOULD BE CONSIDERED IRRELEVANT
126
 
127
  ---------------------------------------
128
 
129
+ **Sample INPUT***: What Causes Bright-Yellow Urine and Other Changes in Color?
130
 
131
  Expected json output :
132
  {
 
135
  }
136
  ------------------------------------------------
137
 
138
+ **Sample INPUT**: The Effects of Aging on Skin
139
 
140
  Expected json output :
141
  {
142
+ "reasoning" : "Given the user's search for 'The Effects of Aging on Skin,' it is clear that they are seeking information related to skin aging. Therefore, the ads that are relevant to skin effects should be considered. Ads 1 and 2 focus on wrinkle treatment and anti-aging solutions, making them pertinent to the user's intent. Ad 3 targets vitiligo and not general skin aging but it is related to skin effect. So it is also relevant. Ads 4 and 5 are about advanced lung cancer, which do not address the interest in skin. Ads 1 and 2, 3 are most relevant to the user's search. So ADS_DATA is relevant to INPUT. ",
143
  "classification": 1
144
  }
145
  ---------------------------------------
146
 
147
  The ADS_DATA provided to you is as follows:
148
+ """
149
 
150
+ self.bestQuestionSystemPrompt = """1. You are an advertising concierge for text ads on websites. Given an INPUT and the available ad inventory (ADS_DATA), your task is to form a relevant QUESTION to ask the user visiting the webpage. This question should help identify the user's intent behind visiting the webpage and should be highly attractive.
151
+ 2. Now form a highly attractive/lucrative and diverse/mutually exclusive OPTION which should be both the answer for the QUESTION and related to ads in this cluster.
152
+ 3. Try to generate intelligent creatives for advertising and keep QUESTION within 70 characters and either 2, 3 or 4 options with each OPTION within 4 to 6 words.
153
+ 4. Provide your REASONING behind choosing the QUESTION and the OPTIONS. Now provide the QUESTION and the OPTIONS. Along with each OPTION, provide the ads from ADS_DATA that you associated with it.
 
 
 
154
 
155
  ---------------------------------------
156
 
157
+ <Sample INPUT>
158
  The Effects of Aging on Skin
159
 
160
  <Sample ADS_DATA>
161
+ {"Cluster 1 Ads": {"Ad 1": "Forget Retinol, Use This Household Item To Fill In Wrinkles - Celebrities Are Ditching Pricey Facelifts For This."}, "Cluster 2 Ads": {"Ad 2": "Stop Covering Your Wrinkles with Make Up - Do This Instead.", "Ad 3": "Living With Migraines? - Discover A Treatment Option. Learn about a type of prescription migraine treatment called CGRP receptor antagonists. Discover a range of resources that may help people dealing with migraines"}, "Cluster 3 Ads": {"Ad 4": "What is Advanced Skin Cancer? - Find Disease Information Here.Find Facts About Advanced Skin Cancer and a Potential Treatment Option.", "Ad 5": "Learn About Advanced Melanoma - Find Disease Information Here.Find Facts About Advanced Melanoma and a Potential Treatment Option.", "Ad 6": "Treatment For CKD - Reduce Risk Of Progressing CKD. Ask About A Treatment That Can Help Reduce Your Risk Of Kidney Failure", "Ad 7": "Are You Living With Vitiligo? - For Patients & Caregivers.Discover An FDA-Approved Topical Cream That May Help With Nonsegmental Vitiligo Repigmentation. Learn About A Copay Savings Card For Eligible Patients With Vitiligo."}]
162
 
163
  <Expected json output>
164
  {
165
+ "reasoning" : "Among the seven ads in **Sample ADS_DATA**, Ads 3 and 6 are irrelevant to the INPUT, so they should be discarded. Ad 1, 2 closely aligns with the user's intent. Ads 4, 5, and 7 are also relevant to INPUT. The question will be formed in a way to connect the PAGE content with the goals of these five relevant ads, making sure they appeal to both specific and general user interests, with the OPTIONS being the answer for QUESTION(it is ensured that no irrelevant options are formed)",
166
+ "question": "Interested in methods to combat aging skin?",
167
+ "options": {"1. Retinol Alternatives for Wrinkle Treatment." : ["Ad 1: Forget Retinol, Use This Household Item To Fill In Wrinkles - Celebrities Are Ditching Pricey Facelifts For This."], "2. Reduce Wrinkles without Makeup.": ["Ad 2: Stop Covering Your Wrinkles with Make Up - Do This Instead."], "3. Information on Skin Diseases": ["Ad 3: What is Advanced Skin Cancer? - Find Disease Information Here.Find Facts About Advanced Skin Cancer and a Potential Treatment Option.", "Ad 4: Learn About Advanced Melanoma - Find Disease Information Here.Find Facts About Advanced Melanoma and a Potential Treatment Option.", "Ad 5: Are You Living With Vitiligo? - For Patients & Caregivers.Discover An FDA-Approved Topical Cream That May Help With Nonsegmental Vitiligo Repigmentation. Learn About A Copay Savings Card For Eligible Patients With Vitiligo."]}
168
  }
169
  -----------------------------------------------
170
 
171
+ <Sample INPUT>
172
  Got A Rosemary Bush? Here’re 20 Brilliant & Unusual Ways To Use All That Rosemary
173
 
174
  <Sample ADS_DATA>
 
183
  -----------------------------------------------
184
 
185
  The ADS_DATA provided to you is as follows:
186
+ """
187
 
188
  old_system_prompt_additional_example = """
189
+ -----------------------------------------------
190
+ <Sample INPUT>
191
+ 7 Signs and Symptoms of Magnesium Deficiency
192
+
193
+ <Sample ADS_DATA>
194
+ Ad 1: 4 Warning Signs Of Dementia - Fight Dementia and Memory Loss. 100% Natural Program To Prevent Cognitive Decline. Developed By Dr. Will Mitchell. Read The Reviews-Get a Special Offer. Doctor Recommended. High Quality Standards. 60-Day Refund.
195
+ Ad 2: About Hyperkalemia - Learn About The Symptoms. High Potassium Can Be A Serious Condition. Learn More About Hyperkalemia Today.
196
+ Ad 3: Weak or Paralyzed Muscles? - A Common Symptom of Cataplexy. About 70% of People With Narcolepsy Are Believed to Have Cataplexy Symptoms. Learn More. Download the Doctor Discussion Guide to Have a Informed Conversation About Your Health.
197
+
198
+ <Expected json output>
199
+ {
200
+ "reasoning" : "Given the input '7 Signs and Symptoms of Magnesium Deficiency,' it is evident that the user is looking for information specifically about magnesium deficiency. Ads 1, 2, and 3 discuss topics such as dementia, hyperkalemia, weak muscles, which are not related to magnesium deficiency in any way. Therefore, all the ads in the ADS_DATA are not suitable for the user's query and will be discarded.",
201
+ "question": "No related ads available to form question and options.",
202
+ "options": []
203
+ }
204
+ ------------------------------------------------
205
  """
206
 
207
  def callOpenAiApi(self, messages):
 
212
  messages=messages,
213
  temperature=0,
214
  seed=42,
215
+ max_tokens=1200,
216
  response_format={"type": "json_object"},
217
  )
218
  tokens_used = response.usage.total_tokens
219
  answer = json.loads(response.choices[0].message.content)
220
  return answer, tokens_used
221
  except Exception as e:
222
+ print(response.choices[0].message.content)
223
+ print("Error-: ", e)
224
  print("Trying Again")
225
 
226
  def getBestQuestionOnTheBasisOfPageInformationAndAdsData(
 
264
  )
265
 
266
  if relation_answer["classification"] != 0:
267
+ question_answer, tokens_used_question = self.callOpenAiApi(
268
+ [
269
  {
270
  "role": "system",
271
  "content": questionSystemPrompt + adsData,
 
276
  "role": "user",
277
  "content": page_information + "\nThe JSON response: ",
278
  }
279
+ ]
280
+ )
281
  return (relation_answer, tokens_used_relation), (
282
  question_answer,
283
  tokens_used_question,
 
285
 
286
  def convertDocumentsClustersToStringForApiCall(self, documents_clusters):
287
  key_counter = count(1)
288
+ res = json.dumps(
289
+ {
290
+ f"Option {i+1} Ads": {
291
+ f"Ad {next(key_counter)}": document[0].page_content
292
+ for j, document in enumerate(documents_cluster)
293
+ }
294
+ for i, documents_cluster in enumerate(documents_clusters)
295
+ }
296
+ )
297
  return res
298
 
299
  def changeDocumentsToPrintableString(self, documents_clusters):
300
  res = ""
301
  i = 0
302
  for ind, documents_cluster in enumerate(documents_clusters):
303
+ res += f"Option {ind+1} Ads-:\n"
304
  for document in documents_cluster:
305
  i += 1
306
  res += f"[Ad {i}] Content: {document[0].page_content}\nRevenue: {document[0].metadata['revenue']}\nAd Click Count: {document[0].metadata['ad_click_count']}\nValue: {document[1]}\n"
 
318
  res += "\n"
319
  return res
320
 
321
+ def logResult(
322
+ self,
323
+ curr_relation_prompt,
324
+ curr_question_prompt,
325
+ page_information,
326
+ relation_answer,
327
+ question_answer,
328
+ ):
329
+ print(
330
+ "**************************************************************************************************\n",
331
+ # curr_relation_prompt,
332
+ # curr_question_prompt,
333
+ page_information,
334
+ json.dumps(relation_answer, indent=4),
335
+ json.dumps(question_answer, indent=4),
336
+ "\n************************************************************************************************\n\n",
337
+ )
338
 
339
  def getRagResponse(
340
  self, RelationPrompt, QuestionPrompt, threshold, page_information
 
347
  if QuestionPrompt != None or len(QuestionPrompt):
348
  curr_question_prompt = QuestionPrompt
349
 
350
+ documents_clusters, best_value = self.db.queryVectorDB(
351
+ page_information, threshold
352
+ )
353
  relation_answer, question_answer = (
354
  self.getBestQuestionOnTheBasisOfPageInformationAndAdsData(
355
  page_information,
 
359
  best_value,
360
  )
361
  )
362
+ self.logResult(
363
+ curr_relation_prompt,
364
+ curr_relation_prompt,
365
+ page_information,
366
+ relation_answer,
367
+ question_answer,
368
+ )
369
 
370
  docs_info = self.changeDocumentsToPrintableString(documents_clusters)
371
  relation_answer_string = self.changeResponseToPrintableString(