Spaces:

MediaNetAdsRag
/

Ads_Rag

Sleeping

App Files Files Community

Rajat.bans commited on Jun 13

Commit

ee3f636

•

1 Parent(s): bf7f11b

Formatted the code

Browse files

Files changed (1) hide show

rag.py +81 -31

rag.py CHANGED Viewed

@@ -107,39 +107,67 @@ The ADS_DATA provided to you is as follows:
 embeddings_hf = HuggingFaceEmbeddings(model_name=embedding_model_hf)
-def getBestQuestionOnTheBasisOfPageInformationAndAdsData(page_information, adsData, relationSystemPrompt, questionSystemPrompt, bestRetreivedAdValue):
     if adsData == "":
-        return ({"reasoning": "No ads data present", "classification": 0}, 0), ({"reasoning": "", "question": "", "options": []}, 0)
     relation_answer = {"reasoning": "", "classification": 1}
     question_answer = {"reasoning": "", "question": "", "options": []}
     tokens_used_relation = 0
     tokens_used_question = 0
     while True:
         try:
-            if (bestRetreivedAdValue > relation_check_best_value_thresh):
-                system_message = {"role": "system", "content": relationSystemPrompt + adsData}
                 response = client.chat.completions.create(
                     model=qa_model_name,
-                    messages=[system_message] + [{"role": "user", "content": page_information + "\nThe JSON response: "}],
                     temperature=0,
                     seed=42,
                     max_tokens=1000,
-                    response_format={"type": "json_object" }
                 )
                 tokens_used_relation = response.usage.total_tokens
                 relation_answer = json.loads(response.choices[0].message.content)
                 tokens_used_question = 0
-            if(relation_answer['classification'] != 0):
-                system_message = {"role": "system", "content": questionSystemPrompt + adsData}
                 response = client.chat.completions.create(
                     model=qa_model_name,
-                    messages=[system_message] + [{"role": "user", "content": page_information + "\nThe JSON response: "}],
                     temperature=0,
                     seed=42,
                     max_tokens=1000,
-                    response_format={"type": "json_object" }
                 )
                 tokens_used_question = response.usage.total_tokens
                 question_answer = json.loads(response.choices[0].message.content)
@@ -147,15 +175,19 @@ def getBestQuestionOnTheBasisOfPageInformationAndAdsData(page_information, adsDa
         except Exception as e:
             print("Error-: ", e.message)
             print("Trying Again")
-    return (relation_answer, tokens_used_relation), (question_answer, tokens_used_question)
 def changeResponseToPrintableString(response, task):
     if task == "relation":
         return f"Reasoning: {response['reasoning']}\n\nClassification: {response['classification']}\n"
     res = f"Reasoning: {response['reasoning']}\n\nQuestion: {response['question']}\n\nOptions: \n"
-    for option in response['options']:
         res += f"{option}\n"
-        for ad in response['options'][option]:
             res += f"{ad}\n"
         res += "\n"
     return res
@@ -165,25 +197,34 @@ def getRagResponse(RelationPrompt, QuestionPrompt, threshold, page_information):
     curr_relation_prompt = bestRelationSystemPrompt
     if RelationPrompt != None or len(RelationPrompt):
         curr_relation_prompt = RelationPrompt
     curr_question_prompt = bestQuestionSystemPrompt
     if QuestionPrompt != None or len(QuestionPrompt):
         curr_question_prompt = QuestionPrompt
     retreived_documents = [
         doc
-        for doc in db.similarity_search_with_score(page_information, k = number_of_ads_to_fetch_from_db)
         if doc[1] < threshold
     ]
     best_value = 1
     if len(retreived_documents):
         best_value = retreived_documents[0][1]
-    relation_answer, question_answer = getBestQuestionOnTheBasisOfPageInformationAndAdsData(
-        page_information,
-        ".\n".join(["Ad " + str(i+1) + ". " + doc[0].page_content for i, doc in enumerate(retreived_documents)]),
-        curr_relation_prompt,
-        curr_question_prompt,
-        best_value
     )
     print("QUERY:", page_information, relation_answer, question_answer)
     docs_info = "\n\n".join(
@@ -194,8 +235,12 @@ def getRagResponse(RelationPrompt, QuestionPrompt, threshold, page_information):
         ]
     )
     try:
-        relation_answer_string = changeResponseToPrintableString(relation_answer[0], "relation")
-        question_answer_string = changeResponseToPrintableString(question_answer[0], "question")
         full_response = f"**ANSWER**: \n Relation answer:\n {relation_answer_string}\n Question answer:\n {question_answer_string}\n\n**RETREIVED DOCUMENTS**:\n{docs_info}\n\n**TOKENS USED**:\nQuestion api call: {question_answer[1]}\nRelation api call: {relation_answer[1]}"
     except:
         full_response = f"Invalid response received"
@@ -205,9 +250,9 @@ def getRagResponse(RelationPrompt, QuestionPrompt, threshold, page_information):
 db = FAISS.load_local(
     DB_FAISS_PATH, embeddings_hf, allow_dangerous_deserialization=True
 )
-data = pd.read_csv(data_file_path, sep='\t')
-data.dropna(axis=0, how='any', inplace=True)
-data.drop_duplicates(subset = ['ad_title', 'ad_desc'], inplace=True)
 ad_title_content = list(data["ad_title"].values)
 with gr.Blocks() as demo:
     gr.Markdown("# RAG on ads data")
@@ -227,13 +272,15 @@ with gr.Blocks() as demo:
         page_information = gr.Textbox(
             lines=1, placeholder="Enter the page information", label="Page Information"
         )
-        threshold = gr.Number(value = default_threshold, label="Threshold", interactive=True)
     output = gr.Textbox(label="Output")
     submit_btn = gr.Button("Submit")
     submit_btn.click(
         getRagResponse,
-        inputs= [RelationPrompt, QuestionPrompt, threshold, page_information],
         outputs=[output],
     )
     page_information.submit(
@@ -246,7 +293,10 @@ with gr.Blocks() as demo:
     demo.load(
         lambda: "<br>".join(
-            random.sample([str(ad_title) for ad_title in ad_title_content], min(100, len(ad_title_content)))
         ),
         None,
         ad_titles,

 embeddings_hf = HuggingFaceEmbeddings(model_name=embedding_model_hf)
+def getBestQuestionOnTheBasisOfPageInformationAndAdsData(
+    page_information,
+    adsData,
+    relationSystemPrompt,
+    questionSystemPrompt,
+    bestRetreivedAdValue,
+):
     if adsData == "":
+        return ({"reasoning": "No ads data present", "classification": 0}, 0), (
+            {"reasoning": "", "question": "", "options": []},
+            0,
+        )
     relation_answer = {"reasoning": "", "classification": 1}
     question_answer = {"reasoning": "", "question": "", "options": []}
     tokens_used_relation = 0
     tokens_used_question = 0
     while True:
         try:
+            if bestRetreivedAdValue > relation_check_best_value_thresh:
+                system_message = {
+                    "role": "system",
+                    "content": relationSystemPrompt + adsData,
+                }
                 response = client.chat.completions.create(
                     model=qa_model_name,
+                    messages=[system_message]
+                    + [
+                        {
+                            "role": "user",
+                            "content": page_information + "\nThe JSON response: ",
+                        }
+                    ],
                     temperature=0,
                     seed=42,
                     max_tokens=1000,
+                    response_format={"type": "json_object"},
                 )
                 tokens_used_relation = response.usage.total_tokens
                 relation_answer = json.loads(response.choices[0].message.content)
                 tokens_used_question = 0
+            if relation_answer["classification"] != 0:
+                system_message = {
+                    "role": "system",
+                    "content": questionSystemPrompt + adsData,
+                }
                 response = client.chat.completions.create(
                     model=qa_model_name,
+                    messages=[system_message]
+                    + [
+                        {
+                            "role": "user",
+                            "content": page_information + "\nThe JSON response: ",
+                        }
+                    ],
                     temperature=0,
                     seed=42,
                     max_tokens=1000,
+                    response_format={"type": "json_object"},
                 )
                 tokens_used_question = response.usage.total_tokens
                 question_answer = json.loads(response.choices[0].message.content)
         except Exception as e:
             print("Error-: ", e.message)
             print("Trying Again")
+    return (relation_answer, tokens_used_relation), (
+        question_answer,
+        tokens_used_question,
+    )
 def changeResponseToPrintableString(response, task):
     if task == "relation":
         return f"Reasoning: {response['reasoning']}\n\nClassification: {response['classification']}\n"
     res = f"Reasoning: {response['reasoning']}\n\nQuestion: {response['question']}\n\nOptions: \n"
+    for option in response["options"]:
         res += f"{option}\n"
+        for ad in response["options"][option]:
             res += f"{ad}\n"
         res += "\n"
     return res
     curr_relation_prompt = bestRelationSystemPrompt
     if RelationPrompt != None or len(RelationPrompt):
         curr_relation_prompt = RelationPrompt
     curr_question_prompt = bestQuestionSystemPrompt
     if QuestionPrompt != None or len(QuestionPrompt):
         curr_question_prompt = QuestionPrompt
     retreived_documents = [
         doc
+        for doc in db.similarity_search_with_score(
+            page_information, k=number_of_ads_to_fetch_from_db
+        )
         if doc[1] < threshold
     ]
     best_value = 1
     if len(retreived_documents):
         best_value = retreived_documents[0][1]
+    relation_answer, question_answer = (
+        getBestQuestionOnTheBasisOfPageInformationAndAdsData(
+            page_information,
+            ".\n".join(
+                [
+                    "Ad " + str(i + 1) + ". " + doc[0].page_content
+                    for i, doc in enumerate(retreived_documents)
+                ]
+            ),
+            curr_relation_prompt,
+            curr_question_prompt,
+            best_value,
+        )
     )
     print("QUERY:", page_information, relation_answer, question_answer)
     docs_info = "\n\n".join(
         ]
     )
     try:
+        relation_answer_string = changeResponseToPrintableString(
+            relation_answer[0], "relation"
+        )
+        question_answer_string = changeResponseToPrintableString(
+            question_answer[0], "question"
+        )
         full_response = f"**ANSWER**: \n Relation answer:\n {relation_answer_string}\n Question answer:\n {question_answer_string}\n\n**RETREIVED DOCUMENTS**:\n{docs_info}\n\n**TOKENS USED**:\nQuestion api call: {question_answer[1]}\nRelation api call: {relation_answer[1]}"
     except:
         full_response = f"Invalid response received"
 db = FAISS.load_local(
     DB_FAISS_PATH, embeddings_hf, allow_dangerous_deserialization=True
 )
+data = pd.read_csv(data_file_path, sep="\t")
+data.dropna(axis=0, how="any", inplace=True)
+data.drop_duplicates(subset=["ad_title", "ad_desc"], inplace=True)
 ad_title_content = list(data["ad_title"].values)
 with gr.Blocks() as demo:
     gr.Markdown("# RAG on ads data")
         page_information = gr.Textbox(
             lines=1, placeholder="Enter the page information", label="Page Information"
         )
+        threshold = gr.Number(
+            value=default_threshold, label="Threshold", interactive=True
+        )
     output = gr.Textbox(label="Output")
     submit_btn = gr.Button("Submit")
     submit_btn.click(
         getRagResponse,
+        inputs=[RelationPrompt, QuestionPrompt, threshold, page_information],
         outputs=[output],
     )
     page_information.submit(
     demo.load(
         lambda: "<br>".join(
+            random.sample(
+                [str(ad_title) for ad_title in ad_title_content],
+                min(100, len(ad_title_content)),
+            )
         ),
         None,
         ad_titles,