Spaces:

collinear-ai
/

collinear-guard-demo

Sleeping

App Files Files Community

tanveeshsingh commited on 17 days ago

Commit

36aee66

•

1 Parent(s): 9ceec3e

defaults

Browse files

Files changed (1) hide show

app.py +3 -26

app.py CHANGED Viewed

@@ -2,12 +2,10 @@ import gradio as gr
 from jinja2 import Template
 import openai
-# Replace this with your actual Hugging Face inference endpoint
 API_ENDPOINT = "https://txl0ptjvttfogwt9.us-east-1.aws.endpoints.huggingface.cloud/v1/"
 API_KEY = "NA"
 def classify_prompt(conv_prefix, response):
-    # Message template for the system prompt
     template_str = """
 Please review the scoring criteria:
 <BEGIN SCORING CRITERIA>
@@ -28,47 +26,26 @@ Now, please output the following as a JSON object:
 {
  "output": <classification score (0 or 1)> (int datatype),
  }"""
-    # Render the template with conversation context
     template = Template(template_str)
     rendered_message = template.render(conv_prefix=conv_prefix, response=response)
-    # Call OpenAI API (assuming you have a client set up)
     client = openai.OpenAI(
         base_url=API_ENDPOINT,
         api_key=API_KEY
     )
-    # Prepare message payload
     messages = [
         {"role": "system", "content": "\nYou will classify a prompt as either safe or unsafe based on predefined safety criteria. Follow these steps:\n\n1. **Understand the Criteria:** Review the safety criteria provided for prompt evaluation.\n2. **Evaluate the Prompt:** Assess the prompt against these criteria.\n3. **Classify the Prompt:** Determine whether the prompt is safe (1) or unsafe (0).\n4. **Provide Rationale:** Explain your classification decision with a brief rationale.\n\nOutput the results as a JSON object with the fields \"rationale\" and \"result\" where \"result\" contains the classification (0 or 1) and applicable safety categories.\n"},
         {"role": "user", "content": rendered_message}
     ]
-    # Make a request to the OpenAI API
     completion = client.chat.completions.create(
         model="tgi",
         messages=messages,
         max_tokens=400
     )
-    # Get the response content
     output = completion.choices[0].message.content
     return output
-def process_inputs(conv_prefix_text, response_content):
-    # Process the input conversation prefix as a list of dictionaries
-    conv_prefix = []
-    for line in conv_prefix_text.split("\n"):
-        if ": " in line:
-            role, content = line.split(": ", 1)
-            conv_prefix.append({"role": role.strip(), "content": content.strip()})
-    # Process the assistant's response as a dictionary
     response = {"role": "assistant", "content": response_content}
-    # Call classify_prompt with the structured data
     output = classify_prompt(conv_prefix, response)
     return output
@@ -76,12 +53,12 @@ def process_inputs(conv_prefix_text, response_content):
 demo = gr.Interface(
     fn=process_inputs,
     inputs=[
-        gr.Textbox(lines=8, placeholder="Enter conversation prefix (role: content), one per line", label="Conversation Prefix"),
         gr.Textbox(lines=2, placeholder="Enter the assistant's response", label="Assistant Response")
     ],
     outputs="text",
     title="Prompt Safety Classification",
-    description="Classify a conversation prompt's safety by providing a conversation prefix and an assistant's response."
 )
 demo.launch()

 from jinja2 import Template
 import openai
 API_ENDPOINT = "https://txl0ptjvttfogwt9.us-east-1.aws.endpoints.huggingface.cloud/v1/"
 API_KEY = "NA"
 def classify_prompt(conv_prefix, response):
     template_str = """
 Please review the scoring criteria:
 <BEGIN SCORING CRITERIA>
 {
  "output": <classification score (0 or 1)> (int datatype),
  }"""
     template = Template(template_str)
     rendered_message = template.render(conv_prefix=conv_prefix, response=response)
     client = openai.OpenAI(
         base_url=API_ENDPOINT,
         api_key=API_KEY
     )
     messages = [
         {"role": "system", "content": "\nYou will classify a prompt as either safe or unsafe based on predefined safety criteria. Follow these steps:\n\n1. **Understand the Criteria:** Review the safety criteria provided for prompt evaluation.\n2. **Evaluate the Prompt:** Assess the prompt against these criteria.\n3. **Classify the Prompt:** Determine whether the prompt is safe (1) or unsafe (0).\n4. **Provide Rationale:** Explain your classification decision with a brief rationale.\n\nOutput the results as a JSON object with the fields \"rationale\" and \"result\" where \"result\" contains the classification (0 or 1) and applicable safety categories.\n"},
         {"role": "user", "content": rendered_message}
     ]
     completion = client.chat.completions.create(
         model="tgi",
         messages=messages,
         max_tokens=400
     )
     output = completion.choices[0].message.content
     return output
+def process_inputs(conv_prefix, response_content):
     response = {"role": "assistant", "content": response_content}
     output = classify_prompt(conv_prefix, response)
     return output
 demo = gr.Interface(
     fn=process_inputs,
     inputs=[
+        gr.JSON(label="Conversation Prefix (Array of Objects)"),
         gr.Textbox(lines=2, placeholder="Enter the assistant's response", label="Assistant Response")
     ],
     outputs="text",
     title="Prompt Safety Classification",
+    description="Classify a conversation prompt's safety by providing a conversation prefix (array of objects) and an assistant's response."
 )
 demo.launch()