qq8933
/

PPRM-gemma-2-2b-it

Generated from Trainer

Model card Files Files and versions Community

qq8933 commited on 5 days ago

Commit

1e55599

•

1 Parent(s): 06d5b88

Update README.md

Files changed (1) hide show

README.md +51 -1

README.md CHANGED Viewed

@@ -105,10 +105,60 @@ async def get_prediction(input_request: InputRequest):
         raise HTTPException(status_code=500, detail=str(e))
 ```
 ```
 uvicorn server:app --host 0.0.0.0 --port $MASTER_PORT --workers 1
 ```
 ## Training procedure

         raise HTTPException(status_code=500, detail=str(e))
 ```
+run pprm_server
 ```
 uvicorn server:app --host 0.0.0.0 --port $MASTER_PORT --workers 1
 ```
+request pprm server
+```
+# qeustion,answer_1,answer_2 = 'What is the capital of France?', 'Berlin', 'Paris'
+# {'yes_logit': -24.26136016845703, 'no_logit': 19.517587661743164, 'logit_difference': -43.778947830200195}
+# Is answer_1 better than answer_2? yes or no
+# 奖励模型的入口
+def request_prediction(
+    qeustion, answer_1, answer_2, url="http://10.140.24.56:10085/predict"
+):
+    """
+    Sends a POST request to the FastAPI server to get a prediction.
+    Args:
+    - text (str): The input text for the prediction.
+    - url (str): The API endpoint URL. Defaults to 'http://localhost:8000/predict'.
+    Returns:
+    - dict: The response from the API containing prediction results.
+    """
+    headers = {"Content-Type": "application/json"}
+    payload = {
+        "text": json.dumps(
+            {"qeustion": qeustion, "answer_1": answer_1, "answer_2": answer_2}
+        )
+    }
+    response = requests.post(url, json=payload, headers=headers, timeout=TIMEOUT_PRM)
+    response.raise_for_status()  # Raises an HTTPError if the response code was unsuccessful
+    return response.json()  # Return the JSON response as a dictionary
+def cal_reward(question, ans, ans2="I don't know"):
+    if ans2 in DUMMY_ANSWERS:#I don't know
+        return 1
+    if ans in DUMMY_ANSWERS:
+        return 0
+    urls = copy.deepcopy(prm_servers)
+    random.shuffle(urls)
+    for url in urls:
+        try:
+            response = request_prediction(question, ans, ans2, url)
+            return math.exp(response["yes_logit"]) / (
+            math.exp(response["yes_logit"]) + math.exp(response["no_logit"])
+        )
+        except Exception as e:
+            # print(e)
+            continue
+    print(Exception("All prm servers are down"))
+    # get_clients()
+    return cal_reward(question, ans, ans2)
+```
 ## Training procedure