{ "epoch": 0.9230769230769231, "eval_logits/chosen": 13.998739242553711, "eval_logits/rejected": 12.849632263183594, "eval_logps/chosen": -396.9144287109375, "eval_logps/rejected": -395.41522216796875, "eval_loss": 0.6937551498413086, "eval_rewards/accuracies": 0.5357142686843872, "eval_rewards/chosen": -0.00012085953494533896, "eval_rewards/margins": 9.88479132502107e-06, "eval_rewards/rejected": -0.00013074450544081628, "eval_runtime": 89.0222, "eval_samples": 1000, "eval_samples_per_second": 11.233, "eval_steps_per_second": 0.708 }