{ "epoch": 1.0, "eval_logits/chosen": 83.16413879394531, "eval_logits/rejected": 83.84245300292969, "eval_logps/chosen": -392.341552734375, "eval_logps/rejected": -414.519775390625, "eval_loss": 0.40556877851486206, "eval_rewards/accuracies": 0.792553186416626, "eval_rewards/chosen": -0.3995126485824585, "eval_rewards/margins": 3.1725716590881348, "eval_rewards/rejected": -3.5720841884613037, "eval_runtime": 140.3273, "eval_samples": 3000, "eval_samples_per_second": 21.379, "eval_steps_per_second": 0.67, "train_loss": 0.5623811487565961, "train_runtime": 6612.2068, "train_samples": 72994, "train_samples_per_second": 11.039, "train_steps_per_second": 0.086 }