{ "epoch": 1.0, "eval_logits/chosen": 0.4467492401599884, "eval_logits/rejected": 1.781506061553955, "eval_logps/chosen": -380.6148681640625, "eval_logps/rejected": -458.5203552246094, "eval_loss": 0.49494317173957825, "eval_rewards/accuracies": 0.7890625, "eval_rewards/chosen": -1.0109279155731201, "eval_rewards/margins": 0.9840108156204224, "eval_rewards/rejected": -1.9949387311935425, "eval_runtime": 90.1258, "eval_samples": 2000, "eval_samples_per_second": 22.191, "eval_steps_per_second": 0.355, "train_loss": 0.08084109238500875, "train_runtime": 794.8705, "train_samples": 61155, "train_samples_per_second": 76.937, "train_steps_per_second": 0.601 }