{ "epoch": 3.0, "eval_logits/chosen": -14.8499116897583, "eval_logits/rejected": -14.938364028930664, "eval_logps/chosen": -109.97705078125, "eval_logps/rejected": -88.43526458740234, "eval_loss": 0.6909456849098206, "eval_rewards/accuracies": 0.3174603283405304, "eval_rewards/chosen": 0.015893306583166122, "eval_rewards/margins": 0.006259124726057053, "eval_rewards/rejected": 0.00963417999446392, "eval_runtime": 227.6793, "eval_samples": 2000, "eval_samples_per_second": 8.784, "eval_steps_per_second": 0.277, "train_loss": 0.692123620142293, "train_runtime": 31275.9572, "train_samples": 61966, "train_samples_per_second": 5.944, "train_steps_per_second": 0.012 }