{ "epoch": 3.0, "eval_logits/chosen": -1.5187865495681763, "eval_logits/rejected": -1.4330620765686035, "eval_logps/chosen": -1948.4508056640625, "eval_logps/rejected": -2216.220703125, "eval_loss": 1.8388876914978027, "eval_rewards/accuracies": 0.6904761791229248, "eval_rewards/chosen": -16.632295608520508, "eval_rewards/margins": 2.9380877017974854, "eval_rewards/margins_max": 11.99801254272461, "eval_rewards/margins_min": -5.294867038726807, "eval_rewards/margins_std": 7.78206729888916, "eval_rewards/rejected": -19.570384979248047, "eval_runtime": 281.8693, "eval_samples": 2000, "eval_samples_per_second": 7.095, "eval_steps_per_second": 0.224, "train_loss": 0.1103198329137612, "train_runtime": 9245.0119, "train_samples": 5678, "train_samples_per_second": 1.843, "train_steps_per_second": 0.115 }