{ "epoch": 1.0, "eval_dpo_losses": 0.6446115970611572, "eval_logits/chosen": -2.713839530944824, "eval_logits/rejected": -2.6717445850372314, "eval_logps/chosen": -284.5663146972656, "eval_logps/rejected": -271.5415954589844, "eval_loss": 1.616790771484375, "eval_positive_losses": 8.828282356262207, "eval_rewards/accuracies": 0.6499999761581421, "eval_rewards/chosen": 0.0002709717955440283, "eval_rewards/margins": 0.12989762425422668, "eval_rewards/margins_max": 0.6433730721473694, "eval_rewards/margins_min": -0.34936216473579407, "eval_rewards/margins_std": 0.33273470401763916, "eval_rewards/rejected": -0.12962664663791656, "eval_runtime": 428.5272, "eval_samples": 2000, "eval_samples_per_second": 4.667, "eval_steps_per_second": 0.292, "train_loss": 0.5743894765074824, "train_runtime": 4311.1014, "train_samples": 5678, "train_samples_per_second": 1.317, "train_steps_per_second": 0.082 }