{ "epoch": 3.0, "eval_logits/chosen": -1.2746007442474365, "eval_logits/rejected": -1.4412107467651367, "eval_logps/chosen": -271.52374267578125, "eval_logps/rejected": -241.7964630126953, "eval_loss": 0.6550792455673218, "eval_rewards/accuracies": 0.6904761791229248, "eval_rewards/chosen": -0.0634559839963913, "eval_rewards/margins": 0.09778770059347153, "eval_rewards/rejected": -0.16124369204044342, "eval_runtime": 341.5018, "eval_samples": 2000, "eval_samples_per_second": 5.856, "eval_steps_per_second": 0.246, "train_loss": 0.6738453804676539, "train_runtime": 49317.7491, "train_samples": 61966, "train_samples_per_second": 3.769, "train_steps_per_second": 0.039 }