{ "epoch": 1.0, "eval_logits/chosen": 0.1915939450263977, "eval_logits/rejected": 0.22264356911182404, "eval_logps/chosen": -325.6150817871094, "eval_logps/rejected": -337.89727783203125, "eval_loss": 0.04012390971183777, "eval_rewards/accuracies": 0.6017963886260986, "eval_rewards/chosen": -0.05564659833908081, "eval_rewards/margins": 0.033811911940574646, "eval_rewards/rejected": -0.08945851027965546, "eval_runtime": 374.9506, "eval_samples": 2000, "eval_samples_per_second": 5.334, "eval_steps_per_second": 0.445, "train_loss": 0.044965725131654775, "train_runtime": 22244.9719, "train_samples": 61135, "train_samples_per_second": 2.748, "train_steps_per_second": 0.057 }