{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 100, "global_step": 156, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-07, "logits/chosen": -2.7403299808502197, "logits/rejected": -2.7255897521972656, "logps/chosen": -195.21282958984375, "logps/rejected": -184.09413146972656, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.125e-06, "logits/chosen": -2.6841495037078857, "logits/rejected": -2.682373523712158, "logps/chosen": -163.6064453125, "logps/rejected": -165.0059356689453, "loss": 0.6929, "rewards/accuracies": 0.4756944477558136, "rewards/chosen": -0.008276204578578472, "rewards/margins": 0.001318673719651997, "rewards/rejected": -0.009594877250492573, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.989935734988098e-06, "logits/chosen": -2.692333698272705, "logits/rejected": -2.665407180786133, "logps/chosen": -176.43678283691406, "logps/rejected": -172.32717895507812, "loss": 0.6935, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.06923351436853409, "rewards/margins": 0.003520409809425473, "rewards/rejected": -0.0727539211511612, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.8776412907378845e-06, "logits/chosen": -2.5914735794067383, "logits/rejected": -2.585202932357788, "logps/chosen": -168.06890869140625, "logps/rejected": -169.85023498535156, "loss": 0.6917, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.034951962530612946, "rewards/margins": 0.007884470745921135, "rewards/rejected": -0.04283643513917923, "step": 30 }, { "epoch": 0.26, "learning_rate": 4.646121984004666e-06, "logits/chosen": -2.671989917755127, "logits/rejected": -2.6600332260131836, "logps/chosen": -178.48281860351562, "logps/rejected": -179.00379943847656, "loss": 0.6901, "rewards/accuracies": 0.53125, "rewards/chosen": -0.01323291938751936, "rewards/margins": 0.006769413594156504, "rewards/rejected": -0.020002331584692, "step": 40 }, { "epoch": 0.32, "learning_rate": 4.3069871595684795e-06, "logits/chosen": -2.6603026390075684, "logits/rejected": -2.6338632106781006, "logps/chosen": -188.98532104492188, "logps/rejected": -187.17684936523438, "loss": 0.6888, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.09863803535699844, "rewards/margins": 0.015061549842357635, "rewards/rejected": -0.11369959264993668, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -2.6301541328430176, "logits/rejected": -2.6398401260375977, "logps/chosen": -184.1541290283203, "logps/rejected": -188.9746856689453, "loss": 0.6898, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.16303817927837372, "rewards/margins": 0.013844095170497894, "rewards/rejected": -0.1768822818994522, "step": 60 }, { "epoch": 0.45, "learning_rate": 3.3784370602033572e-06, "logits/chosen": -2.587550401687622, "logits/rejected": -2.550933361053467, "logps/chosen": -184.53257751464844, "logps/rejected": -183.46517944335938, "loss": 0.6902, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.008703355677425861, "rewards/margins": 0.0026464841794222593, "rewards/rejected": -0.011349838227033615, "step": 70 }, { "epoch": 0.51, "learning_rate": 2.835583164544139e-06, "logits/chosen": -2.56795072555542, "logits/rejected": -2.5637052059173584, "logps/chosen": -176.33071899414062, "logps/rejected": -184.88938903808594, "loss": 0.6906, "rewards/accuracies": 0.578125, "rewards/chosen": 0.0037323920987546444, "rewards/margins": 0.009280242025852203, "rewards/rejected": -0.005547848995774984, "step": 80 }, { "epoch": 0.58, "learning_rate": 2.2759017277414165e-06, "logits/chosen": -2.6137020587921143, "logits/rejected": -2.5704140663146973, "logps/chosen": -187.82107543945312, "logps/rejected": -184.28587341308594, "loss": 0.6883, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.08189593255519867, "rewards/margins": 0.0156090734526515, "rewards/rejected": -0.09750500321388245, "step": 90 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -2.6152119636535645, "logits/rejected": -2.640665292739868, "logps/chosen": -176.8973388671875, "logps/rejected": -184.32557678222656, "loss": 0.689, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08713572472333908, "rewards/margins": 0.014782750979065895, "rewards/rejected": -0.10191845893859863, "step": 100 }, { "epoch": 0.64, "eval_logits/chosen": -2.556511878967285, "eval_logits/rejected": -2.465257406234741, "eval_logps/chosen": -305.5672607421875, "eval_logps/rejected": -298.1529541015625, "eval_loss": 0.6753088235855103, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": -0.11736557632684708, "eval_rewards/margins": 0.04772435873746872, "eval_rewards/rejected": -0.1650899201631546, "eval_runtime": 384.6543, "eval_samples_per_second": 5.199, "eval_steps_per_second": 0.65, "step": 100 }, { "epoch": 0.7, "learning_rate": 1.217751806485235e-06, "logits/chosen": -2.62018084526062, "logits/rejected": -2.610757350921631, "logps/chosen": -171.35316467285156, "logps/rejected": -176.16299438476562, "loss": 0.6845, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.05986649543046951, "rewards/margins": 0.020178433507680893, "rewards/rejected": -0.0800449326634407, "step": 110 }, { "epoch": 0.77, "learning_rate": 7.723433775328385e-07, "logits/chosen": -2.548755168914795, "logits/rejected": -2.529273271560669, "logps/chosen": -173.8794403076172, "logps/rejected": -172.54251098632812, "loss": 0.6854, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.06846034526824951, "rewards/margins": 0.019297173246741295, "rewards/rejected": -0.08775752037763596, "step": 120 }, { "epoch": 0.83, "learning_rate": 4.1356686569674344e-07, "logits/chosen": -2.6056151390075684, "logits/rejected": -2.5934414863586426, "logps/chosen": -181.0892791748047, "logps/rejected": -185.13357543945312, "loss": 0.686, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.055159904062747955, "rewards/margins": 0.01887853816151619, "rewards/rejected": -0.07403843104839325, "step": 130 }, { "epoch": 0.9, "learning_rate": 1.59412823400657e-07, "logits/chosen": -2.592637538909912, "logits/rejected": -2.583139657974243, "logps/chosen": -176.57772827148438, "logps/rejected": -182.9418182373047, "loss": 0.6847, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.04002287983894348, "rewards/margins": 0.02553923986852169, "rewards/rejected": -0.06556212902069092, "step": 140 }, { "epoch": 0.96, "learning_rate": 2.262559558016325e-08, "logits/chosen": -2.5700881481170654, "logits/rejected": -2.561204433441162, "logps/chosen": -178.7554473876953, "logps/rejected": -186.81182861328125, "loss": 0.6849, "rewards/accuracies": 0.503125011920929, "rewards/chosen": -0.05323126167058945, "rewards/margins": 0.007863727398216724, "rewards/rejected": -0.0610949881374836, "step": 150 }, { "epoch": 1.0, "step": 156, "total_flos": 0.0, "train_loss": 0.6885996239307599, "train_runtime": 7326.9096, "train_samples_per_second": 2.73, "train_steps_per_second": 0.021 } ], "logging_steps": 10, "max_steps": 156, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }