{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 100, "global_step": 156, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-07, "logits/chosen": -2.72961163520813, "logits/rejected": -2.7527058124542236, "logps/chosen": -133.97433471679688, "logps/rejected": -138.8169403076172, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.125e-06, "logits/chosen": -2.7357263565063477, "logits/rejected": -2.7272207736968994, "logps/chosen": -115.04521942138672, "logps/rejected": -114.19779205322266, "loss": 0.6932, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": 0.0004528095596469939, "rewards/margins": -0.000532312027644366, "rewards/rejected": 0.00098512158729136, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.989935734988098e-06, "logits/chosen": -2.721278667449951, "logits/rejected": -2.7116167545318604, "logps/chosen": -111.3840103149414, "logps/rejected": -116.3367691040039, "loss": 0.6927, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.014203068800270557, "rewards/margins": 0.0006986708613112569, "rewards/rejected": -0.014901740476489067, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.8776412907378845e-06, "logits/chosen": -2.742332696914673, "logits/rejected": -2.7299842834472656, "logps/chosen": -123.17195129394531, "logps/rejected": -122.3455581665039, "loss": 0.6925, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.09704665839672089, "rewards/margins": 0.00010085676331073046, "rewards/rejected": -0.09714751690626144, "step": 30 }, { "epoch": 0.26, "learning_rate": 4.646121984004666e-06, "logits/chosen": -2.700500965118408, "logits/rejected": -2.673189878463745, "logps/chosen": -116.55732727050781, "logps/rejected": -118.69517517089844, "loss": 0.6924, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -0.08498911559581757, "rewards/margins": 5.4714873840566725e-05, "rewards/rejected": -0.08504383265972137, "step": 40 }, { "epoch": 0.32, "learning_rate": 4.3069871595684795e-06, "logits/chosen": -2.740999698638916, "logits/rejected": -2.711369514465332, "logps/chosen": -111.51325988769531, "logps/rejected": -118.1547622680664, "loss": 0.6904, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.007794947363436222, "rewards/margins": 0.005682565737515688, "rewards/rejected": -0.013477511703968048, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -2.682302236557007, "logits/rejected": -2.660250425338745, "logps/chosen": -119.29142761230469, "logps/rejected": -123.52491760253906, "loss": 0.6896, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05805445462465286, "rewards/margins": 0.008577173575758934, "rewards/rejected": -0.06663163006305695, "step": 60 }, { "epoch": 0.45, "learning_rate": 3.3784370602033572e-06, "logits/chosen": -2.6593658924102783, "logits/rejected": -2.635847568511963, "logps/chosen": -110.70475769042969, "logps/rejected": -111.5876693725586, "loss": 0.6896, "rewards/accuracies": 0.578125, "rewards/chosen": -0.05531386658549309, "rewards/margins": 0.013766427524387836, "rewards/rejected": -0.06908029317855835, "step": 70 }, { "epoch": 0.51, "learning_rate": 2.835583164544139e-06, "logits/chosen": -2.6447205543518066, "logits/rejected": -2.6153109073638916, "logps/chosen": -116.82215881347656, "logps/rejected": -122.38499450683594, "loss": 0.6888, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.044131264090538025, "rewards/margins": 0.011304137296974659, "rewards/rejected": -0.05543540045619011, "step": 80 }, { "epoch": 0.58, "learning_rate": 2.2759017277414165e-06, "logits/chosen": -2.6791300773620605, "logits/rejected": -2.670039653778076, "logps/chosen": -137.33778381347656, "logps/rejected": -134.17361450195312, "loss": 0.6871, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.0856749638915062, "rewards/margins": 0.015940625220537186, "rewards/rejected": -0.10161559283733368, "step": 90 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -2.6628499031066895, "logits/rejected": -2.6339688301086426, "logps/chosen": -124.69636535644531, "logps/rejected": -118.883544921875, "loss": 0.6886, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.03587063401937485, "rewards/margins": 0.011753683909773827, "rewards/rejected": -0.047624316066503525, "step": 100 }, { "epoch": 0.64, "eval_logits/chosen": -2.6271259784698486, "eval_logits/rejected": -2.5384714603424072, "eval_logps/chosen": -288.5429382324219, "eval_logps/rejected": -268.43902587890625, "eval_loss": 0.678156852722168, "eval_rewards/accuracies": 0.6100000143051147, "eval_rewards/chosen": -0.06864660233259201, "eval_rewards/margins": 0.02844993770122528, "eval_rewards/rejected": -0.0970965251326561, "eval_runtime": 383.7648, "eval_samples_per_second": 5.212, "eval_steps_per_second": 0.651, "step": 100 }, { "epoch": 0.7, "learning_rate": 1.217751806485235e-06, "logits/chosen": -2.6874401569366455, "logits/rejected": -2.6627275943756104, "logps/chosen": -120.39668273925781, "logps/rejected": -122.85832214355469, "loss": 0.6876, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.04335067793726921, "rewards/margins": 0.012038113549351692, "rewards/rejected": -0.055388789623975754, "step": 110 }, { "epoch": 0.77, "learning_rate": 7.723433775328385e-07, "logits/chosen": -2.6583964824676514, "logits/rejected": -2.6295745372772217, "logps/chosen": -124.8127670288086, "logps/rejected": -123.85284423828125, "loss": 0.6869, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.060910262167453766, "rewards/margins": 0.01821967028081417, "rewards/rejected": -0.07912993431091309, "step": 120 }, { "epoch": 0.83, "learning_rate": 4.1356686569674344e-07, "logits/chosen": -2.6148152351379395, "logits/rejected": -2.5909037590026855, "logps/chosen": -116.5543212890625, "logps/rejected": -121.72233581542969, "loss": 0.6867, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08463772386312485, "rewards/margins": 0.020932147279381752, "rewards/rejected": -0.10556988418102264, "step": 130 }, { "epoch": 0.9, "learning_rate": 1.59412823400657e-07, "logits/chosen": -2.605799674987793, "logits/rejected": -2.5549862384796143, "logps/chosen": -116.57108306884766, "logps/rejected": -122.12138366699219, "loss": 0.6885, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.05592598766088486, "rewards/margins": 0.015123754739761353, "rewards/rejected": -0.07104974240064621, "step": 140 }, { "epoch": 0.96, "learning_rate": 2.262559558016325e-08, "logits/chosen": -2.643202066421509, "logits/rejected": -2.605377674102783, "logps/chosen": -114.8644027709961, "logps/rejected": -112.22537994384766, "loss": 0.6854, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.0501478835940361, "rewards/margins": 0.02834610641002655, "rewards/rejected": -0.07849399000406265, "step": 150 }, { "epoch": 1.0, "step": 156, "total_flos": 0.0, "train_loss": 0.6890946939969674, "train_runtime": 6305.2604, "train_samples_per_second": 3.172, "train_steps_per_second": 0.025 } ], "logging_steps": 10, "max_steps": 156, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }