|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.72961163520813, |
|
"logits/rejected": -2.7527058124542236, |
|
"logps/chosen": -133.97433471679688, |
|
"logps/rejected": -138.8169403076172, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.7357263565063477, |
|
"logits/rejected": -2.7272207736968994, |
|
"logps/chosen": -115.04521942138672, |
|
"logps/rejected": -114.19779205322266, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": 0.0004528095596469939, |
|
"rewards/margins": -0.000532312027644366, |
|
"rewards/rejected": 0.00098512158729136, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.721278667449951, |
|
"logits/rejected": -2.7116167545318604, |
|
"logps/chosen": -111.3840103149414, |
|
"logps/rejected": -116.3367691040039, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.014203068800270557, |
|
"rewards/margins": 0.0006986708613112569, |
|
"rewards/rejected": -0.014901740476489067, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -2.742332696914673, |
|
"logits/rejected": -2.7299842834472656, |
|
"logps/chosen": -123.17195129394531, |
|
"logps/rejected": -122.3455581665039, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -0.09704665839672089, |
|
"rewards/margins": 0.00010085676331073046, |
|
"rewards/rejected": -0.09714751690626144, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.700500965118408, |
|
"logits/rejected": -2.673189878463745, |
|
"logps/chosen": -116.55732727050781, |
|
"logps/rejected": -118.69517517089844, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -0.08498911559581757, |
|
"rewards/margins": 5.4714873840566725e-05, |
|
"rewards/rejected": -0.08504383265972137, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -2.740999698638916, |
|
"logits/rejected": -2.711369514465332, |
|
"logps/chosen": -111.51325988769531, |
|
"logps/rejected": -118.1547622680664, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.007794947363436222, |
|
"rewards/margins": 0.005682565737515688, |
|
"rewards/rejected": -0.013477511703968048, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -2.682302236557007, |
|
"logits/rejected": -2.660250425338745, |
|
"logps/chosen": -119.29142761230469, |
|
"logps/rejected": -123.52491760253906, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05805445462465286, |
|
"rewards/margins": 0.008577173575758934, |
|
"rewards/rejected": -0.06663163006305695, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -2.6593658924102783, |
|
"logits/rejected": -2.635847568511963, |
|
"logps/chosen": -110.70475769042969, |
|
"logps/rejected": -111.5876693725586, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.05531386658549309, |
|
"rewards/margins": 0.013766427524387836, |
|
"rewards/rejected": -0.06908029317855835, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.6447205543518066, |
|
"logits/rejected": -2.6153109073638916, |
|
"logps/chosen": -116.82215881347656, |
|
"logps/rejected": -122.38499450683594, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.044131264090538025, |
|
"rewards/margins": 0.011304137296974659, |
|
"rewards/rejected": -0.05543540045619011, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -2.6791300773620605, |
|
"logits/rejected": -2.670039653778076, |
|
"logps/chosen": -137.33778381347656, |
|
"logps/rejected": -134.17361450195312, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -0.0856749638915062, |
|
"rewards/margins": 0.015940625220537186, |
|
"rewards/rejected": -0.10161559283733368, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.6628499031066895, |
|
"logits/rejected": -2.6339688301086426, |
|
"logps/chosen": -124.69636535644531, |
|
"logps/rejected": -118.883544921875, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.03587063401937485, |
|
"rewards/margins": 0.011753683909773827, |
|
"rewards/rejected": -0.047624316066503525, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -2.6271259784698486, |
|
"eval_logits/rejected": -2.5384714603424072, |
|
"eval_logps/chosen": -288.5429382324219, |
|
"eval_logps/rejected": -268.43902587890625, |
|
"eval_loss": 0.678156852722168, |
|
"eval_rewards/accuracies": 0.6100000143051147, |
|
"eval_rewards/chosen": -0.06864660233259201, |
|
"eval_rewards/margins": 0.02844993770122528, |
|
"eval_rewards/rejected": -0.0970965251326561, |
|
"eval_runtime": 383.7648, |
|
"eval_samples_per_second": 5.212, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -2.6874401569366455, |
|
"logits/rejected": -2.6627275943756104, |
|
"logps/chosen": -120.39668273925781, |
|
"logps/rejected": -122.85832214355469, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.04335067793726921, |
|
"rewards/margins": 0.012038113549351692, |
|
"rewards/rejected": -0.055388789623975754, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -2.6583964824676514, |
|
"logits/rejected": -2.6295745372772217, |
|
"logps/chosen": -124.8127670288086, |
|
"logps/rejected": -123.85284423828125, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.060910262167453766, |
|
"rewards/margins": 0.01821967028081417, |
|
"rewards/rejected": -0.07912993431091309, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -2.6148152351379395, |
|
"logits/rejected": -2.5909037590026855, |
|
"logps/chosen": -116.5543212890625, |
|
"logps/rejected": -121.72233581542969, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08463772386312485, |
|
"rewards/margins": 0.020932147279381752, |
|
"rewards/rejected": -0.10556988418102264, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -2.605799674987793, |
|
"logits/rejected": -2.5549862384796143, |
|
"logps/chosen": -116.57108306884766, |
|
"logps/rejected": -122.12138366699219, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.05592598766088486, |
|
"rewards/margins": 0.015123754739761353, |
|
"rewards/rejected": -0.07104974240064621, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -2.643202066421509, |
|
"logits/rejected": -2.605377674102783, |
|
"logps/chosen": -114.8644027709961, |
|
"logps/rejected": -112.22537994384766, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.0501478835940361, |
|
"rewards/margins": 0.02834610641002655, |
|
"rewards/rejected": -0.07849399000406265, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6890946939969674, |
|
"train_runtime": 6305.2604, |
|
"train_samples_per_second": 3.172, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|