nash_simple_online_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
4f262c2 verified
raw
history blame contribute delete
No virus
8.94 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 100,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-07,
"logits/chosen": -2.72961163520813,
"logits/rejected": -2.7527058124542236,
"logps/chosen": -133.97433471679688,
"logps/rejected": -138.8169403076172,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.125e-06,
"logits/chosen": -2.7357263565063477,
"logits/rejected": -2.7272207736968994,
"logps/chosen": -115.04521942138672,
"logps/rejected": -114.19779205322266,
"loss": 0.6932,
"rewards/accuracies": 0.3888888955116272,
"rewards/chosen": 0.0004528095596469939,
"rewards/margins": -0.000532312027644366,
"rewards/rejected": 0.00098512158729136,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.989935734988098e-06,
"logits/chosen": -2.721278667449951,
"logits/rejected": -2.7116167545318604,
"logps/chosen": -111.3840103149414,
"logps/rejected": -116.3367691040039,
"loss": 0.6927,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.014203068800270557,
"rewards/margins": 0.0006986708613112569,
"rewards/rejected": -0.014901740476489067,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.8776412907378845e-06,
"logits/chosen": -2.742332696914673,
"logits/rejected": -2.7299842834472656,
"logps/chosen": -123.17195129394531,
"logps/rejected": -122.3455581665039,
"loss": 0.6925,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -0.09704665839672089,
"rewards/margins": 0.00010085676331073046,
"rewards/rejected": -0.09714751690626144,
"step": 30
},
{
"epoch": 0.26,
"learning_rate": 4.646121984004666e-06,
"logits/chosen": -2.700500965118408,
"logits/rejected": -2.673189878463745,
"logps/chosen": -116.55732727050781,
"logps/rejected": -118.69517517089844,
"loss": 0.6924,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -0.08498911559581757,
"rewards/margins": 5.4714873840566725e-05,
"rewards/rejected": -0.08504383265972137,
"step": 40
},
{
"epoch": 0.32,
"learning_rate": 4.3069871595684795e-06,
"logits/chosen": -2.740999698638916,
"logits/rejected": -2.711369514465332,
"logps/chosen": -111.51325988769531,
"logps/rejected": -118.1547622680664,
"loss": 0.6904,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.007794947363436222,
"rewards/margins": 0.005682565737515688,
"rewards/rejected": -0.013477511703968048,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.8772424536302565e-06,
"logits/chosen": -2.682302236557007,
"logits/rejected": -2.660250425338745,
"logps/chosen": -119.29142761230469,
"logps/rejected": -123.52491760253906,
"loss": 0.6896,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.05805445462465286,
"rewards/margins": 0.008577173575758934,
"rewards/rejected": -0.06663163006305695,
"step": 60
},
{
"epoch": 0.45,
"learning_rate": 3.3784370602033572e-06,
"logits/chosen": -2.6593658924102783,
"logits/rejected": -2.635847568511963,
"logps/chosen": -110.70475769042969,
"logps/rejected": -111.5876693725586,
"loss": 0.6896,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.05531386658549309,
"rewards/margins": 0.013766427524387836,
"rewards/rejected": -0.06908029317855835,
"step": 70
},
{
"epoch": 0.51,
"learning_rate": 2.835583164544139e-06,
"logits/chosen": -2.6447205543518066,
"logits/rejected": -2.6153109073638916,
"logps/chosen": -116.82215881347656,
"logps/rejected": -122.38499450683594,
"loss": 0.6888,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.044131264090538025,
"rewards/margins": 0.011304137296974659,
"rewards/rejected": -0.05543540045619011,
"step": 80
},
{
"epoch": 0.58,
"learning_rate": 2.2759017277414165e-06,
"logits/chosen": -2.6791300773620605,
"logits/rejected": -2.670039653778076,
"logps/chosen": -137.33778381347656,
"logps/rejected": -134.17361450195312,
"loss": 0.6871,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.0856749638915062,
"rewards/margins": 0.015940625220537186,
"rewards/rejected": -0.10161559283733368,
"step": 90
},
{
"epoch": 0.64,
"learning_rate": 1.7274575140626318e-06,
"logits/chosen": -2.6628499031066895,
"logits/rejected": -2.6339688301086426,
"logps/chosen": -124.69636535644531,
"logps/rejected": -118.883544921875,
"loss": 0.6886,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.03587063401937485,
"rewards/margins": 0.011753683909773827,
"rewards/rejected": -0.047624316066503525,
"step": 100
},
{
"epoch": 0.64,
"eval_logits/chosen": -2.6271259784698486,
"eval_logits/rejected": -2.5384714603424072,
"eval_logps/chosen": -288.5429382324219,
"eval_logps/rejected": -268.43902587890625,
"eval_loss": 0.678156852722168,
"eval_rewards/accuracies": 0.6100000143051147,
"eval_rewards/chosen": -0.06864660233259201,
"eval_rewards/margins": 0.02844993770122528,
"eval_rewards/rejected": -0.0970965251326561,
"eval_runtime": 383.7648,
"eval_samples_per_second": 5.212,
"eval_steps_per_second": 0.651,
"step": 100
},
{
"epoch": 0.7,
"learning_rate": 1.217751806485235e-06,
"logits/chosen": -2.6874401569366455,
"logits/rejected": -2.6627275943756104,
"logps/chosen": -120.39668273925781,
"logps/rejected": -122.85832214355469,
"loss": 0.6876,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.04335067793726921,
"rewards/margins": 0.012038113549351692,
"rewards/rejected": -0.055388789623975754,
"step": 110
},
{
"epoch": 0.77,
"learning_rate": 7.723433775328385e-07,
"logits/chosen": -2.6583964824676514,
"logits/rejected": -2.6295745372772217,
"logps/chosen": -124.8127670288086,
"logps/rejected": -123.85284423828125,
"loss": 0.6869,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": -0.060910262167453766,
"rewards/margins": 0.01821967028081417,
"rewards/rejected": -0.07912993431091309,
"step": 120
},
{
"epoch": 0.83,
"learning_rate": 4.1356686569674344e-07,
"logits/chosen": -2.6148152351379395,
"logits/rejected": -2.5909037590026855,
"logps/chosen": -116.5543212890625,
"logps/rejected": -121.72233581542969,
"loss": 0.6867,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.08463772386312485,
"rewards/margins": 0.020932147279381752,
"rewards/rejected": -0.10556988418102264,
"step": 130
},
{
"epoch": 0.9,
"learning_rate": 1.59412823400657e-07,
"logits/chosen": -2.605799674987793,
"logits/rejected": -2.5549862384796143,
"logps/chosen": -116.57108306884766,
"logps/rejected": -122.12138366699219,
"loss": 0.6885,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.05592598766088486,
"rewards/margins": 0.015123754739761353,
"rewards/rejected": -0.07104974240064621,
"step": 140
},
{
"epoch": 0.96,
"learning_rate": 2.262559558016325e-08,
"logits/chosen": -2.643202066421509,
"logits/rejected": -2.605377674102783,
"logps/chosen": -114.8644027709961,
"logps/rejected": -112.22537994384766,
"loss": 0.6854,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.0501478835940361,
"rewards/margins": 0.02834610641002655,
"rewards/rejected": -0.07849399000406265,
"step": 150
},
{
"epoch": 1.0,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.6890946939969674,
"train_runtime": 6305.2604,
"train_samples_per_second": 3.172,
"train_steps_per_second": 0.025
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}