|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.005925925925925926, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005925925925925926, |
|
"grad_norm": 10.913461685180664, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -4.444676399230957, |
|
"logits/rejected": -4.0909342765808105, |
|
"logps/chosen": -186.1875, |
|
"logps/rejected": -228.68560791015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011851851851851852, |
|
"grad_norm": 7.564138412475586, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": -4.530362606048584, |
|
"logits/rejected": -3.981240749359131, |
|
"logps/chosen": -192.2100830078125, |
|
"logps/rejected": -213.73086547851562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0017777777777777779, |
|
"grad_norm": 14.393203735351562, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": -4.539660930633545, |
|
"logits/rejected": -4.32346248626709, |
|
"logps/chosen": -200.69912719726562, |
|
"logps/rejected": -224.77163696289062, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04995880275964737, |
|
"rewards/margins": 0.08375511318445206, |
|
"rewards/rejected": -0.03379631042480469, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0023703703703703703, |
|
"grad_norm": 12.62684440612793, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -3.9760360717773438, |
|
"logits/rejected": -4.4724321365356445, |
|
"logps/chosen": -239.9991455078125, |
|
"logps/rejected": -202.63072204589844, |
|
"loss": 0.7198, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.041876986622810364, |
|
"rewards/margins": -0.052445217967033386, |
|
"rewards/rejected": 0.01056823693215847, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002962962962962963, |
|
"grad_norm": 6.745312213897705, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": -4.136646270751953, |
|
"logits/rejected": -4.701557159423828, |
|
"logps/chosen": -169.00192260742188, |
|
"logps/rejected": -149.2642059326172, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02975158765912056, |
|
"rewards/margins": 0.023634720593690872, |
|
"rewards/rejected": 0.006116867531090975, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0035555555555555557, |
|
"grad_norm": 13.741002082824707, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": -5.057063579559326, |
|
"logits/rejected": -5.147495746612549, |
|
"logps/chosen": -193.6820526123047, |
|
"logps/rejected": -202.20425415039062, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0083160400390625, |
|
"rewards/margins": 0.004281995818018913, |
|
"rewards/rejected": 0.0040340423583984375, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004148148148148148, |
|
"grad_norm": 6.920108795166016, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -4.2448506355285645, |
|
"logits/rejected": -3.682173490524292, |
|
"logps/chosen": -176.11734008789062, |
|
"logps/rejected": -222.982666015625, |
|
"loss": 0.7008, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.028499603271484375, |
|
"rewards/margins": -0.015093998052179813, |
|
"rewards/rejected": 0.043593600392341614, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004740740740740741, |
|
"grad_norm": 10.856575965881348, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": -4.413946151733398, |
|
"logits/rejected": -4.419940948486328, |
|
"logps/chosen": -181.99610900878906, |
|
"logps/rejected": -196.20811462402344, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.02880115620791912, |
|
"rewards/margins": -0.007002831436693668, |
|
"rewards/rejected": -0.021798323839902878, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005333333333333333, |
|
"grad_norm": 9.559263229370117, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": -4.909343242645264, |
|
"logits/rejected": -4.601991176605225, |
|
"logps/chosen": -154.6862030029297, |
|
"logps/rejected": -179.27401733398438, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.0032157916575670242, |
|
"rewards/margins": 0.0011837054044008255, |
|
"rewards/rejected": 0.0020320871844887733, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005925925925925926, |
|
"grad_norm": 10.816996574401855, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -3.8037424087524414, |
|
"logits/rejected": -3.683922529220581, |
|
"logps/chosen": -232.978515625, |
|
"logps/rejected": -254.24078369140625, |
|
"loss": 0.7146, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05405044183135033, |
|
"rewards/margins": -0.040559008717536926, |
|
"rewards/rejected": -0.013491439633071423, |
|
"step": 10 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|