kasohrab's picture
Training in progress, epoch 5
e109cbf verified
raw
history blame
2.65 kB
{
"best_metric": 0.9197247706422018,
"best_model_checkpoint": "electra-distilled-sst\\run-30\\checkpoint-1581",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 2635,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 26.225385665893555,
"learning_rate": 6.561713559027068e-05,
"loss": 0.6075,
"step": 527
},
{
"epoch": 1.0,
"eval_accuracy": 0.9139908256880734,
"eval_loss": 0.47316521406173706,
"eval_runtime": 1.4499,
"eval_samples_per_second": 601.43,
"eval_steps_per_second": 4.828,
"step": 527
},
{
"epoch": 2.0,
"grad_norm": 13.330994606018066,
"learning_rate": 5.741499364148685e-05,
"loss": 0.3214,
"step": 1054
},
{
"epoch": 2.0,
"eval_accuracy": 0.8956422018348624,
"eval_loss": 0.6051578521728516,
"eval_runtime": 1.4493,
"eval_samples_per_second": 601.653,
"eval_steps_per_second": 4.83,
"step": 1054
},
{
"epoch": 3.0,
"grad_norm": 10.15234661102295,
"learning_rate": 4.9212851692703006e-05,
"loss": 0.2348,
"step": 1581
},
{
"epoch": 3.0,
"eval_accuracy": 0.9197247706422018,
"eval_loss": 0.4945127069950104,
"eval_runtime": 1.4486,
"eval_samples_per_second": 601.961,
"eval_steps_per_second": 4.832,
"step": 1581
},
{
"epoch": 4.0,
"grad_norm": Infinity,
"learning_rate": 4.1026273580634136e-05,
"loss": 0.1824,
"step": 2108
},
{
"epoch": 4.0,
"eval_accuracy": 0.9071100917431193,
"eval_loss": 0.5778486728668213,
"eval_runtime": 1.4513,
"eval_samples_per_second": 600.822,
"eval_steps_per_second": 4.823,
"step": 2108
},
{
"epoch": 5.0,
"grad_norm": 21.25727653503418,
"learning_rate": 3.28241316318503e-05,
"loss": 0.1498,
"step": 2635
},
{
"epoch": 5.0,
"eval_accuracy": 0.911697247706422,
"eval_loss": 0.515268087387085,
"eval_runtime": 1.4401,
"eval_samples_per_second": 605.529,
"eval_steps_per_second": 4.861,
"step": 2635
}
],
"logging_steps": 500,
"max_steps": 4743,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 500,
"total_flos": 936602651243304.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": {
"alpha": 0.9134230134636561,
"learning_rate": 7.381927753905452e-05,
"num_train_epochs": 9,
"temperature": 18
}
}