{ "best_metric": 0.5835913370480271, "best_model_checkpoint": "output/fine_tuned/t5-base/CoLA/checkpoint-1876", "epoch": 10.0, "eval_steps": 500, "global_step": 2680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.47356653213500977, "eval_matthews_correlation": 0.48848785650378773, "eval_runtime": 4.2201, "eval_samples_per_second": 247.152, "eval_steps_per_second": 31.042, "step": 268 }, { "epoch": 1.8656716417910446, "grad_norm": 5.837749481201172, "learning_rate": 4.067164179104478e-05, "loss": 0.4668, "step": 500 }, { "epoch": 2.0, "eval_loss": 0.498794287443161, "eval_matthews_correlation": 0.5416469931221344, "eval_runtime": 3.8345, "eval_samples_per_second": 272.005, "eval_steps_per_second": 34.164, "step": 536 }, { "epoch": 3.0, "eval_loss": 0.5234289765357971, "eval_matthews_correlation": 0.5495093920893817, "eval_runtime": 3.8772, "eval_samples_per_second": 269.01, "eval_steps_per_second": 33.787, "step": 804 }, { "epoch": 3.7313432835820897, "grad_norm": 3.0416481494903564, "learning_rate": 3.1343283582089554e-05, "loss": 0.293, "step": 1000 }, { "epoch": 4.0, "eval_loss": 0.683355450630188, "eval_matthews_correlation": 0.5434890534893785, "eval_runtime": 3.8435, "eval_samples_per_second": 271.364, "eval_steps_per_second": 34.083, "step": 1072 }, { "epoch": 5.0, "eval_loss": 0.6669164896011353, "eval_matthews_correlation": 0.565579540997454, "eval_runtime": 3.9128, "eval_samples_per_second": 266.561, "eval_steps_per_second": 33.48, "step": 1340 }, { "epoch": 5.597014925373134, "grad_norm": 3.6730122566223145, "learning_rate": 2.201492537313433e-05, "loss": 0.2049, "step": 1500 }, { "epoch": 6.0, "eval_loss": 0.6630592346191406, "eval_matthews_correlation": 0.5807484475986369, "eval_runtime": 3.8474, "eval_samples_per_second": 271.094, "eval_steps_per_second": 34.049, "step": 1608 }, { "epoch": 7.0, "eval_loss": 0.7744859457015991, "eval_matthews_correlation": 0.5835913370480271, "eval_runtime": 2.7082, "eval_samples_per_second": 385.127, "eval_steps_per_second": 48.372, "step": 1876 }, { "epoch": 7.462686567164179, "grad_norm": 5.858553409576416, "learning_rate": 1.2686567164179105e-05, "loss": 0.1511, "step": 2000 }, { "epoch": 8.0, "eval_loss": 0.8352206349372864, "eval_matthews_correlation": 0.5780839016534659, "eval_runtime": 2.3839, "eval_samples_per_second": 437.524, "eval_steps_per_second": 54.953, "step": 2144 }, { "epoch": 9.0, "eval_loss": 0.8335957527160645, "eval_matthews_correlation": 0.567550266689718, "eval_runtime": 2.3912, "eval_samples_per_second": 436.19, "eval_steps_per_second": 54.785, "step": 2412 }, { "epoch": 9.328358208955224, "grad_norm": 2.9813477993011475, "learning_rate": 3.358208955223881e-06, "loss": 0.128, "step": 2500 }, { "epoch": 10.0, "eval_loss": 0.8593345880508423, "eval_matthews_correlation": 0.5624297390215861, "eval_runtime": 2.9658, "eval_samples_per_second": 351.678, "eval_steps_per_second": 44.171, "step": 2680 }, { "epoch": 10.0, "step": 2680, "total_flos": 1.30568834773248e+16, "train_loss": 0.05434396516031294, "train_runtime": 200.6705, "train_samples_per_second": 426.121, "train_steps_per_second": 13.355 } ], "logging_steps": 500, "max_steps": 2680, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.30568834773248e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }