{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.792, "eval_steps": 1, "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.128, "grad_norm": 257.985107421875, "learning_rate": 2.5e-05, "loss": 0.9656, "step": 1 }, { "epoch": 0.128, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 1.0653672218322754, "eval_runtime": 103.5505, "eval_samples_per_second": 2.414, "eval_steps_per_second": 0.608, "step": 1 }, { "epoch": 0.256, "grad_norm": 213.24978637695312, "learning_rate": 5e-05, "loss": 0.8665, "step": 2 }, { "epoch": 0.256, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5950920245398773, "eval_f1_m": 0.40244217234013135, "eval_loss": 1.0189599990844727, "eval_runtime": 103.2975, "eval_samples_per_second": 2.42, "eval_steps_per_second": 0.61, "step": 2 }, { "epoch": 0.384, "grad_norm": 296.04522705078125, "learning_rate": 4.5833333333333334e-05, "loss": 1.005, "step": 3 }, { "epoch": 0.384, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.617283950617284, "eval_f1_m": 0.418024549146998, "eval_loss": 1.2820215225219727, "eval_runtime": 103.3719, "eval_samples_per_second": 2.418, "eval_steps_per_second": 0.609, "step": 3 }, { "epoch": 0.512, "grad_norm": 322.7693786621094, "learning_rate": 4.166666666666667e-05, "loss": 1.24, "step": 4 }, { "epoch": 0.512, "eval_exact_match": 0.24489795918367346, "eval_f1_a": 0.6778242677824268, "eval_f1_m": 0.6303927365151853, "eval_loss": 0.6080453991889954, "eval_runtime": 103.3238, "eval_samples_per_second": 2.42, "eval_steps_per_second": 0.61, "step": 4 }, { "epoch": 0.64, "grad_norm": 155.07017517089844, "learning_rate": 3.7500000000000003e-05, "loss": 0.6974, "step": 5 }, { "epoch": 0.64, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.4822695035460994, "eval_f1_m": 0.5118312259968922, "eval_loss": 0.7246580719947815, "eval_runtime": 103.3375, "eval_samples_per_second": 2.419, "eval_steps_per_second": 0.61, "step": 5 }, { "epoch": 0.768, "grad_norm": 119.71410369873047, "learning_rate": 3.3333333333333335e-05, "loss": 0.7023, "step": 6 }, { "epoch": 0.768, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.41538461538461535, "eval_f1_m": 0.48722283951875783, "eval_loss": 0.7980329394340515, "eval_runtime": 103.3608, "eval_samples_per_second": 2.419, "eval_steps_per_second": 0.61, "step": 6 }, { "epoch": 0.896, "grad_norm": 123.64088439941406, "learning_rate": 2.916666666666667e-05, "loss": 0.6932, "step": 7 }, { "epoch": 0.896, "eval_exact_match": 0.24489795918367346, "eval_f1_a": 0.6153846153846154, "eval_f1_m": 0.5911244651040569, "eval_loss": 0.6341376900672913, "eval_runtime": 103.3457, "eval_samples_per_second": 2.419, "eval_steps_per_second": 0.61, "step": 7 }, { "epoch": 1.024, "grad_norm": 125.78742980957031, "learning_rate": 2.5e-05, "loss": 0.7029, "step": 8 }, { "epoch": 1.024, "eval_exact_match": 0.30612244897959184, "eval_f1_a": 0.700507614213198, "eval_f1_m": 0.6928085519922257, "eval_loss": 0.5055590867996216, "eval_runtime": 103.3778, "eval_samples_per_second": 2.418, "eval_steps_per_second": 0.609, "step": 8 }, { "epoch": 1.152, "grad_norm": 13.356744766235352, "learning_rate": 2.0833333333333336e-05, "loss": 0.5001, "step": 9 }, { "epoch": 1.152, "eval_exact_match": 0.3469387755102041, "eval_f1_a": 0.7155172413793104, "eval_f1_m": 0.7132019907530109, "eval_loss": 0.5521179437637329, "eval_runtime": 103.3622, "eval_samples_per_second": 2.419, "eval_steps_per_second": 0.61, "step": 9 }, { "epoch": 1.28, "grad_norm": 47.14638900756836, "learning_rate": 1.6666666666666667e-05, "loss": 0.4912, "step": 10 }, { "epoch": 1.28, "eval_exact_match": 0.30612244897959184, "eval_f1_a": 0.714859437751004, "eval_f1_m": 0.6873750965587699, "eval_loss": 0.6306286454200745, "eval_runtime": 103.33, "eval_samples_per_second": 2.419, "eval_steps_per_second": 0.61, "step": 10 }, { "epoch": 1.408, "grad_norm": 104.10618591308594, "learning_rate": 1.25e-05, "loss": 0.4704, "step": 11 }, { "epoch": 1.408, "eval_exact_match": 0.32653061224489793, "eval_f1_a": 0.7228915662650601, "eval_f1_m": 0.7029241733323365, "eval_loss": 0.6197946667671204, "eval_runtime": 103.535, "eval_samples_per_second": 2.415, "eval_steps_per_second": 0.608, "step": 11 }, { "epoch": 1.536, "grad_norm": 127.36360168457031, "learning_rate": 8.333333333333334e-06, "loss": 0.4825, "step": 12 }, { "epoch": 1.536, "eval_exact_match": 0.32653061224489793, "eval_f1_a": 0.7206477732793523, "eval_f1_m": 0.7113673174897662, "eval_loss": 0.6032267808914185, "eval_runtime": 103.5344, "eval_samples_per_second": 2.415, "eval_steps_per_second": 0.608, "step": 12 }, { "epoch": 1.6640000000000001, "grad_norm": 159.17672729492188, "learning_rate": 4.166666666666667e-06, "loss": 0.4835, "step": 13 }, { "epoch": 1.6640000000000001, "eval_exact_match": 0.32653061224489793, "eval_f1_a": 0.7242798353909464, "eval_f1_m": 0.716348504103606, "eval_loss": 0.5836342573165894, "eval_runtime": 103.4956, "eval_samples_per_second": 2.416, "eval_steps_per_second": 0.609, "step": 13 }, { "epoch": 1.792, "grad_norm": 144.5326690673828, "learning_rate": 0.0, "loss": 0.5307, "step": 14 }, { "epoch": 1.792, "eval_exact_match": 0.3469387755102041, "eval_f1_a": 0.7280334728033472, "eval_f1_m": 0.7118569072650704, "eval_loss": 0.5592248439788818, "eval_runtime": 103.5416, "eval_samples_per_second": 2.414, "eval_steps_per_second": 0.608, "step": 14 }, { "epoch": 1.792, "step": 14, "total_flos": 4.406528391831552e+16, "train_loss": 0.7022471129894257, "train_runtime": 4257.8526, "train_samples_per_second": 0.47, "train_steps_per_second": 0.003 } ], "logging_steps": 1, "max_steps": 14, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 4.406528391831552e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }