{ "best_metric": 0.6162260174751282, "best_model_checkpoint": "flan_t5_summarization/checkpoint-2720", "epoch": 10.0, "global_step": 2720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 19.0, "eval_loss": 1.120025634765625, "eval_rouge1": 9.2565, "eval_rouge2": 1.2805, "eval_rougeL": 9.2358, "eval_rougeLsum": 9.284, "eval_runtime": 4.0606, "eval_samples_per_second": 16.5, "eval_steps_per_second": 1.478, "step": 272 }, { "epoch": 1.84, "learning_rate": 4.0808823529411765e-05, "loss": 1.5343, "step": 500 }, { "epoch": 2.0, "eval_gen_len": 19.0, "eval_loss": 0.8922988772392273, "eval_rouge1": 10.9045, "eval_rouge2": 2.9468, "eval_rougeL": 10.9112, "eval_rougeLsum": 10.8827, "eval_runtime": 4.0719, "eval_samples_per_second": 16.454, "eval_steps_per_second": 1.474, "step": 544 }, { "epoch": 3.0, "eval_gen_len": 19.0, "eval_loss": 0.7884227633476257, "eval_rouge1": 13.637, "eval_rouge2": 4.8447, "eval_rougeL": 13.3594, "eval_rougeLsum": 13.3459, "eval_runtime": 4.0204, "eval_samples_per_second": 16.665, "eval_steps_per_second": 1.492, "step": 816 }, { "epoch": 3.68, "learning_rate": 3.161764705882353e-05, "loss": 0.9949, "step": 1000 }, { "epoch": 4.0, "eval_gen_len": 18.91044776119403, "eval_loss": 0.7256659269332886, "eval_rouge1": 15.2005, "eval_rouge2": 6.3919, "eval_rougeL": 14.781, "eval_rougeLsum": 14.7204, "eval_runtime": 4.0736, "eval_samples_per_second": 16.447, "eval_steps_per_second": 1.473, "step": 1088 }, { "epoch": 5.0, "eval_gen_len": 18.73134328358209, "eval_loss": 0.6852018237113953, "eval_rouge1": 15.1627, "eval_rouge2": 6.3116, "eval_rougeL": 14.7187, "eval_rougeLsum": 14.6983, "eval_runtime": 4.0455, "eval_samples_per_second": 16.561, "eval_steps_per_second": 1.483, "step": 1360 }, { "epoch": 5.51, "learning_rate": 2.2426470588235296e-05, "loss": 0.8504, "step": 1500 }, { "epoch": 6.0, "eval_gen_len": 18.73134328358209, "eval_loss": 0.6580936312675476, "eval_rouge1": 16.0795, "eval_rouge2": 6.9284, "eval_rougeL": 15.3516, "eval_rougeLsum": 15.3571, "eval_runtime": 4.0423, "eval_samples_per_second": 16.575, "eval_steps_per_second": 1.484, "step": 1632 }, { "epoch": 7.0, "eval_gen_len": 18.91044776119403, "eval_loss": 0.6391794681549072, "eval_rouge1": 16.0518, "eval_rouge2": 6.9377, "eval_rougeL": 15.3914, "eval_rougeLsum": 15.3748, "eval_runtime": 4.0453, "eval_samples_per_second": 16.562, "eval_steps_per_second": 1.483, "step": 1904 }, { "epoch": 7.35, "learning_rate": 1.323529411764706e-05, "loss": 0.7841, "step": 2000 }, { "epoch": 8.0, "eval_gen_len": 18.82089552238806, "eval_loss": 0.6258341073989868, "eval_rouge1": 16.1307, "eval_rouge2": 7.6286, "eval_rougeL": 15.7398, "eval_rougeLsum": 15.7627, "eval_runtime": 4.0536, "eval_samples_per_second": 16.529, "eval_steps_per_second": 1.48, "step": 2176 }, { "epoch": 9.0, "eval_gen_len": 18.73134328358209, "eval_loss": 0.6200478672981262, "eval_rouge1": 15.9488, "eval_rouge2": 7.4447, "eval_rougeL": 15.5654, "eval_rougeLsum": 15.583, "eval_runtime": 4.042, "eval_samples_per_second": 16.576, "eval_steps_per_second": 1.484, "step": 2448 }, { "epoch": 9.19, "learning_rate": 4.044117647058824e-06, "loss": 0.7599, "step": 2500 }, { "epoch": 10.0, "eval_gen_len": 18.73134328358209, "eval_loss": 0.6162260174751282, "eval_rouge1": 15.9418, "eval_rouge2": 7.4447, "eval_rougeL": 15.5655, "eval_rougeLsum": 15.5835, "eval_runtime": 4.0403, "eval_samples_per_second": 16.583, "eval_steps_per_second": 1.485, "step": 2720 }, { "epoch": 10.0, "step": 2720, "total_flos": 1177555285370880.0, "train_loss": 0.9648406533633962, "train_runtime": 1843.1414, "train_samples_per_second": 17.693, "train_steps_per_second": 1.476 } ], "max_steps": 2720, "num_train_epochs": 10, "total_flos": 1177555285370880.0, "trial_name": null, "trial_params": null }