{ "best_metric": 0.6521381139755249, "best_model_checkpoint": "/hy-tmp/checkpoints/zhongjing_7-13/checkpoint-6000", "epoch": 2.271221728021198, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 5.944240397524184e-05, "loss": 0.7616, "step": 500 }, { "epoch": 0.19, "eval_loss": 0.7090576887130737, "eval_runtime": 1700.4285, "eval_samples_per_second": 20.932, "eval_steps_per_second": 0.328, "step": 500 }, { "epoch": 0.38, "learning_rate": 5.7731724820150744e-05, "loss": 0.6972, "step": 1000 }, { "epoch": 0.38, "eval_loss": 0.6910683512687683, "eval_runtime": 1700.3181, "eval_samples_per_second": 20.933, "eval_steps_per_second": 0.328, "step": 1000 }, { "epoch": 0.57, "learning_rate": 5.4934584214999246e-05, "loss": 0.6845, "step": 1500 }, { "epoch": 0.57, "eval_loss": 0.6805794835090637, "eval_runtime": 1698.9819, "eval_samples_per_second": 20.95, "eval_steps_per_second": 0.328, "step": 1500 }, { "epoch": 0.76, "learning_rate": 5.116056731749404e-05, "loss": 0.6764, "step": 2000 }, { "epoch": 0.76, "eval_loss": 0.6737232804298401, "eval_runtime": 1702.5596, "eval_samples_per_second": 20.906, "eval_steps_per_second": 0.327, "step": 2000 }, { "epoch": 0.95, "learning_rate": 4.6557530919724635e-05, "loss": 0.6701, "step": 2500 }, { "epoch": 0.95, "eval_loss": 0.6684596538543701, "eval_runtime": 1702.1448, "eval_samples_per_second": 20.911, "eval_steps_per_second": 0.327, "step": 2500 }, { "epoch": 1.14, "learning_rate": 4.131682830461389e-05, "loss": 0.663, "step": 3000 }, { "epoch": 1.14, "eval_loss": 0.6644229292869568, "eval_runtime": 1699.1079, "eval_samples_per_second": 20.948, "eval_steps_per_second": 0.328, "step": 3000 }, { "epoch": 1.32, "learning_rate": 3.5622841602995877e-05, "loss": 0.6594, "step": 3500 }, { "epoch": 1.32, "eval_loss": 0.661072313785553, "eval_runtime": 1696.6207, "eval_samples_per_second": 20.979, "eval_steps_per_second": 0.328, "step": 3500 }, { "epoch": 1.51, "learning_rate": 2.9720460586930557e-05, "loss": 0.6561, "step": 4000 }, { "epoch": 1.51, "eval_loss": 0.6583240032196045, "eval_runtime": 1699.0104, "eval_samples_per_second": 20.949, "eval_steps_per_second": 0.328, "step": 4000 }, { "epoch": 1.7, "learning_rate": 2.381734702787557e-05, "loss": 0.6538, "step": 4500 }, { "epoch": 1.7, "eval_loss": 0.6560451984405518, "eval_runtime": 1701.2915, "eval_samples_per_second": 20.921, "eval_steps_per_second": 0.327, "step": 4500 }, { "epoch": 1.89, "learning_rate": 1.815645475714211e-05, "loss": 0.6525, "step": 5000 }, { "epoch": 1.89, "eval_loss": 0.654275119304657, "eval_runtime": 1698.3722, "eval_samples_per_second": 20.957, "eval_steps_per_second": 0.328, "step": 5000 }, { "epoch": 2.08, "learning_rate": 1.2969355277673462e-05, "loss": 0.6488, "step": 5500 }, { "epoch": 2.08, "eval_loss": 0.6530821919441223, "eval_runtime": 1698.6748, "eval_samples_per_second": 20.953, "eval_steps_per_second": 0.328, "step": 5500 }, { "epoch": 2.27, "learning_rate": 8.438544816619625e-06, "loss": 0.6478, "step": 6000 }, { "epoch": 2.27, "eval_loss": 0.6521381139755249, "eval_runtime": 1697.953, "eval_samples_per_second": 20.962, "eval_steps_per_second": 0.328, "step": 6000 } ], "max_steps": 7923, "num_train_epochs": 3, "total_flos": 9.200694994360966e+19, "trial_name": null, "trial_params": null }