{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6650630231886847, "eval_steps": 500, "global_step": 108, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.9998119704485016e-05, "loss": 1.1733, "step": 1 }, { "epoch": 0.03, "learning_rate": 1.9953027957931658e-05, "loss": 1.1282, "step": 5 }, { "epoch": 0.06, "learning_rate": 1.9812553106273848e-05, "loss": 1.1452, "step": 10 }, { "epoch": 0.09, "learning_rate": 1.957989512315489e-05, "loss": 1.1346, "step": 15 }, { "epoch": 0.12, "learning_rate": 1.9257239692688907e-05, "loss": 1.1056, "step": 20 }, { "epoch": 0.15, "learning_rate": 1.8847617971766577e-05, "loss": 1.0978, "step": 25 }, { "epoch": 0.18, "learning_rate": 1.8354878114129368e-05, "loss": 1.067, "step": 30 }, { "epoch": 0.22, "learning_rate": 1.7783649119241603e-05, "loss": 1.0392, "step": 35 }, { "epoch": 0.25, "learning_rate": 1.7139297345578992e-05, "loss": 1.0536, "step": 40 }, { "epoch": 0.28, "learning_rate": 1.6427876096865394e-05, "loss": 1.0547, "step": 45 }, { "epoch": 0.31, "learning_rate": 1.5656068754865388e-05, "loss": 1.0304, "step": 50 }, { "epoch": 0.34, "learning_rate": 1.4831125992966386e-05, "loss": 1.0428, "step": 55 }, { "epoch": 0.37, "learning_rate": 1.396079766039157e-05, "loss": 1.0502, "step": 60 }, { "epoch": 0.4, "learning_rate": 1.3053259976951134e-05, "loss": 1.0294, "step": 65 }, { "epoch": 0.43, "learning_rate": 1.211703872229411e-05, "loss": 1.0313, "step": 70 }, { "epoch": 0.46, "learning_rate": 1.1160929141252303e-05, "loss": 1.0468, "step": 75 }, { "epoch": 0.49, "learning_rate": 1.0193913317718245e-05, "loss": 1.0264, "step": 80 }, { "epoch": 0.52, "learning_rate": 9.225075793280693e-06, "loss": 1.0222, "step": 85 }, { "epoch": 0.55, "learning_rate": 8.263518223330698e-06, "loss": 1.0264, "step": 90 }, { "epoch": 0.59, "learning_rate": 7.3182738723936255e-06, "loss": 1.0263, "step": 95 }, { "epoch": 0.62, "learning_rate": 6.3982227519528986e-06, "loss": 1.051, "step": 100 }, { "epoch": 0.65, "learning_rate": 5.512008197995379e-06, "loss": 1.0313, "step": 105 }, { "epoch": 0.67, "eval_loss": 1.025382399559021, "eval_runtime": 12686.0816, "eval_samples_per_second": 1.822, "eval_steps_per_second": 0.911, "step": 108 }, { "epoch": 0.67, "step": 108, "total_flos": 1.227710764417024e+18, "train_loss": 1.0669225134231426, "train_runtime": 50855.7644, "train_samples_per_second": 0.409, "train_steps_per_second": 0.003 } ], "logging_steps": 5, "max_steps": 162, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.227710764417024e+18, "trial_name": null, "trial_params": null }