{ "best_metric": null, "best_model_checkpoint": null, "epoch": 22.0, "eval_steps": 500, "global_step": 2354, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5887850467289719, "eval_loss": 1.2691198587417603, "eval_runtime": 4.8417, "eval_samples_per_second": 44.199, "eval_steps_per_second": 5.577, "step": 107 }, { "epoch": 2.0, "eval_accuracy": 0.6214953271028038, "eval_loss": 1.0963133573532104, "eval_runtime": 5.0021, "eval_samples_per_second": 42.782, "eval_steps_per_second": 5.398, "step": 214 }, { "epoch": 3.0, "eval_accuracy": 0.6308411214953271, "eval_loss": 0.860569179058075, "eval_runtime": 4.6891, "eval_samples_per_second": 45.638, "eval_steps_per_second": 5.758, "step": 321 }, { "epoch": 4.0, "eval_accuracy": 0.7990654205607477, "eval_loss": 0.672334611415863, "eval_runtime": 4.717, "eval_samples_per_second": 45.368, "eval_steps_per_second": 5.724, "step": 428 }, { "epoch": 4.67, "learning_rate": 4.0654205607476636e-05, "loss": 1.1331, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.8457943925233645, "eval_loss": 0.48274144530296326, "eval_runtime": 4.6309, "eval_samples_per_second": 46.212, "eval_steps_per_second": 5.83, "step": 535 }, { "epoch": 6.0, "eval_accuracy": 0.8551401869158879, "eval_loss": 0.35962656140327454, "eval_runtime": 4.7112, "eval_samples_per_second": 45.424, "eval_steps_per_second": 5.731, "step": 642 }, { "epoch": 7.0, "eval_accuracy": 0.8925233644859814, "eval_loss": 0.26292115449905396, "eval_runtime": 5.4422, "eval_samples_per_second": 39.322, "eval_steps_per_second": 4.961, "step": 749 }, { "epoch": 8.0, "eval_accuracy": 0.9158878504672897, "eval_loss": 0.22579917311668396, "eval_runtime": 4.8554, "eval_samples_per_second": 44.074, "eval_steps_per_second": 5.561, "step": 856 }, { "epoch": 9.0, "eval_accuracy": 0.9158878504672897, "eval_loss": 0.19785191118717194, "eval_runtime": 4.9901, "eval_samples_per_second": 42.885, "eval_steps_per_second": 5.411, "step": 963 }, { "epoch": 9.35, "learning_rate": 3.130841121495327e-05, "loss": 0.6031, "step": 1000 }, { "epoch": 10.0, "eval_accuracy": 0.9719626168224299, "eval_loss": 0.16760671138763428, "eval_runtime": 4.8143, "eval_samples_per_second": 44.451, "eval_steps_per_second": 5.608, "step": 1070 }, { "epoch": 11.0, "eval_accuracy": 1.0, "eval_loss": 0.1423913985490799, "eval_runtime": 4.7195, "eval_samples_per_second": 45.343, "eval_steps_per_second": 5.721, "step": 1177 }, { "epoch": 12.0, "eval_accuracy": 0.985981308411215, "eval_loss": 0.12256418913602829, "eval_runtime": 5.1046, "eval_samples_per_second": 41.923, "eval_steps_per_second": 5.289, "step": 1284 }, { "epoch": 13.0, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.11292136460542679, "eval_runtime": 4.8426, "eval_samples_per_second": 44.191, "eval_steps_per_second": 5.576, "step": 1391 }, { "epoch": 14.0, "eval_accuracy": 0.9906542056074766, "eval_loss": 0.106930673122406, "eval_runtime": 4.9097, "eval_samples_per_second": 43.587, "eval_steps_per_second": 5.499, "step": 1498 }, { "epoch": 14.02, "learning_rate": 2.196261682242991e-05, "loss": 0.4317, "step": 1500 }, { "epoch": 15.0, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.09224073588848114, "eval_runtime": 4.854, "eval_samples_per_second": 44.087, "eval_steps_per_second": 5.562, "step": 1605 }, { "epoch": 16.0, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.08621260523796082, "eval_runtime": 4.7695, "eval_samples_per_second": 44.868, "eval_steps_per_second": 5.661, "step": 1712 }, { "epoch": 17.0, "eval_accuracy": 1.0, "eval_loss": 0.07688089460134506, "eval_runtime": 4.8878, "eval_samples_per_second": 43.782, "eval_steps_per_second": 5.524, "step": 1819 }, { "epoch": 18.0, "eval_accuracy": 1.0, "eval_loss": 0.061450209468603134, "eval_runtime": 4.7171, "eval_samples_per_second": 45.367, "eval_steps_per_second": 5.724, "step": 1926 }, { "epoch": 18.69, "learning_rate": 1.2616822429906542e-05, "loss": 0.3584, "step": 2000 }, { "epoch": 19.0, "eval_accuracy": 1.0, "eval_loss": 0.0667119175195694, "eval_runtime": 4.8093, "eval_samples_per_second": 44.498, "eval_steps_per_second": 5.614, "step": 2033 }, { "epoch": 20.0, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.05547282472252846, "eval_runtime": 4.7518, "eval_samples_per_second": 45.036, "eval_steps_per_second": 5.682, "step": 2140 }, { "epoch": 21.0, "eval_accuracy": 1.0, "eval_loss": 0.054027605801820755, "eval_runtime": 4.7251, "eval_samples_per_second": 45.29, "eval_steps_per_second": 5.714, "step": 2247 }, { "epoch": 22.0, "eval_accuracy": 1.0, "eval_loss": 0.04496881738305092, "eval_runtime": 4.6636, "eval_samples_per_second": 45.887, "eval_steps_per_second": 5.79, "step": 2354 } ], "logging_steps": 500, "max_steps": 2675, "num_train_epochs": 25, "save_steps": 200, "total_flos": 3.9904029741839155e+17, "trial_name": null, "trial_params": null }