{ "best_metric": 0.9010989010989011, "best_model_checkpoint": "ktp-kk-crop/checkpoint-10", "epoch": 25.0, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6153846153846154, "eval_loss": 0.6706601977348328, "eval_runtime": 4.349, "eval_samples_per_second": 20.924, "eval_steps_per_second": 1.38, "step": 2 }, { "epoch": 2.0, "eval_accuracy": 0.6153846153846154, "eval_loss": 0.6923023462295532, "eval_runtime": 3.6322, "eval_samples_per_second": 25.053, "eval_steps_per_second": 1.652, "step": 4 }, { "epoch": 3.0, "eval_accuracy": 0.7032967032967034, "eval_loss": 0.7208701372146606, "eval_runtime": 3.5659, "eval_samples_per_second": 25.52, "eval_steps_per_second": 1.683, "step": 6 }, { "epoch": 4.0, "eval_accuracy": 0.8021978021978022, "eval_loss": 0.4788942337036133, "eval_runtime": 3.617, "eval_samples_per_second": 25.159, "eval_steps_per_second": 1.659, "step": 8 }, { "epoch": 5.0, "eval_accuracy": 0.9010989010989011, "eval_loss": 0.3554236590862274, "eval_runtime": 3.6003, "eval_samples_per_second": 25.275, "eval_steps_per_second": 1.667, "step": 10 }, { "epoch": 6.0, "eval_accuracy": 0.8241758241758241, "eval_loss": 0.8337829113006592, "eval_runtime": 3.6007, "eval_samples_per_second": 25.273, "eval_steps_per_second": 1.666, "step": 12 }, { "epoch": 7.0, "eval_accuracy": 0.7912087912087912, "eval_loss": 1.073412537574768, "eval_runtime": 3.6715, "eval_samples_per_second": 24.786, "eval_steps_per_second": 1.634, "step": 14 }, { "epoch": 7.5, "grad_norm": 38.24232864379883, "learning_rate": 3.888888888888889e-05, "loss": 0.2476, "step": 15 }, { "epoch": 8.0, "eval_accuracy": 0.8241758241758241, "eval_loss": 1.0161423683166504, "eval_runtime": 3.6667, "eval_samples_per_second": 24.818, "eval_steps_per_second": 1.636, "step": 16 }, { "epoch": 9.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.8966610431671143, "eval_runtime": 3.6608, "eval_samples_per_second": 24.858, "eval_steps_per_second": 1.639, "step": 18 }, { "epoch": 10.0, "eval_accuracy": 0.8681318681318682, "eval_loss": 0.8477412462234497, "eval_runtime": 3.4998, "eval_samples_per_second": 26.001, "eval_steps_per_second": 1.714, "step": 20 }, { "epoch": 11.0, "eval_accuracy": 0.8351648351648352, "eval_loss": 1.3332176208496094, "eval_runtime": 3.6008, "eval_samples_per_second": 25.272, "eval_steps_per_second": 1.666, "step": 22 }, { "epoch": 12.0, "eval_accuracy": 0.7252747252747253, "eval_loss": 3.477609395980835, "eval_runtime": 3.6298, "eval_samples_per_second": 25.071, "eval_steps_per_second": 1.653, "step": 24 }, { "epoch": 13.0, "eval_accuracy": 0.6373626373626373, "eval_loss": 6.0097527503967285, "eval_runtime": 3.6304, "eval_samples_per_second": 25.066, "eval_steps_per_second": 1.653, "step": 26 }, { "epoch": 14.0, "eval_accuracy": 0.7252747252747253, "eval_loss": 3.8649513721466064, "eval_runtime": 3.5214, "eval_samples_per_second": 25.842, "eval_steps_per_second": 1.704, "step": 28 }, { "epoch": 15.0, "grad_norm": 0.0022278681863099337, "learning_rate": 2.2222222222222223e-05, "loss": 0.0277, "step": 30 }, { "epoch": 15.0, "eval_accuracy": 0.8131868131868132, "eval_loss": 2.2409703731536865, "eval_runtime": 3.6708, "eval_samples_per_second": 24.79, "eval_steps_per_second": 1.635, "step": 30 }, { "epoch": 16.0, "eval_accuracy": 0.8131868131868132, "eval_loss": 1.45852792263031, "eval_runtime": 3.7348, "eval_samples_per_second": 24.365, "eval_steps_per_second": 1.607, "step": 32 }, { "epoch": 17.0, "eval_accuracy": 0.8021978021978022, "eval_loss": 1.4789769649505615, "eval_runtime": 3.7258, "eval_samples_per_second": 24.424, "eval_steps_per_second": 1.61, "step": 34 }, { "epoch": 18.0, "eval_accuracy": 0.8241758241758241, "eval_loss": 1.8427152633666992, "eval_runtime": 3.6118, "eval_samples_per_second": 25.195, "eval_steps_per_second": 1.661, "step": 36 }, { "epoch": 19.0, "eval_accuracy": 0.8241758241758241, "eval_loss": 2.0873501300811768, "eval_runtime": 3.6114, "eval_samples_per_second": 25.198, "eval_steps_per_second": 1.661, "step": 38 }, { "epoch": 20.0, "eval_accuracy": 0.8131868131868132, "eval_loss": 2.245572090148926, "eval_runtime": 3.651, "eval_samples_per_second": 24.924, "eval_steps_per_second": 1.643, "step": 40 }, { "epoch": 21.0, "eval_accuracy": 0.8021978021978022, "eval_loss": 2.366342782974243, "eval_runtime": 3.6311, "eval_samples_per_second": 25.061, "eval_steps_per_second": 1.652, "step": 42 }, { "epoch": 22.0, "eval_accuracy": 0.8021978021978022, "eval_loss": 2.4484336376190186, "eval_runtime": 3.6882, "eval_samples_per_second": 24.673, "eval_steps_per_second": 1.627, "step": 44 }, { "epoch": 22.5, "grad_norm": 0.0005057471571490169, "learning_rate": 5.555555555555556e-06, "loss": 0.0001, "step": 45 }, { "epoch": 23.0, "eval_accuracy": 0.8021978021978022, "eval_loss": 2.4973790645599365, "eval_runtime": 3.6432, "eval_samples_per_second": 24.978, "eval_steps_per_second": 1.647, "step": 46 }, { "epoch": 24.0, "eval_accuracy": 0.8021978021978022, "eval_loss": 2.5220789909362793, "eval_runtime": 3.6924, "eval_samples_per_second": 24.645, "eval_steps_per_second": 1.625, "step": 48 }, { "epoch": 25.0, "eval_accuracy": 0.8021978021978022, "eval_loss": 2.530649185180664, "eval_runtime": 3.7584, "eval_samples_per_second": 24.212, "eval_steps_per_second": 1.596, "step": 50 }, { "epoch": 25.0, "step": 50, "total_flos": 2.427840666897408e+17, "train_loss": 0.08263867741210561, "train_runtime": 466.1075, "train_samples_per_second": 6.597, "train_steps_per_second": 0.107 } ], "logging_steps": 15, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.427840666897408e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }