{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.952830188679245, "eval_steps": 10, "global_step": 210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.999720254525684e-05, "loss": 1.458, "step": 1 }, { "epoch": 0.12, "learning_rate": 4.9930094929529506e-05, "loss": 1.4236, "step": 5 }, { "epoch": 0.24, "learning_rate": 4.972077065562821e-05, "loss": 1.3366, "step": 10 }, { "epoch": 0.24, "eval_loss": 1.278298020362854, "eval_runtime": 24.9289, "eval_samples_per_second": 5.175, "eval_steps_per_second": 1.725, "step": 10 }, { "epoch": 0.35, "learning_rate": 4.937319780454559e-05, "loss": 1.2828, "step": 15 }, { "epoch": 0.47, "learning_rate": 4.888932014465352e-05, "loss": 1.2563, "step": 20 }, { "epoch": 0.47, "eval_loss": 1.2320669889450073, "eval_runtime": 24.9408, "eval_samples_per_second": 5.172, "eval_steps_per_second": 1.724, "step": 20 }, { "epoch": 0.59, "learning_rate": 4.827184371610511e-05, "loss": 1.2308, "step": 25 }, { "epoch": 0.71, "learning_rate": 4.752422169756048e-05, "loss": 1.2289, "step": 30 }, { "epoch": 0.71, "eval_loss": 1.2012428045272827, "eval_runtime": 24.9502, "eval_samples_per_second": 5.17, "eval_steps_per_second": 1.723, "step": 30 }, { "epoch": 0.83, "learning_rate": 4.665063509461097e-05, "loss": 1.2051, "step": 35 }, { "epoch": 0.94, "learning_rate": 4.5655969357899874e-05, "loss": 1.1837, "step": 40 }, { "epoch": 0.94, "eval_loss": 1.1688246726989746, "eval_runtime": 24.9605, "eval_samples_per_second": 5.168, "eval_steps_per_second": 1.723, "step": 40 }, { "epoch": 1.06, "learning_rate": 4.454578706170075e-05, "loss": 1.1831, "step": 45 }, { "epoch": 1.18, "learning_rate": 4.332629679574566e-05, "loss": 1.1534, "step": 50 }, { "epoch": 1.18, "eval_loss": 1.1306308507919312, "eval_runtime": 24.9566, "eval_samples_per_second": 5.169, "eval_steps_per_second": 1.723, "step": 50 }, { "epoch": 1.3, "learning_rate": 4.2004318444272985e-05, "loss": 1.1413, "step": 55 }, { "epoch": 1.42, "learning_rate": 4.058724504646834e-05, "loss": 1.1254, "step": 60 }, { "epoch": 1.42, "eval_loss": 1.1036521196365356, "eval_runtime": 24.9587, "eval_samples_per_second": 5.169, "eval_steps_per_second": 1.723, "step": 60 }, { "epoch": 1.53, "learning_rate": 3.908300145159055e-05, "loss": 1.1167, "step": 65 }, { "epoch": 1.65, "learning_rate": 3.7500000000000003e-05, "loss": 1.1011, "step": 70 }, { "epoch": 1.65, "eval_loss": 1.0882474184036255, "eval_runtime": 24.8955, "eval_samples_per_second": 5.182, "eval_steps_per_second": 1.727, "step": 70 }, { "epoch": 1.77, "learning_rate": 3.5847093477938956e-05, "loss": 1.0891, "step": 75 }, { "epoch": 1.89, "learning_rate": 3.413352560915988e-05, "loss": 1.0825, "step": 80 }, { "epoch": 1.89, "eval_loss": 1.074791669845581, "eval_runtime": 24.8917, "eval_samples_per_second": 5.182, "eval_steps_per_second": 1.727, "step": 80 }, { "epoch": 2.0, "learning_rate": 3.2368879360272606e-05, "loss": 1.0778, "step": 85 }, { "epoch": 2.12, "learning_rate": 3.056302334890786e-05, "loss": 1.0876, "step": 90 }, { "epoch": 2.12, "eval_loss": 1.063535451889038, "eval_runtime": 24.8907, "eval_samples_per_second": 5.183, "eval_steps_per_second": 1.728, "step": 90 }, { "epoch": 2.24, "learning_rate": 2.872605665440436e-05, "loss": 1.0835, "step": 95 }, { "epoch": 2.36, "learning_rate": 2.686825233966061e-05, "loss": 1.0716, "step": 100 }, { "epoch": 2.36, "eval_loss": 1.0540255308151245, "eval_runtime": 24.8912, "eval_samples_per_second": 5.183, "eval_steps_per_second": 1.728, "step": 100 }, { "epoch": 2.48, "learning_rate": 2.5e-05, "loss": 1.0767, "step": 105 }, { "epoch": 2.59, "learning_rate": 2.3131747660339394e-05, "loss": 1.0517, "step": 110 }, { "epoch": 2.59, "eval_loss": 1.0458558797836304, "eval_runtime": 24.9528, "eval_samples_per_second": 5.17, "eval_steps_per_second": 1.723, "step": 110 }, { "epoch": 2.71, "learning_rate": 2.1273943345595637e-05, "loss": 1.0564, "step": 115 }, { "epoch": 2.83, "learning_rate": 1.9436976651092144e-05, "loss": 1.0289, "step": 120 }, { "epoch": 2.83, "eval_loss": 1.0388729572296143, "eval_runtime": 24.9226, "eval_samples_per_second": 5.176, "eval_steps_per_second": 1.725, "step": 120 }, { "epoch": 2.95, "learning_rate": 1.7631120639727393e-05, "loss": 1.0318, "step": 125 }, { "epoch": 3.07, "learning_rate": 1.5866474390840125e-05, "loss": 1.0564, "step": 130 }, { "epoch": 3.07, "eval_loss": 1.0331891775131226, "eval_runtime": 24.9512, "eval_samples_per_second": 5.17, "eval_steps_per_second": 1.723, "step": 130 }, { "epoch": 3.18, "learning_rate": 1.4152906522061048e-05, "loss": 1.0499, "step": 135 }, { "epoch": 3.3, "learning_rate": 1.2500000000000006e-05, "loss": 1.034, "step": 140 }, { "epoch": 3.3, "eval_loss": 1.0287576913833618, "eval_runtime": 24.9512, "eval_samples_per_second": 5.17, "eval_steps_per_second": 1.723, "step": 140 }, { "epoch": 3.42, "learning_rate": 1.0916998548409449e-05, "loss": 1.0387, "step": 145 }, { "epoch": 3.54, "learning_rate": 9.412754953531663e-06, "loss": 1.0337, "step": 150 }, { "epoch": 3.54, "eval_loss": 1.0253461599349976, "eval_runtime": 24.8842, "eval_samples_per_second": 5.184, "eval_steps_per_second": 1.728, "step": 150 }, { "epoch": 3.66, "learning_rate": 7.99568155572701e-06, "loss": 1.0375, "step": 155 }, { "epoch": 3.77, "learning_rate": 6.673703204254347e-06, "loss": 1.033, "step": 160 }, { "epoch": 3.77, "eval_loss": 1.0230636596679688, "eval_runtime": 24.9446, "eval_samples_per_second": 5.171, "eval_steps_per_second": 1.724, "step": 160 }, { "epoch": 3.89, "learning_rate": 5.454212938299255e-06, "loss": 1.0292, "step": 165 }, { "epoch": 4.01, "learning_rate": 4.344030642100133e-06, "loss": 1.0312, "step": 170 }, { "epoch": 4.01, "eval_loss": 1.021319031715393, "eval_runtime": 24.9503, "eval_samples_per_second": 5.17, "eval_steps_per_second": 1.723, "step": 170 }, { "epoch": 4.13, "learning_rate": 3.3493649053890326e-06, "loss": 1.0343, "step": 175 }, { "epoch": 4.25, "learning_rate": 2.475778302439524e-06, "loss": 1.0207, "step": 180 }, { "epoch": 4.25, "eval_loss": 1.0203514099121094, "eval_runtime": 24.9557, "eval_samples_per_second": 5.169, "eval_steps_per_second": 1.723, "step": 180 }, { "epoch": 4.36, "learning_rate": 1.7281562838948966e-06, "loss": 1.0413, "step": 185 }, { "epoch": 4.48, "learning_rate": 1.1106798553464804e-06, "loss": 1.0271, "step": 190 }, { "epoch": 4.48, "eval_loss": 1.019814133644104, "eval_runtime": 24.9781, "eval_samples_per_second": 5.165, "eval_steps_per_second": 1.722, "step": 190 }, { "epoch": 4.6, "learning_rate": 6.268021954544096e-07, "loss": 1.0233, "step": 195 }, { "epoch": 4.72, "learning_rate": 2.7922934437178695e-07, "loss": 1.0351, "step": 200 }, { "epoch": 4.72, "eval_loss": 1.0197088718414307, "eval_runtime": 24.9625, "eval_samples_per_second": 5.168, "eval_steps_per_second": 1.723, "step": 200 }, { "epoch": 4.83, "learning_rate": 6.990507047049676e-08, "loss": 1.015, "step": 205 }, { "epoch": 4.95, "learning_rate": 0.0, "loss": 1.0339, "step": 210 }, { "epoch": 4.95, "eval_loss": 1.0196079015731812, "eval_runtime": 24.9619, "eval_samples_per_second": 5.168, "eval_steps_per_second": 1.723, "step": 210 }, { "epoch": 4.95, "step": 210, "total_flos": 3.221775454641848e+17, "train_loss": 1.1020877304531278, "train_runtime": 16139.1818, "train_samples_per_second": 1.575, "train_steps_per_second": 0.013 } ], "logging_steps": 5, "max_steps": 210, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 20, "total_flos": 3.221775454641848e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }