{ "best_metric": 1.4044392108917236, "best_model_checkpoint": "outputs_llama-2/checkpoint-120", "epoch": 0.18511376783648284, "eval_steps": 40, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 3.4868, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 3.7961, "step": 2 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 3.9117, "step": 3 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 3.5057, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 3.2378, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 3.0761, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00014, "loss": 3.3794, "step": 7 }, { "epoch": 0.01, "learning_rate": 0.00016, "loss": 2.728, "step": 8 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 2.5244, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 2.47, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.00019968652037617558, "loss": 2.5237, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.0001993730407523511, "loss": 2.3919, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.00019905956112852667, "loss": 1.9547, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.0001987460815047022, "loss": 1.8513, "step": 14 }, { "epoch": 0.02, "learning_rate": 0.00019843260188087775, "loss": 1.6401, "step": 15 }, { "epoch": 0.02, "learning_rate": 0.0001981191222570533, "loss": 1.7872, "step": 16 }, { "epoch": 0.03, "learning_rate": 0.00019780564263322884, "loss": 1.8782, "step": 17 }, { "epoch": 0.03, "learning_rate": 0.0001974921630094044, "loss": 1.8139, "step": 18 }, { "epoch": 0.03, "learning_rate": 0.00019717868338557995, "loss": 1.5255, "step": 19 }, { "epoch": 0.03, "learning_rate": 0.0001968652037617555, "loss": 1.326, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00019655172413793104, "loss": 1.7972, "step": 21 }, { "epoch": 0.03, "learning_rate": 0.0001962382445141066, "loss": 1.4295, "step": 22 }, { "epoch": 0.04, "learning_rate": 0.00019592476489028212, "loss": 1.6369, "step": 23 }, { "epoch": 0.04, "learning_rate": 0.0001956112852664577, "loss": 1.7473, "step": 24 }, { "epoch": 0.04, "learning_rate": 0.00019529780564263324, "loss": 1.6524, "step": 25 }, { "epoch": 0.04, "learning_rate": 0.00019498432601880878, "loss": 1.5889, "step": 26 }, { "epoch": 0.04, "learning_rate": 0.00019467084639498435, "loss": 1.3206, "step": 27 }, { "epoch": 0.04, "learning_rate": 0.00019435736677115987, "loss": 1.9595, "step": 28 }, { "epoch": 0.04, "learning_rate": 0.00019404388714733544, "loss": 1.5356, "step": 29 }, { "epoch": 0.05, "learning_rate": 0.00019373040752351098, "loss": 1.932, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.00019341692789968652, "loss": 1.3679, "step": 31 }, { "epoch": 0.05, "learning_rate": 0.0001931034482758621, "loss": 1.5176, "step": 32 }, { "epoch": 0.05, "learning_rate": 0.0001927899686520376, "loss": 1.774, "step": 33 }, { "epoch": 0.05, "learning_rate": 0.00019247648902821318, "loss": 1.4211, "step": 34 }, { "epoch": 0.05, "learning_rate": 0.00019216300940438872, "loss": 2.0095, "step": 35 }, { "epoch": 0.06, "learning_rate": 0.00019184952978056427, "loss": 1.1123, "step": 36 }, { "epoch": 0.06, "learning_rate": 0.0001915360501567398, "loss": 1.5061, "step": 37 }, { "epoch": 0.06, "learning_rate": 0.00019122257053291538, "loss": 1.8059, "step": 38 }, { "epoch": 0.06, "learning_rate": 0.00019090909090909092, "loss": 1.361, "step": 39 }, { "epoch": 0.06, "learning_rate": 0.00019059561128526647, "loss": 1.6624, "step": 40 }, { "epoch": 0.06, "eval_loss": 1.4508017301559448, "eval_runtime": 866.2645, "eval_samples_per_second": 1.16, "eval_steps_per_second": 1.16, "step": 40 }, { "epoch": 0.06, "learning_rate": 0.000190282131661442, "loss": 1.0086, "step": 41 }, { "epoch": 0.06, "learning_rate": 0.00018996865203761755, "loss": 1.8394, "step": 42 }, { "epoch": 0.07, "learning_rate": 0.00018965517241379312, "loss": 1.595, "step": 43 }, { "epoch": 0.07, "learning_rate": 0.00018934169278996866, "loss": 1.4083, "step": 44 }, { "epoch": 0.07, "learning_rate": 0.0001890282131661442, "loss": 1.6845, "step": 45 }, { "epoch": 0.07, "learning_rate": 0.00018871473354231978, "loss": 1.2298, "step": 46 }, { "epoch": 0.07, "learning_rate": 0.0001884012539184953, "loss": 1.0909, "step": 47 }, { "epoch": 0.07, "learning_rate": 0.00018808777429467086, "loss": 1.1942, "step": 48 }, { "epoch": 0.08, "learning_rate": 0.0001877742946708464, "loss": 1.7951, "step": 49 }, { "epoch": 0.08, "learning_rate": 0.00018746081504702195, "loss": 1.5837, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.00018714733542319752, "loss": 1.1171, "step": 51 }, { "epoch": 0.08, "learning_rate": 0.00018683385579937304, "loss": 1.5556, "step": 52 }, { "epoch": 0.08, "learning_rate": 0.0001865203761755486, "loss": 1.6377, "step": 53 }, { "epoch": 0.08, "learning_rate": 0.00018620689655172415, "loss": 1.7227, "step": 54 }, { "epoch": 0.08, "learning_rate": 0.0001858934169278997, "loss": 1.6148, "step": 55 }, { "epoch": 0.09, "learning_rate": 0.00018557993730407524, "loss": 1.1987, "step": 56 }, { "epoch": 0.09, "learning_rate": 0.0001852664576802508, "loss": 0.8116, "step": 57 }, { "epoch": 0.09, "learning_rate": 0.00018495297805642635, "loss": 1.627, "step": 58 }, { "epoch": 0.09, "learning_rate": 0.0001846394984326019, "loss": 1.3519, "step": 59 }, { "epoch": 0.09, "learning_rate": 0.00018432601880877744, "loss": 1.1224, "step": 60 }, { "epoch": 0.09, "learning_rate": 0.00018401253918495298, "loss": 1.4279, "step": 61 }, { "epoch": 0.1, "learning_rate": 0.00018369905956112855, "loss": 1.3011, "step": 62 }, { "epoch": 0.1, "learning_rate": 0.00018338557993730406, "loss": 1.654, "step": 63 }, { "epoch": 0.1, "learning_rate": 0.00018307210031347963, "loss": 0.8621, "step": 64 }, { "epoch": 0.1, "learning_rate": 0.00018275862068965518, "loss": 1.3778, "step": 65 }, { "epoch": 0.1, "learning_rate": 0.00018244514106583072, "loss": 1.7181, "step": 66 }, { "epoch": 0.1, "learning_rate": 0.0001821316614420063, "loss": 1.603, "step": 67 }, { "epoch": 0.1, "learning_rate": 0.00018181818181818183, "loss": 1.3475, "step": 68 }, { "epoch": 0.11, "learning_rate": 0.00018150470219435738, "loss": 1.7242, "step": 69 }, { "epoch": 0.11, "learning_rate": 0.00018119122257053292, "loss": 1.58, "step": 70 }, { "epoch": 0.11, "learning_rate": 0.00018087774294670846, "loss": 1.4371, "step": 71 }, { "epoch": 0.11, "learning_rate": 0.00018056426332288403, "loss": 1.3795, "step": 72 }, { "epoch": 0.11, "learning_rate": 0.00018025078369905958, "loss": 1.1421, "step": 73 }, { "epoch": 0.11, "learning_rate": 0.00017993730407523512, "loss": 1.1617, "step": 74 }, { "epoch": 0.12, "learning_rate": 0.00017962382445141066, "loss": 1.4031, "step": 75 }, { "epoch": 0.12, "learning_rate": 0.0001793103448275862, "loss": 2.0192, "step": 76 }, { "epoch": 0.12, "learning_rate": 0.00017899686520376175, "loss": 1.4762, "step": 77 }, { "epoch": 0.12, "learning_rate": 0.00017868338557993732, "loss": 1.4992, "step": 78 }, { "epoch": 0.12, "learning_rate": 0.00017836990595611286, "loss": 1.5983, "step": 79 }, { "epoch": 0.12, "learning_rate": 0.0001780564263322884, "loss": 1.3888, "step": 80 }, { "epoch": 0.12, "eval_loss": 1.416973352432251, "eval_runtime": 866.3924, "eval_samples_per_second": 1.16, "eval_steps_per_second": 1.16, "step": 80 }, { "epoch": 0.12, "learning_rate": 0.00017774294670846398, "loss": 1.0799, "step": 81 }, { "epoch": 0.13, "learning_rate": 0.0001774294670846395, "loss": 1.3961, "step": 82 }, { "epoch": 0.13, "learning_rate": 0.00017711598746081506, "loss": 1.5792, "step": 83 }, { "epoch": 0.13, "learning_rate": 0.0001768025078369906, "loss": 1.6384, "step": 84 }, { "epoch": 0.13, "learning_rate": 0.00017648902821316615, "loss": 1.3299, "step": 85 }, { "epoch": 0.13, "learning_rate": 0.00017617554858934172, "loss": 1.7483, "step": 86 }, { "epoch": 0.13, "learning_rate": 0.00017586206896551723, "loss": 1.7161, "step": 87 }, { "epoch": 0.14, "learning_rate": 0.0001755485893416928, "loss": 1.3523, "step": 88 }, { "epoch": 0.14, "learning_rate": 0.00017523510971786835, "loss": 1.5451, "step": 89 }, { "epoch": 0.14, "learning_rate": 0.0001749216300940439, "loss": 1.4589, "step": 90 }, { "epoch": 0.14, "learning_rate": 0.00017460815047021943, "loss": 1.4352, "step": 91 }, { "epoch": 0.14, "learning_rate": 0.000174294670846395, "loss": 1.5711, "step": 92 }, { "epoch": 0.14, "learning_rate": 0.00017398119122257055, "loss": 1.3834, "step": 93 }, { "epoch": 0.15, "learning_rate": 0.0001736677115987461, "loss": 1.3734, "step": 94 }, { "epoch": 0.15, "learning_rate": 0.00017335423197492163, "loss": 1.5402, "step": 95 }, { "epoch": 0.15, "learning_rate": 0.00017304075235109718, "loss": 1.5848, "step": 96 }, { "epoch": 0.15, "learning_rate": 0.00017272727272727275, "loss": 1.3129, "step": 97 }, { "epoch": 0.15, "learning_rate": 0.00017241379310344826, "loss": 1.3945, "step": 98 }, { "epoch": 0.15, "learning_rate": 0.00017210031347962383, "loss": 1.79, "step": 99 }, { "epoch": 0.15, "learning_rate": 0.0001717868338557994, "loss": 1.0874, "step": 100 }, { "epoch": 0.16, "learning_rate": 0.00017147335423197492, "loss": 1.617, "step": 101 }, { "epoch": 0.16, "learning_rate": 0.0001711598746081505, "loss": 1.259, "step": 102 }, { "epoch": 0.16, "learning_rate": 0.00017084639498432603, "loss": 1.577, "step": 103 }, { "epoch": 0.16, "learning_rate": 0.00017053291536050158, "loss": 1.3163, "step": 104 }, { "epoch": 0.16, "learning_rate": 0.00017021943573667712, "loss": 1.3077, "step": 105 }, { "epoch": 0.16, "learning_rate": 0.00016990595611285266, "loss": 1.2611, "step": 106 }, { "epoch": 0.17, "learning_rate": 0.00016959247648902823, "loss": 1.8003, "step": 107 }, { "epoch": 0.17, "learning_rate": 0.00016927899686520377, "loss": 1.3783, "step": 108 }, { "epoch": 0.17, "learning_rate": 0.00016896551724137932, "loss": 1.3896, "step": 109 }, { "epoch": 0.17, "learning_rate": 0.00016865203761755486, "loss": 1.4663, "step": 110 }, { "epoch": 0.17, "learning_rate": 0.0001683385579937304, "loss": 0.7607, "step": 111 }, { "epoch": 0.17, "learning_rate": 0.00016802507836990597, "loss": 0.9899, "step": 112 }, { "epoch": 0.17, "learning_rate": 0.00016771159874608152, "loss": 1.8002, "step": 113 }, { "epoch": 0.18, "learning_rate": 0.00016739811912225706, "loss": 1.5776, "step": 114 }, { "epoch": 0.18, "learning_rate": 0.0001670846394984326, "loss": 1.551, "step": 115 }, { "epoch": 0.18, "learning_rate": 0.00016677115987460817, "loss": 1.4058, "step": 116 }, { "epoch": 0.18, "learning_rate": 0.0001664576802507837, "loss": 1.0475, "step": 117 }, { "epoch": 0.18, "learning_rate": 0.00016614420062695926, "loss": 1.7153, "step": 118 }, { "epoch": 0.18, "learning_rate": 0.0001658307210031348, "loss": 1.6289, "step": 119 }, { "epoch": 0.19, "learning_rate": 0.00016551724137931035, "loss": 1.2282, "step": 120 }, { "epoch": 0.19, "eval_loss": 1.4044392108917236, "eval_runtime": 866.8231, "eval_samples_per_second": 1.159, "eval_steps_per_second": 1.159, "step": 120 } ], "logging_steps": 1, "max_steps": 648, "num_train_epochs": 1, "save_steps": 40, "total_flos": 5831985971159040.0, "trial_name": null, "trial_params": null }