{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0588235294117645, "eval_steps": 500, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00019922390686882426, "loss": 1.5465, "step": 1 }, { "epoch": 0.06, "learning_rate": 0.00019842381085730289, "loss": 1.4494, "step": 2 }, { "epoch": 0.09, "learning_rate": 0.00019762371484578152, "loss": 1.2437, "step": 3 }, { "epoch": 0.12, "learning_rate": 0.00019682361883426012, "loss": 1.3019, "step": 4 }, { "epoch": 0.15, "learning_rate": 0.00019602352282273875, "loss": 1.4664, "step": 5 }, { "epoch": 0.18, "learning_rate": 0.00019522342681121735, "loss": 1.1706, "step": 6 }, { "epoch": 0.21, "learning_rate": 0.00019442333079969598, "loss": 1.2089, "step": 7 }, { "epoch": 0.24, "learning_rate": 0.00019362323478817458, "loss": 1.0329, "step": 8 }, { "epoch": 0.26, "learning_rate": 0.0001928231387766532, "loss": 1.3136, "step": 9 }, { "epoch": 0.29, "learning_rate": 0.00019202304276513184, "loss": 1.0903, "step": 10 }, { "epoch": 0.32, "learning_rate": 0.00019122294675361044, "loss": 1.089, "step": 11 }, { "epoch": 0.35, "learning_rate": 0.00019042285074208904, "loss": 1.2918, "step": 12 }, { "epoch": 0.38, "learning_rate": 0.00018962275473056767, "loss": 1.0208, "step": 13 }, { "epoch": 0.41, "learning_rate": 0.0001888226587190463, "loss": 1.2414, "step": 14 }, { "epoch": 0.44, "learning_rate": 0.00018802256270752493, "loss": 0.8309, "step": 15 }, { "epoch": 0.47, "learning_rate": 0.00018722246669600353, "loss": 1.0701, "step": 16 }, { "epoch": 0.5, "learning_rate": 0.00018642237068448213, "loss": 1.103, "step": 17 }, { "epoch": 0.53, "learning_rate": 0.00018562227467296076, "loss": 1.1536, "step": 18 }, { "epoch": 0.56, "learning_rate": 0.0001848221786614394, "loss": 0.9786, "step": 19 }, { "epoch": 0.59, "learning_rate": 0.000184022082649918, "loss": 1.0474, "step": 20 }, { "epoch": 0.62, "learning_rate": 0.00018322198663839662, "loss": 0.9066, "step": 21 }, { "epoch": 0.65, "learning_rate": 0.00018242189062687522, "loss": 1.0756, "step": 22 }, { "epoch": 0.68, "learning_rate": 0.00018162179461535385, "loss": 0.9649, "step": 23 }, { "epoch": 0.71, "learning_rate": 0.00018082169860383248, "loss": 1.0698, "step": 24 }, { "epoch": 0.74, "learning_rate": 0.00018002160259231108, "loss": 1.0971, "step": 25 }, { "epoch": 0.76, "learning_rate": 0.0001792215065807897, "loss": 1.0154, "step": 26 }, { "epoch": 0.79, "learning_rate": 0.00017842141056926834, "loss": 1.2113, "step": 27 }, { "epoch": 0.82, "learning_rate": 0.00017762131455774694, "loss": 1.0433, "step": 28 }, { "epoch": 0.85, "learning_rate": 0.00017682121854622554, "loss": 0.9149, "step": 29 }, { "epoch": 0.88, "learning_rate": 0.00017602112253470417, "loss": 1.294, "step": 30 }, { "epoch": 0.91, "learning_rate": 0.0001752210265231828, "loss": 0.8756, "step": 31 }, { "epoch": 0.94, "learning_rate": 0.00017442093051166143, "loss": 0.9514, "step": 32 }, { "epoch": 0.97, "learning_rate": 0.00017362083450014, "loss": 0.7657, "step": 33 }, { "epoch": 1.0, "learning_rate": 0.00017282073848861863, "loss": 0.9848, "step": 34 }, { "epoch": 1.03, "learning_rate": 0.00017202064247709726, "loss": 0.9737, "step": 35 }, { "epoch": 1.06, "learning_rate": 0.0001712205464655759, "loss": 0.6804, "step": 36 }, { "epoch": 1.09, "learning_rate": 0.0001704204504540545, "loss": 1.0733, "step": 37 }, { "epoch": 1.12, "learning_rate": 0.0001696203544425331, "loss": 0.984, "step": 38 }, { "epoch": 1.15, "learning_rate": 0.00016882025843101172, "loss": 0.7753, "step": 39 }, { "epoch": 1.18, "learning_rate": 0.00016802016241949035, "loss": 0.9925, "step": 40 }, { "epoch": 1.21, "learning_rate": 0.00016722006640796896, "loss": 0.4777, "step": 41 }, { "epoch": 1.24, "learning_rate": 0.00016641997039644758, "loss": 0.8519, "step": 42 }, { "epoch": 1.26, "learning_rate": 0.00016561987438492621, "loss": 0.8004, "step": 43 }, { "epoch": 1.29, "learning_rate": 0.00016481977837340482, "loss": 0.8533, "step": 44 }, { "epoch": 1.32, "learning_rate": 0.00016401968236188344, "loss": 0.5997, "step": 45 }, { "epoch": 1.35, "learning_rate": 0.00016321958635036205, "loss": 0.7882, "step": 46 }, { "epoch": 1.38, "learning_rate": 0.00016241949033884068, "loss": 1.0804, "step": 47 }, { "epoch": 1.41, "learning_rate": 0.0001616193943273193, "loss": 0.4132, "step": 48 }, { "epoch": 1.44, "learning_rate": 0.0001608192983157979, "loss": 0.9914, "step": 49 }, { "epoch": 1.47, "learning_rate": 0.0001600192023042765, "loss": 0.3964, "step": 50 }, { "epoch": 1.5, "learning_rate": 0.00015921910629275514, "loss": 0.5703, "step": 51 }, { "epoch": 1.53, "learning_rate": 0.00015841901028123377, "loss": 0.7238, "step": 52 }, { "epoch": 1.56, "learning_rate": 0.00015761891426971237, "loss": 0.6712, "step": 53 }, { "epoch": 1.59, "learning_rate": 0.00015681881825819097, "loss": 0.6738, "step": 54 }, { "epoch": 1.62, "learning_rate": 0.0001560187222466696, "loss": 0.6217, "step": 55 }, { "epoch": 1.65, "learning_rate": 0.00015521862623514823, "loss": 0.5953, "step": 56 }, { "epoch": 1.68, "learning_rate": 0.00015441853022362686, "loss": 0.8303, "step": 57 }, { "epoch": 1.71, "learning_rate": 0.00015361843421210546, "loss": 0.6253, "step": 58 }, { "epoch": 1.74, "learning_rate": 0.0001528183382005841, "loss": 0.3754, "step": 59 }, { "epoch": 1.76, "learning_rate": 0.0001520182421890627, "loss": 0.6664, "step": 60 }, { "epoch": 1.79, "learning_rate": 0.00015121814617754132, "loss": 0.6756, "step": 61 }, { "epoch": 1.82, "learning_rate": 0.00015041805016601992, "loss": 0.8601, "step": 62 }, { "epoch": 1.85, "learning_rate": 0.00014961795415449855, "loss": 0.6088, "step": 63 }, { "epoch": 1.88, "learning_rate": 0.00014881785814297718, "loss": 0.5837, "step": 64 }, { "epoch": 1.91, "learning_rate": 0.00014801776213145578, "loss": 0.5813, "step": 65 }, { "epoch": 1.94, "learning_rate": 0.00014721766611993438, "loss": 0.672, "step": 66 }, { "epoch": 1.97, "learning_rate": 0.000146417570108413, "loss": 0.9818, "step": 67 }, { "epoch": 2.0, "learning_rate": 0.00014561747409689164, "loss": 0.7044, "step": 68 }, { "epoch": 2.03, "learning_rate": 0.00014481737808537027, "loss": 0.4525, "step": 69 }, { "epoch": 2.06, "learning_rate": 0.00014401728207384887, "loss": 0.3649, "step": 70 }, { "epoch": 2.09, "learning_rate": 0.00014321718606232747, "loss": 0.4089, "step": 71 }, { "epoch": 2.12, "learning_rate": 0.0001424170900508061, "loss": 0.4102, "step": 72 }, { "epoch": 2.15, "learning_rate": 0.00014161699403928473, "loss": 0.2403, "step": 73 }, { "epoch": 2.18, "learning_rate": 0.00014081689802776333, "loss": 0.3113, "step": 74 }, { "epoch": 2.21, "learning_rate": 0.00014001680201624196, "loss": 0.5503, "step": 75 }, { "epoch": 2.24, "learning_rate": 0.00013921670600472056, "loss": 0.6021, "step": 76 }, { "epoch": 2.26, "learning_rate": 0.0001384166099931992, "loss": 0.5271, "step": 77 }, { "epoch": 2.29, "learning_rate": 0.00013761651398167782, "loss": 0.3207, "step": 78 }, { "epoch": 2.32, "learning_rate": 0.00013681641797015642, "loss": 0.4899, "step": 79 }, { "epoch": 2.35, "learning_rate": 0.00013601632195863505, "loss": 0.3858, "step": 80 }, { "epoch": 2.38, "learning_rate": 0.00013521622594711365, "loss": 0.4361, "step": 81 }, { "epoch": 2.41, "learning_rate": 0.00013441612993559228, "loss": 0.3672, "step": 82 }, { "epoch": 2.44, "learning_rate": 0.00013361603392407088, "loss": 0.5378, "step": 83 }, { "epoch": 2.47, "learning_rate": 0.00013281593791254951, "loss": 0.3577, "step": 84 }, { "epoch": 2.5, "learning_rate": 0.00013201584190102814, "loss": 0.2658, "step": 85 }, { "epoch": 2.53, "learning_rate": 0.00013121574588950674, "loss": 0.2015, "step": 86 }, { "epoch": 2.56, "learning_rate": 0.00013041564987798535, "loss": 0.4888, "step": 87 }, { "epoch": 2.59, "learning_rate": 0.00012961555386646398, "loss": 0.3641, "step": 88 }, { "epoch": 2.62, "learning_rate": 0.0001288154578549426, "loss": 0.3613, "step": 89 }, { "epoch": 2.65, "learning_rate": 0.00012801536184342123, "loss": 0.3576, "step": 90 }, { "epoch": 2.68, "learning_rate": 0.00012721526583189984, "loss": 0.1833, "step": 91 }, { "epoch": 2.71, "learning_rate": 0.00012641516982037844, "loss": 0.1528, "step": 92 }, { "epoch": 2.74, "learning_rate": 0.00012561507380885707, "loss": 0.5194, "step": 93 }, { "epoch": 2.76, "learning_rate": 0.0001248149777973357, "loss": 0.543, "step": 94 }, { "epoch": 2.79, "learning_rate": 0.0001240148817858143, "loss": 0.2746, "step": 95 }, { "epoch": 2.82, "learning_rate": 0.00012321478577429293, "loss": 0.1863, "step": 96 }, { "epoch": 2.85, "learning_rate": 0.00012241468976277153, "loss": 0.4064, "step": 97 }, { "epoch": 2.88, "learning_rate": 0.00012161459375125014, "loss": 0.4429, "step": 98 }, { "epoch": 2.91, "learning_rate": 0.00012081449773972877, "loss": 0.32, "step": 99 }, { "epoch": 2.94, "learning_rate": 0.00012001440172820739, "loss": 0.266, "step": 100 }, { "epoch": 2.97, "learning_rate": 0.00011921430571668602, "loss": 0.4243, "step": 101 }, { "epoch": 3.0, "learning_rate": 0.00011841420970516463, "loss": 0.381, "step": 102 }, { "epoch": 3.03, "learning_rate": 0.00011761411369364323, "loss": 0.1964, "step": 103 }, { "epoch": 3.06, "learning_rate": 0.00011681401768212186, "loss": 0.2786, "step": 104 }, { "epoch": 3.09, "learning_rate": 0.00011601392167060048, "loss": 0.2606, "step": 105 }, { "epoch": 3.12, "learning_rate": 0.0001152138256590791, "loss": 0.1124, "step": 106 }, { "epoch": 3.15, "learning_rate": 0.00011441372964755772, "loss": 0.242, "step": 107 }, { "epoch": 3.18, "learning_rate": 0.00011361363363603632, "loss": 0.1387, "step": 108 }, { "epoch": 3.21, "learning_rate": 0.00011281353762451494, "loss": 0.1772, "step": 109 }, { "epoch": 3.24, "learning_rate": 0.00011201344161299357, "loss": 0.1568, "step": 110 }, { "epoch": 3.26, "learning_rate": 0.00011121334560147218, "loss": 0.2883, "step": 111 }, { "epoch": 3.29, "learning_rate": 0.00011041324958995081, "loss": 0.2013, "step": 112 }, { "epoch": 3.32, "learning_rate": 0.00010961315357842943, "loss": 0.2457, "step": 113 }, { "epoch": 3.35, "learning_rate": 0.00010881305756690803, "loss": 0.1284, "step": 114 }, { "epoch": 3.38, "learning_rate": 0.00010801296155538665, "loss": 0.1901, "step": 115 }, { "epoch": 3.41, "learning_rate": 0.00010721286554386528, "loss": 0.1334, "step": 116 }, { "epoch": 3.44, "learning_rate": 0.00010641276953234389, "loss": 0.1461, "step": 117 }, { "epoch": 3.47, "learning_rate": 0.00010561267352082252, "loss": 0.2129, "step": 118 }, { "epoch": 3.5, "learning_rate": 0.00010481257750930111, "loss": 0.0939, "step": 119 }, { "epoch": 3.53, "learning_rate": 0.00010401248149777974, "loss": 0.1209, "step": 120 }, { "epoch": 3.56, "learning_rate": 0.00010321238548625835, "loss": 0.1331, "step": 121 }, { "epoch": 3.59, "learning_rate": 0.00010241228947473698, "loss": 0.0941, "step": 122 }, { "epoch": 3.62, "learning_rate": 0.0001016121934632156, "loss": 0.2107, "step": 123 }, { "epoch": 3.65, "learning_rate": 0.0001008120974516942, "loss": 0.2211, "step": 124 }, { "epoch": 3.68, "learning_rate": 0.00010001200144017281, "loss": 0.145, "step": 125 }, { "epoch": 3.71, "learning_rate": 9.921190542865144e-05, "loss": 0.1024, "step": 126 }, { "epoch": 3.74, "learning_rate": 9.841180941713006e-05, "loss": 0.0663, "step": 127 }, { "epoch": 3.76, "learning_rate": 9.761171340560867e-05, "loss": 0.1352, "step": 128 }, { "epoch": 3.79, "learning_rate": 9.681161739408729e-05, "loss": 0.2258, "step": 129 }, { "epoch": 3.82, "learning_rate": 9.601152138256592e-05, "loss": 0.1926, "step": 130 }, { "epoch": 3.85, "learning_rate": 9.521142537104452e-05, "loss": 0.1149, "step": 131 }, { "epoch": 3.88, "learning_rate": 9.441132935952315e-05, "loss": 0.1301, "step": 132 }, { "epoch": 3.91, "learning_rate": 9.361123334800176e-05, "loss": 0.0552, "step": 133 }, { "epoch": 3.94, "learning_rate": 9.281113733648038e-05, "loss": 0.1837, "step": 134 }, { "epoch": 3.97, "learning_rate": 9.2011041324959e-05, "loss": 0.1085, "step": 135 }, { "epoch": 4.0, "learning_rate": 9.121094531343761e-05, "loss": 0.2303, "step": 136 }, { "epoch": 4.03, "learning_rate": 9.041084930191624e-05, "loss": 0.0498, "step": 137 }, { "epoch": 4.06, "learning_rate": 8.961075329039486e-05, "loss": 0.0452, "step": 138 }, { "epoch": 4.09, "learning_rate": 8.881065727887347e-05, "loss": 0.1161, "step": 139 }, { "epoch": 4.12, "learning_rate": 8.801056126735209e-05, "loss": 0.0441, "step": 140 }, { "epoch": 4.15, "learning_rate": 8.721046525583072e-05, "loss": 0.0704, "step": 141 }, { "epoch": 4.18, "learning_rate": 8.641036924430932e-05, "loss": 0.1103, "step": 142 }, { "epoch": 4.21, "learning_rate": 8.561027323278795e-05, "loss": 0.0313, "step": 143 }, { "epoch": 4.24, "learning_rate": 8.481017722126655e-05, "loss": 0.0494, "step": 144 }, { "epoch": 4.26, "learning_rate": 8.401008120974518e-05, "loss": 0.0805, "step": 145 }, { "epoch": 4.29, "learning_rate": 8.320998519822379e-05, "loss": 0.0512, "step": 146 }, { "epoch": 4.32, "learning_rate": 8.240988918670241e-05, "loss": 0.0442, "step": 147 }, { "epoch": 4.35, "learning_rate": 8.160979317518102e-05, "loss": 0.0944, "step": 148 }, { "epoch": 4.38, "learning_rate": 8.080969716365965e-05, "loss": 0.0621, "step": 149 }, { "epoch": 4.41, "learning_rate": 8.000960115213825e-05, "loss": 0.1191, "step": 150 }, { "epoch": 4.44, "learning_rate": 7.920950514061688e-05, "loss": 0.1518, "step": 151 }, { "epoch": 4.47, "learning_rate": 7.840940912909548e-05, "loss": 0.1002, "step": 152 }, { "epoch": 4.5, "learning_rate": 7.760931311757411e-05, "loss": 0.0803, "step": 153 }, { "epoch": 4.53, "learning_rate": 7.680921710605273e-05, "loss": 0.0582, "step": 154 }, { "epoch": 4.56, "learning_rate": 7.600912109453134e-05, "loss": 0.0745, "step": 155 }, { "epoch": 4.59, "learning_rate": 7.520902508300996e-05, "loss": 0.0844, "step": 156 }, { "epoch": 4.62, "learning_rate": 7.440892907148859e-05, "loss": 0.056, "step": 157 }, { "epoch": 4.65, "learning_rate": 7.360883305996719e-05, "loss": 0.1091, "step": 158 }, { "epoch": 4.68, "learning_rate": 7.280873704844582e-05, "loss": 0.0561, "step": 159 }, { "epoch": 4.71, "learning_rate": 7.200864103692444e-05, "loss": 0.0815, "step": 160 }, { "epoch": 4.74, "learning_rate": 7.120854502540305e-05, "loss": 0.0704, "step": 161 }, { "epoch": 4.76, "learning_rate": 7.040844901388167e-05, "loss": 0.0954, "step": 162 }, { "epoch": 4.79, "learning_rate": 6.960835300236028e-05, "loss": 0.0915, "step": 163 }, { "epoch": 4.82, "learning_rate": 6.880825699083891e-05, "loss": 0.0539, "step": 164 }, { "epoch": 4.85, "learning_rate": 6.800816097931753e-05, "loss": 0.0694, "step": 165 }, { "epoch": 4.88, "learning_rate": 6.720806496779614e-05, "loss": 0.0763, "step": 166 }, { "epoch": 4.91, "learning_rate": 6.640796895627476e-05, "loss": 0.0388, "step": 167 }, { "epoch": 4.94, "learning_rate": 6.560787294475337e-05, "loss": 0.1189, "step": 168 }, { "epoch": 4.97, "learning_rate": 6.480777693323199e-05, "loss": 0.0441, "step": 169 }, { "epoch": 5.0, "learning_rate": 6.400768092171062e-05, "loss": 0.0801, "step": 170 }, { "epoch": 5.03, "learning_rate": 6.320758491018922e-05, "loss": 0.027, "step": 171 }, { "epoch": 5.06, "learning_rate": 6.240748889866785e-05, "loss": 0.0346, "step": 172 }, { "epoch": 5.09, "learning_rate": 6.160739288714646e-05, "loss": 0.0385, "step": 173 }, { "epoch": 5.12, "learning_rate": 6.080729687562507e-05, "loss": 0.0575, "step": 174 }, { "epoch": 5.15, "learning_rate": 6.0007200864103694e-05, "loss": 0.042, "step": 175 }, { "epoch": 5.18, "learning_rate": 5.9207104852582316e-05, "loss": 0.0521, "step": 176 }, { "epoch": 5.21, "learning_rate": 5.840700884106093e-05, "loss": 0.054, "step": 177 }, { "epoch": 5.24, "learning_rate": 5.760691282953955e-05, "loss": 0.0503, "step": 178 }, { "epoch": 5.26, "learning_rate": 5.680681681801816e-05, "loss": 0.0285, "step": 179 }, { "epoch": 5.29, "learning_rate": 5.6006720806496785e-05, "loss": 0.0281, "step": 180 }, { "epoch": 5.32, "learning_rate": 5.520662479497541e-05, "loss": 0.0402, "step": 181 }, { "epoch": 5.35, "learning_rate": 5.4406528783454015e-05, "loss": 0.04, "step": 182 }, { "epoch": 5.38, "learning_rate": 5.360643277193264e-05, "loss": 0.044, "step": 183 }, { "epoch": 5.41, "learning_rate": 5.280633676041126e-05, "loss": 0.0264, "step": 184 }, { "epoch": 5.44, "learning_rate": 5.200624074888987e-05, "loss": 0.0379, "step": 185 }, { "epoch": 5.47, "learning_rate": 5.120614473736849e-05, "loss": 0.037, "step": 186 }, { "epoch": 5.5, "learning_rate": 5.04060487258471e-05, "loss": 0.0422, "step": 187 }, { "epoch": 5.53, "learning_rate": 4.960595271432572e-05, "loss": 0.0313, "step": 188 }, { "epoch": 5.56, "learning_rate": 4.880585670280434e-05, "loss": 0.0241, "step": 189 }, { "epoch": 5.59, "learning_rate": 4.800576069128296e-05, "loss": 0.0546, "step": 190 }, { "epoch": 5.62, "learning_rate": 4.7205664679761575e-05, "loss": 0.0305, "step": 191 }, { "epoch": 5.65, "learning_rate": 4.640556866824019e-05, "loss": 0.0364, "step": 192 }, { "epoch": 5.68, "learning_rate": 4.5605472656718805e-05, "loss": 0.0329, "step": 193 }, { "epoch": 5.71, "learning_rate": 4.480537664519743e-05, "loss": 0.0453, "step": 194 }, { "epoch": 5.74, "learning_rate": 4.400528063367604e-05, "loss": 0.0386, "step": 195 }, { "epoch": 5.76, "learning_rate": 4.320518462215466e-05, "loss": 0.0401, "step": 196 }, { "epoch": 5.79, "learning_rate": 4.2405088610633274e-05, "loss": 0.0527, "step": 197 }, { "epoch": 5.82, "learning_rate": 4.1604992599111896e-05, "loss": 0.0342, "step": 198 }, { "epoch": 5.85, "learning_rate": 4.080489658759051e-05, "loss": 0.0336, "step": 199 }, { "epoch": 5.88, "learning_rate": 4.000480057606913e-05, "loss": 0.034, "step": 200 }, { "epoch": 5.91, "learning_rate": 3.920470456454774e-05, "loss": 0.0316, "step": 201 }, { "epoch": 5.94, "learning_rate": 3.8404608553026365e-05, "loss": 0.0537, "step": 202 }, { "epoch": 5.97, "learning_rate": 3.760451254150498e-05, "loss": 0.0348, "step": 203 }, { "epoch": 6.0, "learning_rate": 3.6804416529983595e-05, "loss": 0.0525, "step": 204 }, { "epoch": 6.03, "learning_rate": 3.600432051846222e-05, "loss": 0.0264, "step": 205 }, { "epoch": 6.06, "learning_rate": 3.520422450694083e-05, "loss": 0.0216, "step": 206 }, { "epoch": 6.09, "learning_rate": 3.4404128495419455e-05, "loss": 0.0232, "step": 207 }, { "epoch": 6.12, "learning_rate": 3.360403248389807e-05, "loss": 0.0342, "step": 208 }, { "epoch": 6.15, "learning_rate": 3.2803936472376686e-05, "loss": 0.0196, "step": 209 }, { "epoch": 6.18, "learning_rate": 3.200384046085531e-05, "loss": 0.0292, "step": 210 }, { "epoch": 6.21, "learning_rate": 3.1203744449333924e-05, "loss": 0.02, "step": 211 }, { "epoch": 6.24, "learning_rate": 3.0403648437812536e-05, "loss": 0.0243, "step": 212 }, { "epoch": 6.26, "learning_rate": 2.9603552426291158e-05, "loss": 0.0223, "step": 213 }, { "epoch": 6.29, "learning_rate": 2.8803456414769773e-05, "loss": 0.017, "step": 214 }, { "epoch": 6.32, "learning_rate": 2.8003360403248392e-05, "loss": 0.0201, "step": 215 }, { "epoch": 6.35, "learning_rate": 2.7203264391727008e-05, "loss": 0.0368, "step": 216 }, { "epoch": 6.38, "learning_rate": 2.640316838020563e-05, "loss": 0.0228, "step": 217 }, { "epoch": 6.41, "learning_rate": 2.5603072368684245e-05, "loss": 0.0241, "step": 218 }, { "epoch": 6.44, "learning_rate": 2.480297635716286e-05, "loss": 0.025, "step": 219 }, { "epoch": 6.47, "learning_rate": 2.400288034564148e-05, "loss": 0.0261, "step": 220 }, { "epoch": 6.5, "learning_rate": 2.3202784334120095e-05, "loss": 0.026, "step": 221 }, { "epoch": 6.53, "learning_rate": 2.2402688322598714e-05, "loss": 0.0242, "step": 222 }, { "epoch": 6.56, "learning_rate": 2.160259231107733e-05, "loss": 0.029, "step": 223 }, { "epoch": 6.59, "learning_rate": 2.0802496299555948e-05, "loss": 0.0305, "step": 224 }, { "epoch": 6.62, "learning_rate": 2.0002400288034563e-05, "loss": 0.0263, "step": 225 }, { "epoch": 6.65, "learning_rate": 1.9202304276513182e-05, "loss": 0.021, "step": 226 }, { "epoch": 6.68, "learning_rate": 1.8402208264991798e-05, "loss": 0.0295, "step": 227 }, { "epoch": 6.71, "learning_rate": 1.7602112253470417e-05, "loss": 0.0276, "step": 228 }, { "epoch": 6.74, "learning_rate": 1.6802016241949035e-05, "loss": 0.023, "step": 229 }, { "epoch": 6.76, "learning_rate": 1.6001920230427654e-05, "loss": 0.0251, "step": 230 }, { "epoch": 6.79, "learning_rate": 1.5201824218906268e-05, "loss": 0.0415, "step": 231 }, { "epoch": 6.82, "learning_rate": 1.4401728207384887e-05, "loss": 0.0292, "step": 232 }, { "epoch": 6.85, "learning_rate": 1.3601632195863504e-05, "loss": 0.0268, "step": 233 }, { "epoch": 6.88, "learning_rate": 1.2801536184342123e-05, "loss": 0.0265, "step": 234 }, { "epoch": 6.91, "learning_rate": 1.200144017282074e-05, "loss": 0.0208, "step": 235 }, { "epoch": 6.94, "learning_rate": 1.1201344161299357e-05, "loss": 0.0231, "step": 236 }, { "epoch": 6.97, "learning_rate": 1.0401248149777974e-05, "loss": 0.022, "step": 237 }, { "epoch": 7.0, "learning_rate": 9.601152138256591e-06, "loss": 0.0242, "step": 238 }, { "epoch": 7.03, "learning_rate": 8.801056126735208e-06, "loss": 0.0155, "step": 239 }, { "epoch": 7.06, "learning_rate": 8.000960115213827e-06, "loss": 0.0183, "step": 240 } ], "logging_steps": 1, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 20, "total_flos": 1.5323280428875776e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }