{ "best_metric": 0.11630760878324509, "best_model_checkpoint": "autotrain-ai-image-detect-20240716-0057/checkpoint-8114", "epoch": 2.0, "eval_steps": 500, "global_step": 8114, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006162188809465122, "grad_norm": 32.40150833129883, "learning_rate": 6.162188809465123e-08, "loss": 0.8702, "step": 25 }, { "epoch": 0.012324377618930245, "grad_norm": 20.778217315673828, "learning_rate": 1.2324377618930246e-07, "loss": 0.8375, "step": 50 }, { "epoch": 0.018486566428395367, "grad_norm": 30.263259887695312, "learning_rate": 1.848656642839537e-07, "loss": 0.8264, "step": 75 }, { "epoch": 0.02464875523786049, "grad_norm": 27.025936126708984, "learning_rate": 2.4648755237860493e-07, "loss": 0.7418, "step": 100 }, { "epoch": 0.03081094404732561, "grad_norm": 30.425628662109375, "learning_rate": 3.081094404732561e-07, "loss": 0.7933, "step": 125 }, { "epoch": 0.036973132856790734, "grad_norm": 27.200397491455078, "learning_rate": 3.697313285679074e-07, "loss": 0.7154, "step": 150 }, { "epoch": 0.04313532166625585, "grad_norm": 28.001712799072266, "learning_rate": 4.3135321666255857e-07, "loss": 0.765, "step": 175 }, { "epoch": 0.04929751047572098, "grad_norm": 25.52901268005371, "learning_rate": 4.929751047572099e-07, "loss": 0.7294, "step": 200 }, { "epoch": 0.0554596992851861, "grad_norm": 24.595458984375, "learning_rate": 5.54596992851861e-07, "loss": 0.6705, "step": 225 }, { "epoch": 0.06162188809465122, "grad_norm": 21.848743438720703, "learning_rate": 6.162188809465122e-07, "loss": 0.66, "step": 250 }, { "epoch": 0.06778407690411634, "grad_norm": 19.161487579345703, "learning_rate": 6.778407690411634e-07, "loss": 0.5849, "step": 275 }, { "epoch": 0.07394626571358147, "grad_norm": 24.0733642578125, "learning_rate": 7.394626571358148e-07, "loss": 0.6295, "step": 300 }, { "epoch": 0.08010845452304659, "grad_norm": 18.740398406982422, "learning_rate": 8.010845452304659e-07, "loss": 0.6113, "step": 325 }, { "epoch": 0.0862706433325117, "grad_norm": 14.259610176086426, "learning_rate": 8.627064333251171e-07, "loss": 0.5723, "step": 350 }, { "epoch": 0.09243283214197683, "grad_norm": 85.3804702758789, "learning_rate": 9.243283214197684e-07, "loss": 0.4889, "step": 375 }, { "epoch": 0.09859502095144196, "grad_norm": 27.59977912902832, "learning_rate": 9.859502095144197e-07, "loss": 0.4906, "step": 400 }, { "epoch": 0.10475720976090708, "grad_norm": 19.361953735351562, "learning_rate": 1.047572097609071e-06, "loss": 0.4567, "step": 425 }, { "epoch": 0.1109193985703722, "grad_norm": 22.093198776245117, "learning_rate": 1.109193985703722e-06, "loss": 0.4161, "step": 450 }, { "epoch": 0.11708158737983732, "grad_norm": 21.007787704467773, "learning_rate": 1.1708158737983732e-06, "loss": 0.465, "step": 475 }, { "epoch": 0.12324377618930243, "grad_norm": 58.968013763427734, "learning_rate": 1.2324377618930244e-06, "loss": 0.3818, "step": 500 }, { "epoch": 0.12940596499876755, "grad_norm": 12.235271453857422, "learning_rate": 1.2940596499876757e-06, "loss": 0.4083, "step": 525 }, { "epoch": 0.13556815380823267, "grad_norm": 52.92424774169922, "learning_rate": 1.3556815380823269e-06, "loss": 0.3776, "step": 550 }, { "epoch": 0.14173034261769782, "grad_norm": 19.378677368164062, "learning_rate": 1.4173034261769783e-06, "loss": 0.3566, "step": 575 }, { "epoch": 0.14789253142716294, "grad_norm": 25.241214752197266, "learning_rate": 1.4789253142716296e-06, "loss": 0.412, "step": 600 }, { "epoch": 0.15405472023662806, "grad_norm": 34.709651947021484, "learning_rate": 1.5405472023662808e-06, "loss": 0.3039, "step": 625 }, { "epoch": 0.16021690904609318, "grad_norm": 106.13933563232422, "learning_rate": 1.6021690904609318e-06, "loss": 0.3414, "step": 650 }, { "epoch": 0.1663790978555583, "grad_norm": 8.852789878845215, "learning_rate": 1.663790978555583e-06, "loss": 0.3152, "step": 675 }, { "epoch": 0.1725412866650234, "grad_norm": 10.936814308166504, "learning_rate": 1.7254128666502343e-06, "loss": 0.3753, "step": 700 }, { "epoch": 0.17870347547448853, "grad_norm": 19.37809181213379, "learning_rate": 1.7870347547448855e-06, "loss": 0.3364, "step": 725 }, { "epoch": 0.18486566428395365, "grad_norm": 31.515443801879883, "learning_rate": 1.8486566428395367e-06, "loss": 0.3178, "step": 750 }, { "epoch": 0.19102785309341877, "grad_norm": 39.89761734008789, "learning_rate": 1.910278530934188e-06, "loss": 0.3016, "step": 775 }, { "epoch": 0.19719004190288392, "grad_norm": 28.02248764038086, "learning_rate": 1.9719004190288394e-06, "loss": 0.3193, "step": 800 }, { "epoch": 0.20335223071234904, "grad_norm": 15.309660911560059, "learning_rate": 2.0335223071234904e-06, "loss": 0.3139, "step": 825 }, { "epoch": 0.20951441952181415, "grad_norm": 26.285245895385742, "learning_rate": 2.095144195218142e-06, "loss": 0.3525, "step": 850 }, { "epoch": 0.21567660833127927, "grad_norm": 19.554412841796875, "learning_rate": 2.156766083312793e-06, "loss": 0.2952, "step": 875 }, { "epoch": 0.2218387971407444, "grad_norm": 8.018360137939453, "learning_rate": 2.218387971407444e-06, "loss": 0.3224, "step": 900 }, { "epoch": 0.2280009859502095, "grad_norm": 35.61044692993164, "learning_rate": 2.2800098595020954e-06, "loss": 0.3077, "step": 925 }, { "epoch": 0.23416317475967463, "grad_norm": 25.721057891845703, "learning_rate": 2.3416317475967464e-06, "loss": 0.2584, "step": 950 }, { "epoch": 0.24032536356913975, "grad_norm": 21.368654251098633, "learning_rate": 2.403253635691398e-06, "loss": 0.2761, "step": 975 }, { "epoch": 0.24648755237860487, "grad_norm": 14.94520092010498, "learning_rate": 2.464875523786049e-06, "loss": 0.2588, "step": 1000 }, { "epoch": 0.25264974118807, "grad_norm": 41.90576934814453, "learning_rate": 2.5264974118807003e-06, "loss": 0.2586, "step": 1025 }, { "epoch": 0.2588119299975351, "grad_norm": 38.49464797973633, "learning_rate": 2.5881192999753513e-06, "loss": 0.2383, "step": 1050 }, { "epoch": 0.2649741188070002, "grad_norm": 24.9031982421875, "learning_rate": 2.6497411880700023e-06, "loss": 0.255, "step": 1075 }, { "epoch": 0.27113630761646534, "grad_norm": 33.10504913330078, "learning_rate": 2.7113630761646538e-06, "loss": 0.2343, "step": 1100 }, { "epoch": 0.27729849642593046, "grad_norm": 13.623468399047852, "learning_rate": 2.772984964259305e-06, "loss": 0.2265, "step": 1125 }, { "epoch": 0.28346068523539564, "grad_norm": 54.64838409423828, "learning_rate": 2.8346068523539567e-06, "loss": 0.2254, "step": 1150 }, { "epoch": 0.28962287404486076, "grad_norm": 18.711090087890625, "learning_rate": 2.8962287404486077e-06, "loss": 0.2393, "step": 1175 }, { "epoch": 0.2957850628543259, "grad_norm": 43.24985122680664, "learning_rate": 2.957850628543259e-06, "loss": 0.2382, "step": 1200 }, { "epoch": 0.301947251663791, "grad_norm": 6.365708827972412, "learning_rate": 3.01947251663791e-06, "loss": 0.2467, "step": 1225 }, { "epoch": 0.3081094404732561, "grad_norm": 41.95709991455078, "learning_rate": 3.0810944047325616e-06, "loss": 0.218, "step": 1250 }, { "epoch": 0.31427162928272123, "grad_norm": 30.292692184448242, "learning_rate": 3.1427162928272126e-06, "loss": 0.3069, "step": 1275 }, { "epoch": 0.32043381809218635, "grad_norm": 21.788148880004883, "learning_rate": 3.2043381809218636e-06, "loss": 0.201, "step": 1300 }, { "epoch": 0.32659600690165147, "grad_norm": 39.77275848388672, "learning_rate": 3.265960069016515e-06, "loss": 0.2879, "step": 1325 }, { "epoch": 0.3327581957111166, "grad_norm": 52.750396728515625, "learning_rate": 3.327581957111166e-06, "loss": 0.2154, "step": 1350 }, { "epoch": 0.3389203845205817, "grad_norm": 27.913963317871094, "learning_rate": 3.3892038452058175e-06, "loss": 0.2396, "step": 1375 }, { "epoch": 0.3450825733300468, "grad_norm": 10.700597763061523, "learning_rate": 3.4508257333004686e-06, "loss": 0.2684, "step": 1400 }, { "epoch": 0.35124476213951195, "grad_norm": 28.48025131225586, "learning_rate": 3.5124476213951196e-06, "loss": 0.2199, "step": 1425 }, { "epoch": 0.35740695094897706, "grad_norm": 34.27480697631836, "learning_rate": 3.574069509489771e-06, "loss": 0.2376, "step": 1450 }, { "epoch": 0.3635691397584422, "grad_norm": 31.14768409729004, "learning_rate": 3.635691397584422e-06, "loss": 0.2313, "step": 1475 }, { "epoch": 0.3697313285679073, "grad_norm": 29.21658706665039, "learning_rate": 3.6973132856790735e-06, "loss": 0.2655, "step": 1500 }, { "epoch": 0.3758935173773724, "grad_norm": 12.605408668518066, "learning_rate": 3.7589351737737245e-06, "loss": 0.3038, "step": 1525 }, { "epoch": 0.38205570618683754, "grad_norm": 20.78334617614746, "learning_rate": 3.820557061868376e-06, "loss": 0.2358, "step": 1550 }, { "epoch": 0.38821789499630266, "grad_norm": 7.5883097648620605, "learning_rate": 3.882178949963027e-06, "loss": 0.1509, "step": 1575 }, { "epoch": 0.39438008380576783, "grad_norm": 88.7538833618164, "learning_rate": 3.943800838057679e-06, "loss": 0.2073, "step": 1600 }, { "epoch": 0.40054227261523295, "grad_norm": 1.4600569009780884, "learning_rate": 4.00542272615233e-06, "loss": 0.2457, "step": 1625 }, { "epoch": 0.40670446142469807, "grad_norm": 13.259570121765137, "learning_rate": 4.067044614246981e-06, "loss": 0.2405, "step": 1650 }, { "epoch": 0.4128666502341632, "grad_norm": 59.721561431884766, "learning_rate": 4.128666502341632e-06, "loss": 0.3026, "step": 1675 }, { "epoch": 0.4190288390436283, "grad_norm": 3.4791340827941895, "learning_rate": 4.190288390436284e-06, "loss": 0.2756, "step": 1700 }, { "epoch": 0.42519102785309343, "grad_norm": 30.421899795532227, "learning_rate": 4.251910278530935e-06, "loss": 0.2085, "step": 1725 }, { "epoch": 0.43135321666255855, "grad_norm": 41.64067840576172, "learning_rate": 4.313532166625586e-06, "loss": 0.1712, "step": 1750 }, { "epoch": 0.43751540547202367, "grad_norm": 55.73050308227539, "learning_rate": 4.375154054720237e-06, "loss": 0.2047, "step": 1775 }, { "epoch": 0.4436775942814888, "grad_norm": 27.624855041503906, "learning_rate": 4.436775942814888e-06, "loss": 0.2631, "step": 1800 }, { "epoch": 0.4498397830909539, "grad_norm": 2.5751349925994873, "learning_rate": 4.49839783090954e-06, "loss": 0.3508, "step": 1825 }, { "epoch": 0.456001971900419, "grad_norm": 59.512813568115234, "learning_rate": 4.560019719004191e-06, "loss": 0.2798, "step": 1850 }, { "epoch": 0.46216416070988414, "grad_norm": 38.6485710144043, "learning_rate": 4.621641607098842e-06, "loss": 0.2426, "step": 1875 }, { "epoch": 0.46832634951934926, "grad_norm": 35.3476676940918, "learning_rate": 4.683263495193493e-06, "loss": 0.2559, "step": 1900 }, { "epoch": 0.4744885383288144, "grad_norm": 9.039581298828125, "learning_rate": 4.744885383288144e-06, "loss": 0.2461, "step": 1925 }, { "epoch": 0.4806507271382795, "grad_norm": 3.9812674522399902, "learning_rate": 4.806507271382796e-06, "loss": 0.2293, "step": 1950 }, { "epoch": 0.4868129159477446, "grad_norm": 39.7280387878418, "learning_rate": 4.868129159477447e-06, "loss": 0.2519, "step": 1975 }, { "epoch": 0.49297510475720974, "grad_norm": 6.103663444519043, "learning_rate": 4.929751047572098e-06, "loss": 0.2443, "step": 2000 }, { "epoch": 0.49913729356667486, "grad_norm": 29.946483612060547, "learning_rate": 4.991372935666749e-06, "loss": 0.2087, "step": 2025 }, { "epoch": 0.50529948237614, "grad_norm": 13.760396003723145, "learning_rate": 5.0529948237614006e-06, "loss": 0.2201, "step": 2050 }, { "epoch": 0.5114616711856051, "grad_norm": 20.78110122680664, "learning_rate": 5.114616711856052e-06, "loss": 0.2413, "step": 2075 }, { "epoch": 0.5176238599950702, "grad_norm": 54.68733596801758, "learning_rate": 5.176238599950703e-06, "loss": 0.2934, "step": 2100 }, { "epoch": 0.5237860488045354, "grad_norm": 0.6074588894844055, "learning_rate": 5.2378604880453545e-06, "loss": 0.1745, "step": 2125 }, { "epoch": 0.5299482376140005, "grad_norm": 15.065305709838867, "learning_rate": 5.299482376140005e-06, "loss": 0.3041, "step": 2150 }, { "epoch": 0.5361104264234656, "grad_norm": 29.989559173583984, "learning_rate": 5.3611042642346565e-06, "loss": 0.2272, "step": 2175 }, { "epoch": 0.5422726152329307, "grad_norm": 31.3291072845459, "learning_rate": 5.4227261523293075e-06, "loss": 0.3574, "step": 2200 }, { "epoch": 0.5484348040423959, "grad_norm": 3.495990753173828, "learning_rate": 5.484348040423959e-06, "loss": 0.1716, "step": 2225 }, { "epoch": 0.5545969928518609, "grad_norm": 4.088655471801758, "learning_rate": 5.54596992851861e-06, "loss": 0.2254, "step": 2250 }, { "epoch": 0.5607591816613261, "grad_norm": 32.07845687866211, "learning_rate": 5.6075918166132614e-06, "loss": 0.3001, "step": 2275 }, { "epoch": 0.5669213704707913, "grad_norm": 5.552308082580566, "learning_rate": 5.669213704707913e-06, "loss": 0.1481, "step": 2300 }, { "epoch": 0.5730835592802563, "grad_norm": 25.674997329711914, "learning_rate": 5.7308355928025635e-06, "loss": 0.2426, "step": 2325 }, { "epoch": 0.5792457480897215, "grad_norm": 27.04370880126953, "learning_rate": 5.792457480897215e-06, "loss": 0.1934, "step": 2350 }, { "epoch": 0.5854079368991866, "grad_norm": 30.078052520751953, "learning_rate": 5.854079368991866e-06, "loss": 0.3716, "step": 2375 }, { "epoch": 0.5915701257086518, "grad_norm": 41.699615478515625, "learning_rate": 5.915701257086518e-06, "loss": 0.208, "step": 2400 }, { "epoch": 0.5977323145181168, "grad_norm": 16.997156143188477, "learning_rate": 5.977323145181168e-06, "loss": 0.157, "step": 2425 }, { "epoch": 0.603894503327582, "grad_norm": 8.579419136047363, "learning_rate": 6.03894503327582e-06, "loss": 0.1638, "step": 2450 }, { "epoch": 0.610056692137047, "grad_norm": 40.762413024902344, "learning_rate": 6.100566921370471e-06, "loss": 0.1574, "step": 2475 }, { "epoch": 0.6162188809465122, "grad_norm": 28.267074584960938, "learning_rate": 6.162188809465123e-06, "loss": 0.2966, "step": 2500 }, { "epoch": 0.6223810697559773, "grad_norm": 177.52345275878906, "learning_rate": 6.223810697559773e-06, "loss": 0.2214, "step": 2525 }, { "epoch": 0.6285432585654425, "grad_norm": 3.633638381958008, "learning_rate": 6.285432585654425e-06, "loss": 0.1454, "step": 2550 }, { "epoch": 0.6347054473749075, "grad_norm": 57.287357330322266, "learning_rate": 6.347054473749075e-06, "loss": 0.1831, "step": 2575 }, { "epoch": 0.6408676361843727, "grad_norm": 14.65641975402832, "learning_rate": 6.408676361843727e-06, "loss": 0.1906, "step": 2600 }, { "epoch": 0.6470298249938378, "grad_norm": 4.212377071380615, "learning_rate": 6.470298249938378e-06, "loss": 0.268, "step": 2625 }, { "epoch": 0.6531920138033029, "grad_norm": 5.885193824768066, "learning_rate": 6.53192013803303e-06, "loss": 0.4373, "step": 2650 }, { "epoch": 0.659354202612768, "grad_norm": 21.820556640625, "learning_rate": 6.59354202612768e-06, "loss": 0.2096, "step": 2675 }, { "epoch": 0.6655163914222332, "grad_norm": 0.1347610503435135, "learning_rate": 6.655163914222332e-06, "loss": 0.2188, "step": 2700 }, { "epoch": 0.6716785802316984, "grad_norm": 53.36775588989258, "learning_rate": 6.716785802316984e-06, "loss": 0.2012, "step": 2725 }, { "epoch": 0.6778407690411634, "grad_norm": 0.11368793249130249, "learning_rate": 6.778407690411635e-06, "loss": 0.1993, "step": 2750 }, { "epoch": 0.6840029578506286, "grad_norm": 0.6453331708908081, "learning_rate": 6.840029578506286e-06, "loss": 0.2713, "step": 2775 }, { "epoch": 0.6901651466600937, "grad_norm": 44.39537811279297, "learning_rate": 6.901651466600937e-06, "loss": 0.2821, "step": 2800 }, { "epoch": 0.6963273354695588, "grad_norm": 0.5684586763381958, "learning_rate": 6.963273354695589e-06, "loss": 0.2122, "step": 2825 }, { "epoch": 0.7024895242790239, "grad_norm": 21.488910675048828, "learning_rate": 7.024895242790239e-06, "loss": 0.2246, "step": 2850 }, { "epoch": 0.7086517130884891, "grad_norm": 28.14980697631836, "learning_rate": 7.086517130884891e-06, "loss": 0.3159, "step": 2875 }, { "epoch": 0.7148139018979541, "grad_norm": 22.465578079223633, "learning_rate": 7.148139018979542e-06, "loss": 0.2319, "step": 2900 }, { "epoch": 0.7209760907074193, "grad_norm": 2.2480404376983643, "learning_rate": 7.209760907074194e-06, "loss": 0.1872, "step": 2925 }, { "epoch": 0.7271382795168844, "grad_norm": 44.47653579711914, "learning_rate": 7.271382795168844e-06, "loss": 0.1988, "step": 2950 }, { "epoch": 0.7333004683263495, "grad_norm": 24.97824478149414, "learning_rate": 7.333004683263496e-06, "loss": 0.1546, "step": 2975 }, { "epoch": 0.7394626571358146, "grad_norm": 19.667158126831055, "learning_rate": 7.394626571358147e-06, "loss": 0.215, "step": 3000 }, { "epoch": 0.7456248459452798, "grad_norm": 1.9553178548812866, "learning_rate": 7.456248459452799e-06, "loss": 0.294, "step": 3025 }, { "epoch": 0.7517870347547448, "grad_norm": 16.141925811767578, "learning_rate": 7.517870347547449e-06, "loss": 0.1873, "step": 3050 }, { "epoch": 0.75794922356421, "grad_norm": 53.507423400878906, "learning_rate": 7.579492235642101e-06, "loss": 0.3383, "step": 3075 }, { "epoch": 0.7641114123736751, "grad_norm": 53.805782318115234, "learning_rate": 7.641114123736752e-06, "loss": 0.283, "step": 3100 }, { "epoch": 0.7702736011831403, "grad_norm": 2.2809536457061768, "learning_rate": 7.702736011831404e-06, "loss": 0.1787, "step": 3125 }, { "epoch": 0.7764357899926053, "grad_norm": 3.1836905479431152, "learning_rate": 7.764357899926054e-06, "loss": 0.1389, "step": 3150 }, { "epoch": 0.7825979788020705, "grad_norm": 37.81892776489258, "learning_rate": 7.825979788020706e-06, "loss": 0.3883, "step": 3175 }, { "epoch": 0.7887601676115357, "grad_norm": 27.96651840209961, "learning_rate": 7.887601676115358e-06, "loss": 0.1151, "step": 3200 }, { "epoch": 0.7949223564210007, "grad_norm": 28.917064666748047, "learning_rate": 7.949223564210008e-06, "loss": 0.2317, "step": 3225 }, { "epoch": 0.8010845452304659, "grad_norm": 31.006044387817383, "learning_rate": 8.01084545230466e-06, "loss": 0.163, "step": 3250 }, { "epoch": 0.807246734039931, "grad_norm": 31.802854537963867, "learning_rate": 8.07246734039931e-06, "loss": 0.2732, "step": 3275 }, { "epoch": 0.8134089228493961, "grad_norm": 15.189701080322266, "learning_rate": 8.134089228493962e-06, "loss": 0.1193, "step": 3300 }, { "epoch": 0.8195711116588612, "grad_norm": 28.935728073120117, "learning_rate": 8.195711116588612e-06, "loss": 0.2183, "step": 3325 }, { "epoch": 0.8257333004683264, "grad_norm": 102.93382263183594, "learning_rate": 8.257333004683264e-06, "loss": 0.2896, "step": 3350 }, { "epoch": 0.8318954892777914, "grad_norm": 13.587028503417969, "learning_rate": 8.318954892777916e-06, "loss": 0.2637, "step": 3375 }, { "epoch": 0.8380576780872566, "grad_norm": 54.688629150390625, "learning_rate": 8.380576780872568e-06, "loss": 0.3006, "step": 3400 }, { "epoch": 0.8442198668967217, "grad_norm": 34.89143371582031, "learning_rate": 8.442198668967218e-06, "loss": 0.2472, "step": 3425 }, { "epoch": 0.8503820557061869, "grad_norm": 6.273120880126953, "learning_rate": 8.50382055706187e-06, "loss": 0.2268, "step": 3450 }, { "epoch": 0.8565442445156519, "grad_norm": 7.3593974113464355, "learning_rate": 8.56544244515652e-06, "loss": 0.1922, "step": 3475 }, { "epoch": 0.8627064333251171, "grad_norm": 58.28133010864258, "learning_rate": 8.627064333251172e-06, "loss": 0.2065, "step": 3500 }, { "epoch": 0.8688686221345822, "grad_norm": 55.01313781738281, "learning_rate": 8.688686221345822e-06, "loss": 0.2455, "step": 3525 }, { "epoch": 0.8750308109440473, "grad_norm": 4.61732292175293, "learning_rate": 8.750308109440474e-06, "loss": 0.2488, "step": 3550 }, { "epoch": 0.8811929997535124, "grad_norm": 31.931983947753906, "learning_rate": 8.811929997535125e-06, "loss": 0.1889, "step": 3575 }, { "epoch": 0.8873551885629776, "grad_norm": 2.225792407989502, "learning_rate": 8.873551885629776e-06, "loss": 0.1902, "step": 3600 }, { "epoch": 0.8935173773724427, "grad_norm": 57.339359283447266, "learning_rate": 8.935173773724428e-06, "loss": 0.2177, "step": 3625 }, { "epoch": 0.8996795661819078, "grad_norm": 22.156328201293945, "learning_rate": 8.99679566181908e-06, "loss": 0.1992, "step": 3650 }, { "epoch": 0.905841754991373, "grad_norm": 20.95226287841797, "learning_rate": 9.058417549913731e-06, "loss": 0.1756, "step": 3675 }, { "epoch": 0.912003943800838, "grad_norm": 0.2742937505245209, "learning_rate": 9.120039438008381e-06, "loss": 0.1651, "step": 3700 }, { "epoch": 0.9181661326103032, "grad_norm": 1.6180331707000732, "learning_rate": 9.181661326103033e-06, "loss": 0.2556, "step": 3725 }, { "epoch": 0.9243283214197683, "grad_norm": 42.67359924316406, "learning_rate": 9.243283214197683e-06, "loss": 0.2887, "step": 3750 }, { "epoch": 0.9304905102292335, "grad_norm": 14.541348457336426, "learning_rate": 9.304905102292335e-06, "loss": 0.2773, "step": 3775 }, { "epoch": 0.9366526990386985, "grad_norm": 12.932967185974121, "learning_rate": 9.366526990386986e-06, "loss": 0.1989, "step": 3800 }, { "epoch": 0.9428148878481637, "grad_norm": 40.763427734375, "learning_rate": 9.428148878481637e-06, "loss": 0.235, "step": 3825 }, { "epoch": 0.9489770766576288, "grad_norm": 1.5332688093185425, "learning_rate": 9.489770766576288e-06, "loss": 0.2553, "step": 3850 }, { "epoch": 0.9551392654670939, "grad_norm": 6.034736156463623, "learning_rate": 9.55139265467094e-06, "loss": 0.2093, "step": 3875 }, { "epoch": 0.961301454276559, "grad_norm": 1.0317189693450928, "learning_rate": 9.613014542765591e-06, "loss": 0.1837, "step": 3900 }, { "epoch": 0.9674636430860242, "grad_norm": 73.78177642822266, "learning_rate": 9.674636430860243e-06, "loss": 0.1555, "step": 3925 }, { "epoch": 0.9736258318954892, "grad_norm": 10.327101707458496, "learning_rate": 9.736258318954893e-06, "loss": 0.2798, "step": 3950 }, { "epoch": 0.9797880207049544, "grad_norm": 10.050827980041504, "learning_rate": 9.797880207049545e-06, "loss": 0.2097, "step": 3975 }, { "epoch": 0.9859502095144195, "grad_norm": 0.25628790259361267, "learning_rate": 9.859502095144195e-06, "loss": 0.1993, "step": 4000 }, { "epoch": 0.9921123983238846, "grad_norm": 25.14055824279785, "learning_rate": 9.921123983238847e-06, "loss": 0.1683, "step": 4025 }, { "epoch": 0.9982745871333497, "grad_norm": 1.2002718448638916, "learning_rate": 9.982745871333497e-06, "loss": 0.2258, "step": 4050 }, { "epoch": 1.0, "eval_accuracy": 0.9549234863606121, "eval_auc": 0.9877450477910369, "eval_f1": 0.9647089464774059, "eval_loss": 0.15075403451919556, "eval_precision": 0.9497435897435897, "eval_recall": 0.9801534797565493, "eval_runtime": 983.9028, "eval_samples_per_second": 6.11, "eval_steps_per_second": 0.255, "step": 4057 }, { "epoch": 1.004436775942815, "grad_norm": 2.7878289222717285, "learning_rate": 1.0044367759428151e-05, "loss": 0.1451, "step": 4075 }, { "epoch": 1.01059896475228, "grad_norm": 37.771907806396484, "learning_rate": 1.0105989647522801e-05, "loss": 0.1963, "step": 4100 }, { "epoch": 1.0167611535617451, "grad_norm": 5.818455696105957, "learning_rate": 1.0167611535617451e-05, "loss": 0.2483, "step": 4125 }, { "epoch": 1.0229233423712103, "grad_norm": 40.85443878173828, "learning_rate": 1.0229233423712103e-05, "loss": 0.1992, "step": 4150 }, { "epoch": 1.0290855311806755, "grad_norm": 22.063749313354492, "learning_rate": 1.0290855311806755e-05, "loss": 0.2059, "step": 4175 }, { "epoch": 1.0352477199901404, "grad_norm": 42.34719467163086, "learning_rate": 1.0352477199901405e-05, "loss": 0.1812, "step": 4200 }, { "epoch": 1.0414099087996056, "grad_norm": 45.726593017578125, "learning_rate": 1.0414099087996057e-05, "loss": 0.1657, "step": 4225 }, { "epoch": 1.0475720976090708, "grad_norm": 0.11054063588380814, "learning_rate": 1.0475720976090709e-05, "loss": 0.1678, "step": 4250 }, { "epoch": 1.053734286418536, "grad_norm": 63.21506881713867, "learning_rate": 1.053734286418536e-05, "loss": 0.1972, "step": 4275 }, { "epoch": 1.059896475228001, "grad_norm": 23.111759185791016, "learning_rate": 1.059896475228001e-05, "loss": 0.3117, "step": 4300 }, { "epoch": 1.066058664037466, "grad_norm": 41.99272155761719, "learning_rate": 1.0660586640374661e-05, "loss": 0.2708, "step": 4325 }, { "epoch": 1.0722208528469312, "grad_norm": 6.2015299797058105, "learning_rate": 1.0722208528469313e-05, "loss": 0.2081, "step": 4350 }, { "epoch": 1.0783830416563964, "grad_norm": 55.75407791137695, "learning_rate": 1.0783830416563965e-05, "loss": 0.1766, "step": 4375 }, { "epoch": 1.0845452304658614, "grad_norm": 3.844930648803711, "learning_rate": 1.0845452304658615e-05, "loss": 0.1841, "step": 4400 }, { "epoch": 1.0907074192753266, "grad_norm": 32.91348648071289, "learning_rate": 1.0907074192753267e-05, "loss": 0.1732, "step": 4425 }, { "epoch": 1.0968696080847917, "grad_norm": 30.539539337158203, "learning_rate": 1.0968696080847919e-05, "loss": 0.1839, "step": 4450 }, { "epoch": 1.103031796894257, "grad_norm": 1.007257342338562, "learning_rate": 1.103031796894257e-05, "loss": 0.2441, "step": 4475 }, { "epoch": 1.109193985703722, "grad_norm": 15.061387062072754, "learning_rate": 1.109193985703722e-05, "loss": 0.2322, "step": 4500 }, { "epoch": 1.115356174513187, "grad_norm": 15.29909610748291, "learning_rate": 1.1153561745131871e-05, "loss": 0.13, "step": 4525 }, { "epoch": 1.1215183633226522, "grad_norm": 35.97024154663086, "learning_rate": 1.1215183633226523e-05, "loss": 0.2061, "step": 4550 }, { "epoch": 1.1276805521321174, "grad_norm": 20.934701919555664, "learning_rate": 1.1276805521321175e-05, "loss": 0.1069, "step": 4575 }, { "epoch": 1.1338427409415823, "grad_norm": 20.788047790527344, "learning_rate": 1.1338427409415827e-05, "loss": 0.2287, "step": 4600 }, { "epoch": 1.1400049297510475, "grad_norm": 0.3708113431930542, "learning_rate": 1.1400049297510475e-05, "loss": 0.1654, "step": 4625 }, { "epoch": 1.1461671185605127, "grad_norm": 47.05365753173828, "learning_rate": 1.1461671185605127e-05, "loss": 0.2401, "step": 4650 }, { "epoch": 1.1523293073699779, "grad_norm": 14.986821174621582, "learning_rate": 1.1523293073699779e-05, "loss": 0.2637, "step": 4675 }, { "epoch": 1.158491496179443, "grad_norm": 3.1243083477020264, "learning_rate": 1.158491496179443e-05, "loss": 0.1247, "step": 4700 }, { "epoch": 1.164653684988908, "grad_norm": 36.42319107055664, "learning_rate": 1.1646536849889081e-05, "loss": 0.2268, "step": 4725 }, { "epoch": 1.1708158737983732, "grad_norm": 36.01278305053711, "learning_rate": 1.1708158737983733e-05, "loss": 0.2738, "step": 4750 }, { "epoch": 1.1769780626078383, "grad_norm": 33.12614059448242, "learning_rate": 1.1769780626078385e-05, "loss": 0.131, "step": 4775 }, { "epoch": 1.1831402514173035, "grad_norm": 9.680112838745117, "learning_rate": 1.1831402514173036e-05, "loss": 0.2766, "step": 4800 }, { "epoch": 1.1893024402267685, "grad_norm": 33.452796936035156, "learning_rate": 1.1893024402267685e-05, "loss": 0.2038, "step": 4825 }, { "epoch": 1.1954646290362336, "grad_norm": 7.234158992767334, "learning_rate": 1.1954646290362337e-05, "loss": 0.1347, "step": 4850 }, { "epoch": 1.2016268178456988, "grad_norm": 0.05366240069270134, "learning_rate": 1.2016268178456989e-05, "loss": 0.186, "step": 4875 }, { "epoch": 1.207789006655164, "grad_norm": 5.159832000732422, "learning_rate": 1.207789006655164e-05, "loss": 0.1855, "step": 4900 }, { "epoch": 1.2139511954646292, "grad_norm": 23.99695587158203, "learning_rate": 1.213951195464629e-05, "loss": 0.2342, "step": 4925 }, { "epoch": 1.220113384274094, "grad_norm": 44.656227111816406, "learning_rate": 1.2201133842740943e-05, "loss": 0.2285, "step": 4950 }, { "epoch": 1.2262755730835593, "grad_norm": 29.628063201904297, "learning_rate": 1.2262755730835594e-05, "loss": 0.172, "step": 4975 }, { "epoch": 1.2324377618930245, "grad_norm": 34.27619171142578, "learning_rate": 1.2324377618930246e-05, "loss": 0.1384, "step": 5000 }, { "epoch": 1.2385999507024894, "grad_norm": 11.067872047424316, "learning_rate": 1.2385999507024897e-05, "loss": 0.1456, "step": 5025 }, { "epoch": 1.2447621395119546, "grad_norm": 37.17292404174805, "learning_rate": 1.2447621395119547e-05, "loss": 0.1672, "step": 5050 }, { "epoch": 1.2509243283214198, "grad_norm": 39.933570861816406, "learning_rate": 1.2509243283214199e-05, "loss": 0.3413, "step": 5075 }, { "epoch": 1.257086517130885, "grad_norm": 5.871866703033447, "learning_rate": 1.257086517130885e-05, "loss": 0.1816, "step": 5100 }, { "epoch": 1.26324870594035, "grad_norm": 54.147586822509766, "learning_rate": 1.2632487059403502e-05, "loss": 0.3126, "step": 5125 }, { "epoch": 1.269410894749815, "grad_norm": 2.5215351581573486, "learning_rate": 1.269410894749815e-05, "loss": 0.1627, "step": 5150 }, { "epoch": 1.2755730835592802, "grad_norm": 4.390874862670898, "learning_rate": 1.2755730835592803e-05, "loss": 0.1663, "step": 5175 }, { "epoch": 1.2817352723687454, "grad_norm": 31.431177139282227, "learning_rate": 1.2817352723687454e-05, "loss": 0.1623, "step": 5200 }, { "epoch": 1.2878974611782106, "grad_norm": 1.1008161306381226, "learning_rate": 1.2878974611782106e-05, "loss": 0.2136, "step": 5225 }, { "epoch": 1.2940596499876755, "grad_norm": 4.677889347076416, "learning_rate": 1.2940596499876757e-05, "loss": 0.1372, "step": 5250 }, { "epoch": 1.3002218387971407, "grad_norm": 37.059410095214844, "learning_rate": 1.3002218387971408e-05, "loss": 0.2209, "step": 5275 }, { "epoch": 1.3063840276066059, "grad_norm": 46.167945861816406, "learning_rate": 1.306384027606606e-05, "loss": 0.2268, "step": 5300 }, { "epoch": 1.312546216416071, "grad_norm": 37.71957015991211, "learning_rate": 1.3125462164160712e-05, "loss": 0.3368, "step": 5325 }, { "epoch": 1.3187084052255362, "grad_norm": 27.41573143005371, "learning_rate": 1.318708405225536e-05, "loss": 0.2033, "step": 5350 }, { "epoch": 1.3248705940350012, "grad_norm": 4.437679767608643, "learning_rate": 1.3248705940350012e-05, "loss": 0.131, "step": 5375 }, { "epoch": 1.3310327828444664, "grad_norm": 45.747215270996094, "learning_rate": 1.3310327828444664e-05, "loss": 0.1056, "step": 5400 }, { "epoch": 1.3371949716539315, "grad_norm": 33.86547088623047, "learning_rate": 1.3371949716539316e-05, "loss": 0.1412, "step": 5425 }, { "epoch": 1.3433571604633965, "grad_norm": 26.170652389526367, "learning_rate": 1.3433571604633968e-05, "loss": 0.1896, "step": 5450 }, { "epoch": 1.3495193492728617, "grad_norm": 10.616788864135742, "learning_rate": 1.3495193492728618e-05, "loss": 0.1377, "step": 5475 }, { "epoch": 1.3556815380823268, "grad_norm": 64.72732543945312, "learning_rate": 1.355681538082327e-05, "loss": 0.2713, "step": 5500 }, { "epoch": 1.361843726891792, "grad_norm": 18.686676025390625, "learning_rate": 1.3618437268917922e-05, "loss": 0.1511, "step": 5525 }, { "epoch": 1.3680059157012572, "grad_norm": 3.992441415786743, "learning_rate": 1.3680059157012572e-05, "loss": 0.1855, "step": 5550 }, { "epoch": 1.3741681045107221, "grad_norm": 24.43255615234375, "learning_rate": 1.3741681045107222e-05, "loss": 0.2355, "step": 5575 }, { "epoch": 1.3803302933201873, "grad_norm": 28.365781784057617, "learning_rate": 1.3803302933201874e-05, "loss": 0.2185, "step": 5600 }, { "epoch": 1.3864924821296525, "grad_norm": 0.04500554874539375, "learning_rate": 1.3864924821296526e-05, "loss": 0.1762, "step": 5625 }, { "epoch": 1.3926546709391177, "grad_norm": 26.34282875061035, "learning_rate": 1.3926546709391178e-05, "loss": 0.2105, "step": 5650 }, { "epoch": 1.3988168597485826, "grad_norm": 1.0606616735458374, "learning_rate": 1.3988168597485826e-05, "loss": 0.2464, "step": 5675 }, { "epoch": 1.4049790485580478, "grad_norm": 2.248135805130005, "learning_rate": 1.4049790485580478e-05, "loss": 0.156, "step": 5700 }, { "epoch": 1.411141237367513, "grad_norm": 28.349903106689453, "learning_rate": 1.411141237367513e-05, "loss": 0.2439, "step": 5725 }, { "epoch": 1.4173034261769781, "grad_norm": 27.21099853515625, "learning_rate": 1.4173034261769782e-05, "loss": 0.294, "step": 5750 }, { "epoch": 1.4234656149864433, "grad_norm": 36.59619140625, "learning_rate": 1.4234656149864432e-05, "loss": 0.2127, "step": 5775 }, { "epoch": 1.4296278037959083, "grad_norm": 0.7626227736473083, "learning_rate": 1.4296278037959084e-05, "loss": 0.2177, "step": 5800 }, { "epoch": 1.4357899926053734, "grad_norm": 0.035361841320991516, "learning_rate": 1.4357899926053736e-05, "loss": 0.1618, "step": 5825 }, { "epoch": 1.4419521814148386, "grad_norm": 0.6795966625213623, "learning_rate": 1.4419521814148388e-05, "loss": 0.1789, "step": 5850 }, { "epoch": 1.4481143702243036, "grad_norm": 9.954318046569824, "learning_rate": 1.448114370224304e-05, "loss": 0.2372, "step": 5875 }, { "epoch": 1.4542765590337687, "grad_norm": 13.679413795471191, "learning_rate": 1.4542765590337688e-05, "loss": 0.2227, "step": 5900 }, { "epoch": 1.460438747843234, "grad_norm": 2.538703680038452, "learning_rate": 1.460438747843234e-05, "loss": 0.1769, "step": 5925 }, { "epoch": 1.466600936652699, "grad_norm": 0.4689609110355377, "learning_rate": 1.4666009366526992e-05, "loss": 0.1778, "step": 5950 }, { "epoch": 1.4727631254621643, "grad_norm": 23.670265197753906, "learning_rate": 1.4727631254621644e-05, "loss": 0.2311, "step": 5975 }, { "epoch": 1.4789253142716292, "grad_norm": 0.7815973162651062, "learning_rate": 1.4789253142716294e-05, "loss": 0.231, "step": 6000 }, { "epoch": 1.4850875030810944, "grad_norm": 24.239229202270508, "learning_rate": 1.4850875030810946e-05, "loss": 0.1953, "step": 6025 }, { "epoch": 1.4912496918905596, "grad_norm": 5.256735324859619, "learning_rate": 1.4912496918905598e-05, "loss": 0.1957, "step": 6050 }, { "epoch": 1.4974118807000245, "grad_norm": 41.954315185546875, "learning_rate": 1.4974118807000248e-05, "loss": 0.1501, "step": 6075 }, { "epoch": 1.5035740695094897, "grad_norm": 52.346153259277344, "learning_rate": 1.5035740695094898e-05, "loss": 0.2342, "step": 6100 }, { "epoch": 1.5097362583189549, "grad_norm": 19.089778900146484, "learning_rate": 1.509736258318955e-05, "loss": 0.2156, "step": 6125 }, { "epoch": 1.51589844712842, "grad_norm": 0.36536940932273865, "learning_rate": 1.5158984471284202e-05, "loss": 0.1642, "step": 6150 }, { "epoch": 1.5220606359378852, "grad_norm": 3.5076828002929688, "learning_rate": 1.5220606359378854e-05, "loss": 0.1871, "step": 6175 }, { "epoch": 1.5282228247473504, "grad_norm": 1.7674132585525513, "learning_rate": 1.5282228247473504e-05, "loss": 0.1761, "step": 6200 }, { "epoch": 1.5343850135568153, "grad_norm": 6.745855808258057, "learning_rate": 1.5343850135568156e-05, "loss": 0.2078, "step": 6225 }, { "epoch": 1.5405472023662805, "grad_norm": 27.79926872253418, "learning_rate": 1.5405472023662807e-05, "loss": 0.248, "step": 6250 }, { "epoch": 1.5467093911757455, "grad_norm": 30.88478660583496, "learning_rate": 1.546709391175746e-05, "loss": 0.318, "step": 6275 }, { "epoch": 1.5528715799852106, "grad_norm": 0.03306032344698906, "learning_rate": 1.5528715799852108e-05, "loss": 0.1467, "step": 6300 }, { "epoch": 1.5590337687946758, "grad_norm": 32.26327896118164, "learning_rate": 1.559033768794676e-05, "loss": 0.2073, "step": 6325 }, { "epoch": 1.565195957604141, "grad_norm": 1.14533531665802, "learning_rate": 1.565195957604141e-05, "loss": 0.189, "step": 6350 }, { "epoch": 1.5713581464136062, "grad_norm": 36.83344650268555, "learning_rate": 1.5713581464136063e-05, "loss": 0.2339, "step": 6375 }, { "epoch": 1.5775203352230713, "grad_norm": 1.0186835527420044, "learning_rate": 1.5775203352230715e-05, "loss": 0.1843, "step": 6400 }, { "epoch": 1.5836825240325365, "grad_norm": 27.14068031311035, "learning_rate": 1.5836825240325364e-05, "loss": 0.2046, "step": 6425 }, { "epoch": 1.5898447128420015, "grad_norm": 25.2268123626709, "learning_rate": 1.5898447128420016e-05, "loss": 0.1091, "step": 6450 }, { "epoch": 1.5960069016514666, "grad_norm": 1.3055094480514526, "learning_rate": 1.5960069016514668e-05, "loss": 0.2874, "step": 6475 }, { "epoch": 1.6021690904609316, "grad_norm": 0.2848321795463562, "learning_rate": 1.602169090460932e-05, "loss": 0.1428, "step": 6500 }, { "epoch": 1.6083312792703968, "grad_norm": 22.48772430419922, "learning_rate": 1.6083312792703968e-05, "loss": 0.2076, "step": 6525 }, { "epoch": 1.614493468079862, "grad_norm": 2.2502341270446777, "learning_rate": 1.614493468079862e-05, "loss": 0.1597, "step": 6550 }, { "epoch": 1.6206556568893271, "grad_norm": 5.433531284332275, "learning_rate": 1.620655656889327e-05, "loss": 0.2692, "step": 6575 }, { "epoch": 1.6268178456987923, "grad_norm": 7.885830879211426, "learning_rate": 1.6268178456987923e-05, "loss": 0.2, "step": 6600 }, { "epoch": 1.6329800345082575, "grad_norm": 0.7249619960784912, "learning_rate": 1.6329800345082575e-05, "loss": 0.1045, "step": 6625 }, { "epoch": 1.6391422233177224, "grad_norm": 0.8871287107467651, "learning_rate": 1.6391422233177224e-05, "loss": 0.1717, "step": 6650 }, { "epoch": 1.6453044121271876, "grad_norm": 44.935760498046875, "learning_rate": 1.6453044121271876e-05, "loss": 0.154, "step": 6675 }, { "epoch": 1.6514666009366525, "grad_norm": 3.8901209831237793, "learning_rate": 1.6514666009366528e-05, "loss": 0.2326, "step": 6700 }, { "epoch": 1.6576287897461177, "grad_norm": 40.206512451171875, "learning_rate": 1.657628789746118e-05, "loss": 0.1931, "step": 6725 }, { "epoch": 1.663790978555583, "grad_norm": 37.017234802246094, "learning_rate": 1.663790978555583e-05, "loss": 0.2989, "step": 6750 }, { "epoch": 1.669953167365048, "grad_norm": 31.788745880126953, "learning_rate": 1.6699531673650483e-05, "loss": 0.2178, "step": 6775 }, { "epoch": 1.6761153561745132, "grad_norm": 56.15694046020508, "learning_rate": 1.6761153561745135e-05, "loss": 0.3655, "step": 6800 }, { "epoch": 1.6822775449839784, "grad_norm": 73.77818298339844, "learning_rate": 1.6822775449839787e-05, "loss": 0.148, "step": 6825 }, { "epoch": 1.6884397337934436, "grad_norm": 75.71527862548828, "learning_rate": 1.6884397337934435e-05, "loss": 0.1407, "step": 6850 }, { "epoch": 1.6946019226029085, "grad_norm": 11.59433650970459, "learning_rate": 1.6946019226029087e-05, "loss": 0.1668, "step": 6875 }, { "epoch": 1.7007641114123737, "grad_norm": 28.444887161254883, "learning_rate": 1.700764111412374e-05, "loss": 0.2934, "step": 6900 }, { "epoch": 1.7069263002218387, "grad_norm": 34.28888702392578, "learning_rate": 1.706926300221839e-05, "loss": 0.1816, "step": 6925 }, { "epoch": 1.7130884890313038, "grad_norm": 67.61705780029297, "learning_rate": 1.713088489031304e-05, "loss": 0.1957, "step": 6950 }, { "epoch": 1.719250677840769, "grad_norm": 16.545900344848633, "learning_rate": 1.719250677840769e-05, "loss": 0.17, "step": 6975 }, { "epoch": 1.7254128666502342, "grad_norm": 31.625978469848633, "learning_rate": 1.7254128666502343e-05, "loss": 0.1732, "step": 7000 }, { "epoch": 1.7315750554596994, "grad_norm": 0.031005702912807465, "learning_rate": 1.7315750554596995e-05, "loss": 0.1023, "step": 7025 }, { "epoch": 1.7377372442691645, "grad_norm": 2.187574625015259, "learning_rate": 1.7377372442691644e-05, "loss": 0.241, "step": 7050 }, { "epoch": 1.7438994330786295, "grad_norm": 20.48172950744629, "learning_rate": 1.7438994330786295e-05, "loss": 0.194, "step": 7075 }, { "epoch": 1.7500616218880947, "grad_norm": 27.909399032592773, "learning_rate": 1.7500616218880947e-05, "loss": 0.1929, "step": 7100 }, { "epoch": 1.7562238106975596, "grad_norm": 46.39067840576172, "learning_rate": 1.75622381069756e-05, "loss": 0.2909, "step": 7125 }, { "epoch": 1.7623859995070248, "grad_norm": 58.90078353881836, "learning_rate": 1.762385999507025e-05, "loss": 0.188, "step": 7150 }, { "epoch": 1.76854818831649, "grad_norm": 35.54610061645508, "learning_rate": 1.76854818831649e-05, "loss": 0.1947, "step": 7175 }, { "epoch": 1.7747103771259551, "grad_norm": 1.9310208559036255, "learning_rate": 1.774710377125955e-05, "loss": 0.0414, "step": 7200 }, { "epoch": 1.7808725659354203, "grad_norm": 37.13237380981445, "learning_rate": 1.7808725659354203e-05, "loss": 0.2563, "step": 7225 }, { "epoch": 1.7870347547448855, "grad_norm": 26.06975746154785, "learning_rate": 1.7870347547448855e-05, "loss": 0.1927, "step": 7250 }, { "epoch": 1.7931969435543507, "grad_norm": 34.53322219848633, "learning_rate": 1.7931969435543507e-05, "loss": 0.1908, "step": 7275 }, { "epoch": 1.7993591323638156, "grad_norm": 0.15388120710849762, "learning_rate": 1.799359132363816e-05, "loss": 0.1858, "step": 7300 }, { "epoch": 1.8055213211732808, "grad_norm": 8.441184043884277, "learning_rate": 1.805521321173281e-05, "loss": 0.2472, "step": 7325 }, { "epoch": 1.8116835099827457, "grad_norm": 0.6277154088020325, "learning_rate": 1.8116835099827463e-05, "loss": 0.1193, "step": 7350 }, { "epoch": 1.817845698792211, "grad_norm": 2.0958328247070312, "learning_rate": 1.817845698792211e-05, "loss": 0.1793, "step": 7375 }, { "epoch": 1.824007887601676, "grad_norm": 0.14230966567993164, "learning_rate": 1.8240078876016763e-05, "loss": 0.1448, "step": 7400 }, { "epoch": 1.8301700764111413, "grad_norm": 2.377575397491455, "learning_rate": 1.8301700764111415e-05, "loss": 0.3275, "step": 7425 }, { "epoch": 1.8363322652206064, "grad_norm": 24.860849380493164, "learning_rate": 1.8363322652206067e-05, "loss": 0.1597, "step": 7450 }, { "epoch": 1.8424944540300716, "grad_norm": 3.0974838733673096, "learning_rate": 1.8424944540300715e-05, "loss": 0.1728, "step": 7475 }, { "epoch": 1.8486566428395366, "grad_norm": 8.12142562866211, "learning_rate": 1.8486566428395367e-05, "loss": 0.1942, "step": 7500 }, { "epoch": 1.8548188316490017, "grad_norm": 15.322603225708008, "learning_rate": 1.854818831649002e-05, "loss": 0.235, "step": 7525 }, { "epoch": 1.8609810204584667, "grad_norm": 0.6635432839393616, "learning_rate": 1.860981020458467e-05, "loss": 0.0823, "step": 7550 }, { "epoch": 1.8671432092679319, "grad_norm": 16.350248336791992, "learning_rate": 1.867143209267932e-05, "loss": 0.2041, "step": 7575 }, { "epoch": 1.873305398077397, "grad_norm": 10.031082153320312, "learning_rate": 1.873305398077397e-05, "loss": 0.2076, "step": 7600 }, { "epoch": 1.8794675868868622, "grad_norm": 32.381011962890625, "learning_rate": 1.8794675868868623e-05, "loss": 0.171, "step": 7625 }, { "epoch": 1.8856297756963274, "grad_norm": 49.66177749633789, "learning_rate": 1.8856297756963275e-05, "loss": 0.1971, "step": 7650 }, { "epoch": 1.8917919645057926, "grad_norm": 23.314538955688477, "learning_rate": 1.8917919645057927e-05, "loss": 0.173, "step": 7675 }, { "epoch": 1.8979541533152577, "grad_norm": 19.56601333618164, "learning_rate": 1.8979541533152575e-05, "loss": 0.2566, "step": 7700 }, { "epoch": 1.9041163421247227, "grad_norm": 17.303730010986328, "learning_rate": 1.9041163421247227e-05, "loss": 0.1466, "step": 7725 }, { "epoch": 1.9102785309341879, "grad_norm": 2.6724612712860107, "learning_rate": 1.910278530934188e-05, "loss": 0.2922, "step": 7750 }, { "epoch": 1.9164407197436528, "grad_norm": 26.80803108215332, "learning_rate": 1.916440719743653e-05, "loss": 0.2612, "step": 7775 }, { "epoch": 1.922602908553118, "grad_norm": 24.797290802001953, "learning_rate": 1.9226029085531183e-05, "loss": 0.2675, "step": 7800 }, { "epoch": 1.9287650973625832, "grad_norm": 0.6875481009483337, "learning_rate": 1.9287650973625834e-05, "loss": 0.1585, "step": 7825 }, { "epoch": 1.9349272861720483, "grad_norm": 15.256152153015137, "learning_rate": 1.9349272861720486e-05, "loss": 0.1762, "step": 7850 }, { "epoch": 1.9410894749815135, "grad_norm": 0.29509520530700684, "learning_rate": 1.9410894749815138e-05, "loss": 0.1418, "step": 7875 }, { "epoch": 1.9472516637909787, "grad_norm": 5.533542633056641, "learning_rate": 1.9472516637909787e-05, "loss": 0.1703, "step": 7900 }, { "epoch": 1.9534138526004436, "grad_norm": 0.09616789221763611, "learning_rate": 1.953413852600444e-05, "loss": 0.1377, "step": 7925 }, { "epoch": 1.9595760414099088, "grad_norm": 0.3136278986930847, "learning_rate": 1.959576041409909e-05, "loss": 0.2255, "step": 7950 }, { "epoch": 1.9657382302193738, "grad_norm": 7.052049160003662, "learning_rate": 1.9657382302193742e-05, "loss": 0.0848, "step": 7975 }, { "epoch": 1.971900419028839, "grad_norm": 0.18716296553611755, "learning_rate": 1.971900419028839e-05, "loss": 0.299, "step": 8000 }, { "epoch": 1.9780626078383041, "grad_norm": 0.731046736240387, "learning_rate": 1.9780626078383043e-05, "loss": 0.1371, "step": 8025 }, { "epoch": 1.9842247966477693, "grad_norm": 27.993221282958984, "learning_rate": 1.9842247966477694e-05, "loss": 0.2037, "step": 8050 }, { "epoch": 1.9903869854572345, "grad_norm": 10.041696548461914, "learning_rate": 1.9903869854572346e-05, "loss": 0.1971, "step": 8075 }, { "epoch": 1.9965491742666996, "grad_norm": 10.864248275756836, "learning_rate": 1.9965491742666995e-05, "loss": 0.2439, "step": 8100 }, { "epoch": 2.0, "eval_accuracy": 0.9629075182967398, "eval_auc": 0.9915818046960203, "eval_f1": 0.970191150915653, "eval_loss": 0.11630760878324509, "eval_precision": 0.9802809292274446, "eval_recall": 0.9603069595130987, "eval_runtime": 968.3715, "eval_samples_per_second": 6.208, "eval_steps_per_second": 0.259, "step": 8114 } ], "logging_steps": 25, "max_steps": 81140, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.241813146148746e+19, "train_batch_size": 12, "trial_name": null, "trial_params": null }