[ { "loss": 3.0203263759613037, "learning_rate": 1.2499999999999999e-06, "epoch": 0.000423953540653791, "total_flos": 990705981560736, "step": 32 }, { "loss": 2.020397424697876, "learning_rate": 2.4999999999999998e-06, "epoch": 0.000847907081307582, "total_flos": 2003002561008000, "step": 64 }, { "loss": 1.762237548828125, "learning_rate": 3.75e-06, "epoch": 0.0012718606219613731, "total_flos": 2977620168584736, "step": 96 }, { "loss": 1.6904010772705078, "learning_rate": 4.9999999999999996e-06, "epoch": 0.001695814162615164, "total_flos": 3948923571120768, "step": 128 }, { "loss": 1.6540651321411133, "learning_rate": 6.25e-06, "epoch": 0.002119767703268955, "total_flos": 4946933674470240, "step": 160 }, { "loss": 1.5807685852050781, "learning_rate": 7.5e-06, "epoch": 0.0025437212439227462, "total_flos": 5942562698470080, "step": 192 }, { "loss": 1.5555791854858398, "learning_rate": 8.750000000000001e-06, "epoch": 0.002967674784576537, "total_flos": 6949276612144896, "step": 224 }, { "loss": 1.5452680587768555, "learning_rate": 9.999999999999999e-06, "epoch": 0.003391628325230328, "total_flos": 7981764100942080, "step": 256 }, { "loss": 1.5814008712768555, "learning_rate": 1.125e-05, "epoch": 0.003815581865884119, "total_flos": 8983603237299744, "step": 288 }, { "loss": 1.5622310638427734, "learning_rate": 1.25e-05, "epoch": 0.00423953540653791, "total_flos": 9978685256584128, "step": 320 }, { "loss": 1.495096206665039, "learning_rate": 1.375e-05, "epoch": 0.004663488947191701, "total_flos": 11002163255950272, "step": 352 }, { "loss": 1.5011672973632812, "learning_rate": 1.5e-05, "epoch": 0.0050874424878454925, "total_flos": 12010244681413728, "step": 384 }, { "loss": 1.4999103546142578, "learning_rate": 1.625e-05, "epoch": 0.005511396028499283, "total_flos": 13029604057039968, "step": 416 }, { "loss": 1.5394706726074219, "learning_rate": 1.7500000000000002e-05, "epoch": 0.005935349569153074, "total_flos": 13963534166811168, "step": 448 }, { "loss": 1.4823856353759766, "learning_rate": 1.8750000000000002e-05, "epoch": 0.006359303109806865, "total_flos": 14926053317151936, "step": 480 }, { "loss": 1.4906253814697266, "learning_rate": 1.9999999999999998e-05, "epoch": 0.006783256650460656, "total_flos": 15952298516843328, "step": 512 }, { "loss": 1.5079078674316406, "learning_rate": 2.125e-05, "epoch": 0.0072072101911144475, "total_flos": 16962455342550720, "step": 544 }, { "loss": 1.5193252563476562, "learning_rate": 2.25e-05, "epoch": 0.007631163731768238, "total_flos": 17960578064518080, "step": 576 }, { "loss": 1.5012016296386719, "learning_rate": 2.3749999999999998e-05, "epoch": 0.00805511727242203, "total_flos": 18926105740793856, "step": 608 }, { "loss": 1.4594497680664062, "learning_rate": 2.5e-05, "epoch": 0.00847907081307582, "total_flos": 19916329071135072, "step": 640 }, { "loss": 1.4904594421386719, "learning_rate": 2.625e-05, "epoch": 0.00890302435372961, "total_flos": 20917444230663456, "step": 672 }, { "loss": 1.4794464111328125, "learning_rate": 2.75e-05, "epoch": 0.009326977894383403, "total_flos": 21867478802792640, "step": 704 }, { "loss": 1.4694061279296875, "learning_rate": 2.875e-05, "epoch": 0.009750931435037193, "total_flos": 22879871912483808, "step": 736 }, { "loss": 1.4405136108398438, "learning_rate": 3e-05, "epoch": 0.010174884975690985, "total_flos": 23877978546077184, "step": 768 }, { "loss": 1.4932212829589844, "learning_rate": 2.9987150486541474e-05, "epoch": 0.010598838516344775, "total_flos": 24892640116500096, "step": 800 }, { "loss": 1.4540672302246094, "learning_rate": 2.9974300973082948e-05, "epoch": 0.011022792056998566, "total_flos": 25923261353915136, "step": 832 }, { "loss": 1.4735946655273438, "learning_rate": 2.9961451459624418e-05, "epoch": 0.011446745597652358, "total_flos": 26916107089215744, "step": 864 }, { "loss": 1.4594268798828125, "learning_rate": 2.994860194616589e-05, "epoch": 0.011870699138306148, "total_flos": 27932071817931360, "step": 896 }, { "loss": 1.4671287536621094, "learning_rate": 2.9935752432707365e-05, "epoch": 0.01229465267895994, "total_flos": 28936501182500448, "step": 928 }, { "loss": 1.4598159790039062, "learning_rate": 2.9922902919248842e-05, "epoch": 0.01271860621961373, "total_flos": 29906597956987680, "step": 960 }, { "loss": 1.4460601806640625, "learning_rate": 2.9910053405790315e-05, "epoch": 0.01314255976026752, "total_flos": 30888777100336896, "step": 992 }, { "loss": 1.4822959899902344, "learning_rate": 2.9897203892331785e-05, "epoch": 0.013566513300921313, "total_flos": 31885725371003424, "step": 1024 }, { "loss": 1.4511146545410156, "learning_rate": 2.988435437887326e-05, "epoch": 0.013990466841575103, "total_flos": 32861597871750912, "step": 1056 }, { "loss": 1.4542427062988281, "learning_rate": 2.9871504865414732e-05, "epoch": 0.014414420382228895, "total_flos": 33821462440384320, "step": 1088 }, { "loss": 1.446197509765625, "learning_rate": 2.9858655351956206e-05, "epoch": 0.014838373922882685, "total_flos": 34811122677636096, "step": 1120 }, { "loss": 1.4471473693847656, "learning_rate": 2.984580583849768e-05, "epoch": 0.015262327463536476, "total_flos": 35819558047327200, "step": 1152 }, { "loss": 1.4424285888671875, "learning_rate": 2.9832956325039153e-05, "epoch": 0.015686281004190266, "total_flos": 36799838762546304, "step": 1184 }, { "loss": 1.4445915222167969, "learning_rate": 2.9820106811580623e-05, "epoch": 0.01611023454484406, "total_flos": 37785895204025664, "step": 1216 }, { "loss": 1.4910545349121094, "learning_rate": 2.9807257298122096e-05, "epoch": 0.01653418808549785, "total_flos": 38737876469406912, "step": 1248 }, { "loss": 1.415313720703125, "learning_rate": 2.979440778466357e-05, "epoch": 0.01695814162615164, "total_flos": 39740600466333696, "step": 1280 }, { "loss": 1.4487762451171875, "learning_rate": 2.9781558271205043e-05, "epoch": 0.01738209516680543, "total_flos": 40731966071227776, "step": 1312 }, { "loss": 1.4305191040039062, "learning_rate": 2.976870875774652e-05, "epoch": 0.01780604870745922, "total_flos": 41709141730267968, "step": 1344 }, { "loss": 1.4301834106445312, "learning_rate": 2.975585924428799e-05, "epoch": 0.018230002248113015, "total_flos": 42764185119390720, "step": 1376 }, { "loss": 1.4452896118164062, "learning_rate": 2.9743009730829464e-05, "epoch": 0.018653955788766805, "total_flos": 43781066885423424, "step": 1408 }, { "loss": 1.4198989868164062, "learning_rate": 2.9730160217370937e-05, "epoch": 0.019077909329420596, "total_flos": 44764436568447456, "step": 1440 }, { "loss": 1.453399658203125, "learning_rate": 2.971731070391241e-05, "epoch": 0.019501862870074386, "total_flos": 45767289272366112, "step": 1472 }, { "loss": 1.433349609375, "learning_rate": 2.9704461190453884e-05, "epoch": 0.019925816410728176, "total_flos": 46760746365878112, "step": 1504 }, { "loss": 1.429931640625, "learning_rate": 2.9691611676995354e-05, "epoch": 0.02034976995138197, "total_flos": 47730328312397856, "step": 1536 }, { "loss": 1.4683990478515625, "learning_rate": 2.9678762163536828e-05, "epoch": 0.02077372349203576, "total_flos": 48726230838755424, "step": 1568 }, { "loss": 1.4522628784179688, "learning_rate": 2.96659126500783e-05, "epoch": 0.02119767703268955, "total_flos": 49701829837145184, "step": 1600 }, { "loss": 1.4383697509765625, "learning_rate": 2.9653063136619775e-05, "epoch": 0.02162163057334334, "total_flos": 50713981621226592, "step": 1632 }, { "loss": 1.4316024780273438, "learning_rate": 2.964021362316125e-05, "epoch": 0.02204558411399713, "total_flos": 51679090999778784, "step": 1664 }, { "loss": 1.4064102172851562, "learning_rate": 2.9627364109702722e-05, "epoch": 0.022469537654650925, "total_flos": 52684533931908864, "step": 1696 }, { "loss": 1.4237823486328125, "learning_rate": 2.9614514596244195e-05, "epoch": 0.022893491195304715, "total_flos": 53673502369079328, "step": 1728 }, { "loss": 1.4352951049804688, "learning_rate": 2.960166508278567e-05, "epoch": 0.023317444735958506, "total_flos": 54669404895436896, "step": 1760 }, { "loss": 1.440277099609375, "learning_rate": 2.9588815569327142e-05, "epoch": 0.023741398276612296, "total_flos": 55664036440249728, "step": 1792 }, { "loss": 1.4329681396484375, "learning_rate": 2.9575966055868616e-05, "epoch": 0.024165351817266086, "total_flos": 56692131802949280, "step": 1824 }, { "loss": 1.420745849609375, "learning_rate": 2.956311654241009e-05, "epoch": 0.02458930535791988, "total_flos": 57677946918818880, "step": 1856 }, { "loss": 1.4187164306640625, "learning_rate": 2.955026702895156e-05, "epoch": 0.02501325889857367, "total_flos": 58638535464281568, "step": 1888 }, { "loss": 1.41278076171875, "learning_rate": 2.9537417515493033e-05, "epoch": 0.02543721243922746, "total_flos": 59619733216817760, "step": 1920 }, { "loss": 1.402587890625, "learning_rate": 2.9524568002034506e-05, "epoch": 0.02586116597988125, "total_flos": 60588221153906592, "step": 1952 }, { "loss": 1.447418212890625, "learning_rate": 2.951171848857598e-05, "epoch": 0.02628511952053504, "total_flos": 61590912974085408, "step": 1984 }, { "loss": 1.431427001953125, "learning_rate": 2.9498868975117453e-05, "epoch": 0.026709073061188835, "total_flos": 62583050820930720, "step": 2016 }, { "loss": 1.4207534790039062, "learning_rate": 2.9486019461658923e-05, "epoch": 0.027133026601842625, "total_flos": 63534147225742848, "step": 2048 }, { "loss": 1.4499435424804688, "learning_rate": 2.9473169948200397e-05, "epoch": 0.027556980142496416, "total_flos": 64477553387790624, "step": 2080 }, { "loss": 1.4471054077148438, "learning_rate": 2.9460320434741874e-05, "epoch": 0.027980933683150206, "total_flos": 65414492023496832, "step": 2112 }, { "loss": 1.3978729248046875, "learning_rate": 2.9447470921283347e-05, "epoch": 0.028404887223803996, "total_flos": 66391265473187424, "step": 2144 }, { "loss": 1.410736083984375, "learning_rate": 2.943462140782482e-05, "epoch": 0.02882884076445779, "total_flos": 67387216264666944, "step": 2176 }, { "loss": 1.3975296020507812, "learning_rate": 2.9421771894366294e-05, "epoch": 0.02925279430511158, "total_flos": 68388991047528672, "step": 2208 }, { "loss": 1.4229736328125, "learning_rate": 2.9408922380907764e-05, "epoch": 0.02967674784576537, "total_flos": 69376962005512128, "step": 2240 }, { "loss": 1.401824951171875, "learning_rate": 2.9396072867449238e-05, "epoch": 0.03010070138641916, "total_flos": 70369099852357440, "step": 2272 }, { "loss": 1.413055419921875, "learning_rate": 2.938322335399071e-05, "epoch": 0.03052465492707295, "total_flos": 71387638720910496, "step": 2304 }, { "loss": 1.4020462036132812, "learning_rate": 2.9370373840532185e-05, "epoch": 0.030948608467726745, "total_flos": 72367549403527968, "step": 2336 }, { "loss": 1.4092025756835938, "learning_rate": 2.935752432707366e-05, "epoch": 0.03137256200838053, "total_flos": 73386281332568832, "step": 2368 }, { "loss": 1.4037322998046875, "learning_rate": 2.934467481361513e-05, "epoch": 0.03179651554903432, "total_flos": 74370085401690432, "step": 2400 }, { "loss": 1.4108963012695312, "learning_rate": 2.9331825300156602e-05, "epoch": 0.03222046908968812, "total_flos": 75356994526990944, "step": 2432 }, { "loss": 1.4197235107421875, "learning_rate": 2.931897578669808e-05, "epoch": 0.03264442263034191, "total_flos": 76319079291234144, "step": 2464 }, { "loss": 1.3584136962890625, "learning_rate": 2.9306126273239552e-05, "epoch": 0.0330683761709957, "total_flos": 77333161680193632, "step": 2496 }, { "loss": 1.4328460693359375, "learning_rate": 2.9293276759781026e-05, "epoch": 0.03349232971164949, "total_flos": 78297740142404352, "step": 2528 }, { "loss": 1.4023361206054688, "learning_rate": 2.9280427246322496e-05, "epoch": 0.03391628325230328, "total_flos": 79282606044208896, "step": 2560 }, { "loss": 1.4247970581054688, "learning_rate": 2.926757773286397e-05, "epoch": 0.03434023679295707, "total_flos": 80251640986013184, "step": 2592 }, { "loss": 1.4202117919921875, "learning_rate": 2.9254728219405443e-05, "epoch": 0.03476419033361086, "total_flos": 81237069980907168, "step": 2624 }, { "loss": 1.377685546875, "learning_rate": 2.9241878705946916e-05, "epoch": 0.03518814387426465, "total_flos": 82202485038565056, "step": 2656 }, { "loss": 1.3993911743164062, "learning_rate": 2.922902919248839e-05, "epoch": 0.03561209741491844, "total_flos": 83181462595491456, "step": 2688 }, { "loss": 1.4008941650390625, "learning_rate": 2.9216179679029863e-05, "epoch": 0.03603605095557223, "total_flos": 84172136400304224, "step": 2720 }, { "loss": 1.3793716430664062, "learning_rate": 2.9203330165571333e-05, "epoch": 0.03646000449622603, "total_flos": 85160509567637280, "step": 2752 }, { "loss": 1.3916702270507812, "learning_rate": 2.9190480652112807e-05, "epoch": 0.03688395803687982, "total_flos": 86156154680011104, "step": 2784 }, { "loss": 1.364410400390625, "learning_rate": 2.917763113865428e-05, "epoch": 0.03730791157753361, "total_flos": 87169320031653504, "step": 2816 }, { "loss": 1.4059906005859375, "learning_rate": 2.9164781625195757e-05, "epoch": 0.0377318651181874, "total_flos": 88154185933458048, "step": 2848 }, { "loss": 1.4061279296875, "learning_rate": 2.915193211173723e-05, "epoch": 0.03815581865884119, "total_flos": 89132809546156800, "step": 2880 }, { "loss": 1.3895263671875, "learning_rate": 2.91390825982787e-05, "epoch": 0.03857977219949498, "total_flos": 90088169370074688, "step": 2912 }, { "loss": 1.4268646240234375, "learning_rate": 2.9126233084820174e-05, "epoch": 0.03900372574014877, "total_flos": 91032026006594016, "step": 2944 }, { "loss": 1.380584716796875, "learning_rate": 2.9113383571361648e-05, "epoch": 0.03942767928080256, "total_flos": 92010569177422848, "step": 2976 }, { "loss": 1.3945770263671875, "learning_rate": 2.910053405790312e-05, "epoch": 0.03985163282145635, "total_flos": 93006729117764160, "step": 3008 }, { "loss": 1.3672027587890625, "learning_rate": 2.9087684544444595e-05, "epoch": 0.04027558636211014, "total_flos": 94023562618674912, "step": 3040 }, { "loss": 1.376129150390625, "learning_rate": 2.9074835030986065e-05, "epoch": 0.04069953990276394, "total_flos": 95038658575195392, "step": 3072 }, { "loss": 1.3983612060546875, "learning_rate": 2.906198551752754e-05, "epoch": 0.04112349344341773, "total_flos": 96035381608626144, "step": 3104 }, { "loss": 1.3933563232421875, "learning_rate": 2.9049136004069012e-05, "epoch": 0.04154744698407152, "total_flos": 97025653204089312, "step": 3136 }, { "loss": 1.3975982666015625, "learning_rate": 2.9036286490610485e-05, "epoch": 0.04197140052472531, "total_flos": 98008588501015776, "step": 3168 }, { "loss": 1.3760986328125, "learning_rate": 2.902343697715196e-05, "epoch": 0.0423953540653791, "total_flos": 99018648796479264, "step": 3200 }, { "loss": 1.408416748046875, "learning_rate": 2.9010587463693436e-05, "epoch": 0.04281930760603289, "total_flos": 100024300877471136, "step": 3232 }, { "loss": 1.386016845703125, "learning_rate": 2.8997737950234906e-05, "epoch": 0.04324326114668668, "total_flos": 101033975051959008, "step": 3264 }, { "loss": 1.39971923828125, "learning_rate": 2.898488843677638e-05, "epoch": 0.04366721468734047, "total_flos": 102008319157178016, "step": 3296 }, { "loss": 1.3941650390625, "learning_rate": 2.8972038923317853e-05, "epoch": 0.04409116822799426, "total_flos": 102988615960771104, "step": 3328 }, { "loss": 1.3987884521484375, "learning_rate": 2.8959189409859326e-05, "epoch": 0.04451512176864805, "total_flos": 103947853082819136, "step": 3360 }, { "loss": 1.383575439453125, "learning_rate": 2.89463398964008e-05, "epoch": 0.04493907530930185, "total_flos": 104942516804379936, "step": 3392 }, { "loss": 1.3425445556640625, "learning_rate": 2.893349038294227e-05, "epoch": 0.04536302884995564, "total_flos": 105945996954883968, "step": 3424 }, { "loss": 1.372650146484375, "learning_rate": 2.8920640869483744e-05, "epoch": 0.04578698239060943, "total_flos": 106952083421973408, "step": 3456 }, { "loss": 1.40899658203125, "learning_rate": 2.8907791356025217e-05, "epoch": 0.04621093593126322, "total_flos": 107916677972558112, "step": 3488 }, { "loss": 1.3840179443359375, "learning_rate": 2.889494184256669e-05, "epoch": 0.04663488947191701, "total_flos": 108891408198752736, "step": 3520 }, { "loss": 1.3770599365234375, "learning_rate": 2.8882092329108164e-05, "epoch": 0.0470588430125708, "total_flos": 109875003119012544, "step": 3552 }, { "loss": 1.3908843994140625, "learning_rate": 2.8869242815649634e-05, "epoch": 0.04748279655322459, "total_flos": 110901200053581984, "step": 3584 }, { "loss": 1.3883209228515625, "learning_rate": 2.885639330219111e-05, "epoch": 0.04790675009387838, "total_flos": 111912048679370688, "step": 3616 }, { "loss": 1.3914947509765625, "learning_rate": 2.8843543788732584e-05, "epoch": 0.04833070363453217, "total_flos": 112904846149549344, "step": 3648 }, { "loss": 1.388641357421875, "learning_rate": 2.8830694275274058e-05, "epoch": 0.04875465717518596, "total_flos": 113886043902085536, "step": 3680 }, { "loss": 1.378143310546875, "learning_rate": 2.881784476181553e-05, "epoch": 0.04917861071583976, "total_flos": 114835402762507392, "step": 3712 }, { "loss": 1.3719329833984375, "learning_rate": 2.8804995248357005e-05, "epoch": 0.04960256425649355, "total_flos": 115807735820978400, "step": 3744 }, { "loss": 1.4041290283203125, "learning_rate": 2.8792145734898475e-05, "epoch": 0.05002651779714734, "total_flos": 116799037072376544, "step": 3776 }, { "loss": 1.3658447265625, "learning_rate": 2.877929622143995e-05, "epoch": 0.05045047133780113, "total_flos": 117781682778571296, "step": 3808 }, { "loss": 1.3819122314453125, "learning_rate": 2.8766446707981422e-05, "epoch": 0.05087442487845492, "total_flos": 118749253678343040, "step": 3840 }, { "loss": 1.3914947509765625, "learning_rate": 2.8753597194522896e-05, "epoch": 0.05129837841910871, "total_flos": 119765861942018016, "step": 3872 }, { "loss": 1.3516387939453125, "learning_rate": 2.874074768106437e-05, "epoch": 0.0517223319597625, "total_flos": 120755490002521824, "step": 3904 }, { "loss": 1.3935394287109375, "learning_rate": 2.872789816760584e-05, "epoch": 0.05214628550041629, "total_flos": 121703449174407072, "step": 3936 }, { "loss": 1.37445068359375, "learning_rate": 2.8715048654147313e-05, "epoch": 0.05257023904107008, "total_flos": 122718577307675520, "step": 3968 }, { "loss": 1.3611907958984375, "learning_rate": 2.870219914068879e-05, "epoch": 0.05299419258172387, "total_flos": 123724760305008864, "step": 4000 }, { "loss": 1.3553466796875, "learning_rate": 2.8689349627230263e-05, "epoch": 0.05341814612237767, "total_flos": 124704027452666976, "step": 4032 }, { "loss": 1.3773651123046875, "learning_rate": 2.8676500113771736e-05, "epoch": 0.05384209966303146, "total_flos": 125696953629837504, "step": 4064 }, { "loss": 1.3795928955078125, "learning_rate": 2.8663650600313207e-05, "epoch": 0.05426605320368525, "total_flos": 126686340364731552, "step": 4096 }, { "loss": 1.3794097900390625, "learning_rate": 2.865080108685468e-05, "epoch": 0.05469000674433904, "total_flos": 127695628418243808, "step": 4128 }, { "loss": 1.3780059814453125, "learning_rate": 2.8637951573396154e-05, "epoch": 0.05511396028499283, "total_flos": 128714006403057024, "step": 4160 }, { "loss": 1.340087890625, "learning_rate": 2.8625102059937627e-05, "epoch": 0.05553791382564662, "total_flos": 129700175463154272, "step": 4192 }, { "loss": 1.3676910400390625, "learning_rate": 2.86122525464791e-05, "epoch": 0.05596186736630041, "total_flos": 130674246066015552, "step": 4224 }, { "loss": 1.3448333740234375, "learning_rate": 2.8599403033020574e-05, "epoch": 0.0563858209069542, "total_flos": 131689309845788064, "step": 4256 }, { "loss": 1.3341064453125, "learning_rate": 2.8586553519562044e-05, "epoch": 0.05680977444760799, "total_flos": 132720735501902304, "step": 4288 }, { "loss": 1.3566131591796875, "learning_rate": 2.8573704006103518e-05, "epoch": 0.05723372798826178, "total_flos": 133717732037690784, "step": 4320 }, { "loss": 1.3648681640625, "learning_rate": 2.8560854492644995e-05, "epoch": 0.05765768152891558, "total_flos": 134683983690795840, "step": 4352 }, { "loss": 1.3654327392578125, "learning_rate": 2.8548004979186468e-05, "epoch": 0.05808163506956937, "total_flos": 135654804442112352, "step": 4384 }, { "loss": 1.379547119140625, "learning_rate": 2.853515546572794e-05, "epoch": 0.05850558861022316, "total_flos": 136643370669933216, "step": 4416 }, { "loss": 1.383087158203125, "learning_rate": 2.852230595226941e-05, "epoch": 0.05892954215087695, "total_flos": 137610507183607392, "step": 4448 }, { "loss": 1.386688232421875, "learning_rate": 2.8509456438810885e-05, "epoch": 0.05935349569153074, "total_flos": 138594230810859072, "step": 4480 }, { "loss": 1.38397216796875, "learning_rate": 2.849660692535236e-05, "epoch": 0.05977744923218453, "total_flos": 139601250403639584, "step": 4512 }, { "loss": 1.3764190673828125, "learning_rate": 2.8483757411893832e-05, "epoch": 0.06020140277283832, "total_flos": 140563898260972224, "step": 4544 }, { "loss": 1.3710784912109375, "learning_rate": 2.8470907898435306e-05, "epoch": 0.06062535631349211, "total_flos": 141547847125459680, "step": 4576 }, { "loss": 1.357147216796875, "learning_rate": 2.8458058384976776e-05, "epoch": 0.0610493098541459, "total_flos": 142498396525556352, "step": 4608 }, { "loss": 1.3641815185546875, "learning_rate": 2.844520887151825e-05, "epoch": 0.06147326339479969, "total_flos": 143518898175735456, "step": 4640 }, { "loss": 1.4013214111328125, "learning_rate": 2.8432359358059723e-05, "epoch": 0.06189721693545349, "total_flos": 144527880550142016, "step": 4672 }, { "loss": 1.393890380859375, "learning_rate": 2.8419509844601196e-05, "epoch": 0.06232117047610728, "total_flos": 145492185509995008, "step": 4704 }, { "loss": 1.377593994140625, "learning_rate": 2.8406660331142673e-05, "epoch": 0.06274512401676106, "total_flos": 146471758336758816, "step": 4736 }, { "loss": 1.401336669921875, "learning_rate": 2.8393810817684147e-05, "epoch": 0.06316907755741485, "total_flos": 147461547281002464, "step": 4768 }, { "loss": 1.3686370849609375, "learning_rate": 2.8380961304225617e-05, "epoch": 0.06359303109806864, "total_flos": 148436325772319040, "step": 4800 }, { "loss": 1.344573974609375, "learning_rate": 2.836811179076709e-05, "epoch": 0.06401698463872245, "total_flos": 149368277012090208, "step": 4832 }, { "loss": 1.357635498046875, "learning_rate": 2.8355262277308564e-05, "epoch": 0.06444093817937624, "total_flos": 150375891874708128, "step": 4864 }, { "loss": 1.3975372314453125, "learning_rate": 2.8342412763850037e-05, "epoch": 0.06486489172003003, "total_flos": 151351458696349920, "step": 4896 }, { "loss": 1.3832550048828125, "learning_rate": 2.832956325039151e-05, "epoch": 0.06528884526068382, "total_flos": 152328795239129952, "step": 4928 }, { "loss": 1.374267578125, "learning_rate": 2.831671373693298e-05, "epoch": 0.06571279880133761, "total_flos": 153301900539552192, "step": 4960 }, { "loss": 1.3809051513671875, "learning_rate": 2.8303864223474454e-05, "epoch": 0.0661367523419914, "total_flos": 154319377575422304, "step": 4992 }, { "loss": 1.3642425537109375, "learning_rate": 2.8291014710015928e-05, "epoch": 0.06656070588264519, "total_flos": 155268318138120576, "step": 5024 }, { "loss": 1.367034912109375, "learning_rate": 2.82781651965574e-05, "epoch": 0.06698465942329898, "total_flos": 156287001802039488, "step": 5056 }, { "loss": 1.38592529296875, "learning_rate": 2.8265315683098875e-05, "epoch": 0.06740861296395277, "total_flos": 157285381937990592, "step": 5088 }, { "loss": 1.3745269775390625, "learning_rate": 2.8252466169640348e-05, "epoch": 0.06783256650460656, "total_flos": 158286416655649056, "step": 5120 }, { "loss": 1.386444091796875, "learning_rate": 2.823961665618182e-05, "epoch": 0.06825652004526035, "total_flos": 159238928837371776, "step": 5152 }, { "loss": 1.3608551025390625, "learning_rate": 2.8226767142723295e-05, "epoch": 0.06868047358591414, "total_flos": 160275454508038944, "step": 5184 }, { "loss": 1.3735504150390625, "learning_rate": 2.821391762926477e-05, "epoch": 0.06910442712656793, "total_flos": 161238182807241504, "step": 5216 }, { "loss": 1.412322998046875, "learning_rate": 2.8201068115806242e-05, "epoch": 0.06952838066722172, "total_flos": 162201731613517248, "step": 5248 }, { "loss": 1.3405609130859375, "learning_rate": 2.8188218602347716e-05, "epoch": 0.06995233420787551, "total_flos": 163214752169793792, "step": 5280 }, { "loss": 1.3396453857421875, "learning_rate": 2.8175369088889186e-05, "epoch": 0.0703762877485293, "total_flos": 164207887495826112, "step": 5312 }, { "loss": 1.3667449951171875, "learning_rate": 2.816251957543066e-05, "epoch": 0.0708002412891831, "total_flos": 165168588659906688, "step": 5344 }, { "loss": 1.38519287109375, "learning_rate": 2.8149670061972133e-05, "epoch": 0.07122419482983688, "total_flos": 166146681356263968, "step": 5376 }, { "loss": 1.356414794921875, "learning_rate": 2.8136820548513606e-05, "epoch": 0.07164814837049067, "total_flos": 167152446055873728, "step": 5408 }, { "loss": 1.3887481689453125, "learning_rate": 2.812397103505508e-05, "epoch": 0.07207210191114446, "total_flos": 168135316999304256, "step": 5440 }, { "loss": 1.3737030029296875, "learning_rate": 2.811112152159655e-05, "epoch": 0.07249605545179827, "total_flos": 169128243176474784, "step": 5472 }, { "loss": 1.3735809326171875, "learning_rate": 2.8098272008138027e-05, "epoch": 0.07292000899245206, "total_flos": 170164077047060640, "step": 5504 }, { "loss": 1.344573974609375, "learning_rate": 2.80854224946795e-05, "epoch": 0.07334396253310585, "total_flos": 171178207701142080, "step": 5536 }, { "loss": 1.3365478515625, "learning_rate": 2.8072572981220974e-05, "epoch": 0.07376791607375964, "total_flos": 172191244345792608, "step": 5568 }, { "loss": 1.3626556396484375, "learning_rate": 2.8059723467762447e-05, "epoch": 0.07419186961441343, "total_flos": 173190348458572992, "step": 5600 }, { "loss": 1.3649444580078125, "learning_rate": 2.8046873954303917e-05, "epoch": 0.07461582315506722, "total_flos": 174193185074117664, "step": 5632 }, { "loss": 1.362579345703125, "learning_rate": 2.803402444084539e-05, "epoch": 0.07503977669572101, "total_flos": 175174736770881504, "step": 5664 }, { "loss": 1.337982177734375, "learning_rate": 2.8021174927386864e-05, "epoch": 0.0754637302363748, "total_flos": 176211455502036480, "step": 5696 }, { "loss": 1.348114013671875, "learning_rate": 2.8008325413928338e-05, "epoch": 0.07588768377702859, "total_flos": 177194953892052384, "step": 5728 }, { "loss": 1.3402252197265625, "learning_rate": 2.799547590046981e-05, "epoch": 0.07631163731768238, "total_flos": 178155606791011008, "step": 5760 }, { "loss": 1.32940673828125, "learning_rate": 2.7982626387011285e-05, "epoch": 0.07673559085833617, "total_flos": 179170590128913600, "step": 5792 }, { "loss": 1.35211181640625, "learning_rate": 2.7969776873552755e-05, "epoch": 0.07715954439898996, "total_flos": 180203560268930304, "step": 5824 }, { "loss": 1.367584228515625, "learning_rate": 2.795692736009423e-05, "epoch": 0.07758349793964375, "total_flos": 181223563179515904, "step": 5856 }, { "loss": 1.366485595703125, "learning_rate": 2.7944077846635705e-05, "epoch": 0.07800745148029754, "total_flos": 182188415144084352, "step": 5888 }, { "loss": 1.3546142578125, "learning_rate": 2.793122833317718e-05, "epoch": 0.07843140502095133, "total_flos": 183160088579222016, "step": 5920 }, { "loss": 1.379974365234375, "learning_rate": 2.7918378819718652e-05, "epoch": 0.07885535856160512, "total_flos": 184153513495986048, "step": 5952 }, { "loss": 1.373870849609375, "learning_rate": 2.7905529306260122e-05, "epoch": 0.07927931210225891, "total_flos": 185132072755188864, "step": 5984 }, { "loss": 1.339263916015625, "learning_rate": 2.7892679792801596e-05, "epoch": 0.0797032656429127, "total_flos": 186103118743741152, "step": 6016 }, { "loss": 1.37225341796875, "learning_rate": 2.787983027934307e-05, "epoch": 0.0801272191835665, "total_flos": 187078379886277248, "step": 6048 }, { "loss": 1.35443115234375, "learning_rate": 2.7866980765884543e-05, "epoch": 0.08055117272422029, "total_flos": 188131219168164192, "step": 6080 }, { "loss": 1.328765869140625, "learning_rate": 2.7854131252426016e-05, "epoch": 0.08097512626487409, "total_flos": 189155678558343360, "step": 6112 }, { "loss": 1.379425048828125, "learning_rate": 2.7841281738967486e-05, "epoch": 0.08139907980552788, "total_flos": 190145210088603264, "step": 6144 }, { "loss": 1.368896484375, "learning_rate": 2.782843222550896e-05, "epoch": 0.08182303334618167, "total_flos": 191124927710732928, "step": 6176 }, { "loss": 1.362579345703125, "learning_rate": 2.7815582712050433e-05, "epoch": 0.08224698688683546, "total_flos": 192114314445626976, "step": 6208 }, { "loss": 1.357513427734375, "learning_rate": 2.780273319859191e-05, "epoch": 0.08267094042748925, "total_flos": 193079616884666976, "step": 6240 }, { "loss": 1.352996826171875, "learning_rate": 2.7789883685133384e-05, "epoch": 0.08309489396814304, "total_flos": 194053478338666464, "step": 6272 }, { "loss": 1.366851806640625, "learning_rate": 2.7777034171674857e-05, "epoch": 0.08351884750879683, "total_flos": 195015707898275520, "step": 6304 }, { "loss": 1.34796142578125, "learning_rate": 2.7764184658216327e-05, "epoch": 0.08394280104945062, "total_flos": 195999946353494688, "step": 6336 }, { "loss": 1.35308837890625, "learning_rate": 2.77513351447578e-05, "epoch": 0.08436675459010441, "total_flos": 197010328416437856, "step": 6368 }, { "loss": 1.369110107421875, "learning_rate": 2.7738485631299274e-05, "epoch": 0.0847907081307582, "total_flos": 197991397461982176, "step": 6400 }, { "loss": 1.332977294921875, "learning_rate": 2.7725636117840748e-05, "epoch": 0.08521466167141199, "total_flos": 199013845805413344, "step": 6432 }, { "loss": 1.36224365234375, "learning_rate": 2.771278660438222e-05, "epoch": 0.08563861521206578, "total_flos": 200027896017624864, "step": 6464 }, { "loss": 1.3404541015625, "learning_rate": 2.769993709092369e-05, "epoch": 0.08606256875271957, "total_flos": 201028158493332096, "step": 6496 }, { "loss": 1.374176025390625, "learning_rate": 2.7687087577465165e-05, "epoch": 0.08648652229337336, "total_flos": 201974894948794560, "step": 6528 }, { "loss": 1.356292724609375, "learning_rate": 2.7674238064006638e-05, "epoch": 0.08691047583402715, "total_flos": 202901392229785152, "step": 6560 }, { "loss": 1.33587646484375, "learning_rate": 2.7661388550548112e-05, "epoch": 0.08733442937468094, "total_flos": 203908427910939648, "step": 6592 }, { "loss": 1.369842529296875, "learning_rate": 2.764853903708959e-05, "epoch": 0.08775838291533473, "total_flos": 204891266677622208, "step": 6624 }, { "loss": 1.342376708984375, "learning_rate": 2.763568952363106e-05, "epoch": 0.08818233645598852, "total_flos": 205858837577393952, "step": 6656 }, { "loss": 1.354400634765625, "learning_rate": 2.7622840010172532e-05, "epoch": 0.08860628999664232, "total_flos": 206837091157491072, "step": 6688 }, { "loss": 1.35028076171875, "learning_rate": 2.7609990496714006e-05, "epoch": 0.0890302435372961, "total_flos": 207794108083929312, "step": 6720 }, { "loss": 1.348846435546875, "learning_rate": 2.759714098325548e-05, "epoch": 0.08945419707794991, "total_flos": 208808126119392864, "step": 6752 }, { "loss": 1.337310791015625, "learning_rate": 2.7584291469796953e-05, "epoch": 0.0898781506186037, "total_flos": 209800666175587776, "step": 6784 }, { "loss": 1.39056396484375, "learning_rate": 2.7571441956338426e-05, "epoch": 0.09030210415925749, "total_flos": 210748673612594976, "step": 6816 }, { "loss": 1.37469482421875, "learning_rate": 2.7558592442879896e-05, "epoch": 0.09072605769991128, "total_flos": 211688459890496352, "step": 6848 }, { "loss": 1.33056640625, "learning_rate": 2.754574292942137e-05, "epoch": 0.09115001124056507, "total_flos": 212695736897260608, "step": 6880 }, { "loss": 1.344512939453125, "learning_rate": 2.7532893415962843e-05, "epoch": 0.09157396478121886, "total_flos": 213712538221423392, "step": 6912 }, { "loss": 1.363067626953125, "learning_rate": 2.7520043902504317e-05, "epoch": 0.09199791832187265, "total_flos": 214676778827780448, "step": 6944 }, { "loss": 1.326385498046875, "learning_rate": 2.750719438904579e-05, "epoch": 0.09242187186252644, "total_flos": 215691713900561088, "step": 6976 }, { "loss": 1.33026123046875, "learning_rate": 2.7494344875587264e-05, "epoch": 0.09284582540318023, "total_flos": 216671206285454976, "step": 7008 }, { "loss": 1.35394287109375, "learning_rate": 2.7481495362128737e-05, "epoch": 0.09326977894383402, "total_flos": 217632856663600608, "step": 7040 }, { "loss": 1.349884033203125, "learning_rate": 2.746864584867021e-05, "epoch": 0.09369373248448781, "total_flos": 218613973974266880, "step": 7072 }, { "loss": 1.34283447265625, "learning_rate": 2.7455796335211684e-05, "epoch": 0.0941176860251416, "total_flos": 219583234153306944, "step": 7104 }, { "loss": 1.333587646484375, "learning_rate": 2.7442946821753158e-05, "epoch": 0.0945416395657954, "total_flos": 220572379562591232, "step": 7136 }, { "loss": 1.365020751953125, "learning_rate": 2.7430097308294628e-05, "epoch": 0.09496559310644918, "total_flos": 221547093700411872, "step": 7168 }, { "loss": 1.33099365234375, "learning_rate": 2.74172477948361e-05, "epoch": 0.09538954664710297, "total_flos": 222548064064574400, "step": 7200 }, { "loss": 1.351654052734375, "learning_rate": 2.7404398281377575e-05, "epoch": 0.09581350018775676, "total_flos": 223554375768899616, "step": 7232 }, { "loss": 1.3623046875, "learning_rate": 2.739154876791905e-05, "epoch": 0.09623745372841055, "total_flos": 224564822185338720, "step": 7264 }, { "loss": 1.35028076171875, "learning_rate": 2.7378699254460522e-05, "epoch": 0.09666140726906434, "total_flos": 225530961219825888, "step": 7296 }, { "loss": 1.35443115234375, "learning_rate": 2.7365849741001995e-05, "epoch": 0.09708536080971814, "total_flos": 226516422391467840, "step": 7328 }, { "loss": 1.338836669921875, "learning_rate": 2.735300022754347e-05, "epoch": 0.09750931435037193, "total_flos": 227484347235467232, "step": 7360 }, { "loss": 1.323333740234375, "learning_rate": 2.7340150714084942e-05, "epoch": 0.09793326789102573, "total_flos": 228474908421662112, "step": 7392 }, { "loss": 1.377960205078125, "learning_rate": 2.7327301200626416e-05, "epoch": 0.09835722143167952, "total_flos": 229473272469239232, "step": 7424 }, { "loss": 1.359619140625, "learning_rate": 2.731445168716789e-05, "epoch": 0.09878117497233331, "total_flos": 230478522340881504, "step": 7456 }, { "loss": 1.337646484375, "learning_rate": 2.7301602173709363e-05, "epoch": 0.0992051285129871, "total_flos": 231499281405044352, "step": 7488 }, { "loss": 1.3497314453125, "learning_rate": 2.7288752660250833e-05, "epoch": 0.09962908205364089, "total_flos": 232492593703190496, "step": 7520 }, { "loss": 1.330474853515625, "learning_rate": 2.7275903146792306e-05, "epoch": 0.10005303559429468, "total_flos": 233505871673450784, "step": 7552 }, { "loss": 1.34307861328125, "learning_rate": 2.726305363333378e-05, "epoch": 0.10047698913494847, "total_flos": 234490850193873216, "step": 7584 }, { "loss": 1.346435546875, "learning_rate": 2.7250204119875253e-05, "epoch": 0.10090094267560226, "total_flos": 235498545498361056, "step": 7616 }, { "loss": 1.33050537109375, "learning_rate": 2.7237354606416727e-05, "epoch": 0.10132489621625605, "total_flos": 236522779651304448, "step": 7648 }, { "loss": 1.34619140625, "learning_rate": 2.7224505092958197e-05, "epoch": 0.10174884975690984, "total_flos": 237499054361401536, "step": 7680 }, { "loss": 1.328765869140625, "learning_rate": 2.721165557949967e-05, "epoch": 0.10217280329756363, "total_flos": 238483759379466240, "step": 7712 }, { "loss": 1.3486328125, "learning_rate": 2.7198806066041147e-05, "epoch": 0.10259675683821742, "total_flos": 239471199421108224, "step": 7744 }, { "loss": 1.319610595703125, "learning_rate": 2.718595655258262e-05, "epoch": 0.10302071037887121, "total_flos": 240452606322506208, "step": 7776 }, { "loss": 1.324188232421875, "learning_rate": 2.7173107039124094e-05, "epoch": 0.103444663919525, "total_flos": 241488938932685568, "step": 7808 }, { "loss": 1.359954833984375, "learning_rate": 2.7160257525665564e-05, "epoch": 0.1038686174601788, "total_flos": 242445618003270144, "step": 7840 }, { "loss": 1.379180908203125, "learning_rate": 2.7147408012207038e-05, "epoch": 0.10429257100083258, "total_flos": 243369846823528992, "step": 7872 }, { "loss": 1.38116455078125, "learning_rate": 2.713455849874851e-05, "epoch": 0.10471652454148637, "total_flos": 244327201605820896, "step": 7904 }, { "loss": 1.3616943359375, "learning_rate": 2.7121708985289985e-05, "epoch": 0.10514047808214017, "total_flos": 245270189470145088, "step": 7936 }, { "loss": 1.35992431640625, "learning_rate": 2.710885947183146e-05, "epoch": 0.10556443162279396, "total_flos": 246279204021299616, "step": 7968 }, { "loss": 1.35003662109375, "learning_rate": 2.7096009958372932e-05, "epoch": 0.10598838516344775, "total_flos": 247212989335704960, "step": 8000 }, { "loss": 1.353546142578125, "learning_rate": 2.7083160444914402e-05, "epoch": 0.10641233870410155, "total_flos": 248188170036371136, "step": 8032 }, { "loss": 1.361053466796875, "learning_rate": 2.7070310931455875e-05, "epoch": 0.10683629224475534, "total_flos": 249181804101996960, "step": 8064 }, { "loss": 1.321502685546875, "learning_rate": 2.705746141799735e-05, "epoch": 0.10726024578540913, "total_flos": 250179395907622848, "step": 8096 }, { "loss": 1.318878173828125, "learning_rate": 2.7044611904538826e-05, "epoch": 0.10768419932606292, "total_flos": 251151101519508480, "step": 8128 }, { "loss": 1.347076416015625, "learning_rate": 2.70317623910803e-05, "epoch": 0.10810815286671671, "total_flos": 252128438062288512, "step": 8160 }, { "loss": 1.35040283203125, "learning_rate": 2.701891287762177e-05, "epoch": 0.1085321064073705, "total_flos": 253123085695475328, "step": 8192 }, { "loss": 1.358306884765625, "learning_rate": 2.7006063364163243e-05, "epoch": 0.10895605994802429, "total_flos": 254083255943214432, "step": 8224 }, { "loss": 1.326629638671875, "learning_rate": 2.6993213850704716e-05, "epoch": 0.10938001348867808, "total_flos": 255045308530709664, "step": 8256 }, { "loss": 1.37933349609375, "learning_rate": 2.698036433724619e-05, "epoch": 0.10980396702933187, "total_flos": 256019620459180704, "step": 8288 }, { "loss": 1.32904052734375, "learning_rate": 2.6967514823787663e-05, "epoch": 0.11022792056998566, "total_flos": 256992790113098880, "step": 8320 }, { "loss": 1.357269287109375, "learning_rate": 2.6954665310329133e-05, "epoch": 0.11065187411063945, "total_flos": 257968485641732544, "step": 8352 }, { "loss": 1.36065673828125, "learning_rate": 2.6941815796870607e-05, "epoch": 0.11107582765129324, "total_flos": 258980540895570048, "step": 8384 }, { "loss": 1.349578857421875, "learning_rate": 2.692896628341208e-05, "epoch": 0.11149978119194703, "total_flos": 259929690607130112, "step": 8416 }, { "loss": 1.36883544921875, "learning_rate": 2.6916116769953554e-05, "epoch": 0.11192373473260082, "total_flos": 260944657856658720, "step": 8448 }, { "loss": 1.363739013671875, "learning_rate": 2.6903267256495027e-05, "epoch": 0.11234768827325461, "total_flos": 261936087815048736, "step": 8480 }, { "loss": 1.34521484375, "learning_rate": 2.6890417743036504e-05, "epoch": 0.1127716418139084, "total_flos": 262901535049454592, "step": 8512 }, { "loss": 1.3756103515625, "learning_rate": 2.6877568229577974e-05, "epoch": 0.1131955953545622, "total_flos": 263872677568250784, "step": 8544 }, { "loss": 1.324249267578125, "learning_rate": 2.6864718716119448e-05, "epoch": 0.11361954889521599, "total_flos": 264857221702575648, "step": 8576 }, { "loss": 1.321197509765625, "learning_rate": 2.685186920266092e-05, "epoch": 0.11404350243586978, "total_flos": 265841926720640352, "step": 8608 }, { "loss": 1.345306396484375, "learning_rate": 2.6839019689202395e-05, "epoch": 0.11446745597652357, "total_flos": 266797061307322464, "step": 8640 }, { "loss": 1.336456298828125, "learning_rate": 2.682617017574387e-05, "epoch": 0.11489140951717737, "total_flos": 267822502088314656, "step": 8672 }, { "loss": 1.34283447265625, "learning_rate": 2.681332066228534e-05, "epoch": 0.11531536305783116, "total_flos": 268862277610526592, "step": 8704 }, { "loss": 1.310272216796875, "learning_rate": 2.6800471148826812e-05, "epoch": 0.11573931659848495, "total_flos": 269887010503063488, "step": 8736 }, { "loss": 1.312225341796875, "learning_rate": 2.6787621635368285e-05, "epoch": 0.11616327013913874, "total_flos": 270857638193892192, "step": 8768 }, { "loss": 1.3388671875, "learning_rate": 2.677477212190976e-05, "epoch": 0.11658722367979253, "total_flos": 271853588985371712, "step": 8800 }, { "loss": 1.340301513671875, "learning_rate": 2.6761922608451232e-05, "epoch": 0.11701117722044632, "total_flos": 272828222681322432, "step": 8832 }, { "loss": 1.337890625, "learning_rate": 2.6749073094992706e-05, "epoch": 0.11743513076110011, "total_flos": 273799831762964160, "step": 8864 }, { "loss": 1.320404052734375, "learning_rate": 2.673622358153418e-05, "epoch": 0.1178590843017539, "total_flos": 274784938990378464, "step": 8896 }, { "loss": 1.32666015625, "learning_rate": 2.6723374068075653e-05, "epoch": 0.11828303784240769, "total_flos": 275783608717061280, "step": 8928 }, { "loss": 1.35205078125, "learning_rate": 2.6710524554617126e-05, "epoch": 0.11870699138306148, "total_flos": 276778047201386304, "step": 8960 }, { "loss": 1.320831298828125, "learning_rate": 2.66976750411586e-05, "epoch": 0.11913094492371527, "total_flos": 277762510893841248, "step": 8992 }, { "loss": 1.32183837890625, "learning_rate": 2.6684825527700073e-05, "epoch": 0.11955489846436906, "total_flos": 278711580163531392, "step": 9024 }, { "loss": 1.32244873046875, "learning_rate": 2.6671976014241544e-05, "epoch": 0.11997885200502285, "total_flos": 279693292744035072, "step": 9056 }, { "loss": 1.329986572265625, "learning_rate": 2.6659126500783017e-05, "epoch": 0.12040280554567664, "total_flos": 280654701796570944, "step": 9088 }, { "loss": 1.343902587890625, "learning_rate": 2.664627698732449e-05, "epoch": 0.12082675908633043, "total_flos": 281620213384472736, "step": 9120 }, { "loss": 1.306549072265625, "learning_rate": 2.6633427473865964e-05, "epoch": 0.12125071262698423, "total_flos": 282564311346601824, "step": 9152 }, { "loss": 1.366241455078125, "learning_rate": 2.6620577960407437e-05, "epoch": 0.12167466616763802, "total_flos": 283564605999057024, "step": 9184 }, { "loss": 1.3428955078125, "learning_rate": 2.6607728446948908e-05, "epoch": 0.1220986197082918, "total_flos": 284508575254194240, "step": 9216 }, { "loss": 1.304779052734375, "learning_rate": 2.6594878933490384e-05, "epoch": 0.1225225732489456, "total_flos": 285483434187380736, "step": 9248 }, { "loss": 1.350250244140625, "learning_rate": 2.6582029420031858e-05, "epoch": 0.12294652678959939, "total_flos": 286443121783900320, "step": 9280 }, { "loss": 1.339569091796875, "learning_rate": 2.656917990657333e-05, "epoch": 0.12337048033025319, "total_flos": 287474772677250336, "step": 9312 }, { "loss": 1.326873779296875, "learning_rate": 2.6556330393114805e-05, "epoch": 0.12379443387090698, "total_flos": 288457643620680864, "step": 9344 }, { "loss": 1.36041259765625, "learning_rate": 2.6543480879656275e-05, "epoch": 0.12421838741156077, "total_flos": 289413920481915840, "step": 9376 }, { "loss": 1.36199951171875, "learning_rate": 2.653063136619775e-05, "epoch": 0.12464234095221456, "total_flos": 290388184145264928, "step": 9408 }, { "loss": 1.34527587890625, "learning_rate": 2.6517781852739222e-05, "epoch": 0.12506629449286835, "total_flos": 291374111879752416, "step": 9440 }, { "loss": 1.3428955078125, "learning_rate": 2.6504932339280696e-05, "epoch": 0.12549024803352213, "total_flos": 292360683149199264, "step": 9472 }, { "loss": 1.327667236328125, "learning_rate": 2.649208282582217e-05, "epoch": 0.12591420157417593, "total_flos": 293365884755719584, "step": 9504 }, { "loss": 1.34375, "learning_rate": 2.6479233312363642e-05, "epoch": 0.1263381551148297, "total_flos": 294366292026792672, "step": 9536 }, { "loss": 1.306304931640625, "learning_rate": 2.6466383798905113e-05, "epoch": 0.1267621086554835, "total_flos": 295326993190873248, "step": 9568 }, { "loss": 1.34747314453125, "learning_rate": 2.6453534285446586e-05, "epoch": 0.1271860621961373, "total_flos": 296299068835360512, "step": 9600 }, { "loss": 1.343658447265625, "learning_rate": 2.6440684771988063e-05, "epoch": 0.1276100157367911, "total_flos": 297310786233344352, "step": 9632 }, { "loss": 1.32623291015625, "learning_rate": 2.6427835258529536e-05, "epoch": 0.1280339692774449, "total_flos": 298284776394335712, "step": 9664 }, { "loss": 1.3409423828125, "learning_rate": 2.641498574507101e-05, "epoch": 0.12845792281809867, "total_flos": 299274098775733824, "step": 9696 }, { "loss": 1.297637939453125, "learning_rate": 2.640213623161248e-05, "epoch": 0.12888187635875248, "total_flos": 300281424047620032, "step": 9728 }, { "loss": 1.329864501953125, "learning_rate": 2.6389286718153954e-05, "epoch": 0.12930582989940625, "total_flos": 301295908645929120, "step": 9760 }, { "loss": 1.316162109375, "learning_rate": 2.6376437204695427e-05, "epoch": 0.12972978344006006, "total_flos": 302333206558547520, "step": 9792 }, { "loss": 1.31231689453125, "learning_rate": 2.63635876912369e-05, "epoch": 0.13015373698071384, "total_flos": 303320469628075680, "step": 9824 }, { "loss": 1.31451416015625, "learning_rate": 2.6350738177778374e-05, "epoch": 0.13057769052136764, "total_flos": 304352474465653344, "step": 9856 }, { "loss": 1.346282958984375, "learning_rate": 2.6337888664319844e-05, "epoch": 0.13100164406202142, "total_flos": 305326738129002432, "step": 9888 }, { "loss": 1.332366943359375, "learning_rate": 2.6325039150861318e-05, "epoch": 0.13142559760267522, "total_flos": 306360577041214272, "step": 9920 }, { "loss": 1.32513427734375, "learning_rate": 2.631218963740279e-05, "epoch": 0.131849551143329, "total_flos": 307373066681149344, "step": 9952 }, { "loss": 1.33160400390625, "learning_rate": 2.6299340123944265e-05, "epoch": 0.1322735046839828, "total_flos": 308341039790270688, "step": 9984 }, { "loss": 1.354827880859375, "learning_rate": 2.628649061048574e-05, "epoch": 0.13269745822463658, "total_flos": 309325374775733760, "step": 10016 }, { "loss": 1.344329833984375, "learning_rate": 2.6273641097027215e-05, "epoch": 0.13312141176529038, "total_flos": 310297096475993376, "step": 10048 }, { "loss": 1.356201171875, "learning_rate": 2.6260791583568685e-05, "epoch": 0.13354536530594416, "total_flos": 311286804978367104, "step": 10080 }, { "loss": 1.310394287109375, "learning_rate": 2.624794207011016e-05, "epoch": 0.13396931884659796, "total_flos": 312309575089277952, "step": 10112 }, { "loss": 1.343963623046875, "learning_rate": 2.6235092556651632e-05, "epoch": 0.13439327238725174, "total_flos": 313286557687830336, "step": 10144 }, { "loss": 1.32635498046875, "learning_rate": 2.6222243043193106e-05, "epoch": 0.13481722592790554, "total_flos": 314278132441586208, "step": 10176 }, { "loss": 1.3350830078125, "learning_rate": 2.620939352973458e-05, "epoch": 0.13524117946855932, "total_flos": 315256579082171136, "step": 10208 }, { "loss": 1.294464111328125, "learning_rate": 2.619654401627605e-05, "epoch": 0.13566513300921312, "total_flos": 316262858609748384, "step": 10240 }, { "eval_loss": 1.2091291969686746, "epoch": 0.13566513300921312, "total_flos": 316262858609748384, "step": 10240 }, { "loss": 1.34130859375, "learning_rate": 2.6183694502817523e-05, "epoch": 0.13608908654986693, "total_flos": 317270714797976064, "step": 10272 }, { "loss": 1.302093505859375, "learning_rate": 2.6170844989358996e-05, "epoch": 0.1365130400905207, "total_flos": 318254728015959456, "step": 10304 }, { "loss": 1.3238525390625, "learning_rate": 2.615799547590047e-05, "epoch": 0.1369369936311745, "total_flos": 319226063595243456, "step": 10336 }, { "loss": 1.352783203125, "learning_rate": 2.6145145962441943e-05, "epoch": 0.13736094717182828, "total_flos": 320206344310462560, "step": 10368 }, { "loss": 1.32818603515625, "learning_rate": 2.6132296448983417e-05, "epoch": 0.1377849007124821, "total_flos": 321199254399259104, "step": 10400 }, { "loss": 1.32623291015625, "learning_rate": 2.611944693552489e-05, "epoch": 0.13820885425313587, "total_flos": 322202123191551744, "step": 10432 }, { "loss": 1.34637451171875, "learning_rate": 2.6106597422066364e-05, "epoch": 0.13863280779378967, "total_flos": 323187616539941664, "step": 10464 }, { "loss": 1.33306884765625, "learning_rate": 2.6093747908607837e-05, "epoch": 0.13905676133444345, "total_flos": 324150039160038528, "step": 10496 }, { "loss": 1.3023681640625, "learning_rate": 2.608089839514931e-05, "epoch": 0.13948071487509725, "total_flos": 325133070987208896, "step": 10528 }, { "loss": 1.320220947265625, "learning_rate": 2.6068048881690784e-05, "epoch": 0.13990466841575103, "total_flos": 326147619939013920, "step": 10560 }, { "loss": 1.34393310546875, "learning_rate": 2.6055199368232254e-05, "epoch": 0.14032862195640483, "total_flos": 327131697510493248, "step": 10592 }, { "loss": 1.34661865234375, "learning_rate": 2.6042349854773728e-05, "epoch": 0.1407525754970586, "total_flos": 328127873539208544, "step": 10624 }, { "loss": 1.340240478515625, "learning_rate": 2.60295003413152e-05, "epoch": 0.1411765290377124, "total_flos": 329129825294184096, "step": 10656 }, { "loss": 1.340179443359375, "learning_rate": 2.6016650827856675e-05, "epoch": 0.1416004825783662, "total_flos": 330073070572492032, "step": 10688 }, { "loss": 1.3336181640625, "learning_rate": 2.6003801314398148e-05, "epoch": 0.14202443611902, "total_flos": 331076534634622080, "step": 10720 }, { "loss": 1.3123779296875, "learning_rate": 2.599095180093962e-05, "epoch": 0.14244838965967377, "total_flos": 332069010337321056, "step": 10752 }, { "loss": 1.313873291015625, "learning_rate": 2.5978102287481095e-05, "epoch": 0.14287234320032757, "total_flos": 333027684366279648, "step": 10784 }, { "loss": 1.301727294921875, "learning_rate": 2.596525277402257e-05, "epoch": 0.14329629674098135, "total_flos": 334071353274995712, "step": 10816 }, { "loss": 1.322906494140625, "learning_rate": 2.5952403260564042e-05, "epoch": 0.14372025028163515, "total_flos": 335008742385173472, "step": 10848 }, { "loss": 1.30328369140625, "learning_rate": 2.5939553747105516e-05, "epoch": 0.14414420382228893, "total_flos": 336028407439905408, "step": 10880 }, { "loss": 1.344940185546875, "learning_rate": 2.5926704233646986e-05, "epoch": 0.14456815736294273, "total_flos": 336982592812522464, "step": 10912 }, { "loss": 1.326690673828125, "learning_rate": 2.591385472018846e-05, "epoch": 0.14499211090359654, "total_flos": 337964144509286304, "step": 10944 }, { "loss": 1.3560791015625, "learning_rate": 2.5901005206729933e-05, "epoch": 0.14541606444425031, "total_flos": 338963007296456928, "step": 10976 }, { "loss": 1.299835205078125, "learning_rate": 2.5888155693271406e-05, "epoch": 0.14584001798490412, "total_flos": 339983525035010016, "step": 11008 }, { "loss": 1.3394775390625, "learning_rate": 2.587530617981288e-05, "epoch": 0.1462639715255579, "total_flos": 340975067612017920, "step": 11040 }, { "loss": 1.294036865234375, "learning_rate": 2.5862456666354353e-05, "epoch": 0.1466879250662117, "total_flos": 341965290942359136, "step": 11072 }, { "loss": 1.29693603515625, "learning_rate": 2.5849607152895823e-05, "epoch": 0.14711187860686548, "total_flos": 342961933533919968, "step": 11104 }, { "loss": 1.3585205078125, "learning_rate": 2.58367576394373e-05, "epoch": 0.14753583214751928, "total_flos": 343942391221252896, "step": 11136 }, { "loss": 1.302642822265625, "learning_rate": 2.5823908125978774e-05, "epoch": 0.14795978568817306, "total_flos": 344934110770374624, "step": 11168 }, { "loss": 1.31976318359375, "learning_rate": 2.5811058612520247e-05, "epoch": 0.14838373922882686, "total_flos": 345950574238683744, "step": 11200 }, { "loss": 1.3154296875, "learning_rate": 2.579820909906172e-05, "epoch": 0.14880769276948064, "total_flos": 346906738481300832, "step": 11232 }, { "loss": 1.30841064453125, "learning_rate": 2.578535958560319e-05, "epoch": 0.14923164631013444, "total_flos": 347900549519040480, "step": 11264 }, { "loss": 1.301177978515625, "learning_rate": 2.5772510072144664e-05, "epoch": 0.14965559985078822, "total_flos": 348885045388243392, "step": 11296 }, { "loss": 1.312042236328125, "learning_rate": 2.5759660558686138e-05, "epoch": 0.15007955339144202, "total_flos": 349904501294113536, "step": 11328 }, { "loss": 1.315826416015625, "learning_rate": 2.574681104522761e-05, "epoch": 0.1505035069320958, "total_flos": 350913564110390016, "step": 11360 }, { "loss": 1.32830810546875, "learning_rate": 2.5733961531769085e-05, "epoch": 0.1509274604727496, "total_flos": 351898912663414080, "step": 11392 }, { "loss": 1.308197021484375, "learning_rate": 2.5721112018310555e-05, "epoch": 0.15135141401340338, "total_flos": 352890744831153696, "step": 11424 }, { "loss": 1.32366943359375, "learning_rate": 2.5708262504852028e-05, "epoch": 0.15177536755405718, "total_flos": 353881836933690048, "step": 11456 }, { "loss": 1.31439208984375, "learning_rate": 2.5695412991393502e-05, "epoch": 0.15219932109471096, "total_flos": 354871223668584096, "step": 11488 }, { "loss": 1.328155517578125, "learning_rate": 2.568256347793498e-05, "epoch": 0.15262327463536476, "total_flos": 355826808729737760, "step": 11520 }, { "loss": 1.344573974609375, "learning_rate": 2.5669713964476452e-05, "epoch": 0.15304722817601857, "total_flos": 356797726011298176, "step": 11552 }, { "loss": 1.295196533203125, "learning_rate": 2.5656864451017926e-05, "epoch": 0.15347118171667234, "total_flos": 357835780077493824, "step": 11584 }, { "loss": 1.3265380859375, "learning_rate": 2.5644014937559396e-05, "epoch": 0.15389513525732615, "total_flos": 358797526985883360, "step": 11616 }, { "loss": 1.31097412109375, "learning_rate": 2.563116542410087e-05, "epoch": 0.15431908879797993, "total_flos": 359740949236305120, "step": 11648 }, { "loss": 1.286163330078125, "learning_rate": 2.5618315910642343e-05, "epoch": 0.15474304233863373, "total_flos": 360736626525426912, "step": 11680 }, { "loss": 1.3349609375, "learning_rate": 2.5605466397183816e-05, "epoch": 0.1551669958792875, "total_flos": 361703698685605152, "step": 11712 }, { "loss": 1.29718017578125, "learning_rate": 2.559261688372529e-05, "epoch": 0.1555909494199413, "total_flos": 362698137169930176, "step": 11744 }, { "loss": 1.254852294921875, "learning_rate": 2.557976737026676e-05, "epoch": 0.1560149029605951, "total_flos": 363751040805313056, "step": 11776 }, { "loss": 1.319488525390625, "learning_rate": 2.5566917856808233e-05, "epoch": 0.1564388565012489, "total_flos": 364715329676792064, "step": 11808 }, { "loss": 1.316558837890625, "learning_rate": 2.5554068343349707e-05, "epoch": 0.15686281004190267, "total_flos": 365782793290630464, "step": 11840 }, { "loss": 1.319091796875, "learning_rate": 2.554121882989118e-05, "epoch": 0.15728676358255647, "total_flos": 366749945892678624, "step": 11872 }, { "loss": 1.29241943359375, "learning_rate": 2.5528369316432657e-05, "epoch": 0.15771071712321025, "total_flos": 367745269237572768, "step": 11904 }, { "loss": 1.31561279296875, "learning_rate": 2.5515519802974127e-05, "epoch": 0.15813467066386405, "total_flos": 368737197935556288, "step": 11936 }, { "loss": 1.30963134765625, "learning_rate": 2.55026702895156e-05, "epoch": 0.15855862420451783, "total_flos": 369735658513377312, "step": 11968 }, { "loss": 1.33843994140625, "learning_rate": 2.5489820776057074e-05, "epoch": 0.15898257774517163, "total_flos": 370713606414368736, "step": 12000 }, { "loss": 1.31561279296875, "learning_rate": 2.5476971262598548e-05, "epoch": 0.1594065312858254, "total_flos": 371708688433653120, "step": 12032 }, { "loss": 1.28985595703125, "learning_rate": 2.546412174914002e-05, "epoch": 0.1598304848264792, "total_flos": 372715209286840128, "step": 12064 }, { "loss": 1.32415771484375, "learning_rate": 2.5451272235681495e-05, "epoch": 0.160254438367133, "total_flos": 373683085865717568, "step": 12096 }, { "loss": 1.3189697265625, "learning_rate": 2.5438422722222965e-05, "epoch": 0.1606783919077868, "total_flos": 374679422778172704, "step": 12128 }, { "loss": 1.3165283203125, "learning_rate": 2.5425573208764438e-05, "epoch": 0.16110234544844057, "total_flos": 375661215800546304, "step": 12160 }, { "loss": 1.32806396484375, "learning_rate": 2.5412723695305912e-05, "epoch": 0.16152629898909437, "total_flos": 376643411032269504, "step": 12192 }, { "loss": 1.31719970703125, "learning_rate": 2.5399874181847385e-05, "epoch": 0.16195025252974818, "total_flos": 377652007285700448, "step": 12224 }, { "loss": 1.34228515625, "learning_rate": 2.538702466838886e-05, "epoch": 0.16237420607040196, "total_flos": 378636905364252960, "step": 12256 }, { "loss": 1.3104248046875, "learning_rate": 2.5374175154930332e-05, "epoch": 0.16279815961105576, "total_flos": 379629445420447872, "step": 12288 }, { "loss": 1.31219482421875, "learning_rate": 2.5361325641471806e-05, "epoch": 0.16322211315170954, "total_flos": 380630882347455936, "step": 12320 }, { "loss": 1.316650390625, "learning_rate": 2.534847612801328e-05, "epoch": 0.16364606669236334, "total_flos": 381612659281455552, "step": 12352 }, { "loss": 1.2742919921875, "learning_rate": 2.5335626614554753e-05, "epoch": 0.16407002023301712, "total_flos": 382593326117650272, "step": 12384 }, { "loss": 1.307373046875, "learning_rate": 2.5322777101096226e-05, "epoch": 0.16449397377367092, "total_flos": 383575666144739328, "step": 12416 }, { "loss": 1.294921875, "learning_rate": 2.5309927587637696e-05, "epoch": 0.1649179273143247, "total_flos": 384519603223128576, "step": 12448 }, { "loss": 1.30548095703125, "learning_rate": 2.529707807417917e-05, "epoch": 0.1653418808549785, "total_flos": 385546540222901280, "step": 12480 }, { "loss": 1.3155517578125, "learning_rate": 2.5284228560720643e-05, "epoch": 0.16576583439563228, "total_flos": 386508673252266432, "step": 12512 }, { "loss": 1.3160400390625, "learning_rate": 2.5271379047262117e-05, "epoch": 0.16618978793628608, "total_flos": 387501856843420704, "step": 12544 }, { "loss": 1.32562255859375, "learning_rate": 2.525852953380359e-05, "epoch": 0.16661374147693986, "total_flos": 388455221708964576, "step": 12576 }, { "loss": 1.2899169921875, "learning_rate": 2.5245680020345064e-05, "epoch": 0.16703769501759366, "total_flos": 389462193036623136, "step": 12608 }, { "loss": 1.30108642578125, "learning_rate": 2.5232830506886537e-05, "epoch": 0.16746164855824744, "total_flos": 390471416736639456, "step": 12640 }, { "loss": 1.31597900390625, "learning_rate": 2.521998099342801e-05, "epoch": 0.16788560209890124, "total_flos": 391480173873810240, "step": 12672 }, { "loss": 1.317626953125, "learning_rate": 2.5207131479969484e-05, "epoch": 0.16830955563955502, "total_flos": 392454180123175584, "step": 12704 }, { "loss": 1.32073974609375, "learning_rate": 2.5194281966510958e-05, "epoch": 0.16873350918020882, "total_flos": 393438836876118336, "step": 12736 }, { "loss": 1.31317138671875, "learning_rate": 2.518143245305243e-05, "epoch": 0.1691574627208626, "total_flos": 394408515352881984, "step": 12768 }, { "loss": 1.31591796875, "learning_rate": 2.51685829395939e-05, "epoch": 0.1695814162615164, "total_flos": 395380977118344864, "step": 12800 }, { "loss": 1.28399658203125, "learning_rate": 2.5155733426135375e-05, "epoch": 0.1700053698021702, "total_flos": 396364202006003040, "step": 12832 }, { "loss": 1.32427978515625, "learning_rate": 2.514288391267685e-05, "epoch": 0.17042932334282399, "total_flos": 397330405393986144, "step": 12864 }, { "loss": 1.3135986328125, "learning_rate": 2.5130034399218322e-05, "epoch": 0.1708532768834778, "total_flos": 398320805696441184, "step": 12896 }, { "loss": 1.335205078125, "learning_rate": 2.5117184885759795e-05, "epoch": 0.17127723042413157, "total_flos": 399282150395481120, "step": 12928 }, { "loss": 1.302001953125, "learning_rate": 2.5104335372301265e-05, "epoch": 0.17170118396478537, "total_flos": 400315747982083200, "step": 12960 }, { "loss": 1.34381103515625, "learning_rate": 2.509148585884274e-05, "epoch": 0.17212513750543915, "total_flos": 401280503416407744, "step": 12992 }, { "loss": 1.31683349609375, "learning_rate": 2.5078636345384216e-05, "epoch": 0.17254909104609295, "total_flos": 402297192121952640, "step": 13024 }, { "loss": 1.3106689453125, "learning_rate": 2.506578683192569e-05, "epoch": 0.17297304458674673, "total_flos": 403292949852944352, "step": 13056 }, { "loss": 1.2984619140625, "learning_rate": 2.5052937318467163e-05, "epoch": 0.17339699812740053, "total_flos": 404284154574098592, "step": 13088 }, { "loss": 1.31488037109375, "learning_rate": 2.5040087805008636e-05, "epoch": 0.1738209516680543, "total_flos": 405283548277610688, "step": 13120 }, { "loss": 1.28424072265625, "learning_rate": 2.5027238291550106e-05, "epoch": 0.1742449052087081, "total_flos": 406270071281935584, "step": 13152 }, { "loss": 1.336669921875, "learning_rate": 2.501438877809158e-05, "epoch": 0.1746688587493619, "total_flos": 407275192446585984, "step": 13184 }, { "loss": 1.32025146484375, "learning_rate": 2.5001539264633053e-05, "epoch": 0.1750928122900157, "total_flos": 408252142868390400, "step": 13216 }, { "loss": 1.33636474609375, "learning_rate": 2.4988689751174527e-05, "epoch": 0.17551676583066947, "total_flos": 409243395854666592, "step": 13248 }, { "loss": 1.31878662109375, "learning_rate": 2.4975840237716e-05, "epoch": 0.17594071937132327, "total_flos": 410231222017284192, "step": 13280 }, { "loss": 1.30828857421875, "learning_rate": 2.496299072425747e-05, "epoch": 0.17636467291197705, "total_flos": 411229216032259680, "step": 13312 }, { "loss": 1.33349609375, "learning_rate": 2.4950141210798944e-05, "epoch": 0.17678862645263085, "total_flos": 412207952263576320, "step": 13344 }, { "loss": 1.28082275390625, "learning_rate": 2.4937291697340417e-05, "epoch": 0.17721257999328463, "total_flos": 413268932262699168, "step": 13376 }, { "loss": 1.29656982421875, "learning_rate": 2.4924442183881894e-05, "epoch": 0.17763653353393843, "total_flos": 414261617114259936, "step": 13408 }, { "loss": 1.29986572265625, "learning_rate": 2.4911592670423368e-05, "epoch": 0.1780604870745922, "total_flos": 415254462849560544, "step": 13440 }, { "loss": 1.30181884765625, "learning_rate": 2.4898743156964838e-05, "epoch": 0.17848444061524602, "total_flos": 416195182253152992, "step": 13472 }, { "loss": 1.30633544921875, "learning_rate": 2.488589364350631e-05, "epoch": 0.17890839415589982, "total_flos": 417155835152111616, "step": 13504 }, { "loss": 1.31561279296875, "learning_rate": 2.4873044130047785e-05, "epoch": 0.1793323476965536, "total_flos": 418114316120582400, "step": 13536 }, { "loss": 1.314208984375, "learning_rate": 2.486019461658926e-05, "epoch": 0.1797563012372074, "total_flos": 419097364036126752, "step": 13568 }, { "loss": 1.30621337890625, "learning_rate": 2.4847345103130732e-05, "epoch": 0.18018025477786118, "total_flos": 420067895196711552, "step": 13600 }, { "loss": 1.3363037109375, "learning_rate": 2.4834495589672205e-05, "epoch": 0.18060420831851498, "total_flos": 421086691479248352, "step": 13632 }, { "loss": 1.27862548828125, "learning_rate": 2.4821646076213675e-05, "epoch": 0.18102816185916876, "total_flos": 422039171484223104, "step": 13664 }, { "loss": 1.3297119140625, "learning_rate": 2.480879656275515e-05, "epoch": 0.18145211539982256, "total_flos": 423025388809442304, "step": 13696 }, { "loss": 1.30023193359375, "learning_rate": 2.4795947049296622e-05, "epoch": 0.18187606894047634, "total_flos": 424036848793442400, "step": 13728 }, { "loss": 1.2943115234375, "learning_rate": 2.4783097535838096e-05, "epoch": 0.18230002248113014, "total_flos": 425035325459637408, "step": 13760 }, { "loss": 1.3037109375, "learning_rate": 2.4770248022379573e-05, "epoch": 0.18272397602178392, "total_flos": 426023827333962336, "step": 13792 }, { "loss": 1.31494140625, "learning_rate": 2.4757398508921043e-05, "epoch": 0.18314792956243772, "total_flos": 427005700798205856, "step": 13824 }, { "loss": 1.3104248046875, "learning_rate": 2.4744548995462516e-05, "epoch": 0.1835718831030915, "total_flos": 428004595762124448, "step": 13856 }, { "loss": 1.317626953125, "learning_rate": 2.473169948200399e-05, "epoch": 0.1839958366437453, "total_flos": 429014060787750528, "step": 13888 }, { "loss": 1.30413818359375, "learning_rate": 2.4718849968545463e-05, "epoch": 0.18441979018439908, "total_flos": 429971238597928608, "step": 13920 }, { "loss": 1.28814697265625, "learning_rate": 2.4706000455086937e-05, "epoch": 0.18484374372505288, "total_flos": 430998513453554976, "step": 13952 }, { "loss": 1.28607177734375, "learning_rate": 2.4693150941628407e-05, "epoch": 0.18526769726570666, "total_flos": 431998019775684960, "step": 13984 }, { "loss": 1.28863525390625, "learning_rate": 2.468030142816988e-05, "epoch": 0.18569165080636046, "total_flos": 432989996738790432, "step": 14016 }, { "loss": 1.30523681640625, "learning_rate": 2.4667451914711354e-05, "epoch": 0.18611560434701424, "total_flos": 434018317338725760, "step": 14048 }, { "loss": 1.3182373046875, "learning_rate": 2.4654602401252827e-05, "epoch": 0.18653955788766804, "total_flos": 434994720755814720, "step": 14080 }, { "loss": 1.30859375, "learning_rate": 2.46417528877943e-05, "epoch": 0.18696351142832185, "total_flos": 435989754509977152, "step": 14112 }, { "loss": 1.26690673828125, "learning_rate": 2.4628903374335778e-05, "epoch": 0.18738746496897563, "total_flos": 436988955153001440, "step": 14144 }, { "loss": 1.31915283203125, "learning_rate": 2.4616053860877248e-05, "epoch": 0.18781141850962943, "total_flos": 437976346929521472, "step": 14176 }, { "loss": 1.277587890625, "learning_rate": 2.460320434741872e-05, "epoch": 0.1882353720502832, "total_flos": 439009188362546304, "step": 14208 }, { "loss": 1.30413818359375, "learning_rate": 2.4590354833960195e-05, "epoch": 0.188659325590937, "total_flos": 439956230497114464, "step": 14240 }, { "loss": 1.29669189453125, "learning_rate": 2.457750532050167e-05, "epoch": 0.1890832791315908, "total_flos": 440898494384609376, "step": 14272 }, { "loss": 1.32135009765625, "learning_rate": 2.4564655807043142e-05, "epoch": 0.1895072326722446, "total_flos": 441850749152348352, "step": 14304 }, { "loss": 1.3421630859375, "learning_rate": 2.4551806293584612e-05, "epoch": 0.18993118621289837, "total_flos": 442820620689599808, "step": 14336 }, { "loss": 1.29339599609375, "learning_rate": 2.4538956780126085e-05, "epoch": 0.19035513975355217, "total_flos": 443863935654088224, "step": 14368 }, { "loss": 1.311767578125, "learning_rate": 2.452610726666756e-05, "epoch": 0.19077909329420595, "total_flos": 444874430335649280, "step": 14400 }, { "loss": 1.296142578125, "learning_rate": 2.4513257753209032e-05, "epoch": 0.19120304683485975, "total_flos": 445871716462169472, "step": 14432 }, { "loss": 1.3167724609375, "learning_rate": 2.4500408239750506e-05, "epoch": 0.19162700037551353, "total_flos": 446851965000640608, "step": 14464 }, { "loss": 1.2747802734375, "learning_rate": 2.4487558726291976e-05, "epoch": 0.19205095391616733, "total_flos": 447833758023014208, "step": 14496 }, { "loss": 1.32379150390625, "learning_rate": 2.4474709212833453e-05, "epoch": 0.1924749074568211, "total_flos": 448825718897745696, "step": 14528 }, { "loss": 1.28106689453125, "learning_rate": 2.4461859699374926e-05, "epoch": 0.1928988609974749, "total_flos": 449783427624265248, "step": 14560 }, { "loss": 1.2999267578125, "learning_rate": 2.44490101859164e-05, "epoch": 0.1933228145381287, "total_flos": 450809045377371264, "step": 14592 }, { "loss": 1.27435302734375, "learning_rate": 2.4436160672457873e-05, "epoch": 0.1937467680787825, "total_flos": 451812638146493184, "step": 14624 }, { "loss": 1.28973388671875, "learning_rate": 2.4423311158999347e-05, "epoch": 0.19417072161943627, "total_flos": 452851850575615680, "step": 14656 }, { "loss": 1.285888671875, "learning_rate": 2.4410461645540817e-05, "epoch": 0.19459467516009007, "total_flos": 453848975818396032, "step": 14688 }, { "loss": 1.30999755859375, "learning_rate": 2.439761213208229e-05, "epoch": 0.19501862870074385, "total_flos": 454865294491339296, "step": 14720 }, { "loss": 1.29351806640625, "learning_rate": 2.4384762618623764e-05, "epoch": 0.19544258224139766, "total_flos": 455877237126558912, "step": 14752 }, { "loss": 1.2960205078125, "learning_rate": 2.4371913105165237e-05, "epoch": 0.19586653578205146, "total_flos": 456857662637143872, "step": 14784 }, { "loss": 1.29730224609375, "learning_rate": 2.435906359170671e-05, "epoch": 0.19629048932270524, "total_flos": 457851344967891648, "step": 14816 }, { "loss": 1.3160400390625, "learning_rate": 2.434621407824818e-05, "epoch": 0.19671444286335904, "total_flos": 458849178099127296, "step": 14848 }, { "loss": 1.2982177734375, "learning_rate": 2.4333364564789655e-05, "epoch": 0.19713839640401282, "total_flos": 459860364580769664, "step": 14880 }, { "loss": 1.287109375, "learning_rate": 2.432051505133113e-05, "epoch": 0.19756234994466662, "total_flos": 460868027708509536, "step": 14912 }, { "loss": 1.28997802734375, "learning_rate": 2.4307665537872605e-05, "epoch": 0.1979863034853204, "total_flos": 461831045598443808, "step": 14944 }, { "loss": 1.3118896484375, "learning_rate": 2.429481602441408e-05, "epoch": 0.1984102570259742, "total_flos": 462764814824475168, "step": 14976 }, { "loss": 1.28741455078125, "learning_rate": 2.428196651095555e-05, "epoch": 0.19883421056662798, "total_flos": 463796835750426816, "step": 15008 }, { "loss": 1.29742431640625, "learning_rate": 2.4269116997497022e-05, "epoch": 0.19925816410728178, "total_flos": 464787461290117632, "step": 15040 }, { "loss": 1.30596923828125, "learning_rate": 2.4256267484038495e-05, "epoch": 0.19968211764793556, "total_flos": 465752039752328352, "step": 15072 }, { "loss": 1.282470703125, "learning_rate": 2.424341797057997e-05, "epoch": 0.20010607118858936, "total_flos": 466736197765677600, "step": 15104 }, { "loss": 1.31597900390625, "learning_rate": 2.4230568457121442e-05, "epoch": 0.20053002472924314, "total_flos": 467701371497725728, "step": 15136 }, { "loss": 1.279541015625, "learning_rate": 2.4217718943662916e-05, "epoch": 0.20095397826989694, "total_flos": 468713458928311200, "step": 15168 }, { "loss": 1.30596923828125, "learning_rate": 2.4204869430204386e-05, "epoch": 0.20137793181055072, "total_flos": 469681432037432544, "step": 15200 }, { "loss": 1.3104248046875, "learning_rate": 2.419201991674586e-05, "epoch": 0.20180188535120452, "total_flos": 470658672049968672, "step": 15232 }, { "loss": 1.283203125, "learning_rate": 2.4179170403287333e-05, "epoch": 0.2022258388918583, "total_flos": 471666898270797984, "step": 15264 }, { "loss": 1.30633544921875, "learning_rate": 2.416632088982881e-05, "epoch": 0.2026497924325121, "total_flos": 472616353661463744, "step": 15296 }, { "loss": 1.3267822265625, "learning_rate": 2.4153471376370283e-05, "epoch": 0.20307374597316588, "total_flos": 473554885046194368, "step": 15328 }, { "loss": 1.31903076171875, "learning_rate": 2.4140621862911754e-05, "epoch": 0.20349769951381969, "total_flos": 474535487528893152, "step": 15360 }, { "loss": 1.30224609375, "learning_rate": 2.4127772349453227e-05, "epoch": 0.2039216530544735, "total_flos": 475495287744030624, "step": 15392 }, { "loss": 1.29998779296875, "learning_rate": 2.41149228359947e-05, "epoch": 0.20434560659512727, "total_flos": 476484722744046624, "step": 15424 }, { "loss": 1.306396484375, "learning_rate": 2.4102073322536174e-05, "epoch": 0.20476956013578107, "total_flos": 477450700894793952, "step": 15456 }, { "loss": 1.31109619140625, "learning_rate": 2.4089223809077647e-05, "epoch": 0.20519351367643485, "total_flos": 478449692388956448, "step": 15488 }, { "loss": 1.29925537109375, "learning_rate": 2.4076374295619118e-05, "epoch": 0.20561746721708865, "total_flos": 479427688555069824, "step": 15520 }, { "loss": 1.3070068359375, "learning_rate": 2.406352478216059e-05, "epoch": 0.20604142075774243, "total_flos": 480368054014434624, "step": 15552 }, { "loss": 1.3017578125, "learning_rate": 2.4050675268702065e-05, "epoch": 0.20646537429839623, "total_flos": 481325006587376928, "step": 15584 }, { "loss": 1.3134765625, "learning_rate": 2.4037825755243538e-05, "epoch": 0.20688932783905, "total_flos": 482302552279018752, "step": 15616 }, { "loss": 1.31292724609375, "learning_rate": 2.4024976241785015e-05, "epoch": 0.2073132813797038, "total_flos": 483313610053669248, "step": 15648 }, { "loss": 1.29290771484375, "learning_rate": 2.4012126728326485e-05, "epoch": 0.2077372349203576, "total_flos": 484315417013278944, "step": 15680 }, { "loss": 1.28466796875, "learning_rate": 2.399927721486796e-05, "epoch": 0.2081611884610114, "total_flos": 485285256373782432, "step": 15712 }, { "loss": 1.3101806640625, "learning_rate": 2.3986427701409432e-05, "epoch": 0.20858514200166517, "total_flos": 486284408751684768, "step": 15744 }, { "loss": 1.3211669921875, "learning_rate": 2.3973578187950906e-05, "epoch": 0.20900909554231897, "total_flos": 487223792820236544, "step": 15776 }, { "loss": 1.2574462890625, "learning_rate": 2.396072867449238e-05, "epoch": 0.20943304908297275, "total_flos": 488236330725293568, "step": 15808 }, { "loss": 1.290771484375, "learning_rate": 2.3947879161033852e-05, "epoch": 0.20985700262362655, "total_flos": 489225862255553472, "step": 15840 }, { "loss": 1.30029296875, "learning_rate": 2.3935029647575323e-05, "epoch": 0.21028095616428033, "total_flos": 490188365317520256, "step": 15872 }, { "loss": 1.30938720703125, "learning_rate": 2.3922180134116796e-05, "epoch": 0.21070490970493413, "total_flos": 491198232552495936, "step": 15904 }, { "loss": 1.2786865234375, "learning_rate": 2.390933062065827e-05, "epoch": 0.2111288632455879, "total_flos": 492214132927715616, "step": 15936 }, { "loss": 1.29925537109375, "learning_rate": 2.3896481107199743e-05, "epoch": 0.21155281678624172, "total_flos": 493244866783748544, "step": 15968 }, { "loss": 1.30157470703125, "learning_rate": 2.3883631593741217e-05, "epoch": 0.2119767703268955, "total_flos": 494249891418155040, "step": 16000 }, { "loss": 1.28961181640625, "learning_rate": 2.387078208028269e-05, "epoch": 0.2124007238675493, "total_flos": 495255849178252608, "step": 16032 }, { "loss": 1.27813720703125, "learning_rate": 2.3857932566824164e-05, "epoch": 0.2128246774082031, "total_flos": 496259972863716000, "step": 16064 }, { "loss": 1.306396484375, "learning_rate": 2.3845083053365637e-05, "epoch": 0.21324863094885688, "total_flos": 497205615309747552, "step": 16096 }, { "loss": 1.27362060546875, "learning_rate": 2.383223353990711e-05, "epoch": 0.21367258448951068, "total_flos": 498205250338869408, "step": 16128 }, { "loss": 1.26947021484375, "learning_rate": 2.3819384026448584e-05, "epoch": 0.21409653803016446, "total_flos": 499205738051812416, "step": 16160 }, { "loss": 1.28302001953125, "learning_rate": 2.3806534512990054e-05, "epoch": 0.21452049157081826, "total_flos": 500187901106787648, "step": 16192 }, { "loss": 1.29180908203125, "learning_rate": 2.3793684999531528e-05, "epoch": 0.21494444511147204, "total_flos": 501219375028023840, "step": 16224 }, { "loss": 1.28485107421875, "learning_rate": 2.3780835486073e-05, "epoch": 0.21536839865212584, "total_flos": 502184886615925632, "step": 16256 }, { "loss": 1.3216552734375, "learning_rate": 2.3767985972614475e-05, "epoch": 0.21579235219277962, "total_flos": 503172487541307456, "step": 16288 }, { "loss": 1.28765869140625, "learning_rate": 2.3755136459155948e-05, "epoch": 0.21621630573343342, "total_flos": 504114831870672288, "step": 16320 }, { "loss": 1.25604248046875, "learning_rate": 2.374228694569742e-05, "epoch": 0.2166402592740872, "total_flos": 505106824922151744, "step": 16352 }, { "loss": 1.31829833984375, "learning_rate": 2.372943743223889e-05, "epoch": 0.217064212814741, "total_flos": 506044358827695360, "step": 16384 }, { "loss": 1.28814697265625, "learning_rate": 2.371658791878037e-05, "epoch": 0.21748816635539478, "total_flos": 507053968648687296, "step": 16416 }, { "loss": 1.28387451171875, "learning_rate": 2.3703738405321842e-05, "epoch": 0.21791211989604858, "total_flos": 508032173963662464, "step": 16448 }, { "loss": 1.27642822265625, "learning_rate": 2.3690888891863316e-05, "epoch": 0.21833607343670236, "total_flos": 508973649520832160, "step": 16480 }, { "loss": 1.31304931640625, "learning_rate": 2.367803937840479e-05, "epoch": 0.21876002697735616, "total_flos": 509957807534181408, "step": 16512 }, { "loss": 1.30462646484375, "learning_rate": 2.366518986494626e-05, "epoch": 0.21918398051800994, "total_flos": 510968076978506688, "step": 16544 }, { "loss": 1.28765869140625, "learning_rate": 2.3652340351487733e-05, "epoch": 0.21960793405866375, "total_flos": 511961855839498368, "step": 16576 }, { "loss": 1.27349853515625, "learning_rate": 2.3639490838029206e-05, "epoch": 0.22003188759931752, "total_flos": 512953575388620096, "step": 16608 }, { "loss": 1.2945556640625, "learning_rate": 2.362664132457068e-05, "epoch": 0.22045584113997133, "total_flos": 513945906295953216, "step": 16640 }, { "loss": 1.2889404296875, "learning_rate": 2.3613791811112153e-05, "epoch": 0.22087979468062513, "total_flos": 514951542288571104, "step": 16672 }, { "loss": 1.27392578125, "learning_rate": 2.3600942297653623e-05, "epoch": 0.2213037482212789, "total_flos": 515974312399481952, "step": 16704 }, { "loss": 1.26470947265625, "learning_rate": 2.3588092784195097e-05, "epoch": 0.2217277017619327, "total_flos": 516980447131693344, "step": 16736 }, { "loss": 1.28167724609375, "learning_rate": 2.357524327073657e-05, "epoch": 0.2221516553025865, "total_flos": 518009121675856320, "step": 16768 }, { "loss": 1.2872314453125, "learning_rate": 2.3562393757278047e-05, "epoch": 0.2225756088432403, "total_flos": 518968600123514112, "step": 16800 }, { "loss": 1.27508544921875, "learning_rate": 2.354954424381952e-05, "epoch": 0.22299956238389407, "total_flos": 519976761990847488, "step": 16832 }, { "loss": 1.3150634765625, "learning_rate": 2.3536694730360994e-05, "epoch": 0.22342351592454787, "total_flos": 520960968269318688, "step": 16864 }, { "loss": 1.27899169921875, "learning_rate": 2.3523845216902464e-05, "epoch": 0.22384746946520165, "total_flos": 522018617975026848, "step": 16896 }, { "loss": 1.29693603515625, "learning_rate": 2.3510995703443938e-05, "epoch": 0.22427142300585545, "total_flos": 522962120667318528, "step": 16928 }, { "loss": 1.29986572265625, "learning_rate": 2.349814618998541e-05, "epoch": 0.22469537654650923, "total_flos": 523972406200017792, "step": 16960 }, { "loss": 1.26983642578125, "learning_rate": 2.3485296676526885e-05, "epoch": 0.22511933008716303, "total_flos": 524984847574830912, "step": 16992 }, { "loss": 1.2994384765625, "learning_rate": 2.3472447163068358e-05, "epoch": 0.2255432836278168, "total_flos": 525948058525252992, "step": 17024 }, { "loss": 1.2835693359375, "learning_rate": 2.3459597649609828e-05, "epoch": 0.2259672371684706, "total_flos": 526958907151041696, "step": 17056 }, { "loss": 1.3056640625, "learning_rate": 2.3446748136151302e-05, "epoch": 0.2263911907091244, "total_flos": 527968050409188096, "step": 17088 }, { "loss": 1.30126953125, "learning_rate": 2.3433898622692775e-05, "epoch": 0.2268151442497782, "total_flos": 528960863967740736, "step": 17120 }, { "loss": 1.26678466796875, "learning_rate": 2.342104910923425e-05, "epoch": 0.22723909779043197, "total_flos": 529913649651821184, "step": 17152 }, { "loss": 1.29595947265625, "learning_rate": 2.3408199595775726e-05, "epoch": 0.22766305133108578, "total_flos": 530936693265089760, "step": 17184 }, { "loss": 1.2796630859375, "learning_rate": 2.3395350082317196e-05, "epoch": 0.22808700487173955, "total_flos": 531946383527951616, "step": 17216 }, { "loss": 1.31231689453125, "learning_rate": 2.338250056885867e-05, "epoch": 0.22851095841239336, "total_flos": 532975347662846304, "step": 17248 }, { "loss": 1.2845458984375, "learning_rate": 2.3369651055400143e-05, "epoch": 0.22893491195304713, "total_flos": 533963334709203744, "step": 17280 }, { "loss": 1.3172607421875, "learning_rate": 2.3356801541941616e-05, "epoch": 0.22935886549370094, "total_flos": 534945224261821248, "step": 17312 }, { "loss": 1.2847900390625, "learning_rate": 2.334395202848309e-05, "epoch": 0.22978281903435474, "total_flos": 535971228135902880, "step": 17344 }, { "loss": 1.2901611328125, "learning_rate": 2.3331102515024563e-05, "epoch": 0.23020677257500852, "total_flos": 536970686192910912, "step": 17376 }, { "loss": 1.29278564453125, "learning_rate": 2.3318253001566033e-05, "epoch": 0.23063072611566232, "total_flos": 537971399143089696, "step": 17408 }, { "loss": 1.2916259765625, "learning_rate": 2.3305403488107507e-05, "epoch": 0.2310546796563161, "total_flos": 538967655613674912, "step": 17440 }, { "loss": 1.28338623046875, "learning_rate": 2.329255397464898e-05, "epoch": 0.2314786331969699, "total_flos": 539928839428975008, "step": 17472 }, { "loss": 1.282470703125, "learning_rate": 2.3279704461190454e-05, "epoch": 0.23190258673762368, "total_flos": 540915458963543808, "step": 17504 }, { "loss": 1.28363037109375, "learning_rate": 2.326685494773193e-05, "epoch": 0.23232654027827748, "total_flos": 541899053883803616, "step": 17536 }, { "loss": 1.27911376953125, "learning_rate": 2.32540054342734e-05, "epoch": 0.23275049381893126, "total_flos": 542943446769348960, "step": 17568 }, { "loss": 1.3072509765625, "learning_rate": 2.3241155920814874e-05, "epoch": 0.23317444735958506, "total_flos": 543949436706194496, "step": 17600 }, { "loss": 1.32867431640625, "learning_rate": 2.3228306407356348e-05, "epoch": 0.23359840090023884, "total_flos": 544904925237104256, "step": 17632 }, { "loss": 1.2962646484375, "learning_rate": 2.321545689389782e-05, "epoch": 0.23402235444089264, "total_flos": 545922675775332096, "step": 17664 }, { "loss": 1.26953125, "learning_rate": 2.3202607380439295e-05, "epoch": 0.23444630798154642, "total_flos": 546873675649900320, "step": 17696 }, { "loss": 1.28265380859375, "learning_rate": 2.3189757866980765e-05, "epoch": 0.23487026152220022, "total_flos": 547857624514387776, "step": 17728 }, { "loss": 1.26153564453125, "learning_rate": 2.3176908353522238e-05, "epoch": 0.235294215062854, "total_flos": 548848507468062336, "step": 17760 }, { "loss": 1.29913330078125, "learning_rate": 2.3164058840063712e-05, "epoch": 0.2357181686035078, "total_flos": 549823237694256960, "step": 17792 }, { "loss": 1.30792236328125, "learning_rate": 2.3151209326605185e-05, "epoch": 0.23614212214416158, "total_flos": 550782330020939136, "step": 17824 }, { "loss": 1.3017578125, "learning_rate": 2.313835981314666e-05, "epoch": 0.23656607568481539, "total_flos": 551801335452337728, "step": 17856 }, { "loss": 1.265380859375, "learning_rate": 2.3125510299688132e-05, "epoch": 0.23699002922546916, "total_flos": 552833806852760928, "step": 17888 }, { "loss": 1.28216552734375, "learning_rate": 2.3112660786229606e-05, "epoch": 0.23741398276612297, "total_flos": 553864218941314176, "step": 17920 }, { "loss": 1.28021240234375, "learning_rate": 2.309981127277108e-05, "epoch": 0.23783793630677677, "total_flos": 554840059265313696, "step": 17952 }, { "loss": 1.28509521484375, "learning_rate": 2.3086961759312553e-05, "epoch": 0.23826188984743055, "total_flos": 555822946297118208, "step": 17984 }, { "loss": 1.3031005859375, "learning_rate": 2.3074112245854026e-05, "epoch": 0.23868584338808435, "total_flos": 556782569540141856, "step": 18016 }, { "loss": 1.271240234375, "learning_rate": 2.30612627323955e-05, "epoch": 0.23910979692873813, "total_flos": 557789605221296352, "step": 18048 }, { "loss": 1.314453125, "learning_rate": 2.304841321893697e-05, "epoch": 0.23953375046939193, "total_flos": 558778091007247296, "step": 18080 }, { "loss": 1.2823486328125, "learning_rate": 2.3035563705478443e-05, "epoch": 0.2399577040100457, "total_flos": 559758902638807872, "step": 18112 }, { "loss": 1.28179931640625, "learning_rate": 2.3022714192019917e-05, "epoch": 0.2403816575506995, "total_flos": 560792805904515648, "step": 18144 }, { "loss": 1.3070068359375, "learning_rate": 2.300986467856139e-05, "epoch": 0.2408056110913533, "total_flos": 561801048213718944, "step": 18176 }, { "loss": 1.2728271484375, "learning_rate": 2.2997015165102864e-05, "epoch": 0.2412295646320071, "total_flos": 562812701258206848, "step": 18208 }, { "loss": 1.29901123046875, "learning_rate": 2.2984165651644334e-05, "epoch": 0.24165351817266087, "total_flos": 563785887000499008, "step": 18240 }, { "loss": 1.31170654296875, "learning_rate": 2.2971316138185807e-05, "epoch": 0.24207747171331467, "total_flos": 564756498602953728, "step": 18272 }, { "loss": 1.26641845703125, "learning_rate": 2.2958466624727284e-05, "epoch": 0.24250142525396845, "total_flos": 565762585070043168, "step": 18304 }, { "loss": 1.29119873046875, "learning_rate": 2.2945617111268758e-05, "epoch": 0.24292537879462225, "total_flos": 566761737447945504, "step": 18336 }, { "loss": 1.2784423828125, "learning_rate": 2.293276759781023e-05, "epoch": 0.24334933233527603, "total_flos": 567779922372270912, "step": 18368 }, { "loss": 1.307373046875, "learning_rate": 2.2919918084351705e-05, "epoch": 0.24377328587592983, "total_flos": 568728444637245600, "step": 18400 }, { "loss": 1.3076171875, "learning_rate": 2.2907068570893175e-05, "epoch": 0.2441972394165836, "total_flos": 569703721868155680, "step": 18432 }, { "loss": 1.27911376953125, "learning_rate": 2.2894219057434648e-05, "epoch": 0.24462119295723742, "total_flos": 570709373949147552, "step": 18464 }, { "loss": 1.30059814453125, "learning_rate": 2.2881369543976122e-05, "epoch": 0.2450451464978912, "total_flos": 571722281886806208, "step": 18496 }, { "loss": 1.27032470703125, "learning_rate": 2.2868520030517595e-05, "epoch": 0.245469100038545, "total_flos": 572694019675439808, "step": 18528 }, { "loss": 1.26513671875, "learning_rate": 2.285567051705907e-05, "epoch": 0.24589305357919877, "total_flos": 573675877051309344, "step": 18560 }, { "loss": 1.281005859375, "learning_rate": 2.284282100360054e-05, "epoch": 0.24631700711985258, "total_flos": 574667516158561152, "step": 18592 }, { "loss": 1.28875732421875, "learning_rate": 2.2829971490142012e-05, "epoch": 0.24674096066050638, "total_flos": 575673329123292864, "step": 18624 }, { "loss": 1.2750244140625, "learning_rate": 2.2817121976683486e-05, "epoch": 0.24716491420116016, "total_flos": 576666866658674784, "step": 18656 }, { "loss": 1.2835693359375, "learning_rate": 2.2804272463224963e-05, "epoch": 0.24758886774181396, "total_flos": 577663509250235616, "step": 18688 }, { "loss": 1.27203369140625, "learning_rate": 2.2791422949766436e-05, "epoch": 0.24801282128246774, "total_flos": 578629471312608960, "step": 18720 }, { "loss": 1.2886962890625, "learning_rate": 2.2778573436307906e-05, "epoch": 0.24843677482312154, "total_flos": 579644583357503424, "step": 18752 }, { "loss": 1.27392578125, "learning_rate": 2.276572392284938e-05, "epoch": 0.24886072836377532, "total_flos": 580662703928332896, "step": 18784 }, { "loss": 1.28656005859375, "learning_rate": 2.2752874409390853e-05, "epoch": 0.24928468190442912, "total_flos": 581603085476071680, "step": 18816 }, { "loss": 1.26849365234375, "learning_rate": 2.2740024895932327e-05, "epoch": 0.2497086354450829, "total_flos": 582561292942184736, "step": 18848 }, { "loss": 1.27020263671875, "learning_rate": 2.27271753824738e-05, "epoch": 0.2501325889857367, "total_flos": 583576179749843424, "step": 18880 }, { "loss": 1.26690673828125, "learning_rate": 2.2714325869015274e-05, "epoch": 0.2505565425263905, "total_flos": 584551714394737248, "step": 18912 }, { "loss": 1.3138427734375, "learning_rate": 2.2701476355556744e-05, "epoch": 0.25098049606704426, "total_flos": 585552282549550176, "step": 18944 }, { "loss": 1.33203125, "learning_rate": 2.2688626842098217e-05, "epoch": 0.2514044496076981, "total_flos": 586516426625663328, "step": 18976 }, { "loss": 1.29974365234375, "learning_rate": 2.267577732863969e-05, "epoch": 0.25182840314835186, "total_flos": 587525360734947936, "step": 19008 }, { "loss": 1.29583740234375, "learning_rate": 2.2662927815181168e-05, "epoch": 0.25225235668900564, "total_flos": 588528358234232448, "step": 19040 }, { "loss": 1.27783203125, "learning_rate": 2.265007830172264e-05, "epoch": 0.2526763102296594, "total_flos": 589488480216849600, "step": 19072 }, { "loss": 1.27490234375, "learning_rate": 2.263722878826411e-05, "epoch": 0.25310026377031325, "total_flos": 590524297999061472, "step": 19104 }, { "loss": 1.28741455078125, "learning_rate": 2.2624379274805585e-05, "epoch": 0.253524217310967, "total_flos": 591500878388264256, "step": 19136 }, { "loss": 1.27239990234375, "learning_rate": 2.261152976134706e-05, "epoch": 0.2539481708516208, "total_flos": 592515813461044896, "step": 19168 }, { "loss": 1.29547119140625, "learning_rate": 2.2598680247888532e-05, "epoch": 0.2543721243922746, "total_flos": 593486215914637824, "step": 19200 }, { "loss": 1.293212890625, "learning_rate": 2.2585830734430005e-05, "epoch": 0.2547960779329284, "total_flos": 594478595087092896, "step": 19232 }, { "loss": 1.25732421875, "learning_rate": 2.2572981220971475e-05, "epoch": 0.2552200314735822, "total_flos": 595512948827272224, "step": 19264 }, { "loss": 1.288818359375, "learning_rate": 2.256013170751295e-05, "epoch": 0.25564398501423596, "total_flos": 596496833338263744, "step": 19296 }, { "loss": 1.3033447265625, "learning_rate": 2.2547282194054422e-05, "epoch": 0.2560679385548898, "total_flos": 597470598262019328, "step": 19328 }, { "loss": 1.2994384765625, "learning_rate": 2.2534432680595896e-05, "epoch": 0.25649189209554357, "total_flos": 598447049944230240, "step": 19360 }, { "loss": 1.23681640625, "learning_rate": 2.252158316713737e-05, "epoch": 0.25691584563619735, "total_flos": 599453458178799360, "step": 19392 }, { "loss": 1.27545166015625, "learning_rate": 2.2508733653678846e-05, "epoch": 0.2573397991768511, "total_flos": 600499524255239040, "step": 19424 }, { "loss": 1.2779541015625, "learning_rate": 2.2495884140220316e-05, "epoch": 0.25776375271750496, "total_flos": 601472436495173472, "step": 19456 }, { "loss": 1.27691650390625, "learning_rate": 2.248303462676179e-05, "epoch": 0.25818770625815873, "total_flos": 602462788532506560, "step": 19488 }, { "loss": 1.296630859375, "learning_rate": 2.2470185113303263e-05, "epoch": 0.2586116597988125, "total_flos": 603444710261872032, "step": 19520 }, { "loss": 1.26397705078125, "learning_rate": 2.2457335599844737e-05, "epoch": 0.2590356133394663, "total_flos": 604406795026115232, "step": 19552 }, { "loss": 1.27105712890625, "learning_rate": 2.244448608638621e-05, "epoch": 0.2594595668801201, "total_flos": 605398241072879232, "step": 19584 }, { "loss": 1.304443359375, "learning_rate": 2.243163657292768e-05, "epoch": 0.2598835204207739, "total_flos": 606343561751431104, "step": 19616 }, { "loss": 1.31561279296875, "learning_rate": 2.2418787059469154e-05, "epoch": 0.26030747396142767, "total_flos": 607320994824455040, "step": 19648 }, { "loss": 1.27886962890625, "learning_rate": 2.2405937546010627e-05, "epoch": 0.26073142750208145, "total_flos": 608347754852113920, "step": 19680 }, { "loss": 1.31060791015625, "learning_rate": 2.23930880325521e-05, "epoch": 0.2611553810427353, "total_flos": 609325107483267936, "step": 19712 }, { "loss": 1.28106689453125, "learning_rate": 2.2380238519093574e-05, "epoch": 0.26157933458338906, "total_flos": 610295123815885248, "step": 19744 }, { "loss": 1.27911376953125, "learning_rate": 2.2367389005635044e-05, "epoch": 0.26200328812404283, "total_flos": 611292844328503008, "step": 19776 }, { "loss": 1.26446533203125, "learning_rate": 2.235453949217652e-05, "epoch": 0.2624272416646966, "total_flos": 612282456300632832, "step": 19808 }, { "loss": 1.27880859375, "learning_rate": 2.2341689978717995e-05, "epoch": 0.26285119520535044, "total_flos": 613274079319510656, "step": 19840 }, { "loss": 1.2960205078125, "learning_rate": 2.232884046525947e-05, "epoch": 0.2632751487460042, "total_flos": 614233895623022112, "step": 19872 }, { "loss": 1.28729248046875, "learning_rate": 2.2315990951800942e-05, "epoch": 0.263699102286658, "total_flos": 615194210666127072, "step": 19904 }, { "loss": 1.28118896484375, "learning_rate": 2.2303141438342415e-05, "epoch": 0.2641230558273118, "total_flos": 616186589838582144, "step": 19936 }, { "loss": 1.29193115234375, "learning_rate": 2.2290291924883885e-05, "epoch": 0.2645470093679656, "total_flos": 617160676529817408, "step": 19968 }, { "loss": 1.26654052734375, "learning_rate": 2.227744241142536e-05, "epoch": 0.2649709629086194, "total_flos": 618129872355361536, "step": 20000 }, { "loss": 1.27825927734375, "learning_rate": 2.2264592897966832e-05, "epoch": 0.26539491644927315, "total_flos": 619126177091068704, "step": 20032 }, { "loss": 1.2628173828125, "learning_rate": 2.2251743384508306e-05, "epoch": 0.265818869989927, "total_flos": 620148158871654336, "step": 20064 }, { "loss": 1.25830078125, "learning_rate": 2.223889387104978e-05, "epoch": 0.26624282353058076, "total_flos": 621189028403297184, "step": 20096 }, { "loss": 1.2913818359375, "learning_rate": 2.222604435759125e-05, "epoch": 0.26666677707123454, "total_flos": 622168488611443104, "step": 20128 }, { "loss": 1.28985595703125, "learning_rate": 2.2213194844132723e-05, "epoch": 0.2670907306118883, "total_flos": 623149010652271968, "step": 20160 }, { "loss": 1.3016357421875, "learning_rate": 2.22003453306742e-05, "epoch": 0.26751468415254215, "total_flos": 624152587333019904, "step": 20192 }, { "loss": 1.28729248046875, "learning_rate": 2.2187495817215673e-05, "epoch": 0.2679386376931959, "total_flos": 625140284788645632, "step": 20224 }, { "loss": 1.2872314453125, "learning_rate": 2.2174646303757147e-05, "epoch": 0.2683625912338497, "total_flos": 626104782808986432, "step": 20256 }, { "loss": 1.28485107421875, "learning_rate": 2.2161796790298617e-05, "epoch": 0.2687865447745035, "total_flos": 627111818490140928, "step": 20288 }, { "loss": 1.28790283203125, "learning_rate": 2.214894727684009e-05, "epoch": 0.2692104983151573, "total_flos": 628082623153083456, "step": 20320 }, { "loss": 1.2774658203125, "learning_rate": 2.2136097763381564e-05, "epoch": 0.2696344518558111, "total_flos": 629051111090172288, "step": 20352 }, { "loss": 1.290771484375, "learning_rate": 2.2123248249923037e-05, "epoch": 0.27005840539646486, "total_flos": 630032308842708480, "step": 20384 }, { "loss": 1.25970458984375, "learning_rate": 2.211039873646451e-05, "epoch": 0.27048235893711864, "total_flos": 631045715519960640, "step": 20416 }, { "loss": 1.25927734375, "learning_rate": 2.2097549223005984e-05, "epoch": 0.27090631247777247, "total_flos": 632067697300546272, "step": 20448 }, { "loss": 1.271484375, "learning_rate": 2.2084699709547455e-05, "epoch": 0.27133026601842625, "total_flos": 633042234466253088, "step": 20480 }, { "eval_loss": 1.134141172003746, "epoch": 0.27133026601842625, "total_flos": 633042234466253088, "step": 20480 }, { "loss": 1.27313232421875, "learning_rate": 2.2071850196088928e-05, "epoch": 0.27175421955908, "total_flos": 634021598144155104, "step": 20512 }, { "loss": 1.3248291015625, "learning_rate": 2.2059000682630405e-05, "epoch": 0.27217817309973386, "total_flos": 634956541821487296, "step": 20544 }, { "loss": 1.2603759765625, "learning_rate": 2.204615116917188e-05, "epoch": 0.27260212664038763, "total_flos": 635934087513129120, "step": 20576 }, { "loss": 1.25567626953125, "learning_rate": 2.2033301655713352e-05, "epoch": 0.2730260801810414, "total_flos": 636935379644771328, "step": 20608 }, { "loss": 1.2843017578125, "learning_rate": 2.2020452142254822e-05, "epoch": 0.2734500337216952, "total_flos": 637874795890071072, "step": 20640 }, { "loss": 1.259521484375, "learning_rate": 2.2007602628796295e-05, "epoch": 0.273873987262349, "total_flos": 638885660604233760, "step": 20672 }, { "loss": 1.266357421875, "learning_rate": 2.199475311533777e-05, "epoch": 0.2742979408030028, "total_flos": 639873390236607456, "step": 20704 }, { "loss": 1.24664306640625, "learning_rate": 2.1981903601879242e-05, "epoch": 0.27472189434365657, "total_flos": 640872188670282144, "step": 20736 }, { "loss": 1.2713623046875, "learning_rate": 2.1969054088420716e-05, "epoch": 0.27514584788431035, "total_flos": 641900203591111776, "step": 20768 }, { "loss": 1.27197265625, "learning_rate": 2.1956204574962186e-05, "epoch": 0.2755698014249642, "total_flos": 642905549992997952, "step": 20800 }, { "loss": 1.3167724609375, "learning_rate": 2.194335506150366e-05, "epoch": 0.27599375496561795, "total_flos": 643869710157485088, "step": 20832 }, { "loss": 1.280517578125, "learning_rate": 2.1930505548045133e-05, "epoch": 0.27641770850627173, "total_flos": 644866931930509344, "step": 20864 }, { "loss": 1.277587890625, "learning_rate": 2.1917656034586607e-05, "epoch": 0.2768416620469255, "total_flos": 645904760759469216, "step": 20896 }, { "loss": 1.30633544921875, "learning_rate": 2.1904806521128083e-05, "epoch": 0.27726561558757934, "total_flos": 646892506480216896, "step": 20928 }, { "loss": 1.2850341796875, "learning_rate": 2.1891957007669557e-05, "epoch": 0.2776895691282331, "total_flos": 647909484776493504, "step": 20960 }, { "loss": 1.246337890625, "learning_rate": 2.1879107494211027e-05, "epoch": 0.2781135226688869, "total_flos": 648901735241956704, "step": 20992 }, { "loss": 1.26080322265625, "learning_rate": 2.18662579807525e-05, "epoch": 0.27853747620954067, "total_flos": 649904153559777792, "step": 21024 }, { "loss": 1.3001708984375, "learning_rate": 2.1853408467293974e-05, "epoch": 0.2789614297501945, "total_flos": 650853560685321600, "step": 21056 }, { "loss": 1.2822265625, "learning_rate": 2.1840558953835447e-05, "epoch": 0.2793853832908483, "total_flos": 651831331614199200, "step": 21088 }, { "loss": 1.2686767578125, "learning_rate": 2.182770944037692e-05, "epoch": 0.27980933683150205, "total_flos": 652897122037143264, "step": 21120 }, { "loss": 1.28118896484375, "learning_rate": 2.181485992691839e-05, "epoch": 0.28023329037215583, "total_flos": 653907922397810016, "step": 21152 }, { "loss": 1.24957275390625, "learning_rate": 2.1802010413459865e-05, "epoch": 0.28065724391280966, "total_flos": 654905675087175744, "step": 21184 }, { "loss": 1.27056884765625, "learning_rate": 2.1789160900001338e-05, "epoch": 0.28108119745346344, "total_flos": 655883269043939520, "step": 21216 }, { "loss": 1.25592041015625, "learning_rate": 2.177631138654281e-05, "epoch": 0.2815051509941172, "total_flos": 656840125086637920, "step": 21248 }, { "loss": 1.27642822265625, "learning_rate": 2.1763461873084285e-05, "epoch": 0.28192910453477105, "total_flos": 657846211553727360, "step": 21280 }, { "loss": 1.27703857421875, "learning_rate": 2.175061235962576e-05, "epoch": 0.2823530580754248, "total_flos": 658823435477889504, "step": 21312 }, { "loss": 1.2554931640625, "learning_rate": 2.1737762846167232e-05, "epoch": 0.2827770116160786, "total_flos": 659814704552539680, "step": 21344 }, { "loss": 1.29345703125, "learning_rate": 2.1724913332708705e-05, "epoch": 0.2832009651567324, "total_flos": 660788903862392832, "step": 21376 }, { "loss": 1.26690673828125, "learning_rate": 2.171206381925018e-05, "epoch": 0.2836249186973862, "total_flos": 661746580412164416, "step": 21408 }, { "loss": 1.2899169921875, "learning_rate": 2.1699214305791652e-05, "epoch": 0.28404887223804, "total_flos": 662700524459171712, "step": 21440 }, { "loss": 1.25213623046875, "learning_rate": 2.1686364792333126e-05, "epoch": 0.28447282577869376, "total_flos": 663743726805042240, "step": 21472 }, { "loss": 1.2615966796875, "learning_rate": 2.1673515278874596e-05, "epoch": 0.28489677931934754, "total_flos": 664719776277903552, "step": 21504 }, { "loss": 1.287841796875, "learning_rate": 2.166066576541607e-05, "epoch": 0.28532073286000137, "total_flos": 665660704830357792, "step": 21536 }, { "loss": 1.277099609375, "learning_rate": 2.1647816251957543e-05, "epoch": 0.28574468640065515, "total_flos": 666683957592488160, "step": 21568 }, { "loss": 1.28997802734375, "learning_rate": 2.1634966738499017e-05, "epoch": 0.2861686399413089, "total_flos": 667681034570146560, "step": 21600 }, { "loss": 1.26068115234375, "learning_rate": 2.162211722504049e-05, "epoch": 0.2865925934819627, "total_flos": 668658950294390016, "step": 21632 }, { "loss": 1.27569580078125, "learning_rate": 2.160926771158196e-05, "epoch": 0.28701654702261653, "total_flos": 669657636109446816, "step": 21664 }, { "loss": 1.26239013671875, "learning_rate": 2.1596418198123437e-05, "epoch": 0.2874405005632703, "total_flos": 670656321924503616, "step": 21696 }, { "loss": 1.262451171875, "learning_rate": 2.158356868466491e-05, "epoch": 0.2878644541039241, "total_flos": 671658418474845024, "step": 21728 }, { "loss": 1.32574462890625, "learning_rate": 2.1570719171206384e-05, "epoch": 0.28828840764457786, "total_flos": 672656364224698560, "step": 21760 }, { "loss": 1.27935791015625, "learning_rate": 2.1557869657747857e-05, "epoch": 0.2887123611852317, "total_flos": 673621682752112544, "step": 21792 }, { "loss": 1.2518310546875, "learning_rate": 2.1545020144289328e-05, "epoch": 0.28913631472588547, "total_flos": 674622653116275072, "step": 21824 }, { "loss": 1.271728515625, "learning_rate": 2.15321706308308e-05, "epoch": 0.28956026826653924, "total_flos": 675589789629949248, "step": 21856 }, { "loss": 1.24676513671875, "learning_rate": 2.1519321117372275e-05, "epoch": 0.2899842218071931, "total_flos": 676646393591348448, "step": 21888 }, { "loss": 1.26788330078125, "learning_rate": 2.1506471603913748e-05, "epoch": 0.29040817534784685, "total_flos": 677618774914941408, "step": 21920 }, { "loss": 1.28326416015625, "learning_rate": 2.149362209045522e-05, "epoch": 0.29083212888850063, "total_flos": 678605716216989888, "step": 21952 }, { "loss": 1.27398681640625, "learning_rate": 2.1480772576996695e-05, "epoch": 0.2912560824291544, "total_flos": 679560689919932160, "step": 21984 }, { "loss": 1.263427734375, "learning_rate": 2.1467923063538165e-05, "epoch": 0.29168003596980824, "total_flos": 680567548628972832, "step": 22016 }, { "loss": 1.24554443359375, "learning_rate": 2.145507355007964e-05, "epoch": 0.292103989510462, "total_flos": 681555600028826208, "step": 22048 }, { "loss": 1.26971435546875, "learning_rate": 2.1442224036621116e-05, "epoch": 0.2925279430511158, "total_flos": 682529268422337888, "step": 22080 }, { "loss": 1.26116943359375, "learning_rate": 2.142937452316259e-05, "epoch": 0.29295189659176957, "total_flos": 683504674360239840, "step": 22112 }, { "loss": 1.279541015625, "learning_rate": 2.1416525009704063e-05, "epoch": 0.2933758501324234, "total_flos": 684496361732613600, "step": 22144 }, { "loss": 1.28521728515625, "learning_rate": 2.1403675496245533e-05, "epoch": 0.2937998036730772, "total_flos": 685510975037914560, "step": 22176 }, { "loss": 1.27117919921875, "learning_rate": 2.1390825982787006e-05, "epoch": 0.29422375721373095, "total_flos": 686474813434922016, "step": 22208 }, { "loss": 1.2662353515625, "learning_rate": 2.137797646932848e-05, "epoch": 0.29464771075438473, "total_flos": 687439263190140864, "step": 22240 }, { "loss": 1.29541015625, "learning_rate": 2.1365126955869953e-05, "epoch": 0.29507166429503856, "total_flos": 688422777668530752, "step": 22272 }, { "loss": 1.2677001953125, "learning_rate": 2.1352277442411427e-05, "epoch": 0.29549561783569234, "total_flos": 689425389046839648, "step": 22304 }, { "loss": 1.30572509765625, "learning_rate": 2.1339427928952897e-05, "epoch": 0.2959195713763461, "total_flos": 690391399374334944, "step": 22336 }, { "loss": 1.2750244140625, "learning_rate": 2.132657841549437e-05, "epoch": 0.2963435249169999, "total_flos": 691333985029309536, "step": 22368 }, { "loss": 1.23980712890625, "learning_rate": 2.1313728902035844e-05, "epoch": 0.2967674784576537, "total_flos": 692319832321927104, "step": 22400 }, { "loss": 1.2725830078125, "learning_rate": 2.130087938857732e-05, "epoch": 0.2971914319983075, "total_flos": 693270478252267680, "step": 22432 }, { "loss": 1.29150390625, "learning_rate": 2.1288029875118794e-05, "epoch": 0.2976153855389613, "total_flos": 694250067167405472, "step": 22464 }, { "loss": 1.25762939453125, "learning_rate": 2.1275180361660264e-05, "epoch": 0.2980393390796151, "total_flos": 695228481631242432, "step": 22496 }, { "loss": 1.25909423828125, "learning_rate": 2.1262330848201738e-05, "epoch": 0.2984632926202689, "total_flos": 696217514421908832, "step": 22528 }, { "loss": 1.22955322265625, "learning_rate": 2.124948133474321e-05, "epoch": 0.29888724616092266, "total_flos": 697225595847372288, "step": 22560 }, { "loss": 1.264404296875, "learning_rate": 2.1236631821284685e-05, "epoch": 0.29931119970157644, "total_flos": 698201822292347424, "step": 22592 }, { "loss": 1.26226806640625, "learning_rate": 2.1223782307826158e-05, "epoch": 0.29973515324223027, "total_flos": 699175619392850976, "step": 22624 }, { "loss": 1.271728515625, "learning_rate": 2.121093279436763e-05, "epoch": 0.30015910678288404, "total_flos": 700134566924167296, "step": 22656 }, { "loss": 1.28521728515625, "learning_rate": 2.1198083280909102e-05, "epoch": 0.3005830603235378, "total_flos": 701102105647191072, "step": 22688 }, { "loss": 1.2945556640625, "learning_rate": 2.1185233767450575e-05, "epoch": 0.3010070138641916, "total_flos": 702090301842410304, "step": 22720 }, { "loss": 1.26324462890625, "learning_rate": 2.117238425399205e-05, "epoch": 0.30143096740484543, "total_flos": 703079640312182400, "step": 22752 }, { "loss": 1.24737548828125, "learning_rate": 2.1159534740533522e-05, "epoch": 0.3018549209454992, "total_flos": 704057266445694144, "step": 22784 }, { "loss": 1.255615234375, "learning_rate": 2.1146685227075e-05, "epoch": 0.302278874486153, "total_flos": 705013446776685216, "step": 22816 }, { "loss": 1.26055908203125, "learning_rate": 2.113383571361647e-05, "epoch": 0.30270282802680676, "total_flos": 705978733127351232, "step": 22848 }, { "loss": 1.26666259765625, "learning_rate": 2.1120986200157943e-05, "epoch": 0.3031267815674606, "total_flos": 707001985889481600, "step": 22880 }, { "loss": 1.25506591796875, "learning_rate": 2.1108136686699416e-05, "epoch": 0.30355073510811437, "total_flos": 708019784692831392, "step": 22912 }, { "loss": 1.27850341796875, "learning_rate": 2.109528717324089e-05, "epoch": 0.30397468864876814, "total_flos": 708990251499920256, "step": 22944 }, { "loss": 1.283447265625, "learning_rate": 2.1082437659782363e-05, "epoch": 0.3043986421894219, "total_flos": 710012265457253856, "step": 22976 }, { "loss": 1.2620849609375, "learning_rate": 2.1069588146323833e-05, "epoch": 0.30482259573007575, "total_flos": 710996198233367328, "step": 23008 }, { "loss": 1.2623291015625, "learning_rate": 2.1056738632865307e-05, "epoch": 0.30524654927072953, "total_flos": 711959956188504864, "step": 23040 }, { "loss": 1.24066162109375, "learning_rate": 2.104388911940678e-05, "epoch": 0.3056705028113833, "total_flos": 712953799402992480, "step": 23072 }, { "loss": 1.24749755859375, "learning_rate": 2.1031039605948254e-05, "epoch": 0.30609445635203714, "total_flos": 713961736033090080, "step": 23104 }, { "loss": 1.2825927734375, "learning_rate": 2.1018190092489727e-05, "epoch": 0.3065184098926909, "total_flos": 714905367432373632, "step": 23136 }, { "loss": 1.28759765625, "learning_rate": 2.10053405790312e-05, "epoch": 0.3069423634333447, "total_flos": 715897939665316512, "step": 23168 }, { "loss": 1.298095703125, "learning_rate": 2.0992491065572674e-05, "epoch": 0.30736631697399847, "total_flos": 716863644313706112, "step": 23200 }, { "loss": 1.25714111328125, "learning_rate": 2.0979641552114148e-05, "epoch": 0.3077902705146523, "total_flos": 717864083761527168, "step": 23232 }, { "loss": 1.24261474609375, "learning_rate": 2.096679203865562e-05, "epoch": 0.3082142240553061, "total_flos": 718871586005527200, "step": 23264 }, { "loss": 1.2926025390625, "learning_rate": 2.0953942525197095e-05, "epoch": 0.30863817759595985, "total_flos": 719836116202615968, "step": 23296 }, { "loss": 1.27813720703125, "learning_rate": 2.0941093011738568e-05, "epoch": 0.3090621311366136, "total_flos": 720781871267265408, "step": 23328 }, { "loss": 1.28997802734375, "learning_rate": 2.0928243498280038e-05, "epoch": 0.30948608467726746, "total_flos": 721792864688419968, "step": 23360 }, { "loss": 1.26983642578125, "learning_rate": 2.0915393984821512e-05, "epoch": 0.30991003821792124, "total_flos": 722794623382907712, "step": 23392 }, { "loss": 1.29302978515625, "learning_rate": 2.0902544471362985e-05, "epoch": 0.310333991758575, "total_flos": 723793824025932000, "step": 23424 }, { "loss": 1.27264404296875, "learning_rate": 2.088969495790446e-05, "epoch": 0.3107579452992288, "total_flos": 724775359634321856, "step": 23456 }, { "loss": 1.2916259765625, "learning_rate": 2.0876845444445932e-05, "epoch": 0.3111818988398826, "total_flos": 725763877597020768, "step": 23488 }, { "loss": 1.26123046875, "learning_rate": 2.0863995930987402e-05, "epoch": 0.3116058523805364, "total_flos": 726758798732565312, "step": 23520 }, { "loss": 1.25982666015625, "learning_rate": 2.0851146417528876e-05, "epoch": 0.3120298059211902, "total_flos": 727763469422744160, "step": 23552 }, { "loss": 1.2640380859375, "learning_rate": 2.0838296904070353e-05, "epoch": 0.31245375946184395, "total_flos": 728785612087069632, "step": 23584 }, { "loss": 1.297607421875, "learning_rate": 2.0825447390611826e-05, "epoch": 0.3128777130024978, "total_flos": 729770477988874176, "step": 23616 }, { "loss": 1.26104736328125, "learning_rate": 2.08125978771533e-05, "epoch": 0.31330166654315156, "total_flos": 730782131033362080, "step": 23648 }, { "loss": 1.2725830078125, "learning_rate": 2.0799748363694773e-05, "epoch": 0.31372562008380533, "total_flos": 731780205490207488, "step": 23680 }, { "loss": 1.30377197265625, "learning_rate": 2.0786898850236243e-05, "epoch": 0.3141495736244591, "total_flos": 732747181120141824, "step": 23712 }, { "loss": 1.2537841796875, "learning_rate": 2.0774049336777717e-05, "epoch": 0.31457352716511294, "total_flos": 733782419720890272, "step": 23744 }, { "loss": 1.2733154296875, "learning_rate": 2.076119982331919e-05, "epoch": 0.3149974807057667, "total_flos": 734762861319849216, "step": 23776 }, { "loss": 1.281005859375, "learning_rate": 2.0748350309860664e-05, "epoch": 0.3154214342464205, "total_flos": 735709372538075904, "step": 23808 }, { "loss": 1.2520751953125, "learning_rate": 2.0735500796402137e-05, "epoch": 0.3158453877870743, "total_flos": 736698743184595968, "step": 23840 }, { "loss": 1.2628173828125, "learning_rate": 2.0722651282943607e-05, "epoch": 0.3162693413277281, "total_flos": 737690173142985984, "step": 23872 }, { "loss": 1.2674560546875, "learning_rate": 2.070980176948508e-05, "epoch": 0.3166932948683819, "total_flos": 738708647658043104, "step": 23904 }, { "loss": 1.2742919921875, "learning_rate": 2.0696952256026558e-05, "epoch": 0.31711724840903566, "total_flos": 739746717812612736, "step": 23936 }, { "loss": 1.26507568359375, "learning_rate": 2.068410274256803e-05, "epoch": 0.3175412019496895, "total_flos": 740731969835392896, "step": 23968 }, { "loss": 1.253173828125, "learning_rate": 2.0671253229109505e-05, "epoch": 0.31796515549034327, "total_flos": 741721099156303200, "step": 24000 }, { "loss": 1.2742919921875, "learning_rate": 2.0658403715650975e-05, "epoch": 0.31838910903099704, "total_flos": 742710984630790752, "step": 24032 }, { "loss": 1.2437744140625, "learning_rate": 2.0645554202192448e-05, "epoch": 0.3188130625716508, "total_flos": 743680647019180416, "step": 24064 }, { "loss": 1.24658203125, "learning_rate": 2.0632704688733922e-05, "epoch": 0.31923701611230465, "total_flos": 744660091138952352, "step": 24096 }, { "loss": 1.26507568359375, "learning_rate": 2.0619855175275395e-05, "epoch": 0.3196609696529584, "total_flos": 745653596497586304, "step": 24128 }, { "loss": 1.27264404296875, "learning_rate": 2.060700566181687e-05, "epoch": 0.3200849231936122, "total_flos": 746634681631504608, "step": 24160 }, { "loss": 1.265380859375, "learning_rate": 2.0594156148358342e-05, "epoch": 0.320508876734266, "total_flos": 747624567105992160, "step": 24192 }, { "loss": 1.26068115234375, "learning_rate": 2.0581306634899812e-05, "epoch": 0.3209328302749198, "total_flos": 748587922851780096, "step": 24224 }, { "loss": 1.24041748046875, "learning_rate": 2.0568457121441286e-05, "epoch": 0.3213567838155736, "total_flos": 749576505167974944, "step": 24256 }, { "loss": 1.254638671875, "learning_rate": 2.055560760798276e-05, "epoch": 0.32178073735622736, "total_flos": 750598229534576832, "step": 24288 }, { "loss": 1.2574462890625, "learning_rate": 2.0542758094524236e-05, "epoch": 0.32220469089688114, "total_flos": 751595049098251488, "step": 24320 }, { "loss": 1.24176025390625, "learning_rate": 2.052990858106571e-05, "epoch": 0.322628644437535, "total_flos": 752580799860625152, "step": 24352 }, { "loss": 1.2559814453125, "learning_rate": 2.051705906760718e-05, "epoch": 0.32305259797818875, "total_flos": 753605597106657984, "step": 24384 }, { "loss": 1.2564697265625, "learning_rate": 2.0504209554148653e-05, "epoch": 0.3234765515188425, "total_flos": 754576208709112704, "step": 24416 }, { "loss": 1.2882080078125, "learning_rate": 2.0491360040690127e-05, "epoch": 0.32390050505949636, "total_flos": 755521513299290592, "step": 24448 }, { "loss": 1.238037109375, "learning_rate": 2.04785105272316e-05, "epoch": 0.32432445860015013, "total_flos": 756518992486298592, "step": 24480 }, { "loss": 1.2742919921875, "learning_rate": 2.0465661013773074e-05, "epoch": 0.3247484121408039, "total_flos": 757483973157858912, "step": 24512 }, { "loss": 1.25726318359375, "learning_rate": 2.0452811500314544e-05, "epoch": 0.3251723656814577, "total_flos": 758493679509094752, "step": 24544 }, { "loss": 1.2562255859375, "learning_rate": 2.0439961986856017e-05, "epoch": 0.3255963192221115, "total_flos": 759486428714151456, "step": 24576 }, { "loss": 1.2603759765625, "learning_rate": 2.042711247339749e-05, "epoch": 0.3260202727627653, "total_flos": 760490182367013216, "step": 24608 }, { "loss": 1.2723388671875, "learning_rate": 2.0414262959938964e-05, "epoch": 0.32644422630341907, "total_flos": 761523007711664064, "step": 24640 }, { "loss": 1.244384765625, "learning_rate": 2.0401413446480438e-05, "epoch": 0.32686817984407285, "total_flos": 762505122501517344, "step": 24672 }, { "loss": 1.26953125, "learning_rate": 2.0388563933021915e-05, "epoch": 0.3272921333847267, "total_flos": 763502472981533472, "step": 24704 }, { "loss": 1.248046875, "learning_rate": 2.0375714419563385e-05, "epoch": 0.32771608692538046, "total_flos": 764497458470573952, "step": 24736 }, { "loss": 1.2730712890625, "learning_rate": 2.0362864906104858e-05, "epoch": 0.32814004046603423, "total_flos": 765457210420589472, "step": 24768 }, { "loss": 1.2459716796875, "learning_rate": 2.0350015392646332e-05, "epoch": 0.328563994006688, "total_flos": 766461124957191072, "step": 24800 }, { "loss": 1.2569580078125, "learning_rate": 2.0337165879187805e-05, "epoch": 0.32898794754734184, "total_flos": 767440488635093088, "step": 24832 }, { "loss": 1.2684326171875, "learning_rate": 2.032431636572928e-05, "epoch": 0.3294119010879956, "total_flos": 768409057014051840, "step": 24864 }, { "loss": 1.2652587890625, "learning_rate": 2.031146685227075e-05, "epoch": 0.3298358546286494, "total_flos": 769400310000328032, "step": 24896 }, { "loss": 1.2847900390625, "learning_rate": 2.0298617338812222e-05, "epoch": 0.33025980816930317, "total_flos": 770373318770506368, "step": 24928 }, { "loss": 1.234130859375, "learning_rate": 2.0285767825353696e-05, "epoch": 0.330683761709957, "total_flos": 771402990793856352, "step": 24960 }, { "loss": 1.2767333984375, "learning_rate": 2.027291831189517e-05, "epoch": 0.3311077152506108, "total_flos": 772353813696310752, "step": 24992 }, { "loss": 1.2696533203125, "learning_rate": 2.0260068798436643e-05, "epoch": 0.33153166879126456, "total_flos": 773304073505675712, "step": 25024 }, { "loss": 1.271240234375, "learning_rate": 2.0247219284978113e-05, "epoch": 0.3319556223319184, "total_flos": 774323095025448288, "step": 25056 }, { "loss": 1.2613525390625, "learning_rate": 2.023436977151959e-05, "epoch": 0.33237957587257216, "total_flos": 775365798631725312, "step": 25088 }, { "loss": 1.251220703125, "learning_rate": 2.0221520258061063e-05, "epoch": 0.33280352941322594, "total_flos": 776330473624179936, "step": 25120 }, { "loss": 1.2852783203125, "learning_rate": 2.0208670744602537e-05, "epoch": 0.3332274829538797, "total_flos": 777336881858749056, "step": 25152 }, { "loss": 1.2764892578125, "learning_rate": 2.019582123114401e-05, "epoch": 0.33365143649453355, "total_flos": 778316132918033184, "step": 25184 }, { "loss": 1.2685546875, "learning_rate": 2.0182971717685484e-05, "epoch": 0.3340753900351873, "total_flos": 779266891466991648, "step": 25216 }, { "loss": 1.288818359375, "learning_rate": 2.0170122204226954e-05, "epoch": 0.3344993435758411, "total_flos": 780245756405300160, "step": 25248 }, { "loss": 1.2705078125, "learning_rate": 2.0157272690768427e-05, "epoch": 0.3349232971164949, "total_flos": 781242125494503264, "step": 25280 }, { "loss": 1.2706298828125, "learning_rate": 2.01444231773099e-05, "epoch": 0.3353472506571487, "total_flos": 782221119139803648, "step": 25312 }, { "loss": 1.27294921875, "learning_rate": 2.0131573663851374e-05, "epoch": 0.3357712041978025, "total_flos": 783176655935835360, "step": 25344 }, { "loss": 1.25537109375, "learning_rate": 2.0118724150392848e-05, "epoch": 0.33619515773845626, "total_flos": 784137904104631392, "step": 25376 }, { "loss": 1.2452392578125, "learning_rate": 2.0105874636934318e-05, "epoch": 0.33661911127911004, "total_flos": 785103994873996608, "step": 25408 }, { "loss": 1.25390625, "learning_rate": 2.009302512347579e-05, "epoch": 0.33704306481976387, "total_flos": 786133908222956352, "step": 25440 }, { "loss": 1.2581787109375, "learning_rate": 2.008017561001727e-05, "epoch": 0.33746701836041765, "total_flos": 787098309713053248, "step": 25472 }, { "loss": 1.2779541015625, "learning_rate": 2.0067326096558742e-05, "epoch": 0.3378909719010714, "total_flos": 788084366154532608, "step": 25504 }, { "loss": 1.29150390625, "learning_rate": 2.0054476583100215e-05, "epoch": 0.3383149254417252, "total_flos": 789034786847637408, "step": 25536 }, { "loss": 1.2313232421875, "learning_rate": 2.0041627069641685e-05, "epoch": 0.33873887898237903, "total_flos": 790029933220417728, "step": 25568 }, { "loss": 1.2666015625, "learning_rate": 2.002877755618316e-05, "epoch": 0.3391628325230328, "total_flos": 791054360433848928, "step": 25600 }, { "loss": 1.2698974609375, "learning_rate": 2.0015928042724632e-05, "epoch": 0.3395867860636866, "total_flos": 792028350594840288, "step": 25632 }, { "loss": 1.2786865234375, "learning_rate": 2.0003078529266106e-05, "epoch": 0.3400107396043404, "total_flos": 793014294417701760, "step": 25664 }, { "loss": 1.2650146484375, "learning_rate": 1.999022901580758e-05, "epoch": 0.3404346931449942, "total_flos": 793982782354790592, "step": 25696 }, { "loss": 1.2861328125, "learning_rate": 1.9977379502349053e-05, "epoch": 0.34085864668564797, "total_flos": 794931433326757152, "step": 25728 }, { "loss": 1.2725830078125, "learning_rate": 1.9964529988890523e-05, "epoch": 0.34128260022630175, "total_flos": 795925212187748832, "step": 25760 }, { "loss": 1.2381591796875, "learning_rate": 1.9951680475431996e-05, "epoch": 0.3417065537669556, "total_flos": 796936784790366816, "step": 25792 }, { "loss": 1.26220703125, "learning_rate": 1.9938830961973473e-05, "epoch": 0.34213050730760936, "total_flos": 797943321731927808, "step": 25824 }, { "loss": 1.2496337890625, "learning_rate": 1.9925981448514947e-05, "epoch": 0.34255446084826313, "total_flos": 798947429329017216, "step": 25856 }, { "loss": 1.2230224609375, "learning_rate": 1.991313193505642e-05, "epoch": 0.3429784143889169, "total_flos": 799944956781147168, "step": 25888 }, { "loss": 1.258056640625, "learning_rate": 1.990028242159789e-05, "epoch": 0.34340236792957074, "total_flos": 800911304964496128, "step": 25920 }, { "loss": 1.27197265625, "learning_rate": 1.9887432908139364e-05, "epoch": 0.3438263214702245, "total_flos": 801931870968171168, "step": 25952 }, { "loss": 1.259765625, "learning_rate": 1.9874583394680837e-05, "epoch": 0.3442502750108783, "total_flos": 802909320129569088, "step": 25984 }, { "loss": 1.2518310546875, "learning_rate": 1.986173388122231e-05, "epoch": 0.34467422855153207, "total_flos": 803913717317390208, "step": 26016 }, { "loss": 1.2518310546875, "learning_rate": 1.9848884367763784e-05, "epoch": 0.3450981820921859, "total_flos": 804918227123829216, "step": 26048 }, { "loss": 1.2637939453125, "learning_rate": 1.9836034854305254e-05, "epoch": 0.3455221356328397, "total_flos": 805896641587666176, "step": 26080 }, { "loss": 1.2664794921875, "learning_rate": 1.9823185340846728e-05, "epoch": 0.34594608917349345, "total_flos": 806896373147031936, "step": 26112 }, { "loss": 1.2291259765625, "learning_rate": 1.98103358273882e-05, "epoch": 0.34637004271414723, "total_flos": 807883555774690176, "step": 26144 }, { "loss": 1.2802734375, "learning_rate": 1.9797486313929675e-05, "epoch": 0.34679399625480106, "total_flos": 808881533701291680, "step": 26176 }, { "loss": 1.248291015625, "learning_rate": 1.9784636800471152e-05, "epoch": 0.34721794979545484, "total_flos": 809889164652283584, "step": 26208 }, { "loss": 1.2681884765625, "learning_rate": 1.9771787287012625e-05, "epoch": 0.3476419033361086, "total_flos": 810884053611080160, "step": 26240 }, { "loss": 1.2620849609375, "learning_rate": 1.9758937773554095e-05, "epoch": 0.3480658568767624, "total_flos": 811832511522558912, "step": 26272 }, { "loss": 1.2769775390625, "learning_rate": 1.974608826009557e-05, "epoch": 0.3484898104174162, "total_flos": 812813001386639808, "step": 26304 }, { "loss": 1.2630615234375, "learning_rate": 1.9733238746637042e-05, "epoch": 0.34891376395807, "total_flos": 813820519719013824, "step": 26336 }, { "loss": 1.261962890625, "learning_rate": 1.9720389233178516e-05, "epoch": 0.3493377174987238, "total_flos": 814801090024964640, "step": 26368 }, { "loss": 1.2874755859375, "learning_rate": 1.970753971971999e-05, "epoch": 0.3497616710393776, "total_flos": 815777075144330016, "step": 26400 }, { "loss": 1.2454833984375, "learning_rate": 1.969469020626146e-05, "epoch": 0.3501856245800314, "total_flos": 816742667174101728, "step": 26432 }, { "loss": 1.2493896484375, "learning_rate": 1.9681840692802933e-05, "epoch": 0.35060957812068516, "total_flos": 817734354546475488, "step": 26464 }, { "loss": 1.23486328125, "learning_rate": 1.9668991179344406e-05, "epoch": 0.35103353166133894, "total_flos": 818721440643889824, "step": 26496 }, { "loss": 1.2542724609375, "learning_rate": 1.965614166588588e-05, "epoch": 0.35145748520199277, "total_flos": 819701930507970720, "step": 26528 }, { "loss": 1.2686767578125, "learning_rate": 1.9643292152427353e-05, "epoch": 0.35188143874264655, "total_flos": 820678156952945856, "step": 26560 }, { "loss": 1.2322998046875, "learning_rate": 1.9630442638968827e-05, "epoch": 0.3523053922833003, "total_flos": 821680398298653120, "step": 26592 }, { "loss": 1.2623291015625, "learning_rate": 1.96175931255103e-05, "epoch": 0.3527293458239541, "total_flos": 822678714081108288, "step": 26624 }, { "loss": 1.254150390625, "learning_rate": 1.9604743612051774e-05, "epoch": 0.35315329936460793, "total_flos": 823674584430717888, "step": 26656 }, { "loss": 1.2724609375, "learning_rate": 1.9591894098593247e-05, "epoch": 0.3535772529052617, "total_flos": 824660673048945216, "step": 26688 }, { "loss": 1.2606201171875, "learning_rate": 1.957904458513472e-05, "epoch": 0.3540012064459155, "total_flos": 825620601971074560, "step": 26720 }, { "loss": 1.266845703125, "learning_rate": 1.9566195071676194e-05, "epoch": 0.35442515998656926, "total_flos": 826608074189464512, "step": 26752 }, { "loss": 1.2474365234375, "learning_rate": 1.9553345558217665e-05, "epoch": 0.3548491135272231, "total_flos": 827614466335659648, "step": 26784 }, { "loss": 1.2523193359375, "learning_rate": 1.9540496044759138e-05, "epoch": 0.35527306706787687, "total_flos": 828613860039171744, "step": 26816 }, { "loss": 1.262939453125, "learning_rate": 1.952764653130061e-05, "epoch": 0.35569702060853065, "total_flos": 829621217487805920, "step": 26848 }, { "loss": 1.2540283203125, "learning_rate": 1.9514797017842085e-05, "epoch": 0.3561209741491844, "total_flos": 830619726330748896, "step": 26880 }, { "loss": 1.243408203125, "learning_rate": 1.950194750438356e-05, "epoch": 0.35654492768983825, "total_flos": 831660225829790112, "step": 26912 }, { "loss": 1.272705078125, "learning_rate": 1.948909799092503e-05, "epoch": 0.35696888123049203, "total_flos": 832649741271676032, "step": 26944 }, { "loss": 1.254638671875, "learning_rate": 1.9476248477466505e-05, "epoch": 0.3573928347711458, "total_flos": 833594547122260416, "step": 26976 }, { "loss": 1.2718505859375, "learning_rate": 1.946339896400798e-05, "epoch": 0.35781678831179964, "total_flos": 834576098819024256, "step": 27008 }, { "loss": 1.24951171875, "learning_rate": 1.9450549450549452e-05, "epoch": 0.3582407418524534, "total_flos": 835555253348064480, "step": 27040 }, { "loss": 1.2781982421875, "learning_rate": 1.9437699937090926e-05, "epoch": 0.3586646953931072, "total_flos": 836541036287186112, "step": 27072 }, { "loss": 1.2432861328125, "learning_rate": 1.9424850423632396e-05, "epoch": 0.35908864893376097, "total_flos": 837511663978014816, "step": 27104 }, { "loss": 1.2137451171875, "learning_rate": 1.941200091017387e-05, "epoch": 0.3595126024744148, "total_flos": 838512746960795232, "step": 27136 }, { "loss": 1.2576904296875, "learning_rate": 1.9399151396715343e-05, "epoch": 0.3599365560150686, "total_flos": 839477759809103520, "step": 27168 }, { "loss": 1.254150390625, "learning_rate": 1.9386301883256817e-05, "epoch": 0.36036050955572235, "total_flos": 840490345979282496, "step": 27200 }, { "loss": 1.2452392578125, "learning_rate": 1.937345236979829e-05, "epoch": 0.36078446309637613, "total_flos": 841521498133039008, "step": 27232 }, { "loss": 1.2608642578125, "learning_rate": 1.9360602856339764e-05, "epoch": 0.36120841663702996, "total_flos": 842533231619396832, "step": 27264 }, { "loss": 1.2647705078125, "learning_rate": 1.9347753342881234e-05, "epoch": 0.36163237017768374, "total_flos": 843506497803558912, "step": 27296 }, { "loss": 1.27685546875, "learning_rate": 1.933490382942271e-05, "epoch": 0.3620563237183375, "total_flos": 844449710905118880, "step": 27328 }, { "loss": 1.261474609375, "learning_rate": 1.9322054315964184e-05, "epoch": 0.3624802772589913, "total_flos": 845443232352126816, "step": 27360 }, { "loss": 1.2724609375, "learning_rate": 1.9309204802505657e-05, "epoch": 0.3629042307996451, "total_flos": 846451812517183776, "step": 27392 }, { "loss": 1.2841796875, "learning_rate": 1.929635528904713e-05, "epoch": 0.3633281843402989, "total_flos": 847417002337605888, "step": 27424 }, { "loss": 1.2374267578125, "learning_rate": 1.92835057755886e-05, "epoch": 0.3637521378809527, "total_flos": 848360907239247168, "step": 27456 }, { "loss": 1.232421875, "learning_rate": 1.9270656262130075e-05, "epoch": 0.36417609142160645, "total_flos": 849339434321702016, "step": 27488 }, { "loss": 1.2542724609375, "learning_rate": 1.9257806748671548e-05, "epoch": 0.3646000449622603, "total_flos": 850328885410092000, "step": 27520 }, { "loss": 1.240478515625, "learning_rate": 1.924495723521302e-05, "epoch": 0.36502399850291406, "total_flos": 851290551876611616, "step": 27552 }, { "loss": 1.25, "learning_rate": 1.9232107721754495e-05, "epoch": 0.36544795204356784, "total_flos": 852282673635082944, "step": 27584 }, { "loss": 1.2603759765625, "learning_rate": 1.9219258208295965e-05, "epoch": 0.36587190558422167, "total_flos": 853264820601684192, "step": 27616 }, { "loss": 1.2708740234375, "learning_rate": 1.920640869483744e-05, "epoch": 0.36629585912487544, "total_flos": 854266756268285760, "step": 27648 }, { "loss": 1.2720947265625, "learning_rate": 1.9193559181378912e-05, "epoch": 0.3667198126655292, "total_flos": 855240102894317760, "step": 27680 }, { "loss": 1.2613525390625, "learning_rate": 1.918070966792039e-05, "epoch": 0.367143766206183, "total_flos": 856221461530593792, "step": 27712 }, { "loss": 1.2257080078125, "learning_rate": 1.9167860154461862e-05, "epoch": 0.36756771974683683, "total_flos": 857181680043454848, "step": 27744 }, { "loss": 1.2613525390625, "learning_rate": 1.9155010641003336e-05, "epoch": 0.3679916732874906, "total_flos": 858185594580056448, "step": 27776 }, { "loss": 1.2777099609375, "learning_rate": 1.9142161127544806e-05, "epoch": 0.3684156268281444, "total_flos": 859154516903242848, "step": 27808 }, { "loss": 1.25537109375, "learning_rate": 1.912931161408628e-05, "epoch": 0.36883958036879816, "total_flos": 860119803253908864, "step": 27840 }, { "loss": 1.240966796875, "learning_rate": 1.9116462100627753e-05, "epoch": 0.369263533909452, "total_flos": 861117475501404672, "step": 27872 }, { "loss": 1.2269287109375, "learning_rate": 1.9103612587169227e-05, "epoch": 0.36968748745010577, "total_flos": 862127069234022624, "step": 27904 }, { "loss": 1.251953125, "learning_rate": 1.90907630737107e-05, "epoch": 0.37011144099075954, "total_flos": 863110422828672672, "step": 27936 }, { "loss": 1.243408203125, "learning_rate": 1.907791356025217e-05, "epoch": 0.3705353945314133, "total_flos": 864072684565029696, "step": 27968 }, { "loss": 1.2659912109375, "learning_rate": 1.9065064046793644e-05, "epoch": 0.37095934807206715, "total_flos": 865049458014720288, "step": 28000 }, { "loss": 1.245361328125, "learning_rate": 1.9052214533335117e-05, "epoch": 0.37138330161272093, "total_flos": 866029963967175168, "step": 28032 }, { "loss": 1.2467041015625, "learning_rate": 1.903936501987659e-05, "epoch": 0.3718072551533747, "total_flos": 867019302436947264, "step": 28064 }, { "loss": 1.2352294921875, "learning_rate": 1.9026515506418068e-05, "epoch": 0.3722312086940285, "total_flos": 868029845383630272, "step": 28096 }, { "loss": 1.2484130859375, "learning_rate": 1.9013665992959538e-05, "epoch": 0.3726551622346823, "total_flos": 869010978782670528, "step": 28128 }, { "loss": 1.2755126953125, "learning_rate": 1.900081647950101e-05, "epoch": 0.3730791157753361, "total_flos": 869975283742523520, "step": 28160 }, { "loss": 1.286865234375, "learning_rate": 1.8987966966042485e-05, "epoch": 0.37350306931598987, "total_flos": 870970092259450176, "step": 28192 }, { "loss": 1.243408203125, "learning_rate": 1.8975117452583958e-05, "epoch": 0.3739270228566437, "total_flos": 871967297944100448, "step": 28224 }, { "loss": 1.257568359375, "learning_rate": 1.896226793912543e-05, "epoch": 0.3743509763972975, "total_flos": 872973834885661440, "step": 28256 }, { "loss": 1.262939453125, "learning_rate": 1.8949418425666905e-05, "epoch": 0.37477492993795125, "total_flos": 873951541461043104, "step": 28288 }, { "loss": 1.265380859375, "learning_rate": 1.8936568912208375e-05, "epoch": 0.375198883478605, "total_flos": 874935184646424864, "step": 28320 }, { "loss": 1.27978515625, "learning_rate": 1.892371939874985e-05, "epoch": 0.37562283701925886, "total_flos": 875947030751400576, "step": 28352 }, { "loss": 1.2362060546875, "learning_rate": 1.8910869885291322e-05, "epoch": 0.37604679055991264, "total_flos": 876912976725399936, "step": 28384 }, { "loss": 1.2789306640625, "learning_rate": 1.8898020371832796e-05, "epoch": 0.3764707441005664, "total_flos": 877952414391758208, "step": 28416 }, { "loss": 1.2589111328125, "learning_rate": 1.888517085837427e-05, "epoch": 0.3768946976412202, "total_flos": 878948333006489760, "step": 28448 }, { "loss": 1.2857666015625, "learning_rate": 1.8872321344915743e-05, "epoch": 0.377318651181874, "total_flos": 879912654054716736, "step": 28480 }, { "loss": 1.2530517578125, "learning_rate": 1.8859471831457216e-05, "epoch": 0.3777426047225278, "total_flos": 880901702933757120, "step": 28512 }, { "loss": 1.258056640625, "learning_rate": 1.884662231799869e-05, "epoch": 0.3781665582631816, "total_flos": 881905038288895296, "step": 28544 }, { "loss": 1.2373046875, "learning_rate": 1.8833772804540163e-05, "epoch": 0.37859051180383535, "total_flos": 882921019105984896, "step": 28576 }, { "loss": 1.2742919921875, "learning_rate": 1.8820923291081637e-05, "epoch": 0.3790144653444892, "total_flos": 883885581479821632, "step": 28608 }, { "loss": 1.2542724609375, "learning_rate": 1.8808073777623107e-05, "epoch": 0.37943841888514296, "total_flos": 884850063411788448, "step": 28640 }, { "loss": 1.265869140625, "learning_rate": 1.879522426416458e-05, "epoch": 0.37986237242579673, "total_flos": 885815156701966656, "step": 28672 }, { "loss": 1.263671875, "learning_rate": 1.8782374750706054e-05, "epoch": 0.3802863259664505, "total_flos": 886832762444828640, "step": 28704 }, { "loss": 1.2484130859375, "learning_rate": 1.8769525237247527e-05, "epoch": 0.38071027950710434, "total_flos": 887827217017527648, "step": 28736 }, { "loss": 1.2857666015625, "learning_rate": 1.8756675723789e-05, "epoch": 0.3811342330477581, "total_flos": 888805583216242656, "step": 28768 }, { "loss": 1.2366943359375, "learning_rate": 1.8743826210330474e-05, "epoch": 0.3815581865884119, "total_flos": 889791768364713888, "step": 28800 }, { "loss": 1.275146484375, "learning_rate": 1.8730976696871948e-05, "epoch": 0.3819821401290657, "total_flos": 890755124110501824, "step": 28832 }, { "loss": 1.2608642578125, "learning_rate": 1.871812718341342e-05, "epoch": 0.3824060936697195, "total_flos": 891719348628484896, "step": 28864 }, { "loss": 1.2884521484375, "learning_rate": 1.8705277669954895e-05, "epoch": 0.3828300472103733, "total_flos": 892700900325248736, "step": 28896 }, { "loss": 1.2564697265625, "learning_rate": 1.8692428156496368e-05, "epoch": 0.38325400075102706, "total_flos": 893666830210874112, "step": 28928 }, { "loss": 1.25439453125, "learning_rate": 1.867957864303784e-05, "epoch": 0.3836779542916809, "total_flos": 894656844392353536, "step": 28960 }, { "loss": 1.232666015625, "learning_rate": 1.8666729129579312e-05, "epoch": 0.38410190783233467, "total_flos": 895666212887735712, "step": 28992 }, { "loss": 1.2503662109375, "learning_rate": 1.8653879616120785e-05, "epoch": 0.38452586137298844, "total_flos": 896677753313605728, "step": 29024 }, { "loss": 1.2637939453125, "learning_rate": 1.864103010266226e-05, "epoch": 0.3849498149136422, "total_flos": 897633032695653696, "step": 29056 }, { "loss": 1.2440185546875, "learning_rate": 1.8628180589203732e-05, "epoch": 0.38537376845429605, "total_flos": 898604480893555584, "step": 29088 }, { "loss": 1.2464599609375, "learning_rate": 1.8615331075745206e-05, "epoch": 0.3857977219949498, "total_flos": 899583410185360032, "step": 29120 }, { "loss": 1.2596435546875, "learning_rate": 1.8602481562286676e-05, "epoch": 0.3862216755356036, "total_flos": 900544803149521920, "step": 29152 }, { "loss": 1.2391357421875, "learning_rate": 1.858963204882815e-05, "epoch": 0.3866456290762574, "total_flos": 901543135020351072, "step": 29184 }, { "loss": 1.250244140625, "learning_rate": 1.8576782535369626e-05, "epoch": 0.3870695826169112, "total_flos": 902553726232156032, "step": 29216 }, { "loss": 1.2469482421875, "learning_rate": 1.85639330219111e-05, "epoch": 0.387493536157565, "total_flos": 903556997233798272, "step": 29248 }, { "loss": 1.2796630859375, "learning_rate": 1.8551083508452573e-05, "epoch": 0.38791748969821876, "total_flos": 904527898426984704, "step": 29280 }, { "loss": 1.2518310546875, "learning_rate": 1.8538233994994043e-05, "epoch": 0.38834144323887254, "total_flos": 905494278787081632, "step": 29312 }, { "loss": 1.233642578125, "learning_rate": 1.8525384481535517e-05, "epoch": 0.3887653967795264, "total_flos": 906480592642544736, "step": 29344 }, { "loss": 1.236572265625, "learning_rate": 1.851253496807699e-05, "epoch": 0.38918935032018015, "total_flos": 907480082876300736, "step": 29376 }, { "loss": 1.24755859375, "learning_rate": 1.8499685454618464e-05, "epoch": 0.3896133038608339, "total_flos": 908488807836723552, "step": 29408 }, { "loss": 1.2359619140625, "learning_rate": 1.8486835941159937e-05, "epoch": 0.3900372574014877, "total_flos": 909497741946008160, "step": 29440 }, { "loss": 1.254638671875, "learning_rate": 1.847398642770141e-05, "epoch": 0.39046121094214153, "total_flos": 910488834048544512, "step": 29472 }, { "loss": 1.2552490234375, "learning_rate": 1.846113691424288e-05, "epoch": 0.3908851644827953, "total_flos": 911442327621080256, "step": 29504 }, { "loss": 1.244873046875, "learning_rate": 1.8448287400784354e-05, "epoch": 0.3913091180234491, "total_flos": 912423187517762784, "step": 29536 }, { "loss": 1.219482421875, "learning_rate": 1.8435437887325828e-05, "epoch": 0.3917330715641029, "total_flos": 913404707037778656, "step": 29568 }, { "loss": 1.2484130859375, "learning_rate": 1.8422588373867305e-05, "epoch": 0.3921570251047567, "total_flos": 914353695865598880, "step": 29600 }, { "loss": 1.253662109375, "learning_rate": 1.8409738860408778e-05, "epoch": 0.39258097864541047, "total_flos": 915348617001143424, "step": 29632 }, { "loss": 1.251708984375, "learning_rate": 1.8396889346950248e-05, "epoch": 0.39300493218606425, "total_flos": 916331037470102400, "step": 29664 }, { "loss": 1.2845458984375, "learning_rate": 1.8384039833491722e-05, "epoch": 0.3934288857267181, "total_flos": 917298350955890400, "step": 29696 }, { "loss": 1.24755859375, "learning_rate": 1.8371190320033195e-05, "epoch": 0.39385283926737186, "total_flos": 918267176748832896, "step": 29728 }, { "loss": 1.2515869140625, "learning_rate": 1.835834080657467e-05, "epoch": 0.39427679280802563, "total_flos": 919211097738848160, "step": 29760 }, { "loss": 1.2127685546875, "learning_rate": 1.8345491293116142e-05, "epoch": 0.3947007463486794, "total_flos": 920272319063580768, "step": 29792 }, { "loss": 1.2415771484375, "learning_rate": 1.8332641779657616e-05, "epoch": 0.39512469988933324, "total_flos": 921244459061563968, "step": 29824 }, { "loss": 1.244140625, "learning_rate": 1.8319792266199086e-05, "epoch": 0.395548653429987, "total_flos": 922199802797107872, "step": 29856 }, { "loss": 1.2235107421875, "learning_rate": 1.830694275274056e-05, "epoch": 0.3959726069706408, "total_flos": 923176608423546432, "step": 29888 }, { "loss": 1.2298583984375, "learning_rate": 1.8294093239282033e-05, "epoch": 0.39639656051129457, "total_flos": 924152094803318304, "step": 29920 }, { "loss": 1.2490234375, "learning_rate": 1.8281243725823506e-05, "epoch": 0.3968205140519484, "total_flos": 925124846159512896, "step": 29952 }, { "loss": 1.2593994140625, "learning_rate": 1.8268394212364983e-05, "epoch": 0.3972444675926022, "total_flos": 926126250909772992, "step": 29984 }, { "loss": 1.2335205078125, "learning_rate": 1.8255544698906453e-05, "epoch": 0.39766842113325596, "total_flos": 927104842345723776, "step": 30016 }, { "loss": 1.2633056640625, "learning_rate": 1.8242695185447927e-05, "epoch": 0.39809237467390973, "total_flos": 928096433187853632, "step": 30048 }, { "loss": 1.2391357421875, "learning_rate": 1.82298456719894e-05, "epoch": 0.39851632821456356, "total_flos": 929110596018683040, "step": 30080 }, { "loss": 1.2476806640625, "learning_rate": 1.8216996158530874e-05, "epoch": 0.39894028175521734, "total_flos": 930075351453007584, "step": 30112 }, { "loss": 1.25927734375, "learning_rate": 1.8204146645072347e-05, "epoch": 0.3993642352958711, "total_flos": 931047250125381024, "step": 30144 }, { "loss": 1.2449951171875, "learning_rate": 1.8191297131613817e-05, "epoch": 0.39978818883652495, "total_flos": 932099799816536256, "step": 30176 }, { "loss": 1.239990234375, "learning_rate": 1.817844761815529e-05, "epoch": 0.4002121423771787, "total_flos": 933076283675495136, "step": 30208 }, { "loss": 1.26220703125, "learning_rate": 1.8165598104696764e-05, "epoch": 0.4006360959178325, "total_flos": 934012916632095648, "step": 30240 }, { "loss": 1.235595703125, "learning_rate": 1.8152748591238238e-05, "epoch": 0.4010600494584863, "total_flos": 935006824200079200, "step": 30272 }, { "loss": 1.2408447265625, "learning_rate": 1.813989907777971e-05, "epoch": 0.4014840029991401, "total_flos": 936030173492453472, "step": 30304 }, { "loss": 1.25634765625, "learning_rate": 1.8127049564321185e-05, "epoch": 0.4019079565397939, "total_flos": 937026574758404544, "step": 30336 }, { "loss": 1.24560546875, "learning_rate": 1.8114200050862658e-05, "epoch": 0.40233191008044766, "total_flos": 937994499602403936, "step": 30368 }, { "loss": 1.2362060546875, "learning_rate": 1.8101350537404132e-05, "epoch": 0.40275586362110144, "total_flos": 938961893530061856, "step": 30400 }, { "loss": 1.2305908203125, "learning_rate": 1.8088501023945605e-05, "epoch": 0.40317981716175527, "total_flos": 939947869529671296, "step": 30432 }, { "loss": 1.236328125, "learning_rate": 1.807565151048708e-05, "epoch": 0.40360377070240905, "total_flos": 940919542964808960, "step": 30464 }, { "loss": 1.2269287109375, "learning_rate": 1.8062801997028552e-05, "epoch": 0.4040277242430628, "total_flos": 941903620536288288, "step": 30496 }, { "loss": 1.22412109375, "learning_rate": 1.8049952483570022e-05, "epoch": 0.4044516777837166, "total_flos": 942875599650531648, "step": 30528 }, { "loss": 1.2896728515625, "learning_rate": 1.8037102970111496e-05, "epoch": 0.40487563132437043, "total_flos": 943858004031116640, "step": 30560 }, { "loss": 1.23095703125, "learning_rate": 1.802425345665297e-05, "epoch": 0.4052995848650242, "total_flos": 944874660559913568, "step": 30592 }, { "loss": 1.24072265625, "learning_rate": 1.8011403943194443e-05, "epoch": 0.405723538405678, "total_flos": 945906874546353024, "step": 30624 }, { "loss": 1.2392578125, "learning_rate": 1.7998554429735916e-05, "epoch": 0.40614749194633176, "total_flos": 946956158297589504, "step": 30656 }, { "loss": 1.24072265625, "learning_rate": 1.7985704916277386e-05, "epoch": 0.4065714454869856, "total_flos": 947935586328987456, "step": 30688 }, { "loss": 1.2423095703125, "learning_rate": 1.7972855402818863e-05, "epoch": 0.40699539902763937, "total_flos": 948882564110059680, "step": 30720 }, { "eval_loss": 1.1086247526090591, "epoch": 0.40699539902763937, "total_flos": 948882564110059680, "step": 30720 }, { "loss": 1.2176513671875, "learning_rate": 1.7960005889360337e-05, "epoch": 0.40741935256829315, "total_flos": 949894764159263040, "step": 30752 }, { "loss": 1.2452392578125, "learning_rate": 1.794715637590181e-05, "epoch": 0.407843306108947, "total_flos": 950904068301149280, "step": 30784 }, { "loss": 1.2431640625, "learning_rate": 1.7934306862443284e-05, "epoch": 0.40826725964960076, "total_flos": 951885314318807424, "step": 30816 }, { "loss": 1.2568359375, "learning_rate": 1.7921457348984754e-05, "epoch": 0.40869121319025453, "total_flos": 952878208319229984, "step": 30848 }, { "loss": 1.2808837890625, "learning_rate": 1.7908607835526227e-05, "epoch": 0.4091151667309083, "total_flos": 953846149251603360, "step": 30880 }, { "loss": 1.2550048828125, "learning_rate": 1.78957583220677e-05, "epoch": 0.40953912027156214, "total_flos": 954847489648367520, "step": 30912 }, { "loss": 1.25830078125, "learning_rate": 1.7882908808609174e-05, "epoch": 0.4099630738122159, "total_flos": 955872126010660512, "step": 30944 }, { "loss": 1.23828125, "learning_rate": 1.7870059295150648e-05, "epoch": 0.4103870273528697, "total_flos": 956859694759294368, "step": 30976 }, { "loss": 1.232421875, "learning_rate": 1.785720978169212e-05, "epoch": 0.41081098089352347, "total_flos": 957809600624431680, "step": 31008 }, { "loss": 1.241943359375, "learning_rate": 1.784436026823359e-05, "epoch": 0.4112349344341773, "total_flos": 958829732242009152, "step": 31040 }, { "loss": 1.2562255859375, "learning_rate": 1.7831510754775065e-05, "epoch": 0.4116588879748311, "total_flos": 959830413015439968, "step": 31072 }, { "loss": 1.23046875, "learning_rate": 1.7818661241316542e-05, "epoch": 0.41208284151548485, "total_flos": 960876141236025984, "step": 31104 }, { "loss": 1.2366943359375, "learning_rate": 1.7805811727858015e-05, "epoch": 0.41250679505613863, "total_flos": 961843953461407488, "step": 31136 }, { "loss": 1.25146484375, "learning_rate": 1.779296221439949e-05, "epoch": 0.41293074859679246, "total_flos": 962853016277683968, "step": 31168 }, { "loss": 1.24365234375, "learning_rate": 1.778011270094096e-05, "epoch": 0.41335470213744624, "total_flos": 963926416501522464, "step": 31200 }, { "loss": 1.251220703125, "learning_rate": 1.7767263187482432e-05, "epoch": 0.4137786556781, "total_flos": 964903559983814688, "step": 31232 }, { "loss": 1.2611083984375, "learning_rate": 1.7754413674023906e-05, "epoch": 0.4142026092187538, "total_flos": 965882634070984992, "step": 31264 }, { "loss": 1.243896484375, "learning_rate": 1.774156416056538e-05, "epoch": 0.4146265627594076, "total_flos": 966851298980187648, "step": 31296 }, { "loss": 1.22607421875, "learning_rate": 1.7728714647106853e-05, "epoch": 0.4150505163000614, "total_flos": 967869644788252896, "step": 31328 }, { "loss": 1.2347412109375, "learning_rate": 1.7715865133648323e-05, "epoch": 0.4154744698407152, "total_flos": 968878691516155392, "step": 31360 }, { "loss": 1.2525634765625, "learning_rate": 1.7703015620189796e-05, "epoch": 0.41589842338136895, "total_flos": 969906481199749248, "step": 31392 }, { "loss": 1.275390625, "learning_rate": 1.769016610673127e-05, "epoch": 0.4163223769220228, "total_flos": 970847731519683168, "step": 31424 }, { "loss": 1.251953125, "learning_rate": 1.7677316593272743e-05, "epoch": 0.41674633046267656, "total_flos": 971819501485064736, "step": 31456 }, { "loss": 1.2249755859375, "learning_rate": 1.766446707981422e-05, "epoch": 0.41717028400333034, "total_flos": 972847773819878112, "step": 31488 }, { "loss": 1.22607421875, "learning_rate": 1.7651617566355694e-05, "epoch": 0.41759423754398417, "total_flos": 973826365255828896, "step": 31520 }, { "loss": 1.22216796875, "learning_rate": 1.7638768052897164e-05, "epoch": 0.41801819108463795, "total_flos": 974861780828691168, "step": 31552 }, { "loss": 1.2611083984375, "learning_rate": 1.7625918539438637e-05, "epoch": 0.4184421446252917, "total_flos": 975830654886755616, "step": 31584 }, { "loss": 1.2606201171875, "learning_rate": 1.761306902598011e-05, "epoch": 0.4188660981659455, "total_flos": 976769250624982176, "step": 31616 }, { "loss": 1.2435302734375, "learning_rate": 1.7600219512521584e-05, "epoch": 0.41929005170659933, "total_flos": 977774355701258592, "step": 31648 }, { "loss": 1.2313232421875, "learning_rate": 1.7587369999063058e-05, "epoch": 0.4197140052472531, "total_flos": 978790191722982336, "step": 31680 }, { "loss": 1.2401123046875, "learning_rate": 1.7574520485604528e-05, "epoch": 0.4201379587879069, "total_flos": 979780334611453632, "step": 31712 }, { "loss": 1.242919921875, "learning_rate": 1.7561670972146e-05, "epoch": 0.42056191232856066, "total_flos": 980807062462364544, "step": 31744 }, { "loss": 1.2376708984375, "learning_rate": 1.7548821458687475e-05, "epoch": 0.4209858658692145, "total_flos": 981810735673356384, "step": 31776 }, { "loss": 1.2579345703125, "learning_rate": 1.753597194522895e-05, "epoch": 0.42140981940986827, "total_flos": 982774751042477664, "step": 31808 }, { "loss": 1.2413330078125, "learning_rate": 1.7523122431770422e-05, "epoch": 0.42183377295052205, "total_flos": 983777989867371936, "step": 31840 }, { "loss": 1.2325439453125, "learning_rate": 1.7510272918311895e-05, "epoch": 0.4222577264911758, "total_flos": 984746896102184352, "step": 31872 }, { "loss": 1.2200927734375, "learning_rate": 1.749742340485337e-05, "epoch": 0.42268168003182965, "total_flos": 985731520678379136, "step": 31904 }, { "loss": 1.2108154296875, "learning_rate": 1.7484573891394842e-05, "epoch": 0.42310563357248343, "total_flos": 986733182842622976, "step": 31936 }, { "loss": 1.240966796875, "learning_rate": 1.7471724377936316e-05, "epoch": 0.4235295871131372, "total_flos": 987751174706460576, "step": 31968 }, { "loss": 1.240478515625, "learning_rate": 1.745887486447779e-05, "epoch": 0.423953540653791, "total_flos": 988761846360135456, "step": 32000 }, { "loss": 1.2625732421875, "learning_rate": 1.7446025351019263e-05, "epoch": 0.4243774941944448, "total_flos": 989768206329582624, "step": 32032 }, { "loss": 1.2628173828125, "learning_rate": 1.7433175837560733e-05, "epoch": 0.4248014477350986, "total_flos": 990690906754312992, "step": 32064 }, { "loss": 1.26416015625, "learning_rate": 1.7420326324102206e-05, "epoch": 0.42522540127575237, "total_flos": 991684267317581088, "step": 32096 }, { "loss": 1.24462890625, "learning_rate": 1.740747681064368e-05, "epoch": 0.4256493548164062, "total_flos": 992654524975808160, "step": 32128 }, { "loss": 1.2275390625, "learning_rate": 1.7394627297185153e-05, "epoch": 0.42607330835706, "total_flos": 993628322076311712, "step": 32160 }, { "loss": 1.275634765625, "learning_rate": 1.7381777783726627e-05, "epoch": 0.42649726189771375, "total_flos": 994620443834783040, "step": 32192 }, { "loss": 1.2523193359375, "learning_rate": 1.73689282702681e-05, "epoch": 0.42692121543836753, "total_flos": 995586357632034432, "step": 32224 }, { "loss": 1.243896484375, "learning_rate": 1.7356078756809574e-05, "epoch": 0.42734516897902136, "total_flos": 996569212487090976, "step": 32256 }, { "loss": 1.230224609375, "learning_rate": 1.7343229243351047e-05, "epoch": 0.42776912251967514, "total_flos": 997570922916456768, "step": 32288 }, { "loss": 1.2222900390625, "learning_rate": 1.733037972989252e-05, "epoch": 0.4281930760603289, "total_flos": 998555965790375136, "step": 32320 }, { "loss": 1.2117919921875, "learning_rate": 1.7317530216433994e-05, "epoch": 0.4286170296009827, "total_flos": 999511422144536928, "step": 32352 }, { "loss": 1.2459716796875, "learning_rate": 1.7304680702975465e-05, "epoch": 0.4290409831416365, "total_flos": 1000513389987886464, "step": 32384 }, { "loss": 1.2255859375, "learning_rate": 1.7291831189516938e-05, "epoch": 0.4294649366822903, "total_flos": 1001532540214650912, "step": 32416 }, { "loss": 1.2393798828125, "learning_rate": 1.727898167605841e-05, "epoch": 0.4298888902229441, "total_flos": 1002522650926374240, "step": 32448 }, { "loss": 1.2406005859375, "learning_rate": 1.7266132162599885e-05, "epoch": 0.43031284376359785, "total_flos": 1003474519573137600, "step": 32480 }, { "loss": 1.2353515625, "learning_rate": 1.725328264914136e-05, "epoch": 0.4307367973042517, "total_flos": 1004470663425104928, "step": 32512 }, { "loss": 1.2615966796875, "learning_rate": 1.7240433135682832e-05, "epoch": 0.43116075084490546, "total_flos": 1005439360511055552, "step": 32544 }, { "loss": 1.2310791015625, "learning_rate": 1.7227583622224302e-05, "epoch": 0.43158470438555924, "total_flos": 1006393224116192928, "step": 32576 }, { "loss": 1.2099609375, "learning_rate": 1.721473410876578e-05, "epoch": 0.432008657926213, "total_flos": 1007416605585315168, "step": 32608 }, { "loss": 1.2413330078125, "learning_rate": 1.7201884595307252e-05, "epoch": 0.43243261146686685, "total_flos": 1008393845597851296, "step": 32640 }, { "loss": 1.2508544921875, "learning_rate": 1.7189035081848726e-05, "epoch": 0.4328565650075206, "total_flos": 1009409198968355520, "step": 32672 }, { "loss": 1.2310791015625, "learning_rate": 1.71761855683902e-05, "epoch": 0.4332805185481744, "total_flos": 1010399663624306496, "step": 32704 }, { "loss": 1.2281494140625, "learning_rate": 1.716333605493167e-05, "epoch": 0.43370447208882823, "total_flos": 1011395453532046176, "step": 32736 }, { "loss": 1.2327880859375, "learning_rate": 1.7150486541473143e-05, "epoch": 0.434128425629482, "total_flos": 1012393544077265568, "step": 32768 }, { "loss": 1.254150390625, "learning_rate": 1.7137637028014617e-05, "epoch": 0.4345523791701358, "total_flos": 1013397458613867168, "step": 32800 }, { "loss": 1.2508544921875, "learning_rate": 1.712478751455609e-05, "epoch": 0.43497633271078956, "total_flos": 1014407953295428224, "step": 32832 }, { "loss": 1.204833984375, "learning_rate": 1.7111938001097563e-05, "epoch": 0.4354002862514434, "total_flos": 1015393607527557984, "step": 32864 }, { "loss": 1.2550048828125, "learning_rate": 1.7099088487639034e-05, "epoch": 0.43582423979209717, "total_flos": 1016377524215297472, "step": 32896 }, { "loss": 1.2464599609375, "learning_rate": 1.7086238974180507e-05, "epoch": 0.43624819333275094, "total_flos": 1017369420736533024, "step": 32928 }, { "loss": 1.223388671875, "learning_rate": 1.707338946072198e-05, "epoch": 0.4366721468734047, "total_flos": 1018389922386712128, "step": 32960 }, { "loss": 1.2520751953125, "learning_rate": 1.7060539947263457e-05, "epoch": 0.43709610041405855, "total_flos": 1019371313199736128, "step": 32992 }, { "loss": 1.2713623046875, "learning_rate": 1.704769043380493e-05, "epoch": 0.43752005395471233, "total_flos": 1020326077753816608, "step": 33024 }, { "loss": 1.236572265625, "learning_rate": 1.7034840920346404e-05, "epoch": 0.4379440074953661, "total_flos": 1021315737991068384, "step": 33056 }, { "loss": 1.260498046875, "learning_rate": 1.7021991406887875e-05, "epoch": 0.4383679610360199, "total_flos": 1022277130955230272, "step": 33088 }, { "loss": 1.2257080078125, "learning_rate": 1.7009141893429348e-05, "epoch": 0.4387919145766737, "total_flos": 1023249045715977696, "step": 33120 }, { "loss": 1.225341796875, "learning_rate": 1.699629237997082e-05, "epoch": 0.4392158681173275, "total_flos": 1024213720708432320, "step": 33152 }, { "loss": 1.2158203125, "learning_rate": 1.6983442866512295e-05, "epoch": 0.43963982165798127, "total_flos": 1025224070594627520, "step": 33184 }, { "loss": 1.219970703125, "learning_rate": 1.697059335305377e-05, "epoch": 0.44006377519863504, "total_flos": 1026177322841553504, "step": 33216 }, { "loss": 1.2548828125, "learning_rate": 1.695774383959524e-05, "epoch": 0.4404877287392889, "total_flos": 1027193207128399200, "step": 33248 }, { "loss": 1.2467041015625, "learning_rate": 1.6944894326136712e-05, "epoch": 0.44091168227994265, "total_flos": 1028188466119797408, "step": 33280 }, { "loss": 1.2158203125, "learning_rate": 1.6932044812678186e-05, "epoch": 0.44133563582059643, "total_flos": 1029204237788025216, "step": 33312 }, { "loss": 1.2664794921875, "learning_rate": 1.691919529921966e-05, "epoch": 0.44175958936125026, "total_flos": 1030235293411537824, "step": 33344 }, { "loss": 1.2220458984375, "learning_rate": 1.6906345785761136e-05, "epoch": 0.44218354290190404, "total_flos": 1031242119943830528, "step": 33376 }, { "loss": 1.244873046875, "learning_rate": 1.6893496272302606e-05, "epoch": 0.4426074964425578, "total_flos": 1032219038188886976, "step": 33408 }, { "loss": 1.26171875, "learning_rate": 1.688064675884408e-05, "epoch": 0.4430314499832116, "total_flos": 1033217418324838080, "step": 33440 }, { "loss": 1.263671875, "learning_rate": 1.6867797245385553e-05, "epoch": 0.4434554035238654, "total_flos": 1034177041567861728, "step": 33472 }, { "loss": 1.244140625, "learning_rate": 1.6854947731927027e-05, "epoch": 0.4438793570645192, "total_flos": 1035130438610153568, "step": 33504 }, { "loss": 1.2325439453125, "learning_rate": 1.68420982184685e-05, "epoch": 0.444303310605173, "total_flos": 1036091767220819520, "step": 33536 }, { "loss": 1.2296142578125, "learning_rate": 1.6829248705009974e-05, "epoch": 0.44472726414582675, "total_flos": 1037094249892136544, "step": 33568 }, { "loss": 1.230712890625, "learning_rate": 1.6816399191551444e-05, "epoch": 0.4451512176864806, "total_flos": 1038061016373209088, "step": 33600 }, { "loss": 1.2161865234375, "learning_rate": 1.6803549678092917e-05, "epoch": 0.44557517122713436, "total_flos": 1039082579856071136, "step": 33632 }, { "loss": 1.2559814453125, "learning_rate": 1.679070016463439e-05, "epoch": 0.44599912476778814, "total_flos": 1040093605453973664, "step": 33664 }, { "loss": 1.21533203125, "learning_rate": 1.6777850651175864e-05, "epoch": 0.4464230783084419, "total_flos": 1041100995079355808, "step": 33696 }, { "loss": 1.243896484375, "learning_rate": 1.676500113771734e-05, "epoch": 0.44684703184909574, "total_flos": 1042106100155632224, "step": 33728 }, { "loss": 1.242919921875, "learning_rate": 1.675215162425881e-05, "epoch": 0.4472709853897495, "total_flos": 1043108775887437056, "step": 33760 }, { "loss": 1.2196044921875, "learning_rate": 1.6739302110800285e-05, "epoch": 0.4476949389304033, "total_flos": 1044073820912493312, "step": 33792 }, { "loss": 1.208251953125, "learning_rate": 1.6726452597341758e-05, "epoch": 0.4481188924710571, "total_flos": 1045109349103973472, "step": 33824 }, { "loss": 1.2333984375, "learning_rate": 1.671360308388323e-05, "epoch": 0.4485428460117109, "total_flos": 1046046352093175616, "step": 33856 }, { "loss": 1.2205810546875, "learning_rate": 1.6700753570424705e-05, "epoch": 0.4489667995523647, "total_flos": 1047072002023029600, "step": 33888 }, { "loss": 1.2255859375, "learning_rate": 1.6687904056966175e-05, "epoch": 0.44939075309301846, "total_flos": 1048056819659712192, "step": 33920 }, { "loss": 1.2271728515625, "learning_rate": 1.667505454350765e-05, "epoch": 0.44981470663367223, "total_flos": 1049016684228345600, "step": 33952 }, { "loss": 1.24853515625, "learning_rate": 1.6662205030049122e-05, "epoch": 0.45023866017432607, "total_flos": 1050022754607061056, "step": 33984 }, { "loss": 1.2425537109375, "learning_rate": 1.6649355516590596e-05, "epoch": 0.45066261371497984, "total_flos": 1051016983942524288, "step": 34016 }, { "loss": 1.2283935546875, "learning_rate": 1.663650600313207e-05, "epoch": 0.4510865672556336, "total_flos": 1051976864599531680, "step": 34048 }, { "loss": 1.252197265625, "learning_rate": 1.6623656489673543e-05, "epoch": 0.45151052079628745, "total_flos": 1052995194319222944, "step": 34080 }, { "loss": 1.250244140625, "learning_rate": 1.6610806976215016e-05, "epoch": 0.4519344743369412, "total_flos": 1054011738229401984, "step": 34112 }, { "loss": 1.22265625, "learning_rate": 1.659795746275649e-05, "epoch": 0.452358427877595, "total_flos": 1054979405659417632, "step": 34144 }, { "loss": 1.2393798828125, "learning_rate": 1.6585107949297963e-05, "epoch": 0.4527823814182488, "total_flos": 1055925273342684960, "step": 34176 }, { "loss": 1.2261962890625, "learning_rate": 1.6572258435839437e-05, "epoch": 0.4532063349589026, "total_flos": 1056932920382050848, "step": 34208 }, { "loss": 1.229248046875, "learning_rate": 1.655940892238091e-05, "epoch": 0.4536302884995564, "total_flos": 1057941082249384224, "step": 34240 }, { "loss": 1.2452392578125, "learning_rate": 1.654655940892238e-05, "epoch": 0.45405424204021017, "total_flos": 1058920783783139904, "step": 34272 }, { "loss": 1.2227783203125, "learning_rate": 1.6533709895463854e-05, "epoch": 0.45447819558086394, "total_flos": 1059953142564945216, "step": 34304 }, { "loss": 1.2650146484375, "learning_rate": 1.6520860382005327e-05, "epoch": 0.4549021491215178, "total_flos": 1060930800875204928, "step": 34336 }, { "loss": 1.2393798828125, "learning_rate": 1.65080108685468e-05, "epoch": 0.45532610266217155, "total_flos": 1061924853238554336, "step": 34368 }, { "loss": 1.21875, "learning_rate": 1.6495161355088274e-05, "epoch": 0.4557500562028253, "total_flos": 1062910362675318240, "step": 34400 }, { "loss": 1.24169921875, "learning_rate": 1.6482311841629744e-05, "epoch": 0.4561740097434791, "total_flos": 1063859029735658784, "step": 34432 }, { "loss": 1.2357177734375, "learning_rate": 1.6469462328171218e-05, "epoch": 0.45659796328413293, "total_flos": 1064833277310633888, "step": 34464 }, { "loss": 1.2412109375, "learning_rate": 1.6456612814712695e-05, "epoch": 0.4570219168247867, "total_flos": 1065857704524065088, "step": 34496 }, { "loss": 1.21630859375, "learning_rate": 1.6443763301254168e-05, "epoch": 0.4574458703654405, "total_flos": 1066824133149283968, "step": 34528 }, { "loss": 1.2525634765625, "learning_rate": 1.643091378779564e-05, "epoch": 0.45786982390609426, "total_flos": 1067812120195641408, "step": 34560 }, { "loss": 1.234130859375, "learning_rate": 1.6418064274337115e-05, "epoch": 0.4582937774467481, "total_flos": 1068781428639803424, "step": 34592 }, { "loss": 1.2257080078125, "learning_rate": 1.6405214760878585e-05, "epoch": 0.4587177309874019, "total_flos": 1069736724110225376, "step": 34624 }, { "loss": 1.244873046875, "learning_rate": 1.639236524742006e-05, "epoch": 0.45914168452805565, "total_flos": 1070721300421298208, "step": 34656 }, { "loss": 1.21435546875, "learning_rate": 1.6379515733961532e-05, "epoch": 0.4595656380687095, "total_flos": 1071723622208875392, "step": 34688 }, { "loss": 1.2442626953125, "learning_rate": 1.6366666220503006e-05, "epoch": 0.45998959160936326, "total_flos": 1072729209936371328, "step": 34720 }, { "loss": 1.2265625, "learning_rate": 1.635381670704448e-05, "epoch": 0.46041354515001703, "total_flos": 1073686757779151040, "step": 34752 }, { "loss": 1.2388916015625, "learning_rate": 1.634096719358595e-05, "epoch": 0.4608374986906708, "total_flos": 1074672347657784864, "step": 34784 }, { "loss": 1.2379150390625, "learning_rate": 1.6328117680127423e-05, "epoch": 0.46126145223132464, "total_flos": 1075652724903247872, "step": 34816 }, { "loss": 1.216796875, "learning_rate": 1.6315268166668896e-05, "epoch": 0.4616854057719784, "total_flos": 1076644782308223264, "step": 34848 }, { "loss": 1.209716796875, "learning_rate": 1.6302418653210373e-05, "epoch": 0.4621093593126322, "total_flos": 1077626655772466784, "step": 34880 }, { "loss": 1.2213134765625, "learning_rate": 1.6289569139751847e-05, "epoch": 0.46253331285328597, "total_flos": 1078664758103784384, "step": 34912 }, { "loss": 1.2547607421875, "learning_rate": 1.6276719626293317e-05, "epoch": 0.4629572663939398, "total_flos": 1079685018428353728, "step": 34944 }, { "loss": 1.2340087890625, "learning_rate": 1.626387011283479e-05, "epoch": 0.4633812199345936, "total_flos": 1080658397231133696, "step": 34976 }, { "loss": 1.197509765625, "learning_rate": 1.6251020599376264e-05, "epoch": 0.46380517347524736, "total_flos": 1081684642430825088, "step": 35008 }, { "loss": 1.227294921875, "learning_rate": 1.6238171085917737e-05, "epoch": 0.46422912701590113, "total_flos": 1082677375547507808, "step": 35040 }, { "loss": 1.2423095703125, "learning_rate": 1.622532157245921e-05, "epoch": 0.46465308055655496, "total_flos": 1083650191257198336, "step": 35072 }, { "loss": 1.2220458984375, "learning_rate": 1.6212472059000684e-05, "epoch": 0.46507703409720874, "total_flos": 1084634976717132960, "step": 35104 }, { "loss": 1.248046875, "learning_rate": 1.6199622545542154e-05, "epoch": 0.4655009876378625, "total_flos": 1085619971325929376, "step": 35136 }, { "loss": 1.22998046875, "learning_rate": 1.6186773032083628e-05, "epoch": 0.4659249411785163, "total_flos": 1086607234395457536, "step": 35168 }, { "loss": 1.2320556640625, "learning_rate": 1.61739235186251e-05, "epoch": 0.4663488947191701, "total_flos": 1087590941934335232, "step": 35200 }, { "loss": 1.2451171875, "learning_rate": 1.6161074005166575e-05, "epoch": 0.4667728482598239, "total_flos": 1088551803982155648, "step": 35232 }, { "loss": 1.2406005859375, "learning_rate": 1.614822449170805e-05, "epoch": 0.4671968018004777, "total_flos": 1089525118431439680, "step": 35264 }, { "loss": 1.2386474609375, "learning_rate": 1.6135374978249522e-05, "epoch": 0.4676207553411315, "total_flos": 1090497869787634272, "step": 35296 }, { "loss": 1.2357177734375, "learning_rate": 1.6122525464790995e-05, "epoch": 0.4680447088817853, "total_flos": 1091469060571552416, "step": 35328 }, { "loss": 1.222900390625, "learning_rate": 1.610967595133247e-05, "epoch": 0.46846866242243906, "total_flos": 1092522044648805216, "step": 35360 }, { "loss": 1.2203369140625, "learning_rate": 1.6096826437873942e-05, "epoch": 0.46889261596309284, "total_flos": 1093499799489308832, "step": 35392 }, { "loss": 1.2276611328125, "learning_rate": 1.6083976924415416e-05, "epoch": 0.4693165695037467, "total_flos": 1094499289723064832, "step": 35424 }, { "loss": 1.2493896484375, "learning_rate": 1.6071127410956886e-05, "epoch": 0.46974052304440045, "total_flos": 1095455534407551840, "step": 35456 }, { "loss": 1.231201171875, "learning_rate": 1.605827789749836e-05, "epoch": 0.4701644765850542, "total_flos": 1096461540432771360, "step": 35488 }, { "loss": 1.245849609375, "learning_rate": 1.6045428384039833e-05, "epoch": 0.470588430125708, "total_flos": 1097414503088965632, "step": 35520 }, { "loss": 1.231689453125, "learning_rate": 1.6032578870581306e-05, "epoch": 0.47101238366636183, "total_flos": 1098377858834753568, "step": 35552 }, { "loss": 1.25439453125, "learning_rate": 1.601972935712278e-05, "epoch": 0.4714363372070156, "total_flos": 1099375531082249376, "step": 35584 }, { "loss": 1.2271728515625, "learning_rate": 1.6006879843664257e-05, "epoch": 0.4718602907476694, "total_flos": 1100368553789663808, "step": 35616 }, { "loss": 1.244140625, "learning_rate": 1.5994030330205727e-05, "epoch": 0.47228424428832316, "total_flos": 1101351376467972384, "step": 35648 }, { "loss": 1.239990234375, "learning_rate": 1.59811808167472e-05, "epoch": 0.472708197828977, "total_flos": 1102349949664411296, "step": 35680 }, { "loss": 1.241455078125, "learning_rate": 1.5968331303288674e-05, "epoch": 0.47313215136963077, "total_flos": 1103334413356866240, "step": 35712 }, { "loss": 1.2266845703125, "learning_rate": 1.5955481789830147e-05, "epoch": 0.47355610491028455, "total_flos": 1104318217425987840, "step": 35744 }, { "loss": 1.22021484375, "learning_rate": 1.594263227637162e-05, "epoch": 0.4739800584509383, "total_flos": 1105273786398767520, "step": 35776 }, { "loss": 1.20947265625, "learning_rate": 1.592978276291309e-05, "epoch": 0.47440401199159216, "total_flos": 1106274081051222720, "step": 35808 }, { "loss": 1.243896484375, "learning_rate": 1.5916933249454564e-05, "epoch": 0.47482796553224593, "total_flos": 1107291027170751360, "step": 35840 }, { "loss": 1.23779296875, "learning_rate": 1.5904083735996038e-05, "epoch": 0.4752519190728997, "total_flos": 1108325396999304672, "step": 35872 }, { "loss": 1.234130859375, "learning_rate": 1.589123422253751e-05, "epoch": 0.47567587261355354, "total_flos": 1109360136860459616, "step": 35904 }, { "loss": 1.226806640625, "learning_rate": 1.5878384709078985e-05, "epoch": 0.4760998261542073, "total_flos": 1110340385398930752, "step": 35936 }, { "loss": 1.2305908203125, "learning_rate": 1.5865535195620455e-05, "epoch": 0.4765237796948611, "total_flos": 1111333762050572832, "step": 35968 }, { "loss": 1.1993408203125, "learning_rate": 1.5852685682161932e-05, "epoch": 0.47694773323551487, "total_flos": 1112315764221808224, "step": 36000 }, { "loss": 1.2354736328125, "learning_rate": 1.5839836168703405e-05, "epoch": 0.4773716867761687, "total_flos": 1113303461677433952, "step": 36032 }, { "loss": 1.2529296875, "learning_rate": 1.582698665524488e-05, "epoch": 0.4777956403168225, "total_flos": 1114316433968588544, "step": 36064 }, { "loss": 1.2333984375, "learning_rate": 1.5814137141786352e-05, "epoch": 0.47821959385747626, "total_flos": 1115292258204214080, "step": 36096 }, { "loss": 1.251708984375, "learning_rate": 1.5801287628327826e-05, "epoch": 0.47864354739813003, "total_flos": 1116271477086750240, "step": 36128 }, { "loss": 1.2249755859375, "learning_rate": 1.5788438114869296e-05, "epoch": 0.47906750093878386, "total_flos": 1117272350920668864, "step": 36160 }, { "loss": 1.235595703125, "learning_rate": 1.577558860141077e-05, "epoch": 0.47949145447943764, "total_flos": 1118258471715644160, "step": 36192 }, { "loss": 1.250244140625, "learning_rate": 1.5762739087952243e-05, "epoch": 0.4799154080200914, "total_flos": 1119217499688830400, "step": 36224 }, { "loss": 1.21826171875, "learning_rate": 1.5749889574493716e-05, "epoch": 0.4803393615607452, "total_flos": 1120258932313562688, "step": 36256 }, { "loss": 1.221923828125, "learning_rate": 1.573704006103519e-05, "epoch": 0.480763315101399, "total_flos": 1121274156977075040, "step": 36288 }, { "loss": 1.2177734375, "learning_rate": 1.572419054757666e-05, "epoch": 0.4811872686420528, "total_flos": 1122272923234001760, "step": 36320 }, { "loss": 1.24267578125, "learning_rate": 1.5711341034118133e-05, "epoch": 0.4816112221827066, "total_flos": 1123250951576863104, "step": 36352 }, { "loss": 1.2296142578125, "learning_rate": 1.569849152065961e-05, "epoch": 0.48203517572336035, "total_flos": 1124260529221107072, "step": 36384 }, { "loss": 1.2423095703125, "learning_rate": 1.5685642007201084e-05, "epoch": 0.4824591292640142, "total_flos": 1125213218374943616, "step": 36416 }, { "loss": 1.2125244140625, "learning_rate": 1.5672792493742557e-05, "epoch": 0.48288308280466796, "total_flos": 1126226287196342112, "step": 36448 }, { "loss": 1.2479248046875, "learning_rate": 1.5659942980284027e-05, "epoch": 0.48330703634532174, "total_flos": 1127205908288227872, "step": 36480 }, { "loss": 1.223876953125, "learning_rate": 1.56470934668255e-05, "epoch": 0.4837309898859755, "total_flos": 1128211544280845760, "step": 36512 }, { "loss": 1.25830078125, "learning_rate": 1.5634243953366974e-05, "epoch": 0.48415494342662935, "total_flos": 1129188993442243680, "step": 36544 }, { "loss": 1.201904296875, "learning_rate": 1.5621394439908448e-05, "epoch": 0.4845788969672831, "total_flos": 1130198329760877888, "step": 36576 }, { "loss": 1.218994140625, "learning_rate": 1.560854492644992e-05, "epoch": 0.4850028505079369, "total_flos": 1131187314286422336, "step": 36608 }, { "loss": 1.2291259765625, "learning_rate": 1.5595695412991395e-05, "epoch": 0.48542680404859073, "total_flos": 1132163621173267392, "step": 36640 }, { "loss": 1.2313232421875, "learning_rate": 1.5582845899532865e-05, "epoch": 0.4858507575892445, "total_flos": 1133139928060112448, "step": 36672 }, { "loss": 1.214111328125, "learning_rate": 1.556999638607434e-05, "epoch": 0.4862747111298983, "total_flos": 1134106710629558976, "step": 36704 }, { "loss": 1.2359619140625, "learning_rate": 1.5557146872615812e-05, "epoch": 0.48669866467055206, "total_flos": 1135099315039249824, "step": 36736 }, { "loss": 1.241455078125, "learning_rate": 1.554429735915729e-05, "epoch": 0.4871226182112059, "total_flos": 1136060434501053984, "step": 36768 }, { "loss": 1.2325439453125, "learning_rate": 1.5531447845698762e-05, "epoch": 0.48754657175185967, "total_flos": 1137070398266273568, "step": 36800 }, { "loss": 1.2431640625, "learning_rate": 1.5518598332240232e-05, "epoch": 0.48797052529251345, "total_flos": 1138065287225070144, "step": 36832 }, { "loss": 1.2313232421875, "learning_rate": 1.5505748818781706e-05, "epoch": 0.4883944788331672, "total_flos": 1139048866056955968, "step": 36864 }, { "loss": 1.25830078125, "learning_rate": 1.549289930532318e-05, "epoch": 0.48881843237382105, "total_flos": 1140025880832256320, "step": 36896 }, { "loss": 1.2294921875, "learning_rate": 1.5480049791864653e-05, "epoch": 0.48924238591447483, "total_flos": 1140967710333653664, "step": 36928 }, { "loss": 1.23046875, "learning_rate": 1.5467200278406126e-05, "epoch": 0.4896663394551286, "total_flos": 1141953573714645216, "step": 36960 }, { "loss": 1.248291015625, "learning_rate": 1.5454350764947596e-05, "epoch": 0.4900902929957824, "total_flos": 1142939887570108320, "step": 36992 }, { "loss": 1.2489013671875, "learning_rate": 1.544150125148907e-05, "epoch": 0.4905142465364362, "total_flos": 1143917771117603808, "step": 37024 }, { "loss": 1.2213134765625, "learning_rate": 1.5428651738030543e-05, "epoch": 0.49093820007709, "total_flos": 1144905645545343360, "step": 37056 }, { "loss": 1.2462158203125, "learning_rate": 1.5415802224572017e-05, "epoch": 0.49136215361774377, "total_flos": 1145860104420318144, "step": 37088 }, { "loss": 1.1944580078125, "learning_rate": 1.5402952711113494e-05, "epoch": 0.49178610715839755, "total_flos": 1146910900478709120, "step": 37120 }, { "loss": 1.2427978515625, "learning_rate": 1.5390103197654964e-05, "epoch": 0.4922100606990514, "total_flos": 1147867965670269312, "step": 37152 }, { "loss": 1.2374267578125, "learning_rate": 1.5377253684196437e-05, "epoch": 0.49263401423970515, "total_flos": 1148850032195000640, "step": 37184 }, { "loss": 1.2310791015625, "learning_rate": 1.536440417073791e-05, "epoch": 0.49305796778035893, "total_flos": 1149839113250788992, "step": 37216 }, { "loss": 1.214111328125, "learning_rate": 1.5351554657279384e-05, "epoch": 0.49348192132101276, "total_flos": 1150869332278854432, "step": 37248 }, { "loss": 1.2183837890625, "learning_rate": 1.5338705143820858e-05, "epoch": 0.49390587486166654, "total_flos": 1151861470125699744, "step": 37280 }, { "loss": 1.22607421875, "learning_rate": 1.532585563036233e-05, "epoch": 0.4943298284023203, "total_flos": 1152873541467911232, "step": 37312 }, { "loss": 1.224365234375, "learning_rate": 1.53130061169038e-05, "epoch": 0.4947537819429741, "total_flos": 1153851215866544928, "step": 37344 }, { "loss": 1.216064453125, "learning_rate": 1.5300156603445275e-05, "epoch": 0.4951777354836279, "total_flos": 1154861967962089728, "step": 37376 }, { "loss": 1.2393798828125, "learning_rate": 1.528730708998675e-05, "epoch": 0.4956016890242817, "total_flos": 1155840977695764096, "step": 37408 }, { "loss": 1.223876953125, "learning_rate": 1.5274457576528222e-05, "epoch": 0.4960256425649355, "total_flos": 1156828996918869504, "step": 37440 }, { "loss": 1.2271728515625, "learning_rate": 1.5261608063069695e-05, "epoch": 0.49644959610558925, "total_flos": 1157829388101568608, "step": 37472 }, { "loss": 1.2174072265625, "learning_rate": 1.524875854961117e-05, "epoch": 0.4968735496462431, "total_flos": 1158812854314836544, "step": 37504 }, { "loss": 1.23095703125, "learning_rate": 1.5235909036152642e-05, "epoch": 0.49729750318689686, "total_flos": 1159817718065503200, "step": 37536 }, { "loss": 1.2391357421875, "learning_rate": 1.5223059522694116e-05, "epoch": 0.49772145672755064, "total_flos": 1160793654919746624, "step": 37568 }, { "loss": 1.2393798828125, "learning_rate": 1.521021000923559e-05, "epoch": 0.4981454102682044, "total_flos": 1161758185116835392, "step": 37600 }, { "loss": 1.2568359375, "learning_rate": 1.5197360495777061e-05, "epoch": 0.49856936380885825, "total_flos": 1162727413119127488, "step": 37632 }, { "loss": 1.2469482421875, "learning_rate": 1.5184510982318535e-05, "epoch": 0.498993317349512, "total_flos": 1163705377108492896, "step": 37664 }, { "loss": 1.2315673828125, "learning_rate": 1.5171661468860008e-05, "epoch": 0.4994172708901658, "total_flos": 1164680734781272896, "step": 37696 }, { "loss": 1.2100830078125, "learning_rate": 1.515881195540148e-05, "epoch": 0.4998412244308196, "total_flos": 1165661498147711520, "step": 37728 }, { "loss": 1.2376708984375, "learning_rate": 1.5145962441942953e-05, "epoch": 0.5002651779714734, "total_flos": 1166640507881385888, "step": 37760 }, { "loss": 1.2210693359375, "learning_rate": 1.5133112928484425e-05, "epoch": 0.5006891315121271, "total_flos": 1167622976615466816, "step": 37792 }, { "loss": 1.2166748046875, "learning_rate": 1.5120263415025899e-05, "epoch": 0.501113085052781, "total_flos": 1168620600597840672, "step": 37824 }, { "loss": 1.2047119140625, "learning_rate": 1.5107413901567372e-05, "epoch": 0.5015370385934348, "total_flos": 1169611724877124992, "step": 37856 }, { "loss": 1.21337890625, "learning_rate": 1.5094564388108847e-05, "epoch": 0.5019609921340885, "total_flos": 1170592777834295328, "step": 37888 }, { "loss": 1.2164306640625, "learning_rate": 1.5081714874650321e-05, "epoch": 0.5023849456747423, "total_flos": 1171591318853986272, "step": 37920 }, { "loss": 1.235595703125, "learning_rate": 1.5068865361191794e-05, "epoch": 0.5028088992153962, "total_flos": 1172557956628066944, "step": 37952 }, { "loss": 1.2191162109375, "learning_rate": 1.5056015847733266e-05, "epoch": 0.5032328527560499, "total_flos": 1173503052069383040, "step": 37984 }, { "loss": 1.229736328125, "learning_rate": 1.504316633427474e-05, "epoch": 0.5036568062967037, "total_flos": 1174510892169236736, "step": 38016 }, { "loss": 1.25244140625, "learning_rate": 1.5030316820816211e-05, "epoch": 0.5040807598373576, "total_flos": 1175473330877707584, "step": 38048 }, { "loss": 1.2210693359375, "learning_rate": 1.5017467307357685e-05, "epoch": 0.5045047133780113, "total_flos": 1176487558062032928, "step": 38080 }, { "loss": 1.2274169921875, "learning_rate": 1.5004617793899158e-05, "epoch": 0.5049286669186651, "total_flos": 1177463929302373920, "step": 38112 }, { "loss": 1.254150390625, "learning_rate": 1.499176828044063e-05, "epoch": 0.5053526204593188, "total_flos": 1178448296464584960, "step": 38144 }, { "loss": 1.2452392578125, "learning_rate": 1.4978918766982105e-05, "epoch": 0.5057765739999727, "total_flos": 1179428915035657728, "step": 38176 }, { "loss": 1.2279052734375, "learning_rate": 1.4966069253523579e-05, "epoch": 0.5062005275406265, "total_flos": 1180405929810958080, "step": 38208 }, { "loss": 1.2418212890625, "learning_rate": 1.495321974006505e-05, "epoch": 0.5066244810812802, "total_flos": 1181399660406827808, "step": 38240 }, { "loss": 1.2135009765625, "learning_rate": 1.4940370226606524e-05, "epoch": 0.507048434621934, "total_flos": 1182301188531395232, "step": 38272 }, { "loss": 1.240966796875, "learning_rate": 1.4927520713147996e-05, "epoch": 0.5074723881625879, "total_flos": 1183257031006532640, "step": 38304 }, { "loss": 1.2271728515625, "learning_rate": 1.491467119968947e-05, "epoch": 0.5078963417032416, "total_flos": 1184235429381995616, "step": 38336 }, { "loss": 1.244384765625, "learning_rate": 1.4901821686230945e-05, "epoch": 0.5083202952438954, "total_flos": 1185202292393312064, "step": 38368 }, { "loss": 1.2476806640625, "learning_rate": 1.4888972172772416e-05, "epoch": 0.5087442487845492, "total_flos": 1186186563025279200, "step": 38400 }, { "loss": 1.233642578125, "learning_rate": 1.487612265931389e-05, "epoch": 0.509168202325203, "total_flos": 1187156659799766432, "step": 38432 }, { "loss": 1.2091064453125, "learning_rate": 1.4863273145855363e-05, "epoch": 0.5095921558658568, "total_flos": 1188147767990676768, "step": 38464 }, { "loss": 1.223388671875, "learning_rate": 1.4850423632396835e-05, "epoch": 0.5100161094065105, "total_flos": 1189121565091180320, "step": 38496 }, { "loss": 1.24462890625, "learning_rate": 1.4837574118938309e-05, "epoch": 0.5104400629471644, "total_flos": 1190131255354042176, "step": 38528 }, { "loss": 1.1910400390625, "learning_rate": 1.4824724605479782e-05, "epoch": 0.5108640164878182, "total_flos": 1191143680640481312, "step": 38560 }, { "loss": 1.2265625, "learning_rate": 1.4811875092021256e-05, "epoch": 0.5112879700284719, "total_flos": 1192171180733343456, "step": 38592 }, { "loss": 1.2489013671875, "learning_rate": 1.479902557856273e-05, "epoch": 0.5117119235691258, "total_flos": 1193116195732789632, "step": 38624 }, { "loss": 1.2724609375, "learning_rate": 1.4786176065104201e-05, "epoch": 0.5121358771097796, "total_flos": 1194099163206464064, "step": 38656 }, { "loss": 1.23046875, "learning_rate": 1.4773326551645675e-05, "epoch": 0.5125598306504333, "total_flos": 1195099908333390816, "step": 38688 }, { "loss": 1.21435546875, "learning_rate": 1.476047703818715e-05, "epoch": 0.5129837841910871, "total_flos": 1196058164064625824, "step": 38720 }, { "loss": 1.2166748046875, "learning_rate": 1.4747627524728621e-05, "epoch": 0.5134077377317409, "total_flos": 1197014778781714464, "step": 38752 }, { "loss": 1.2083740234375, "learning_rate": 1.4734778011270095e-05, "epoch": 0.5138316912723947, "total_flos": 1197998277171730368, "step": 38784 }, { "loss": 1.2022705078125, "learning_rate": 1.4721928497811567e-05, "epoch": 0.5142556448130485, "total_flos": 1199001322936136832, "step": 38816 }, { "loss": 1.21923828125, "learning_rate": 1.470907898435304e-05, "epoch": 0.5146795983537023, "total_flos": 1200006106244933568, "step": 38848 }, { "loss": 1.216064453125, "learning_rate": 1.4696229470894514e-05, "epoch": 0.5151035518943561, "total_flos": 1200994350705274752, "step": 38880 }, { "loss": 1.21484375, "learning_rate": 1.4683379957435987e-05, "epoch": 0.5155275054350099, "total_flos": 1201989480989681088, "step": 38912 }, { "loss": 1.2437744140625, "learning_rate": 1.467053044397746e-05, "epoch": 0.5159514589756636, "total_flos": 1202942314938883488, "step": 38944 }, { "loss": 1.24072265625, "learning_rate": 1.4657680930518934e-05, "epoch": 0.5163754125163175, "total_flos": 1203902436921500640, "step": 38976 }, { "loss": 1.2220458984375, "learning_rate": 1.4644831417060406e-05, "epoch": 0.5167993660569712, "total_flos": 1204948116876964704, "step": 39008 }, { "loss": 1.21728515625, "learning_rate": 1.463198190360188e-05, "epoch": 0.517223319597625, "total_flos": 1205961539642590848, "step": 39040 }, { "loss": 1.22607421875, "learning_rate": 1.4619132390143351e-05, "epoch": 0.5176472731382789, "total_flos": 1206927614323582080, "step": 39072 }, { "loss": 1.2181396484375, "learning_rate": 1.4606282876684827e-05, "epoch": 0.5180712266789326, "total_flos": 1207890616125142368, "step": 39104 }, { "loss": 1.2119140625, "learning_rate": 1.45934333632263e-05, "epoch": 0.5184951802195864, "total_flos": 1208874902845483488, "step": 39136 }, { "loss": 1.1920166015625, "learning_rate": 1.4580583849767772e-05, "epoch": 0.5189191337602402, "total_flos": 1209889194383304768, "step": 39168 }, { "loss": 1.22021484375, "learning_rate": 1.4567734336309245e-05, "epoch": 0.519343087300894, "total_flos": 1210887719314621728, "step": 39200 }, { "loss": 1.269287109375, "learning_rate": 1.4554884822850719e-05, "epoch": 0.5197670408415478, "total_flos": 1211831125476669504, "step": 39232 }, { "loss": 1.2152099609375, "learning_rate": 1.454203530939219e-05, "epoch": 0.5201909943822016, "total_flos": 1212860668793027616, "step": 39264 }, { "loss": 1.256591796875, "learning_rate": 1.4529185795933666e-05, "epoch": 0.5206149479228553, "total_flos": 1213866626553125184, "step": 39296 }, { "loss": 1.2144775390625, "learning_rate": 1.4516336282475138e-05, "epoch": 0.5210389014635092, "total_flos": 1214835339727449792, "step": 39328 }, { "loss": 1.2208251953125, "learning_rate": 1.4503486769016611e-05, "epoch": 0.5214628550041629, "total_flos": 1215808895502343584, "step": 39360 }, { "loss": 1.2359619140625, "learning_rate": 1.4490637255558085e-05, "epoch": 0.5218868085448167, "total_flos": 1216742568198131040, "step": 39392 }, { "loss": 1.2008056640625, "learning_rate": 1.4477787742099556e-05, "epoch": 0.5223107620854706, "total_flos": 1217752918084326240, "step": 39424 }, { "loss": 1.2099609375, "learning_rate": 1.446493822864103e-05, "epoch": 0.5227347156261243, "total_flos": 1218748257517594368, "step": 39456 }, { "loss": 1.240234375, "learning_rate": 1.4452088715182505e-05, "epoch": 0.5231586691667781, "total_flos": 1219705853625496032, "step": 39488 }, { "loss": 1.23388671875, "learning_rate": 1.4439239201723977e-05, "epoch": 0.5235826227074319, "total_flos": 1220671252594779936, "step": 39520 }, { "loss": 1.208251953125, "learning_rate": 1.442638968826545e-05, "epoch": 0.5240065762480857, "total_flos": 1221652305551950272, "step": 39552 }, { "loss": 1.2554931640625, "learning_rate": 1.4413540174806922e-05, "epoch": 0.5244305297887395, "total_flos": 1222606651808307168, "step": 39584 }, { "loss": 1.252197265625, "learning_rate": 1.4400690661348396e-05, "epoch": 0.5248544833293932, "total_flos": 1223565454544257632, "step": 39616 }, { "loss": 1.25390625, "learning_rate": 1.4387841147889869e-05, "epoch": 0.525278436870047, "total_flos": 1224561485777607072, "step": 39648 }, { "loss": 1.2060546875, "learning_rate": 1.4374991634431343e-05, "epoch": 0.5257023904107009, "total_flos": 1225522090411443744, "step": 39680 }, { "loss": 1.2213134765625, "learning_rate": 1.4362142120972816e-05, "epoch": 0.5261263439513546, "total_flos": 1226446447938694464, "step": 39712 }, { "loss": 1.219970703125, "learning_rate": 1.434929260751429e-05, "epoch": 0.5265502974920084, "total_flos": 1227441208190499168, "step": 39744 }, { "loss": 1.21728515625, "learning_rate": 1.4336443094055761e-05, "epoch": 0.5269742510326623, "total_flos": 1228458556519377408, "step": 39776 }, { "loss": 1.2288818359375, "learning_rate": 1.4323593580597235e-05, "epoch": 0.527398204573316, "total_flos": 1229433125861832192, "step": 39808 }, { "loss": 1.218505859375, "learning_rate": 1.4310744067138707e-05, "epoch": 0.5278221581139698, "total_flos": 1230442542622336320, "step": 39840 }, { "loss": 1.2235107421875, "learning_rate": 1.4297894553680182e-05, "epoch": 0.5282461116546237, "total_flos": 1231445668828612704, "step": 39872 }, { "loss": 1.2181396484375, "learning_rate": 1.4285045040221655e-05, "epoch": 0.5286700651952774, "total_flos": 1232407351383506304, "step": 39904 }, { "loss": 1.224365234375, "learning_rate": 1.4272195526763127e-05, "epoch": 0.5290940187359312, "total_flos": 1233395724550839360, "step": 39936 }, { "loss": 1.2259521484375, "learning_rate": 1.42593460133046e-05, "epoch": 0.5295179722765849, "total_flos": 1234374943433375520, "step": 39968 }, { "loss": 1.2100830078125, "learning_rate": 1.4246496499846074e-05, "epoch": 0.5299419258172388, "total_flos": 1235349046212984768, "step": 40000 }, { "loss": 1.2449951171875, "learning_rate": 1.4233646986387546e-05, "epoch": 0.5303658793578926, "total_flos": 1236320880531862272, "step": 40032 }, { "loss": 1.21484375, "learning_rate": 1.4220797472929021e-05, "epoch": 0.5307898328985463, "total_flos": 1237330796031959904, "step": 40064 }, { "loss": 1.1986083984375, "learning_rate": 1.4207947959470493e-05, "epoch": 0.5312137864392001, "total_flos": 1238322853436935296, "step": 40096 }, { "loss": 1.19873046875, "learning_rate": 1.4195098446011966e-05, "epoch": 0.531637739979854, "total_flos": 1239312803264918784, "step": 40128 }, { "loss": 1.2664794921875, "learning_rate": 1.418224893255344e-05, "epoch": 0.5320616935205077, "total_flos": 1240278057438836832, "step": 40160 }, { "loss": 1.220947265625, "learning_rate": 1.4169399419094912e-05, "epoch": 0.5324856470611615, "total_flos": 1241247639385356576, "step": 40192 }, { "loss": 1.2242431640625, "learning_rate": 1.4156549905636385e-05, "epoch": 0.5329096006018152, "total_flos": 1242170371986834912, "step": 40224 }, { "loss": 1.227783203125, "learning_rate": 1.414370039217786e-05, "epoch": 0.5333335541424691, "total_flos": 1243205063582867904, "step": 40256 }, { "loss": 1.2020263671875, "learning_rate": 1.4130850878719332e-05, "epoch": 0.5337575076831229, "total_flos": 1244193259778087136, "step": 40288 }, { "loss": 1.236083984375, "learning_rate": 1.4118001365260806e-05, "epoch": 0.5341814612237766, "total_flos": 1245157307323956384, "step": 40320 }, { "loss": 1.2169189453125, "learning_rate": 1.4105151851802277e-05, "epoch": 0.5346054147644305, "total_flos": 1246184582179582752, "step": 40352 }, { "loss": 1.2222900390625, "learning_rate": 1.4092302338343751e-05, "epoch": 0.5350293683050843, "total_flos": 1247171233890899520, "step": 40384 }, { "loss": 1.1922607421875, "learning_rate": 1.4079452824885226e-05, "epoch": 0.535453321845738, "total_flos": 1248143518684248576, "step": 40416 }, { "loss": 1.2325439453125, "learning_rate": 1.4066603311426698e-05, "epoch": 0.5358772753863918, "total_flos": 1249101629620117728, "step": 40448 }, { "loss": 1.252685546875, "learning_rate": 1.4053753797968171e-05, "epoch": 0.5363012289270457, "total_flos": 1250069200519889472, "step": 40480 }, { "loss": 1.212158203125, "learning_rate": 1.4040904284509645e-05, "epoch": 0.5367251824676994, "total_flos": 1251062030166816096, "step": 40512 }, { "loss": 1.229248046875, "learning_rate": 1.4028054771051117e-05, "epoch": 0.5371491360083532, "total_flos": 1252069435880572224, "step": 40544 }, { "loss": 1.2305908203125, "learning_rate": 1.401520525759259e-05, "epoch": 0.537573089549007, "total_flos": 1253092189903109088, "step": 40576 }, { "loss": 1.2188720703125, "learning_rate": 1.4002355744134064e-05, "epoch": 0.5379970430896608, "total_flos": 1254074803432555872, "step": 40608 }, { "loss": 1.19482421875, "learning_rate": 1.3989506230675537e-05, "epoch": 0.5384209966303146, "total_flos": 1255045768979238240, "step": 40640 }, { "loss": 1.201416015625, "learning_rate": 1.397665671721701e-05, "epoch": 0.5388449501709683, "total_flos": 1256073799988441856, "step": 40672 }, { "loss": 1.2230224609375, "learning_rate": 1.3963807203758482e-05, "epoch": 0.5392689037116222, "total_flos": 1257054370294392672, "step": 40704 }, { "loss": 1.21875, "learning_rate": 1.3950957690299956e-05, "epoch": 0.539692857252276, "total_flos": 1258038254805384192, "step": 40736 }, { "loss": 1.2188720703125, "learning_rate": 1.393810817684143e-05, "epoch": 0.5401168107929297, "total_flos": 1259015607436538208, "step": 40768 }, { "loss": 1.2025146484375, "learning_rate": 1.3925258663382903e-05, "epoch": 0.5405407643335836, "total_flos": 1259998993207936224, "step": 40800 }, { "loss": 1.2237548828125, "learning_rate": 1.3912409149924376e-05, "epoch": 0.5409647178742373, "total_flos": 1260985789714618848, "step": 40832 }, { "loss": 1.19775390625, "learning_rate": 1.3899559636465848e-05, "epoch": 0.5413886714148911, "total_flos": 1262020851343253472, "step": 40864 }, { "loss": 1.2288818359375, "learning_rate": 1.3886710123007322e-05, "epoch": 0.5418126249555449, "total_flos": 1263005926393919808, "step": 40896 }, { "loss": 1.2213134765625, "learning_rate": 1.3873860609548795e-05, "epoch": 0.5422365784961987, "total_flos": 1264004499590358720, "step": 40928 }, { "loss": 1.226318359375, "learning_rate": 1.3861011096090267e-05, "epoch": 0.5426605320368525, "total_flos": 1264959730707284736, "step": 40960 }, { "eval_loss": 1.0659530388489367, "epoch": 0.5426605320368525, "total_flos": 1264959730707284736, "step": 40960 }, { "loss": 1.2401123046875, "learning_rate": 1.3848161582631742e-05, "epoch": 0.5430844855775063, "total_flos": 1265907175051202496, "step": 40992 }, { "loss": 1.233642578125, "learning_rate": 1.3835312069173216e-05, "epoch": 0.54350843911816, "total_flos": 1266881615686665408, "step": 41024 }, { "loss": 1.2393798828125, "learning_rate": 1.3822462555714687e-05, "epoch": 0.5439323926588139, "total_flos": 1267906123341966528, "step": 41056 }, { "loss": 1.1962890625, "learning_rate": 1.3809613042256161e-05, "epoch": 0.5443563461994677, "total_flos": 1268913352083608832, "step": 41088 }, { "loss": 1.231689453125, "learning_rate": 1.3796763528797633e-05, "epoch": 0.5447802997401214, "total_flos": 1269906069111917568, "step": 41120 }, { "loss": 1.212158203125, "learning_rate": 1.3783914015339106e-05, "epoch": 0.5452042532807753, "total_flos": 1270871451992827488, "step": 41152 }, { "loss": 1.22802734375, "learning_rate": 1.3771064501880581e-05, "epoch": 0.545628206821429, "total_flos": 1271873081980323360, "step": 41184 }, { "loss": 1.266357421875, "learning_rate": 1.3758214988422053e-05, "epoch": 0.5460521603620828, "total_flos": 1272832447809363264, "step": 41216 }, { "loss": 1.2174072265625, "learning_rate": 1.3745365474963527e-05, "epoch": 0.5464761139027366, "total_flos": 1273842926402550336, "step": 41248 }, { "loss": 1.183349609375, "learning_rate": 1.3732515961505e-05, "epoch": 0.5469000674433904, "total_flos": 1274834292007444416, "step": 41280 }, { "loss": 1.2406005859375, "learning_rate": 1.3719666448046472e-05, "epoch": 0.5473240209840442, "total_flos": 1275808587847541472, "step": 41312 }, { "loss": 1.223876953125, "learning_rate": 1.3706816934587945e-05, "epoch": 0.547747974524698, "total_flos": 1276794966056500512, "step": 41344 }, { "loss": 1.21142578125, "learning_rate": 1.3693967421129419e-05, "epoch": 0.5481719280653518, "total_flos": 1277809965482777088, "step": 41376 }, { "loss": 1.2442626953125, "learning_rate": 1.3681117907670892e-05, "epoch": 0.5485958816060056, "total_flos": 1278801540236532960, "step": 41408 }, { "loss": 1.214599609375, "learning_rate": 1.3668268394212366e-05, "epoch": 0.5490198351466593, "total_flos": 1279781997923865888, "step": 41440 }, { "loss": 1.1876220703125, "learning_rate": 1.3655418880753838e-05, "epoch": 0.5494437886873131, "total_flos": 1280783450939247936, "step": 41472 }, { "loss": 1.2027587890625, "learning_rate": 1.3642569367295311e-05, "epoch": 0.549867742227967, "total_flos": 1281764552161540224, "step": 41504 }, { "loss": 1.2259521484375, "learning_rate": 1.3629719853836785e-05, "epoch": 0.5502916957686207, "total_flos": 1282818694601719872, "step": 41536 }, { "loss": 1.2154541015625, "learning_rate": 1.3616870340378258e-05, "epoch": 0.5507156493092745, "total_flos": 1283779363589052480, "step": 41568 }, { "loss": 1.2054443359375, "learning_rate": 1.3604020826919732e-05, "epoch": 0.5511396028499284, "total_flos": 1284759853453133376, "step": 41600 }, { "loss": 1.2476806640625, "learning_rate": 1.3591171313461204e-05, "epoch": 0.5515635563905821, "total_flos": 1285692802172091552, "step": 41632 }, { "loss": 1.2376708984375, "learning_rate": 1.3578321800002677e-05, "epoch": 0.5519875099312359, "total_flos": 1286656013122513632, "step": 41664 }, { "loss": 1.1878662109375, "learning_rate": 1.356547228654415e-05, "epoch": 0.5524114634718897, "total_flos": 1287620414612610528, "step": 41696 }, { "loss": 1.233642578125, "learning_rate": 1.3552622773085622e-05, "epoch": 0.5528354170125435, "total_flos": 1288626066693602400, "step": 41728 }, { "loss": 1.234619140625, "learning_rate": 1.3539773259627097e-05, "epoch": 0.5532593705531973, "total_flos": 1289596002584349792, "step": 41760 }, { "loss": 1.24072265625, "learning_rate": 1.3526923746168571e-05, "epoch": 0.553683324093851, "total_flos": 1290616166378675232, "step": 41792 }, { "loss": 1.244873046875, "learning_rate": 1.3514074232710043e-05, "epoch": 0.5541072776345048, "total_flos": 1291596447093894336, "step": 41824 }, { "loss": 1.205322265625, "learning_rate": 1.3501224719251516e-05, "epoch": 0.5545312311751587, "total_flos": 1292572818334235328, "step": 41856 }, { "loss": 1.222900390625, "learning_rate": 1.3488375205792988e-05, "epoch": 0.5549551847158124, "total_flos": 1293540711001486752, "step": 41888 }, { "loss": 1.2081298828125, "learning_rate": 1.3475525692334463e-05, "epoch": 0.5553791382564662, "total_flos": 1294557319265161728, "step": 41920 }, { "loss": 1.228759765625, "learning_rate": 1.3462676178875937e-05, "epoch": 0.5558030917971201, "total_flos": 1295561603834364960, "step": 41952 }, { "loss": 1.2100830078125, "learning_rate": 1.3449826665417409e-05, "epoch": 0.5562270453377738, "total_flos": 1296542109786819840, "step": 41984 }, { "loss": 1.2078857421875, "learning_rate": 1.3436977151958882e-05, "epoch": 0.5566509988784276, "total_flos": 1297503180983502048, "step": 42016 }, { "loss": 1.217529296875, "learning_rate": 1.3424127638500356e-05, "epoch": 0.5570749524190813, "total_flos": 1298521929000916896, "step": 42048 }, { "loss": 1.2061767578125, "learning_rate": 1.3411278125041827e-05, "epoch": 0.5574989059597352, "total_flos": 1299544232548982208, "step": 42080 }, { "loss": 1.234375, "learning_rate": 1.3398428611583303e-05, "epoch": 0.557922859500389, "total_flos": 1300532798776803072, "step": 42112 }, { "loss": 1.2431640625, "learning_rate": 1.3385579098124774e-05, "epoch": 0.5583468130410427, "total_flos": 1301515058362022208, "step": 42144 }, { "loss": 1.208984375, "learning_rate": 1.3372729584666248e-05, "epoch": 0.5587707665816966, "total_flos": 1302486796150655808, "step": 42176 }, { "loss": 1.2332763671875, "learning_rate": 1.3359880071207721e-05, "epoch": 0.5591947201223504, "total_flos": 1303468251317175744, "step": 42208 }, { "loss": 1.2108154296875, "learning_rate": 1.3347030557749193e-05, "epoch": 0.5596186736630041, "total_flos": 1304427922825321344, "step": 42240 }, { "loss": 1.2198486328125, "learning_rate": 1.3334181044290667e-05, "epoch": 0.5600426272036579, "total_flos": 1305389026198751520, "step": 42272 }, { "loss": 1.2177734375, "learning_rate": 1.3321331530832142e-05, "epoch": 0.5604665807443117, "total_flos": 1306376434063645536, "step": 42304 }, { "loss": 1.21337890625, "learning_rate": 1.3308482017373614e-05, "epoch": 0.5608905342849655, "total_flos": 1307326018161303168, "step": 42336 }, { "loss": 1.2191162109375, "learning_rate": 1.3295632503915087e-05, "epoch": 0.5613144878256193, "total_flos": 1308305076160099488, "step": 42368 }, { "loss": 1.2054443359375, "learning_rate": 1.3282782990456559e-05, "epoch": 0.561738441366273, "total_flos": 1309341859244750400, "step": 42400 }, { "loss": 1.224609375, "learning_rate": 1.3269933476998032e-05, "epoch": 0.5621623949069269, "total_flos": 1310346208167449568, "step": 42432 }, { "loss": 1.2159423828125, "learning_rate": 1.3257083963539506e-05, "epoch": 0.5625863484475807, "total_flos": 1311369927492425472, "step": 42464 }, { "loss": 1.1929931640625, "learning_rate": 1.324423445008098e-05, "epoch": 0.5630103019882344, "total_flos": 1312368323716750560, "step": 42496 }, { "loss": 1.2421875, "learning_rate": 1.3231384936622453e-05, "epoch": 0.5634342555288883, "total_flos": 1313363856210506496, "step": 42528 }, { "loss": 1.234619140625, "learning_rate": 1.3218535423163926e-05, "epoch": 0.5638582090695421, "total_flos": 1314344426516457312, "step": 42560 }, { "loss": 1.2156982421875, "learning_rate": 1.3205685909705398e-05, "epoch": 0.5642821626101958, "total_flos": 1315356336974928960, "step": 42592 }, { "loss": 1.21240234375, "learning_rate": 1.3192836396246872e-05, "epoch": 0.5647061161508496, "total_flos": 1316388422254376544, "step": 42624 }, { "loss": 1.2127685546875, "learning_rate": 1.3179986882788343e-05, "epoch": 0.5651300696915034, "total_flos": 1317396278442604224, "step": 42656 }, { "loss": 1.21533203125, "learning_rate": 1.3167137369329819e-05, "epoch": 0.5655540232321572, "total_flos": 1318405486054246560, "step": 42688 }, { "loss": 1.2025146484375, "learning_rate": 1.3154287855871292e-05, "epoch": 0.565977976772811, "total_flos": 1319378012173205376, "step": 42720 }, { "loss": 1.246337890625, "learning_rate": 1.3141438342412764e-05, "epoch": 0.5664019303134648, "total_flos": 1320324925600781664, "step": 42752 }, { "loss": 1.2386474609375, "learning_rate": 1.3128588828954237e-05, "epoch": 0.5668258838541186, "total_flos": 1321333489677464640, "step": 42784 }, { "loss": 1.2200927734375, "learning_rate": 1.311573931549571e-05, "epoch": 0.5672498373947724, "total_flos": 1322307608545447872, "step": 42816 }, { "loss": 1.219970703125, "learning_rate": 1.3102889802037183e-05, "epoch": 0.5676737909354261, "total_flos": 1323292088326276800, "step": 42848 }, { "loss": 1.21630859375, "learning_rate": 1.3090040288578658e-05, "epoch": 0.56809774447608, "total_flos": 1324296228100114176, "step": 42880 }, { "loss": 1.210693359375, "learning_rate": 1.307719077512013e-05, "epoch": 0.5685216980167337, "total_flos": 1325270218261105536, "step": 42912 }, { "loss": 1.21337890625, "learning_rate": 1.3064341261661603e-05, "epoch": 0.5689456515573875, "total_flos": 1326284155854699168, "step": 42944 }, { "loss": 1.210693359375, "learning_rate": 1.3051491748203077e-05, "epoch": 0.5693696050980414, "total_flos": 1327290306675284544, "step": 42976 }, { "loss": 1.2001953125, "learning_rate": 1.3038642234744548e-05, "epoch": 0.5697935586386951, "total_flos": 1328331819741886752, "step": 43008 }, { "loss": 1.1878662109375, "learning_rate": 1.3025792721286022e-05, "epoch": 0.5702175121793489, "total_flos": 1329326274314585760, "step": 43040 }, { "loss": 1.227294921875, "learning_rate": 1.3012943207827497e-05, "epoch": 0.5706414657200027, "total_flos": 1330308485634682944, "step": 43072 }, { "loss": 1.1944580078125, "learning_rate": 1.3000093694368969e-05, "epoch": 0.5710654192606565, "total_flos": 1331348550747626592, "step": 43104 }, { "loss": 1.21630859375, "learning_rate": 1.2987244180910442e-05, "epoch": 0.5714893728013103, "total_flos": 1332327689188292832, "step": 43136 }, { "loss": 1.2158203125, "learning_rate": 1.2974394667451914e-05, "epoch": 0.5719133263419641, "total_flos": 1333325924528878080, "step": 43168 }, { "loss": 1.2109375, "learning_rate": 1.2961545153993388e-05, "epoch": 0.5723372798826178, "total_flos": 1334348002839707616, "step": 43200 }, { "loss": 1.2362060546875, "learning_rate": 1.2948695640534861e-05, "epoch": 0.5727612334232717, "total_flos": 1335335491146471552, "step": 43232 }, { "loss": 1.2421875, "learning_rate": 1.2935846127076335e-05, "epoch": 0.5731851869639254, "total_flos": 1336360240127382432, "step": 43264 }, { "loss": 1.1947021484375, "learning_rate": 1.2922996613617808e-05, "epoch": 0.5736091405045792, "total_flos": 1337328454562113536, "step": 43296 }, { "loss": 1.206787109375, "learning_rate": 1.2910147100159282e-05, "epoch": 0.5740330940452331, "total_flos": 1338266278058388864, "step": 43328 }, { "loss": 1.2320556640625, "learning_rate": 1.2897297586700753e-05, "epoch": 0.5744570475858868, "total_flos": 1339255117788567456, "step": 43360 }, { "loss": 1.20751953125, "learning_rate": 1.2884448073242227e-05, "epoch": 0.5748810011265406, "total_flos": 1340239887160128096, "step": 43392 }, { "loss": 1.2183837890625, "learning_rate": 1.2871598559783699e-05, "epoch": 0.5753049546671944, "total_flos": 1341234792207298656, "step": 43424 }, { "loss": 1.2117919921875, "learning_rate": 1.2858749046325174e-05, "epoch": 0.5757289082078482, "total_flos": 1342214011089834816, "step": 43456 }, { "loss": 1.187255859375, "learning_rate": 1.2845899532866647e-05, "epoch": 0.576152861748502, "total_flos": 1343211120244241184, "step": 43488 }, { "loss": 1.2392578125, "learning_rate": 1.283305001940812e-05, "epoch": 0.5765768152891557, "total_flos": 1344209194701086592, "step": 43520 }, { "loss": 1.207275390625, "learning_rate": 1.2820200505949593e-05, "epoch": 0.5770007688298096, "total_flos": 1345210921218826368, "step": 43552 }, { "loss": 1.212646484375, "learning_rate": 1.2807350992491066e-05, "epoch": 0.5774247223704634, "total_flos": 1346134763918109600, "step": 43584 }, { "loss": 1.1900634765625, "learning_rate": 1.279450147903254e-05, "epoch": 0.5778486759111171, "total_flos": 1347143102757556800, "step": 43616 }, { "loss": 1.2215576171875, "learning_rate": 1.2781651965574013e-05, "epoch": 0.5782726294517709, "total_flos": 1348148304364077120, "step": 43648 }, { "loss": 1.21337890625, "learning_rate": 1.2768802452115485e-05, "epoch": 0.5786965829924248, "total_flos": 1349155774431329184, "step": 43680 }, { "loss": 1.2335205078125, "learning_rate": 1.2755952938656958e-05, "epoch": 0.5791205365330785, "total_flos": 1350121237754109024, "step": 43712 }, { "loss": 1.1943359375, "learning_rate": 1.2743103425198432e-05, "epoch": 0.5795444900737323, "total_flos": 1351117832080547904, "step": 43744 }, { "loss": 1.215087890625, "learning_rate": 1.2730253911739904e-05, "epoch": 0.5799684436143862, "total_flos": 1352068365392270592, "step": 43776 }, { "loss": 1.2001953125, "learning_rate": 1.2717404398281379e-05, "epoch": 0.5803923971550399, "total_flos": 1353043224325457088, "step": 43808 }, { "loss": 1.2188720703125, "learning_rate": 1.270455488482285e-05, "epoch": 0.5808163506956937, "total_flos": 1354046768829457056, "step": 43840 }, { "loss": 1.205322265625, "learning_rate": 1.2691705371364324e-05, "epoch": 0.5812403042363474, "total_flos": 1355060674246302720, "step": 43872 }, { "loss": 1.19580078125, "learning_rate": 1.2678855857905798e-05, "epoch": 0.5816642577770013, "total_flos": 1356059376149733504, "step": 43904 }, { "loss": 1.2249755859375, "learning_rate": 1.266600634444727e-05, "epoch": 0.5820882113176551, "total_flos": 1357071769259424672, "step": 43936 }, { "loss": 1.22412109375, "learning_rate": 1.2653156830988743e-05, "epoch": 0.5825121648583088, "total_flos": 1358066497334481408, "step": 43968 }, { "loss": 1.2330322265625, "learning_rate": 1.2640307317530218e-05, "epoch": 0.5829361183989626, "total_flos": 1359050285315229024, "step": 44000 }, { "loss": 1.234619140625, "learning_rate": 1.262745780407169e-05, "epoch": 0.5833600719396165, "total_flos": 1360000223357114304, "step": 44032 }, { "loss": 1.2132568359375, "learning_rate": 1.2614608290613163e-05, "epoch": 0.5837840254802702, "total_flos": 1361004009186724032, "step": 44064 }, { "loss": 1.239990234375, "learning_rate": 1.2601758777154637e-05, "epoch": 0.584207979020924, "total_flos": 1361969520774625824, "step": 44096 }, { "loss": 1.2164306640625, "learning_rate": 1.2588909263696109e-05, "epoch": 0.5846319325615777, "total_flos": 1362893009529681408, "step": 44128 }, { "loss": 1.1854248046875, "learning_rate": 1.2576059750237582e-05, "epoch": 0.5850558861022316, "total_flos": 1363946974997747232, "step": 44160 }, { "loss": 1.2120361328125, "learning_rate": 1.2563210236779056e-05, "epoch": 0.5854798396428854, "total_flos": 1364923442768332128, "step": 44192 }, { "loss": 1.219482421875, "learning_rate": 1.255036072332053e-05, "epoch": 0.5859037931835391, "total_flos": 1365889517449323360, "step": 44224 }, { "loss": 1.220458984375, "learning_rate": 1.2537511209862003e-05, "epoch": 0.586327746724193, "total_flos": 1366879966016900352, "step": 44256 }, { "loss": 1.21337890625, "learning_rate": 1.2524661696403474e-05, "epoch": 0.5867517002648468, "total_flos": 1367860568499599136, "step": 44288 }, { "loss": 1.217529296875, "learning_rate": 1.2511812182944948e-05, "epoch": 0.5871756538055005, "total_flos": 1368838870344818208, "step": 44320 }, { "loss": 1.2410888671875, "learning_rate": 1.2498962669486421e-05, "epoch": 0.5875996073461544, "total_flos": 1369839969415972608, "step": 44352 }, { "loss": 1.2254638671875, "learning_rate": 1.2486113156027895e-05, "epoch": 0.5880235608868082, "total_flos": 1370825028378264960, "step": 44384 }, { "loss": 1.2222900390625, "learning_rate": 1.2473263642569368e-05, "epoch": 0.5884475144274619, "total_flos": 1371825950477305536, "step": 44416 }, { "loss": 1.2213134765625, "learning_rate": 1.246041412911084e-05, "epoch": 0.5888714679681157, "total_flos": 1372796932112361888, "step": 44448 }, { "loss": 1.2049560546875, "learning_rate": 1.2447564615652314e-05, "epoch": 0.5892954215087695, "total_flos": 1373795360513434944, "step": 44480 }, { "loss": 1.23388671875, "learning_rate": 1.2434715102193787e-05, "epoch": 0.5897193750494233, "total_flos": 1374777861424263840, "step": 44512 }, { "loss": 1.1923828125, "learning_rate": 1.2421865588735259e-05, "epoch": 0.5901433285900771, "total_flos": 1375821836012085600, "step": 44544 }, { "loss": 1.1959228515625, "learning_rate": 1.2409016075276734e-05, "epoch": 0.5905672821307308, "total_flos": 1376816017082426880, "step": 44576 }, { "loss": 1.206298828125, "learning_rate": 1.2396166561818206e-05, "epoch": 0.5909912356713847, "total_flos": 1377765826417320288, "step": 44608 }, { "loss": 1.211181640625, "learning_rate": 1.238331704835968e-05, "epoch": 0.5914151892120385, "total_flos": 1378774454847499200, "step": 44640 }, { "loss": 1.2266845703125, "learning_rate": 1.2370467534901153e-05, "epoch": 0.5918391427526922, "total_flos": 1379773462430035680, "step": 44672 }, { "loss": 1.2127685546875, "learning_rate": 1.2357618021442625e-05, "epoch": 0.5922630962933461, "total_flos": 1380718943992327392, "step": 44704 }, { "loss": 1.1851806640625, "learning_rate": 1.2344768507984098e-05, "epoch": 0.5926870498339998, "total_flos": 1381747843773726144, "step": 44736 }, { "loss": 1.2142333984375, "learning_rate": 1.2331918994525573e-05, "epoch": 0.5931110033746536, "total_flos": 1382714883757156416, "step": 44768 }, { "loss": 1.1988525390625, "learning_rate": 1.2319069481067045e-05, "epoch": 0.5935349569153074, "total_flos": 1383723270861725568, "step": 44800 }, { "loss": 1.2069091796875, "learning_rate": 1.2306219967608519e-05, "epoch": 0.5939589104559612, "total_flos": 1384676217429545856, "step": 44832 }, { "loss": 1.215576171875, "learning_rate": 1.229337045414999e-05, "epoch": 0.594382863996615, "total_flos": 1385656594675008864, "step": 44864 }, { "loss": 1.2303466796875, "learning_rate": 1.2280520940691464e-05, "epoch": 0.5948068175372688, "total_flos": 1386655618345919328, "step": 44896 }, { "loss": 1.20947265625, "learning_rate": 1.2267671427232938e-05, "epoch": 0.5952307710779225, "total_flos": 1387702231427074464, "step": 44928 }, { "loss": 1.218505859375, "learning_rate": 1.2254821913774411e-05, "epoch": 0.5956547246185764, "total_flos": 1388662369498065600, "step": 44960 }, { "loss": 1.1864013671875, "learning_rate": 1.2241972400315885e-05, "epoch": 0.5960786781592302, "total_flos": 1389621896210845344, "step": 44992 }, { "loss": 1.1827392578125, "learning_rate": 1.2229122886857358e-05, "epoch": 0.5965026316998839, "total_flos": 1390603045698259584, "step": 45024 }, { "loss": 1.221435546875, "learning_rate": 1.221627337339883e-05, "epoch": 0.5969265852405378, "total_flos": 1391613315142584864, "step": 45056 }, { "loss": 1.2095947265625, "learning_rate": 1.2203423859940303e-05, "epoch": 0.5973505387811915, "total_flos": 1392598534988617056, "step": 45088 }, { "loss": 1.2276611328125, "learning_rate": 1.2190574346481775e-05, "epoch": 0.5977744923218453, "total_flos": 1393547266402453536, "step": 45120 }, { "loss": 1.1846923828125, "learning_rate": 1.217772483302325e-05, "epoch": 0.5981984458624992, "total_flos": 1394524876447591296, "step": 45152 }, { "loss": 1.222412109375, "learning_rate": 1.2164875319564724e-05, "epoch": 0.5986223994031529, "total_flos": 1395481652048419776, "step": 45184 }, { "loss": 1.21728515625, "learning_rate": 1.2152025806106196e-05, "epoch": 0.5990463529438067, "total_flos": 1396453132423069632, "step": 45216 }, { "loss": 1.197265625, "learning_rate": 1.2139176292647669e-05, "epoch": 0.5994703064844605, "total_flos": 1397392789993979136, "step": 45248 }, { "loss": 1.21826171875, "learning_rate": 1.2126326779189143e-05, "epoch": 0.5998942600251143, "total_flos": 1398376964095702368, "step": 45280 }, { "loss": 1.2213134765625, "learning_rate": 1.2113477265730616e-05, "epoch": 0.6003182135657681, "total_flos": 1399326548193360000, "step": 45312 }, { "loss": 1.2130126953125, "learning_rate": 1.210062775227209e-05, "epoch": 0.6007421671064218, "total_flos": 1400302356340611552, "step": 45344 }, { "loss": 1.2376708984375, "learning_rate": 1.2087778238813561e-05, "epoch": 0.6011661206470756, "total_flos": 1401298435839082944, "step": 45376 }, { "loss": 1.2177734375, "learning_rate": 1.2074928725355035e-05, "epoch": 0.6015900741877295, "total_flos": 1402286985978529824, "step": 45408 }, { "loss": 1.194091796875, "learning_rate": 1.2062079211896508e-05, "epoch": 0.6020140277283832, "total_flos": 1403292686324643648, "step": 45440 }, { "loss": 1.2325439453125, "learning_rate": 1.204922969843798e-05, "epoch": 0.602437981269037, "total_flos": 1404265952508805728, "step": 45472 }, { "loss": 1.197509765625, "learning_rate": 1.2036380184979455e-05, "epoch": 0.6028619348096909, "total_flos": 1405266456310122720, "step": 45504 }, { "loss": 1.2205810546875, "learning_rate": 1.2023530671520929e-05, "epoch": 0.6032858883503446, "total_flos": 1406211841342170528, "step": 45536 }, { "loss": 1.2239990234375, "learning_rate": 1.20106811580624e-05, "epoch": 0.6037098418909984, "total_flos": 1407167490756820128, "step": 45568 }, { "loss": 1.2252197265625, "learning_rate": 1.1997831644603874e-05, "epoch": 0.6041337954316522, "total_flos": 1408110639504884160, "step": 45600 }, { "loss": 1.22802734375, "learning_rate": 1.1984982131145346e-05, "epoch": 0.604557748972306, "total_flos": 1409107008594087264, "step": 45632 }, { "loss": 1.1737060546875, "learning_rate": 1.197213261768682e-05, "epoch": 0.6049817025129598, "total_flos": 1410091504463290176, "step": 45664 }, { "loss": 1.2353515625, "learning_rate": 1.1959283104228295e-05, "epoch": 0.6054056560536135, "total_flos": 1411051996478508960, "step": 45696 }, { "loss": 1.19775390625, "learning_rate": 1.1946433590769766e-05, "epoch": 0.6058296095942673, "total_flos": 1412057053289663424, "step": 45728 }, { "loss": 1.18017578125, "learning_rate": 1.193358407731124e-05, "epoch": 0.6062535631349212, "total_flos": 1413028276250329536, "step": 45760 }, { "loss": 1.191650390625, "learning_rate": 1.1920734563852713e-05, "epoch": 0.6066775166755749, "total_flos": 1413988478674816608, "step": 45792 }, { "loss": 1.229736328125, "learning_rate": 1.1907885050394185e-05, "epoch": 0.6071014702162287, "total_flos": 1414948729364425632, "step": 45824 }, { "loss": 1.216064453125, "learning_rate": 1.1895035536935659e-05, "epoch": 0.6075254237568826, "total_flos": 1415935155838506624, "step": 45856 }, { "loss": 1.2132568359375, "learning_rate": 1.1882186023477132e-05, "epoch": 0.6079493772975363, "total_flos": 1416892108411448928, "step": 45888 }, { "loss": 1.177001953125, "learning_rate": 1.1869336510018606e-05, "epoch": 0.6083733308381901, "total_flos": 1417926944802847776, "step": 45920 }, { "loss": 1.216064453125, "learning_rate": 1.1856486996560079e-05, "epoch": 0.6087972843788438, "total_flos": 1418901964619774112, "step": 45952 }, { "loss": 1.215576171875, "learning_rate": 1.1843637483101551e-05, "epoch": 0.6092212379194977, "total_flos": 1419876260459871168, "step": 45984 }, { "loss": 1.20263671875, "learning_rate": 1.1830787969643024e-05, "epoch": 0.6096451914601515, "total_flos": 1420849816234764960, "step": 46016 }, { "loss": 1.1962890625, "learning_rate": 1.1817938456184498e-05, "epoch": 0.6100691450008052, "total_flos": 1421822615856081504, "step": 46048 }, { "loss": 1.2193603515625, "learning_rate": 1.1805088942725971e-05, "epoch": 0.6104930985414591, "total_flos": 1422813225307398336, "step": 46080 }, { "loss": 1.218994140625, "learning_rate": 1.1792239429267445e-05, "epoch": 0.6109170520821129, "total_flos": 1423781664979365216, "step": 46112 }, { "loss": 1.2220458984375, "learning_rate": 1.1779389915808917e-05, "epoch": 0.6113410056227666, "total_flos": 1424732166114339936, "step": 46144 }, { "loss": 1.2158203125, "learning_rate": 1.176654040235039e-05, "epoch": 0.6117649591634204, "total_flos": 1425695087474030304, "step": 46176 }, { "loss": 1.221435546875, "learning_rate": 1.1753690888891864e-05, "epoch": 0.6121889127040743, "total_flos": 1426706467016160480, "step": 46208 }, { "loss": 1.230712890625, "learning_rate": 1.1740841375433335e-05, "epoch": 0.612612866244728, "total_flos": 1427679556228208736, "step": 46240 }, { "loss": 1.2100830078125, "learning_rate": 1.172799186197481e-05, "epoch": 0.6130368197853818, "total_flos": 1428655219580094432, "step": 46272 }, { "loss": 1.2042236328125, "learning_rate": 1.1715142348516284e-05, "epoch": 0.6134607733260355, "total_flos": 1429652457441492672, "step": 46304 }, { "loss": 1.1966552734375, "learning_rate": 1.1702292835057756e-05, "epoch": 0.6138847268666894, "total_flos": 1430662952123053728, "step": 46336 }, { "loss": 1.2081298828125, "learning_rate": 1.168944332159923e-05, "epoch": 0.6143086804073432, "total_flos": 1431642750187053312, "step": 46368 }, { "loss": 1.2071533203125, "learning_rate": 1.1676593808140701e-05, "epoch": 0.6147326339479969, "total_flos": 1432597209062028096, "step": 46400 }, { "loss": 1.217041015625, "learning_rate": 1.1663744294682175e-05, "epoch": 0.6151565874886508, "total_flos": 1433578873377409824, "step": 46432 }, { "loss": 1.18798828125, "learning_rate": 1.165089478122365e-05, "epoch": 0.6155805410293046, "total_flos": 1434583946276938272, "step": 46464 }, { "loss": 1.2314453125, "learning_rate": 1.1638045267765122e-05, "epoch": 0.6160044945699583, "total_flos": 1435566897662238720, "step": 46496 }, { "loss": 1.1849365234375, "learning_rate": 1.1625195754306595e-05, "epoch": 0.6164284481106121, "total_flos": 1436511703512823104, "step": 46528 }, { "loss": 1.1925048828125, "learning_rate": 1.1612346240848069e-05, "epoch": 0.6168524016512659, "total_flos": 1437515103221457216, "step": 46560 }, { "loss": 1.2154541015625, "learning_rate": 1.159949672738954e-05, "epoch": 0.6172763551919197, "total_flos": 1438472281031635296, "step": 46592 }, { "loss": 1.1943359375, "learning_rate": 1.1586647213931014e-05, "epoch": 0.6177003087325735, "total_flos": 1439462729599212288, "step": 46624 }, { "loss": 1.1959228515625, "learning_rate": 1.1573797700472487e-05, "epoch": 0.6181242622732273, "total_flos": 1440476313248578272, "step": 46656 }, { "loss": 1.225341796875, "learning_rate": 1.1560948187013961e-05, "epoch": 0.6185482158138811, "total_flos": 1441474291175179776, "step": 46688 }, { "loss": 1.2095947265625, "learning_rate": 1.1548098673555434e-05, "epoch": 0.6189721693545349, "total_flos": 1442478833158366752, "step": 46720 }, { "loss": 1.23046875, "learning_rate": 1.1535249160096906e-05, "epoch": 0.6193961228951886, "total_flos": 1443460529650496448, "step": 46752 }, { "loss": 1.2254638671875, "learning_rate": 1.152239964663838e-05, "epoch": 0.6198200764358425, "total_flos": 1444420796428479456, "step": 46784 }, { "loss": 1.2088623046875, "learning_rate": 1.1509550133179853e-05, "epoch": 0.6202440299764963, "total_flos": 1445437967785243872, "step": 46816 }, { "loss": 1.223876953125, "learning_rate": 1.1496700619721327e-05, "epoch": 0.62066798351715, "total_flos": 1446426324864202944, "step": 46848 }, { "loss": 1.1939697265625, "learning_rate": 1.14838511062628e-05, "epoch": 0.6210919370578039, "total_flos": 1447413893612836800, "step": 46880 }, { "loss": 1.1912841796875, "learning_rate": 1.1471001592804272e-05, "epoch": 0.6215158905984576, "total_flos": 1448431998095292288, "step": 46912 }, { "loss": 1.220947265625, "learning_rate": 1.1458152079345745e-05, "epoch": 0.6219398441391114, "total_flos": 1449429300310186464, "step": 46944 }, { "loss": 1.194580078125, "learning_rate": 1.1445302565887219e-05, "epoch": 0.6223637976797652, "total_flos": 1450453373579390016, "step": 46976 }, { "loss": 1.1954345703125, "learning_rate": 1.1432453052428692e-05, "epoch": 0.622787751220419, "total_flos": 1451412658966560000, "step": 47008 }, { "loss": 1.262451171875, "learning_rate": 1.1419603538970166e-05, "epoch": 0.6232117047610728, "total_flos": 1452413838479584320, "step": 47040 }, { "loss": 1.2049560546875, "learning_rate": 1.140675402551164e-05, "epoch": 0.6236356583017266, "total_flos": 1453389163975616352, "step": 47072 }, { "loss": 1.2135009765625, "learning_rate": 1.1393904512053111e-05, "epoch": 0.6240596118423803, "total_flos": 1454353147167989664, "step": 47104 }, { "loss": 1.2177734375, "learning_rate": 1.1381054998594585e-05, "epoch": 0.6244835653830342, "total_flos": 1455345558517192704, "step": 47136 }, { "loss": 1.181396484375, "learning_rate": 1.1368205485136057e-05, "epoch": 0.6249075189236879, "total_flos": 1456319387794444224, "step": 47168 }, { "loss": 1.213623046875, "learning_rate": 1.1355355971677532e-05, "epoch": 0.6253314724643417, "total_flos": 1457306039505760992, "step": 47200 }, { "loss": 1.180419921875, "learning_rate": 1.1342506458219005e-05, "epoch": 0.6257554260049956, "total_flos": 1458299947073744544, "step": 47232 }, { "loss": 1.2030029296875, "learning_rate": 1.1329656944760477e-05, "epoch": 0.6261793795456493, "total_flos": 1459280501291321376, "step": 47264 }, { "loss": 1.19921875, "learning_rate": 1.131680743130195e-05, "epoch": 0.6266033330863031, "total_flos": 1460239706236621440, "step": 47296 }, { "loss": 1.19384765625, "learning_rate": 1.1303957917843424e-05, "epoch": 0.627027286626957, "total_flos": 1461243669038344992, "step": 47328 }, { "loss": 1.2139892578125, "learning_rate": 1.1291108404384896e-05, "epoch": 0.6274512401676107, "total_flos": 1462239668094946464, "step": 47360 }, { "loss": 1.1956787109375, "learning_rate": 1.1278258890926371e-05, "epoch": 0.6278751937082645, "total_flos": 1463265028434068736, "step": 47392 }, { "loss": 1.2161865234375, "learning_rate": 1.1265409377467843e-05, "epoch": 0.6282991472489182, "total_flos": 1464257632843759584, "step": 47424 }, { "loss": 1.17236328125, "learning_rate": 1.1252559864009316e-05, "epoch": 0.628723100789572, "total_flos": 1465260855580279872, "step": 47456 }, { "loss": 1.1898193359375, "learning_rate": 1.123971035055079e-05, "epoch": 0.6291470543302259, "total_flos": 1466240766262897344, "step": 47488 }, { "loss": 1.1751708984375, "learning_rate": 1.1226860837092262e-05, "epoch": 0.6295710078708796, "total_flos": 1467239564696572032, "step": 47520 }, { "loss": 1.1929931640625, "learning_rate": 1.1214011323633735e-05, "epoch": 0.6299949614115334, "total_flos": 1468271473003905792, "step": 47552 }, { "loss": 1.21533203125, "learning_rate": 1.120116181017521e-05, "epoch": 0.6304189149521873, "total_flos": 1469296463310426432, "step": 47584 }, { "loss": 1.20166015625, "learning_rate": 1.1188312296716682e-05, "epoch": 0.630842868492841, "total_flos": 1470277870211824416, "step": 47616 }, { "loss": 1.2283935546875, "learning_rate": 1.1175462783258155e-05, "epoch": 0.6312668220334948, "total_flos": 1471255319373222336, "step": 47648 }, { "loss": 1.195068359375, "learning_rate": 1.1162613269799627e-05, "epoch": 0.6316907755741487, "total_flos": 1472269433938929792, "step": 47680 }, { "loss": 1.214111328125, "learning_rate": 1.11497637563411e-05, "epoch": 0.6321147291148024, "total_flos": 1473245708649026880, "step": 47712 }, { "loss": 1.19677734375, "learning_rate": 1.1136914242882574e-05, "epoch": 0.6325386826554562, "total_flos": 1474224637940831328, "step": 47744 }, { "loss": 1.201171875, "learning_rate": 1.1124064729424048e-05, "epoch": 0.6329626361961099, "total_flos": 1475204484269952864, "step": 47776 }, { "loss": 1.1988525390625, "learning_rate": 1.1111215215965521e-05, "epoch": 0.6333865897367638, "total_flos": 1476181547310375168, "step": 47808 }, { "loss": 1.224365234375, "learning_rate": 1.1098365702506995e-05, "epoch": 0.6338105432774176, "total_flos": 1477136344041203616, "step": 47840 }, { "loss": 1.1966552734375, "learning_rate": 1.1085516189048467e-05, "epoch": 0.6342344968180713, "total_flos": 1478098557512438688, "step": 47872 }, { "loss": 1.20458984375, "learning_rate": 1.107266667558994e-05, "epoch": 0.6346584503587251, "total_flos": 1479079706999852928, "step": 47904 }, { "loss": 1.2169189453125, "learning_rate": 1.1059817162131412e-05, "epoch": 0.635082403899379, "total_flos": 1480059199384746816, "step": 47936 }, { "loss": 1.224853515625, "learning_rate": 1.1046967648672887e-05, "epoch": 0.6355063574400327, "total_flos": 1481061762497933760, "step": 47968 }, { "loss": 1.2193603515625, "learning_rate": 1.103411813521436e-05, "epoch": 0.6359303109806865, "total_flos": 1482040611347868288, "step": 48000 }, { "loss": 1.21533203125, "learning_rate": 1.1021268621755832e-05, "epoch": 0.6363542645213403, "total_flos": 1483003677502924512, "step": 48032 }, { "loss": 1.217529296875, "learning_rate": 1.1008419108297306e-05, "epoch": 0.6367782180619941, "total_flos": 1484029681377006144, "step": 48064 }, { "loss": 1.19677734375, "learning_rate": 1.099556959483878e-05, "epoch": 0.6372021716026479, "total_flos": 1485037247974502112, "step": 48096 }, { "loss": 1.2117919921875, "learning_rate": 1.0982720081380251e-05, "epoch": 0.6376261251433016, "total_flos": 1486035949877932896, "step": 48128 }, { "loss": 1.21728515625, "learning_rate": 1.0969870567921726e-05, "epoch": 0.6380500786839555, "total_flos": 1486981109672744928, "step": 48160 }, { "loss": 1.191162109375, "learning_rate": 1.0957021054463198e-05, "epoch": 0.6384740322246093, "total_flos": 1487975145947720352, "step": 48192 }, { "loss": 1.2083740234375, "learning_rate": 1.0944171541004672e-05, "epoch": 0.638897985765263, "total_flos": 1488977725149281280, "step": 48224 }, { "loss": 1.2275390625, "learning_rate": 1.0931322027546145e-05, "epoch": 0.6393219393059169, "total_flos": 1489978695513443808, "step": 48256 }, { "loss": 1.22119140625, "learning_rate": 1.0918472514087617e-05, "epoch": 0.6397458928465707, "total_flos": 1490959668028744224, "step": 48288 }, { "loss": 1.2266845703125, "learning_rate": 1.090562300062909e-05, "epoch": 0.6401698463872244, "total_flos": 1491945821000467488, "step": 48320 }, { "loss": 1.2086181640625, "learning_rate": 1.0892773487170566e-05, "epoch": 0.6405937999278782, "total_flos": 1492909948988206656, "step": 48352 }, { "loss": 1.2222900390625, "learning_rate": 1.0879923973712037e-05, "epoch": 0.641017753468532, "total_flos": 1493882137251311808, "step": 48384 }, { "loss": 1.215576171875, "learning_rate": 1.086707446025351e-05, "epoch": 0.6414417070091858, "total_flos": 1494846265239050976, "step": 48416 }, { "loss": 1.19189453125, "learning_rate": 1.0854224946794983e-05, "epoch": 0.6418656605498396, "total_flos": 1495829892336058752, "step": 48448 }, { "loss": 1.18994140625, "learning_rate": 1.0841375433336456e-05, "epoch": 0.6422896140904933, "total_flos": 1496790496969895424, "step": 48480 }, { "loss": 1.2081298828125, "learning_rate": 1.0828525919877931e-05, "epoch": 0.6427135676311472, "total_flos": 1497789633259423776, "step": 48512 }, { "loss": 1.2386474609375, "learning_rate": 1.0815676406419403e-05, "epoch": 0.643137521171801, "total_flos": 1498769946151390848, "step": 48544 }, { "loss": 1.203125, "learning_rate": 1.0802826892960877e-05, "epoch": 0.6435614747124547, "total_flos": 1499756839188317376, "step": 48576 }, { "loss": 1.2069091796875, "learning_rate": 1.078997737950235e-05, "epoch": 0.6439854282531086, "total_flos": 1500763713985732032, "step": 48608 }, { "loss": 1.203369140625, "learning_rate": 1.0777127866043822e-05, "epoch": 0.6444093817937623, "total_flos": 1501727375410625664, "step": 48640 }, { "loss": 1.2039794921875, "learning_rate": 1.0764278352585295e-05, "epoch": 0.6448333353344161, "total_flos": 1502688285723568032, "step": 48672 }, { "loss": 1.2109375, "learning_rate": 1.0751428839126769e-05, "epoch": 0.64525728887507, "total_flos": 1503677302425860448, "step": 48704 }, { "loss": 1.2139892578125, "learning_rate": 1.0738579325668242e-05, "epoch": 0.6456812424157237, "total_flos": 1504667574021323616, "step": 48736 }, { "loss": 1.2025146484375, "learning_rate": 1.0725729812209716e-05, "epoch": 0.6461051959563775, "total_flos": 1505647259466705312, "step": 48768 }, { "loss": 1.2139892578125, "learning_rate": 1.0712880298751188e-05, "epoch": 0.6465291494970313, "total_flos": 1506656418813225696, "step": 48800 }, { "loss": 1.181396484375, "learning_rate": 1.0700030785292661e-05, "epoch": 0.646953103037685, "total_flos": 1507658531451941088, "step": 48832 }, { "loss": 1.1943359375, "learning_rate": 1.0687181271834135e-05, "epoch": 0.6473770565783389, "total_flos": 1508641354130249664, "step": 48864 }, { "loss": 1.219482421875, "learning_rate": 1.0674331758375608e-05, "epoch": 0.6478010101189927, "total_flos": 1509589329390508896, "step": 48896 }, { "loss": 1.1944580078125, "learning_rate": 1.0661482244917082e-05, "epoch": 0.6482249636596464, "total_flos": 1510572682985158944, "step": 48928 }, { "loss": 1.2003173828125, "learning_rate": 1.0648632731458553e-05, "epoch": 0.6486489172003003, "total_flos": 1511571014855988096, "step": 48960 }, { "loss": 1.19384765625, "learning_rate": 1.0635783218000027e-05, "epoch": 0.649072870740954, "total_flos": 1512623355398281536, "step": 48992 }, { "loss": 1.2186279296875, "learning_rate": 1.06229337045415e-05, "epoch": 0.6494968242816078, "total_flos": 1513622909985533472, "step": 49024 }, { "loss": 1.226806640625, "learning_rate": 1.0610084191082972e-05, "epoch": 0.6499207778222617, "total_flos": 1514581937958719712, "step": 49056 }, { "loss": 1.20751953125, "learning_rate": 1.0597234677624447e-05, "epoch": 0.6503447313629154, "total_flos": 1515574301042800800, "step": 49088 }, { "loss": 1.2095947265625, "learning_rate": 1.0584385164165921e-05, "epoch": 0.6507686849035692, "total_flos": 1516568433848020128, "step": 49120 }, { "loss": 1.1917724609375, "learning_rate": 1.0571535650707393e-05, "epoch": 0.651192638444223, "total_flos": 1517562743625353280, "step": 49152 }, { "loss": 1.1971435546875, "learning_rate": 1.0558686137248866e-05, "epoch": 0.6516165919848768, "total_flos": 1518570229780979328, "step": 49184 }, { "loss": 1.1820068359375, "learning_rate": 1.0545836623790338e-05, "epoch": 0.6520405455255306, "total_flos": 1519578616885548480, "step": 49216 }, { "loss": 1.2059326171875, "learning_rate": 1.0532987110331811e-05, "epoch": 0.6524644990661843, "total_flos": 1520583207133857408, "step": 49248 }, { "loss": 1.185791015625, "learning_rate": 1.0520137596873287e-05, "epoch": 0.6528884526068381, "total_flos": 1521557052499482912, "step": 49280 }, { "loss": 1.194091796875, "learning_rate": 1.0507288083414758e-05, "epoch": 0.653312406147492, "total_flos": 1522578262038117312, "step": 49312 }, { "loss": 1.22412109375, "learning_rate": 1.0494438569956232e-05, "epoch": 0.6537363596881457, "total_flos": 1523547425686913472, "step": 49344 }, { "loss": 1.206298828125, "learning_rate": 1.0481589056497705e-05, "epoch": 0.6541603132287995, "total_flos": 1524496334072863776, "step": 49376 }, { "loss": 1.194091796875, "learning_rate": 1.0468739543039177e-05, "epoch": 0.6545842667694534, "total_flos": 1525489115454668448, "step": 49408 }, { "loss": 1.2288818359375, "learning_rate": 1.045589002958065e-05, "epoch": 0.6550082203101071, "total_flos": 1526507670411595488, "step": 49440 }, { "loss": 1.2423095703125, "learning_rate": 1.0443040516122124e-05, "epoch": 0.6554321738507609, "total_flos": 1527486824940635712, "step": 49472 }, { "loss": 1.218017578125, "learning_rate": 1.0430191002663598e-05, "epoch": 0.6558561273914147, "total_flos": 1528497013943091072, "step": 49504 }, { "loss": 1.214599609375, "learning_rate": 1.0417341489205071e-05, "epoch": 0.6562800809320685, "total_flos": 1529483697831155808, "step": 49536 }, { "loss": 1.2183837890625, "learning_rate": 1.0404491975746543e-05, "epoch": 0.6567040344727223, "total_flos": 1530474082045236864, "step": 49568 }, { "loss": 1.1912841796875, "learning_rate": 1.0391642462288016e-05, "epoch": 0.657127988013376, "total_flos": 1531439818870374432, "step": 49600 }, { "loss": 1.1898193359375, "learning_rate": 1.037879294882949e-05, "epoch": 0.6575519415540299, "total_flos": 1532429720433235968, "step": 49632 }, { "loss": 1.1885986328125, "learning_rate": 1.0365943435370963e-05, "epoch": 0.6579758950946837, "total_flos": 1533400621626422400, "step": 49664 }, { "loss": 1.1845703125, "learning_rate": 1.0353093921912437e-05, "epoch": 0.6583998486353374, "total_flos": 1534419482262455136, "step": 49696 }, { "loss": 1.1842041015625, "learning_rate": 1.0340244408453909e-05, "epoch": 0.6588238021759912, "total_flos": 1535425005636455136, "step": 49728 }, { "loss": 1.2197265625, "learning_rate": 1.0327394894995382e-05, "epoch": 0.6592477557166451, "total_flos": 1536399349741674144, "step": 49760 }, { "loss": 1.177734375, "learning_rate": 1.0314545381536856e-05, "epoch": 0.6596717092572988, "total_flos": 1537400963640796032, "step": 49792 }, { "loss": 1.1971435546875, "learning_rate": 1.0301695868078327e-05, "epoch": 0.6600956627979526, "total_flos": 1538360635148941632, "step": 49824 }, { "loss": 1.18505859375, "learning_rate": 1.0288846354619803e-05, "epoch": 0.6605196163386063, "total_flos": 1539382906520258976, "step": 49856 }, { "loss": 1.192626953125, "learning_rate": 1.0275996841161276e-05, "epoch": 0.6609435698792602, "total_flos": 1540344042070437120, "step": 49888 }, { "loss": 1.1827392578125, "learning_rate": 1.0263147327702748e-05, "epoch": 0.661367523419914, "total_flos": 1541371928284274880, "step": 49920 }, { "loss": 1.1763916015625, "learning_rate": 1.0250297814244221e-05, "epoch": 0.6617914769605677, "total_flos": 1542408389601446112, "step": 49952 }, { "loss": 1.1895751953125, "learning_rate": 1.0237448300785693e-05, "epoch": 0.6622154305012216, "total_flos": 1543358054140973664, "step": 49984 }, { "loss": 1.20849609375, "learning_rate": 1.0224598787327167e-05, "epoch": 0.6626393840418754, "total_flos": 1544309922787737024, "step": 50016 }, { "loss": 1.2117919921875, "learning_rate": 1.0211749273868642e-05, "epoch": 0.6630633375825291, "total_flos": 1545283172883525120, "step": 50048 }, { "loss": 1.1885986328125, "learning_rate": 1.0198899760410114e-05, "epoch": 0.6634872911231829, "total_flos": 1546317446181834528, "step": 50080 }, { "loss": 1.1905517578125, "learning_rate": 1.0186050246951587e-05, "epoch": 0.6639112446638368, "total_flos": 1547320347150875136, "step": 50112 }, { "loss": 1.2216796875, "learning_rate": 1.017320073349306e-05, "epoch": 0.6643351982044905, "total_flos": 1548303153740809728, "step": 50144 }, { "loss": 1.2147216796875, "learning_rate": 1.0160351220034533e-05, "epoch": 0.6647591517451443, "total_flos": 1549321901758224576, "step": 50176 }, { "loss": 1.1883544921875, "learning_rate": 1.0147501706576008e-05, "epoch": 0.665183105285798, "total_flos": 1550326105885557888, "step": 50208 }, { "loss": 1.204833984375, "learning_rate": 1.013465219311748e-05, "epoch": 0.6656070588264519, "total_flos": 1551370900980452832, "step": 50240 }, { "loss": 1.1978759765625, "learning_rate": 1.0121802679658953e-05, "epoch": 0.6660310123671057, "total_flos": 1552391772663233568, "step": 50272 }, { "loss": 1.226318359375, "learning_rate": 1.0108953166200426e-05, "epoch": 0.6664549659077594, "total_flos": 1553348226496582368, "step": 50304 }, { "loss": 1.19677734375, "learning_rate": 1.0096103652741898e-05, "epoch": 0.6668789194484133, "total_flos": 1554356372275541760, "step": 50336 }, { "loss": 1.211181640625, "learning_rate": 1.0083254139283372e-05, "epoch": 0.6673028729890671, "total_flos": 1555314499299784896, "step": 50368 }, { "loss": 1.223876953125, "learning_rate": 1.0070404625824847e-05, "epoch": 0.6677268265297208, "total_flos": 1556301553220451264, "step": 50400 }, { "loss": 1.189208984375, "learning_rate": 1.0057555112366319e-05, "epoch": 0.6681507800703747, "total_flos": 1557253067922986976, "step": 50432 }, { "loss": 1.1695556640625, "learning_rate": 1.0044705598907792e-05, "epoch": 0.6685747336110284, "total_flos": 1558235054005848384, "step": 50464 }, { "loss": 1.1875, "learning_rate": 1.0031856085449264e-05, "epoch": 0.6689986871516822, "total_flos": 1559225132540823744, "step": 50496 }, { "loss": 1.1815185546875, "learning_rate": 1.0019006571990738e-05, "epoch": 0.669422640692336, "total_flos": 1560216079847994240, "step": 50528 }, { "loss": 1.189208984375, "learning_rate": 1.0006157058532211e-05, "epoch": 0.6698465942329898, "total_flos": 1561162462359229056, "step": 50560 }, { "loss": 1.1995849609375, "learning_rate": 9.993307545073685e-06, "epoch": 0.6702705477736436, "total_flos": 1562134795417700064, "step": 50592 }, { "loss": 1.2119140625, "learning_rate": 9.980458031615158e-06, "epoch": 0.6706945013142974, "total_flos": 1563112485904707744, "step": 50624 }, { "loss": 1.1981201171875, "learning_rate": 9.967608518156631e-06, "epoch": 0.6711184548549511, "total_flos": 1564080137246349408, "step": 50656 }, { "loss": 1.213134765625, "learning_rate": 9.954759004698103e-06, "epoch": 0.671542408395605, "total_flos": 1565064263082950688, "step": 50688 }, { "loss": 1.200439453125, "learning_rate": 9.941909491239577e-06, "epoch": 0.6719663619362588, "total_flos": 1566051654859470720, "step": 50720 }, { "loss": 1.21484375, "learning_rate": 9.929059977781049e-06, "epoch": 0.6723903154769125, "total_flos": 1567051161181600704, "step": 50752 }, { "loss": 1.1971435546875, "learning_rate": 9.916210464322524e-06, "epoch": 0.6728142690175664, "total_flos": 1568022657644624544, "step": 50784 }, { "loss": 1.1937255859375, "learning_rate": 9.903360950863997e-06, "epoch": 0.6732382225582201, "total_flos": 1568971775179436640, "step": 50816 }, { "loss": 1.2005615234375, "learning_rate": 9.890511437405469e-06, "epoch": 0.6736621760988739, "total_flos": 1569927295887094368, "step": 50848 }, { "loss": 1.1705322265625, "learning_rate": 9.877661923946943e-06, "epoch": 0.6740861296395277, "total_flos": 1570947749272151520, "step": 50880 }, { "loss": 1.219970703125, "learning_rate": 9.864812410488416e-06, "epoch": 0.6745100831801815, "total_flos": 1571880585372491808, "step": 50912 }, { "loss": 1.1990966796875, "learning_rate": 9.851962897029888e-06, "epoch": 0.6749340367208353, "total_flos": 1572860673027223104, "step": 50944 }, { "loss": 1.1754150390625, "learning_rate": 9.839113383571363e-06, "epoch": 0.6753579902614891, "total_flos": 1573843447440409728, "step": 50976 }, { "loss": 1.1689453125, "learning_rate": 9.826263870112835e-06, "epoch": 0.6757819438021428, "total_flos": 1574851866721726848, "step": 51008 }, { "loss": 1.220947265625, "learning_rate": 9.813414356654308e-06, "epoch": 0.6762058973427967, "total_flos": 1575816734774669280, "step": 51040 }, { "loss": 1.1884765625, "learning_rate": 9.800564843195782e-06, "epoch": 0.6766298508834504, "total_flos": 1576807875142327584, "step": 51072 }, { "loss": 1.1920166015625, "learning_rate": 9.787715329737254e-06, "epoch": 0.6770538044241042, "total_flos": 1577772083571936672, "step": 51104 }, { "loss": 1.1873779296875, "learning_rate": 9.774865816278727e-06, "epoch": 0.6774777579647581, "total_flos": 1578741601164960480, "step": 51136 }, { "loss": 1.1800537109375, "learning_rate": 9.7620163028202e-06, "epoch": 0.6779017115054118, "total_flos": 1579732789797740736, "step": 51168 }, { "loss": 1.1943359375, "learning_rate": 9.749166789361674e-06, "epoch": 0.6783256650460656, "total_flos": 1580682727839626016, "step": 51200 }, { "eval_loss": 1.0445926526136697, "epoch": 0.6783256650460656, "total_flos": 1580682727839626016, "step": 51200 }, { "loss": 1.1802978515625, "learning_rate": 9.736317275903148e-06, "epoch": 0.6787496185867194, "total_flos": 1581695297921431008, "step": 51232 }, { "loss": 1.183349609375, "learning_rate": 9.72346776244462e-06, "epoch": 0.6791735721273732, "total_flos": 1582641776962909728, "step": 51264 }, { "loss": 1.216064453125, "learning_rate": 9.710618248986093e-06, "epoch": 0.679597525668027, "total_flos": 1583612999923575840, "step": 51296 }, { "loss": 1.222900390625, "learning_rate": 9.697768735527566e-06, "epoch": 0.6800214792086808, "total_flos": 1584651971027088576, "step": 51328 }, { "loss": 1.195068359375, "learning_rate": 9.68491922206904e-06, "epoch": 0.6804454327493346, "total_flos": 1585602439985315328, "step": 51360 }, { "loss": 1.1826171875, "learning_rate": 9.672069708610513e-06, "epoch": 0.6808693862899884, "total_flos": 1586574274304192832, "step": 51392 }, { "loss": 1.212158203125, "learning_rate": 9.659220195151985e-06, "epoch": 0.6812933398306421, "total_flos": 1587599425494453312, "step": 51424 }, { "loss": 1.20947265625, "learning_rate": 9.646370681693459e-06, "epoch": 0.6817172933712959, "total_flos": 1588601763370404480, "step": 51456 }, { "loss": 1.169189453125, "learning_rate": 9.633521168234932e-06, "epoch": 0.6821412469119498, "total_flos": 1589659525694730528, "step": 51488 }, { "loss": 1.189453125, "learning_rate": 9.620671654776404e-06, "epoch": 0.6825652004526035, "total_flos": 1590697290170194464, "step": 51520 }, { "loss": 1.16845703125, "learning_rate": 9.607822141317879e-06, "epoch": 0.6829891539932573, "total_flos": 1591780922599886784, "step": 51552 }, { "loss": 1.185791015625, "learning_rate": 9.594972627859353e-06, "epoch": 0.6834131075339112, "total_flos": 1592813828386407552, "step": 51584 }, { "loss": 1.213134765625, "learning_rate": 9.582123114400824e-06, "epoch": 0.6838370610745649, "total_flos": 1593824918337806016, "step": 51616 }, { "loss": 1.21875, "learning_rate": 9.569273600942298e-06, "epoch": 0.6842610146152187, "total_flos": 1594836104819448384, "step": 51648 }, { "loss": 1.188720703125, "learning_rate": 9.55642408748377e-06, "epoch": 0.6846849681558724, "total_flos": 1595879902435156320, "step": 51680 }, { "loss": 1.20361328125, "learning_rate": 9.543574574025243e-06, "epoch": 0.6851089216965263, "total_flos": 1596873536500782144, "step": 51712 }, { "loss": 1.174072265625, "learning_rate": 9.530725060566718e-06, "epoch": 0.6855328752371801, "total_flos": 1597948368589905216, "step": 51744 }, { "loss": 1.196044921875, "learning_rate": 9.51787554710819e-06, "epoch": 0.6859568287778338, "total_flos": 1599020739157808736, "step": 51776 }, { "loss": 1.1962890625, "learning_rate": 9.505026033649664e-06, "epoch": 0.6863807823184876, "total_flos": 1600045166371239936, "step": 51808 }, { "loss": 1.17919921875, "learning_rate": 9.492176520191137e-06, "epoch": 0.6868047358591415, "total_flos": 1601062257286134432, "step": 51840 }, { "loss": 1.201904296875, "learning_rate": 9.479327006732609e-06, "epoch": 0.6872286893997952, "total_flos": 1602044742108589344, "step": 51872 }, { "loss": 1.200927734375, "learning_rate": 9.466477493274084e-06, "epoch": 0.687652642940449, "total_flos": 1603040387220963168, "step": 51904 }, { "loss": 1.201904296875, "learning_rate": 9.453627979815556e-06, "epoch": 0.6880765964811029, "total_flos": 1604071008458378208, "step": 51936 }, { "loss": 1.18603515625, "learning_rate": 9.44077846635703e-06, "epoch": 0.6885005500217566, "total_flos": 1605072027087662688, "step": 51968 }, { "loss": 1.207763671875, "learning_rate": 9.427928952898503e-06, "epoch": 0.6889245035624104, "total_flos": 1606128035779224480, "step": 52000 }, { "loss": 1.18896484375, "learning_rate": 9.415079439439975e-06, "epoch": 0.6893484571030641, "total_flos": 1607163789207940416, "step": 52032 }, { "loss": 1.19091796875, "learning_rate": 9.402229925981448e-06, "epoch": 0.689772410643718, "total_flos": 1608205833190884096, "step": 52064 }, { "loss": 1.207763671875, "learning_rate": 9.389380412522923e-06, "epoch": 0.6901963641843718, "total_flos": 1609236486605047104, "step": 52096 }, { "loss": 1.2021484375, "learning_rate": 9.376530899064395e-06, "epoch": 0.6906203177250255, "total_flos": 1610221979953437024, "step": 52128 }, { "loss": 1.216064453125, "learning_rate": 9.363681385605869e-06, "epoch": 0.6910442712656794, "total_flos": 1611233665174672896, "step": 52160 }, { "loss": 1.16748046875, "learning_rate": 9.35083187214734e-06, "epoch": 0.6914682248063332, "total_flos": 1612247586679892544, "step": 52192 }, { "loss": 1.189697265625, "learning_rate": 9.337982358688814e-06, "epoch": 0.6918921783469869, "total_flos": 1613259078840640608, "step": 52224 }, { "loss": 1.1611328125, "learning_rate": 9.325132845230287e-06, "epoch": 0.6923161318876407, "total_flos": 1614355501527650208, "step": 52256 }, { "loss": 1.175537109375, "learning_rate": 9.312283331771761e-06, "epoch": 0.6927400854282945, "total_flos": 1615354235607828960, "step": 52288 }, { "loss": 1.189208984375, "learning_rate": 9.299433818313234e-06, "epoch": 0.6931640389689483, "total_flos": 1616368334085162432, "step": 52320 }, { "loss": 1.21044921875, "learning_rate": 9.286584304854708e-06, "epoch": 0.6935879925096021, "total_flos": 1617384009223146336, "step": 52352 }, { "loss": 1.177734375, "learning_rate": 9.27373479139618e-06, "epoch": 0.6940119460502558, "total_flos": 1618373476399910304, "step": 52384 }, { "loss": 1.186767578125, "learning_rate": 9.260885277937653e-06, "epoch": 0.6944358995909097, "total_flos": 1619399271125130144, "step": 52416 }, { "loss": 1.213623046875, "learning_rate": 9.248035764479125e-06, "epoch": 0.6948598531315635, "total_flos": 1620451305988317888, "step": 52448 }, { "loss": 1.193603515625, "learning_rate": 9.2351862510206e-06, "epoch": 0.6952838066722172, "total_flos": 1621480784951180064, "step": 52480 }, { "loss": 1.1875, "learning_rate": 9.222336737562074e-06, "epoch": 0.6957077602128711, "total_flos": 1622497216242741216, "step": 52512 }, { "loss": 1.212646484375, "learning_rate": 9.209487224103545e-06, "epoch": 0.6961317137535248, "total_flos": 1623496722564871200, "step": 52544 }, { "loss": 1.184814453125, "learning_rate": 9.196637710645019e-06, "epoch": 0.6965556672941786, "total_flos": 1624542804729684864, "step": 52576 }, { "loss": 1.20556640625, "learning_rate": 9.183788197186492e-06, "epoch": 0.6969796208348324, "total_flos": 1625558914253766336, "step": 52608 }, { "loss": 1.19140625, "learning_rate": 9.170938683727964e-06, "epoch": 0.6974035743754862, "total_flos": 1626544681104513984, "step": 52640 }, { "loss": 1.187744140625, "learning_rate": 9.15808917026944e-06, "epoch": 0.69782752791614, "total_flos": 1627583121291685248, "step": 52672 }, { "loss": 1.16650390625, "learning_rate": 9.145239656810911e-06, "epoch": 0.6982514814567938, "total_flos": 1628616300580563744, "step": 52704 }, { "loss": 1.21435546875, "learning_rate": 9.132390143352385e-06, "epoch": 0.6986754349974476, "total_flos": 1629615275986352256, "step": 52736 }, { "loss": 1.1748046875, "learning_rate": 9.119540629893858e-06, "epoch": 0.6990993885381014, "total_flos": 1630613221736205792, "step": 52768 }, { "loss": 1.198486328125, "learning_rate": 9.10669111643533e-06, "epoch": 0.6995233420787552, "total_flos": 1631626483618092096, "step": 52800 }, { "loss": 1.224365234375, "learning_rate": 9.093841602976803e-06, "epoch": 0.6999472956194089, "total_flos": 1632608566231197408, "step": 52832 }, { "loss": 1.212890625, "learning_rate": 9.080992089518279e-06, "epoch": 0.7003712491600628, "total_flos": 1633598274733571136, "step": 52864 }, { "loss": 1.174560546875, "learning_rate": 9.06814257605975e-06, "epoch": 0.7007952027007165, "total_flos": 1634635041729848064, "step": 52896 }, { "loss": 1.2109375, "learning_rate": 9.055293062601224e-06, "epoch": 0.7012191562413703, "total_flos": 1635612812658725664, "step": 52928 }, { "loss": 1.19921875, "learning_rate": 9.042443549142696e-06, "epoch": 0.7016431097820242, "total_flos": 1636613123399554848, "step": 52960 }, { "loss": 1.226806640625, "learning_rate": 9.02959403568417e-06, "epoch": 0.7020670633226779, "total_flos": 1637605663455749760, "step": 52992 }, { "loss": 1.216796875, "learning_rate": 9.016744522225643e-06, "epoch": 0.7024910168633317, "total_flos": 1638659162360970048, "step": 53024 }, { "loss": 1.1962890625, "learning_rate": 9.003895008767116e-06, "epoch": 0.7029149704039855, "total_flos": 1639677524257409280, "step": 53056 }, { "loss": 1.2080078125, "learning_rate": 8.99104549530859e-06, "epoch": 0.7033389239446393, "total_flos": 1640700342633442080, "step": 53088 }, { "loss": 1.181884765625, "learning_rate": 8.978195981850063e-06, "epoch": 0.7037628774852931, "total_flos": 1641731478698824608, "step": 53120 }, { "loss": 1.21826171875, "learning_rate": 8.965346468391535e-06, "epoch": 0.7041868310259468, "total_flos": 1642735119733068480, "step": 53152 }, { "loss": 1.180908203125, "learning_rate": 8.952496954933008e-06, "epoch": 0.7046107845666006, "total_flos": 1643782279818939072, "step": 53184 }, { "loss": 1.190185546875, "learning_rate": 8.93964744147448e-06, "epoch": 0.7050347381072545, "total_flos": 1644841490096923680, "step": 53216 }, { "loss": 1.187255859375, "learning_rate": 8.926797928015955e-06, "epoch": 0.7054586916479082, "total_flos": 1645822945263443616, "step": 53248 }, { "loss": 1.18212890625, "learning_rate": 8.913948414557429e-06, "epoch": 0.705882645188562, "total_flos": 1646849303081752896, "step": 53280 }, { "loss": 1.20068359375, "learning_rate": 8.9010989010989e-06, "epoch": 0.7063065987292159, "total_flos": 1647874277299899552, "step": 53312 }, { "loss": 1.212890625, "learning_rate": 8.888249387640374e-06, "epoch": 0.7067305522698696, "total_flos": 1648853528359183680, "step": 53344 }, { "loss": 1.197998046875, "learning_rate": 8.875399874181848e-06, "epoch": 0.7071545058105234, "total_flos": 1649863862157004896, "step": 53376 }, { "loss": 1.2001953125, "learning_rate": 8.86255036072332e-06, "epoch": 0.7075784593511772, "total_flos": 1650902945879135520, "step": 53408 }, { "loss": 1.192626953125, "learning_rate": 8.849700847264795e-06, "epoch": 0.708002412891831, "total_flos": 1651966033455250272, "step": 53440 }, { "loss": 1.1884765625, "learning_rate": 8.836851333806267e-06, "epoch": 0.7084263664324848, "total_flos": 1652985505449494400, "step": 53472 }, { "loss": 1.211181640625, "learning_rate": 8.82400182034774e-06, "epoch": 0.7088503199731385, "total_flos": 1653955586135607648, "step": 53504 }, { "loss": 1.16259765625, "learning_rate": 8.811152306889214e-06, "epoch": 0.7092742735137924, "total_flos": 1655021666149283424, "step": 53536 }, { "loss": 1.177978515625, "learning_rate": 8.798302793430685e-06, "epoch": 0.7096982270544462, "total_flos": 1656009267074665248, "step": 53568 }, { "loss": 1.1669921875, "learning_rate": 8.78545327997216e-06, "epoch": 0.7101221805950999, "total_flos": 1657008371187445632, "step": 53600 }, { "loss": 1.165771484375, "learning_rate": 8.772603766513634e-06, "epoch": 0.7105461341357537, "total_flos": 1658070203870389632, "step": 53632 }, { "loss": 1.18896484375, "learning_rate": 8.759754253055106e-06, "epoch": 0.7109700876764076, "total_flos": 1659051964716015264, "step": 53664 }, { "loss": 1.204345703125, "learning_rate": 8.74690473959658e-06, "epoch": 0.7113940412170613, "total_flos": 1660087573349365344, "step": 53696 }, { "loss": 1.18212890625, "learning_rate": 8.734055226138051e-06, "epoch": 0.7118179947577151, "total_flos": 1661084473354909920, "step": 53728 }, { "loss": 1.1923828125, "learning_rate": 8.721205712679525e-06, "epoch": 0.7122419482983688, "total_flos": 1662099311897446656, "step": 53760 }, { "loss": 1.24169921875, "learning_rate": 8.708356199221e-06, "epoch": 0.7126659018390227, "total_flos": 1663132587716569056, "step": 53792 }, { "loss": 1.222412109375, "learning_rate": 8.695506685762472e-06, "epoch": 0.7130898553796765, "total_flos": 1664118563716178496, "step": 53824 }, { "loss": 1.19775390625, "learning_rate": 8.682657172303945e-06, "epoch": 0.7135138089203302, "total_flos": 1665128527481398080, "step": 53856 }, { "loss": 1.19189453125, "learning_rate": 8.669807658845419e-06, "epoch": 0.7139377624609841, "total_flos": 1666113473825072544, "step": 53888 }, { "loss": 1.177001953125, "learning_rate": 8.65695814538689e-06, "epoch": 0.7143617160016379, "total_flos": 1667116632208096896, "step": 53920 }, { "loss": 1.159912109375, "learning_rate": 8.644108631928364e-06, "epoch": 0.7147856695422916, "total_flos": 1668097846049007072, "step": 53952 }, { "loss": 1.17236328125, "learning_rate": 8.631259118469837e-06, "epoch": 0.7152096230829454, "total_flos": 1669119280824877248, "step": 53984 }, { "loss": 1.192138671875, "learning_rate": 8.61840960501131e-06, "epoch": 0.7156335766235993, "total_flos": 1670152733616113472, "step": 54016 }, { "loss": 1.205810546875, "learning_rate": 8.605560091552784e-06, "epoch": 0.716057530164253, "total_flos": 1671169551028650240, "step": 54048 }, { "loss": 1.188720703125, "learning_rate": 8.592710578094256e-06, "epoch": 0.7164814837049068, "total_flos": 1672175154844520160, "step": 54080 }, { "loss": 1.18798828125, "learning_rate": 8.57986106463573e-06, "epoch": 0.7169054372455606, "total_flos": 1673239368606813792, "step": 54112 }, { "loss": 1.176025390625, "learning_rate": 8.567011551177203e-06, "epoch": 0.7173293907862144, "total_flos": 1674309856834961184, "step": 54144 }, { "loss": 1.1962890625, "learning_rate": 8.554162037718677e-06, "epoch": 0.7177533443268682, "total_flos": 1675305148003107360, "step": 54176 }, { "loss": 1.191162109375, "learning_rate": 8.54131252426015e-06, "epoch": 0.7181772978675219, "total_flos": 1676298202887269760, "step": 54208 }, { "loss": 1.16455078125, "learning_rate": 8.528463010801622e-06, "epoch": 0.7186012514081758, "total_flos": 1677331784385497856, "step": 54240 }, { "loss": 1.168701171875, "learning_rate": 8.515613497343095e-06, "epoch": 0.7190252049488296, "total_flos": 1678327912149091200, "step": 54272 }, { "loss": 1.18701171875, "learning_rate": 8.502763983884569e-06, "epoch": 0.7194491584894833, "total_flos": 1679386671952604256, "step": 54304 }, { "loss": 1.185546875, "learning_rate": 8.48991447042604e-06, "epoch": 0.7198731120301372, "total_flos": 1680461230539369600, "step": 54336 }, { "loss": 1.1875, "learning_rate": 8.477064956967516e-06, "epoch": 0.7202970655707909, "total_flos": 1681492173544264320, "step": 54368 }, { "loss": 1.182861328125, "learning_rate": 8.46421544350899e-06, "epoch": 0.7207210191114447, "total_flos": 1682539365806882880, "step": 54400 }, { "loss": 1.156982421875, "learning_rate": 8.451365930050461e-06, "epoch": 0.7211449726520985, "total_flos": 1683586027153159968, "step": 54432 }, { "loss": 1.160888671875, "learning_rate": 8.438516416591935e-06, "epoch": 0.7215689261927523, "total_flos": 1684612513678461120, "step": 54464 }, { "loss": 1.211669921875, "learning_rate": 8.425666903133406e-06, "epoch": 0.7219928797334061, "total_flos": 1685665980406933440, "step": 54496 }, { "loss": 1.207275390625, "learning_rate": 8.41281738967488e-06, "epoch": 0.7224168332740599, "total_flos": 1686678019572396960, "step": 54528 }, { "loss": 1.174560546875, "learning_rate": 8.399967876216355e-06, "epoch": 0.7228407868147136, "total_flos": 1687717843359730848, "step": 54560 }, { "loss": 1.18359375, "learning_rate": 8.387118362757827e-06, "epoch": 0.7232647403553675, "total_flos": 1688785323061943232, "step": 54592 }, { "loss": 1.16162109375, "learning_rate": 8.3742688492993e-06, "epoch": 0.7236886938960213, "total_flos": 1689801191260414944, "step": 54624 }, { "loss": 1.2138671875, "learning_rate": 8.361419335840774e-06, "epoch": 0.724112647436675, "total_flos": 1690789612692869952, "step": 54656 }, { "loss": 1.2109375, "learning_rate": 8.348569822382246e-06, "epoch": 0.7245366009773289, "total_flos": 1691760578239552320, "step": 54688 }, { "loss": 1.210205078125, "learning_rate": 8.335720308923719e-06, "epoch": 0.7249605545179826, "total_flos": 1692764283627292128, "step": 54720 }, { "loss": 1.179931640625, "learning_rate": 8.322870795465193e-06, "epoch": 0.7253845080586364, "total_flos": 1693787069826576960, "step": 54752 }, { "loss": 1.19091796875, "learning_rate": 8.310021282006666e-06, "epoch": 0.7258084615992902, "total_flos": 1694808987253666656, "step": 54784 }, { "loss": 1.17529296875, "learning_rate": 8.29717176854814e-06, "epoch": 0.726232415139944, "total_flos": 1695860346405147072, "step": 54816 }, { "loss": 1.205810546875, "learning_rate": 8.284322255089611e-06, "epoch": 0.7266563686805978, "total_flos": 1696914360138334848, "step": 54848 }, { "loss": 1.179443359375, "learning_rate": 8.271472741631085e-06, "epoch": 0.7270803222212516, "total_flos": 1697965429699083552, "step": 54880 }, { "loss": 1.173828125, "learning_rate": 8.258623228172558e-06, "epoch": 0.7275042757619054, "total_flos": 1698984451218856128, "step": 54912 }, { "loss": 1.207275390625, "learning_rate": 8.245773714714032e-06, "epoch": 0.7279282293025592, "total_flos": 1699952376062855520, "step": 54944 }, { "loss": 1.182373046875, "learning_rate": 8.232924201255505e-06, "epoch": 0.7283521828432129, "total_flos": 1700969531331245952, "step": 54976 }, { "loss": 1.1708984375, "learning_rate": 8.220074687796977e-06, "epoch": 0.7287761363838667, "total_flos": 1702006555741506624, "step": 55008 }, { "loss": 1.17578125, "learning_rate": 8.20722517433845e-06, "epoch": 0.7292000899245206, "total_flos": 1703028199666238592, "step": 55040 }, { "loss": 1.216552734375, "learning_rate": 8.194375660879924e-06, "epoch": 0.7296240434651743, "total_flos": 1704024970964791296, "step": 55072 }, { "loss": 1.193359375, "learning_rate": 8.181526147421398e-06, "epoch": 0.7300479970058281, "total_flos": 1705035867855701952, "step": 55104 }, { "loss": 1.1796875, "learning_rate": 8.168676633962871e-06, "epoch": 0.730471950546482, "total_flos": 1706053521863685888, "step": 55136 }, { "loss": 1.2080078125, "learning_rate": 8.155827120504345e-06, "epoch": 0.7308959040871357, "total_flos": 1707086282854840800, "step": 55168 }, { "loss": 1.2109375, "learning_rate": 8.142977607045816e-06, "epoch": 0.7313198576277895, "total_flos": 1708089505591361088, "step": 55200 }, { "loss": 1.16455078125, "learning_rate": 8.13012809358729e-06, "epoch": 0.7317438111684433, "total_flos": 1709119949856662304, "step": 55232 }, { "loss": 1.2041015625, "learning_rate": 8.117278580128762e-06, "epoch": 0.7321677647090971, "total_flos": 1710085010970092544, "step": 55264 }, { "loss": 1.155029296875, "learning_rate": 8.104429066670237e-06, "epoch": 0.7325917182497509, "total_flos": 1711104096843361056, "step": 55296 }, { "loss": 1.21044921875, "learning_rate": 8.09157955321171e-06, "epoch": 0.7330156717904046, "total_flos": 1712105421151751232, "step": 55328 }, { "loss": 1.179443359375, "learning_rate": 8.078730039753182e-06, "epoch": 0.7334396253310584, "total_flos": 1713127515550954752, "step": 55360 }, { "loss": 1.18359375, "learning_rate": 8.065880526294656e-06, "epoch": 0.7338635788717123, "total_flos": 1714123305458694432, "step": 55392 }, { "loss": 1.19921875, "learning_rate": 8.05303101283613e-06, "epoch": 0.734287532412366, "total_flos": 1715173441893752064, "step": 55424 }, { "loss": 1.1826171875, "learning_rate": 8.040181499377601e-06, "epoch": 0.7347114859530198, "total_flos": 1716179013532874016, "step": 55456 }, { "loss": 1.17724609375, "learning_rate": 8.027331985919076e-06, "epoch": 0.7351354394936737, "total_flos": 1717222328497362432, "step": 55488 }, { "loss": 1.220947265625, "learning_rate": 8.014482472460548e-06, "epoch": 0.7355593930343274, "total_flos": 1718235300788517024, "step": 55520 }, { "loss": 1.177978515625, "learning_rate": 8.001632959002021e-06, "epoch": 0.7359833465749812, "total_flos": 1719258215694793728, "step": 55552 }, { "loss": 1.172607421875, "learning_rate": 7.988783445543495e-06, "epoch": 0.7364073001156349, "total_flos": 1720298136012371520, "step": 55584 }, { "loss": 1.180419921875, "learning_rate": 7.975933932084967e-06, "epoch": 0.7368312536562888, "total_flos": 1721312427550192800, "step": 55616 }, { "loss": 1.179931640625, "learning_rate": 7.96308441862644e-06, "epoch": 0.7372552071969426, "total_flos": 1722265792415736672, "step": 55648 }, { "loss": 1.190185546875, "learning_rate": 7.950234905167915e-06, "epoch": 0.7376791607375963, "total_flos": 1723308592552257600, "step": 55680 }, { "loss": 1.183837890625, "learning_rate": 7.937385391709387e-06, "epoch": 0.7381031142782501, "total_flos": 1724356492703331456, "step": 55712 }, { "loss": 1.17919921875, "learning_rate": 7.92453587825086e-06, "epoch": 0.738527067818904, "total_flos": 1725402848370502848, "step": 55744 }, { "loss": 1.20166015625, "learning_rate": 7.911686364792332e-06, "epoch": 0.7389510213595577, "total_flos": 1726392460342632672, "step": 55776 }, { "loss": 1.19775390625, "learning_rate": 7.898836851333806e-06, "epoch": 0.7393749749002115, "total_flos": 1727447342848015584, "step": 55808 }, { "loss": 1.2021484375, "learning_rate": 7.88598733787528e-06, "epoch": 0.7397989284408654, "total_flos": 1728432417898681920, "step": 55840 }, { "loss": 1.17578125, "learning_rate": 7.873137824416753e-06, "epoch": 0.7402228819815191, "total_flos": 1729459387075202592, "step": 55872 }, { "loss": 1.19091796875, "learning_rate": 7.860288310958226e-06, "epoch": 0.7406468355221729, "total_flos": 1730502267653593440, "step": 55904 }, { "loss": 1.187744140625, "learning_rate": 7.8474387974997e-06, "epoch": 0.7410707890628266, "total_flos": 1731548703762634752, "step": 55936 }, { "loss": 1.1845703125, "learning_rate": 7.834589284041172e-06, "epoch": 0.7414947426034805, "total_flos": 1732594415894846784, "step": 55968 }, { "loss": 1.18359375, "learning_rate": 7.821739770582645e-06, "epoch": 0.7419186961441343, "total_flos": 1733622366462180480, "step": 56000 }, { "loss": 1.183837890625, "learning_rate": 7.808890257124117e-06, "epoch": 0.742342649684788, "total_flos": 1734623111589107232, "step": 56032 }, { "loss": 1.173583984375, "learning_rate": 7.796040743665592e-06, "epoch": 0.7427666032254419, "total_flos": 1735622086994895744, "step": 56064 }, { "loss": 1.20068359375, "learning_rate": 7.783191230207066e-06, "epoch": 0.7431905567660957, "total_flos": 1736658258721335264, "step": 56096 }, { "loss": 1.173095703125, "learning_rate": 7.770341716748538e-06, "epoch": 0.7436145103067494, "total_flos": 1737679387818099744, "step": 56128 }, { "loss": 1.194580078125, "learning_rate": 7.757492203290011e-06, "epoch": 0.7440384638474032, "total_flos": 1738695384723563328, "step": 56160 }, { "loss": 1.1494140625, "learning_rate": 7.744642689831484e-06, "epoch": 0.744462417388057, "total_flos": 1739722563048945792, "step": 56192 }, { "loss": 1.188720703125, "learning_rate": 7.731793176372956e-06, "epoch": 0.7448863709287108, "total_flos": 1740747714239206272, "step": 56224 }, { "loss": 1.1748046875, "learning_rate": 7.718943662914431e-06, "epoch": 0.7453103244693646, "total_flos": 1741768457214995136, "step": 56256 }, { "loss": 1.186279296875, "learning_rate": 7.706094149455903e-06, "epoch": 0.7457342780100183, "total_flos": 1742836918308020544, "step": 56288 }, { "loss": 1.16796875, "learning_rate": 7.693244635997377e-06, "epoch": 0.7461582315506722, "total_flos": 1743892492613484768, "step": 56320 }, { "loss": 1.16259765625, "learning_rate": 7.68039512253885e-06, "epoch": 0.746582185091326, "total_flos": 1744873529482281120, "step": 56352 }, { "loss": 1.211181640625, "learning_rate": 7.667545609080322e-06, "epoch": 0.7470061386319797, "total_flos": 1745848661917825344, "step": 56384 }, { "loss": 1.154296875, "learning_rate": 7.654696095621796e-06, "epoch": 0.7474300921726336, "total_flos": 1746888260467923456, "step": 56416 }, { "loss": 1.172607421875, "learning_rate": 7.64184658216327e-06, "epoch": 0.7478540457132874, "total_flos": 1747914360872248992, "step": 56448 }, { "loss": 1.15478515625, "learning_rate": 7.6289970687047425e-06, "epoch": 0.7482779992539411, "total_flos": 1748916103478362752, "step": 56480 }, { "loss": 1.153076171875, "learning_rate": 7.616147555246216e-06, "epoch": 0.748701952794595, "total_flos": 1749996872177485920, "step": 56512 }, { "loss": 1.16943359375, "learning_rate": 7.603298041787689e-06, "epoch": 0.7491259063352487, "total_flos": 1751014365301730016, "step": 56544 }, { "loss": 1.17431640625, "learning_rate": 7.590448528329161e-06, "epoch": 0.7495498598759025, "total_flos": 1752055765749714336, "step": 56576 }, { "loss": 1.195068359375, "learning_rate": 7.577599014870634e-06, "epoch": 0.7499738134165563, "total_flos": 1753049174578104384, "step": 56608 }, { "loss": 1.17138671875, "learning_rate": 7.564749501412109e-06, "epoch": 0.75039776695721, "total_flos": 1754085153244056096, "step": 56640 }, { "loss": 1.173095703125, "learning_rate": 7.551899987953582e-06, "epoch": 0.7508217204978639, "total_flos": 1755146937661878144, "step": 56672 }, { "loss": 1.150390625, "learning_rate": 7.539050474495054e-06, "epoch": 0.7512456740385177, "total_flos": 1756178363317992384, "step": 56704 }, { "loss": 1.181884765625, "learning_rate": 7.526200961036527e-06, "epoch": 0.7516696275791714, "total_flos": 1757224316775814176, "step": 56736 }, { "loss": 1.176025390625, "learning_rate": 7.5133514475780005e-06, "epoch": 0.7520935811198253, "total_flos": 1758203809160708064, "step": 56768 }, { "loss": 1.208984375, "learning_rate": 7.500501934119475e-06, "epoch": 0.752517534660479, "total_flos": 1759219291238204160, "step": 56800 }, { "loss": 1.1669921875, "learning_rate": 7.487652420660947e-06, "epoch": 0.7529414882011328, "total_flos": 1760266933975294272, "step": 56832 }, { "loss": 1.193359375, "learning_rate": 7.47480290720242e-06, "epoch": 0.7533654417417867, "total_flos": 1761279182289619584, "step": 56864 }, { "loss": 1.1875, "learning_rate": 7.461953393743894e-06, "epoch": 0.7537893952824404, "total_flos": 1762312248959880192, "step": 56896 }, { "loss": 1.189208984375, "learning_rate": 7.449103880285366e-06, "epoch": 0.7542133488230942, "total_flos": 1763338091950221984, "step": 56928 }, { "loss": 1.181640625, "learning_rate": 7.43625436682684e-06, "epoch": 0.754637302363748, "total_flos": 1764370080699425664, "step": 56960 }, { "loss": 1.15771484375, "learning_rate": 7.4234048533683124e-06, "epoch": 0.7550612559044018, "total_flos": 1765425767623507776, "step": 56992 }, { "loss": 1.197265625, "learning_rate": 7.410555339909786e-06, "epoch": 0.7554852094450556, "total_flos": 1766463371215231872, "step": 57024 }, { "loss": 1.164794921875, "learning_rate": 7.3977058264512594e-06, "epoch": 0.7559091629857093, "total_flos": 1767491064368581824, "step": 57056 }, { "loss": 1.176025390625, "learning_rate": 7.384856312992732e-06, "epoch": 0.7563331165263631, "total_flos": 1768482494326971840, "step": 57088 }, { "loss": 1.188720703125, "learning_rate": 7.372006799534205e-06, "epoch": 0.756757070067017, "total_flos": 1769468373796337376, "step": 57120 }, { "loss": 1.178466796875, "learning_rate": 7.359157286075679e-06, "epoch": 0.7571810236076707, "total_flos": 1770499831629199584, "step": 57152 }, { "loss": 1.199462890625, "learning_rate": 7.346307772617152e-06, "epoch": 0.7576049771483245, "total_flos": 1771496747723118144, "step": 57184 }, { "loss": 1.1748046875, "learning_rate": 7.333458259158624e-06, "epoch": 0.7580289306889784, "total_flos": 1772520853169069664, "step": 57216 }, { "loss": 1.20166015625, "learning_rate": 7.320608745700098e-06, "epoch": 0.7584528842296321, "total_flos": 1773518348444451648, "step": 57248 }, { "loss": 1.216064453125, "learning_rate": 7.307759232241571e-06, "epoch": 0.7588768377702859, "total_flos": 1774464666602190528, "step": 57280 }, { "loss": 1.177001953125, "learning_rate": 7.294909718783044e-06, "epoch": 0.7593007913109397, "total_flos": 1775495287839605568, "step": 57312 }, { "loss": 1.177490234375, "learning_rate": 7.2820602053245175e-06, "epoch": 0.7597247448515935, "total_flos": 1776543686730272928, "step": 57344 }, { "loss": 1.19140625, "learning_rate": 7.26921069186599e-06, "epoch": 0.7601486983922473, "total_flos": 1777563625287362592, "step": 57376 }, { "loss": 1.177978515625, "learning_rate": 7.256361178407464e-06, "epoch": 0.760572651932901, "total_flos": 1778608243410143712, "step": 57408 }, { "loss": 1.1806640625, "learning_rate": 7.243511664948937e-06, "epoch": 0.7609966054735549, "total_flos": 1779646683597314976, "step": 57440 }, { "loss": 1.17529296875, "learning_rate": 7.23066215149041e-06, "epoch": 0.7614205590142087, "total_flos": 1780684721575136640, "step": 57472 }, { "loss": 1.168701171875, "learning_rate": 7.217812638031882e-06, "epoch": 0.7618445125548624, "total_flos": 1781698771787348160, "step": 57504 }, { "loss": 1.1796875, "learning_rate": 7.204963124573357e-06, "epoch": 0.7622684660955162, "total_flos": 1782739174756145472, "step": 57536 }, { "loss": 1.162841796875, "learning_rate": 7.192113611114829e-06, "epoch": 0.7626924196361701, "total_flos": 1783751020861121184, "step": 57568 }, { "loss": 1.1796875, "learning_rate": 7.179264097656303e-06, "epoch": 0.7631163731768238, "total_flos": 1784798599244715360, "step": 57600 }, { "loss": 1.181396484375, "learning_rate": 7.1664145841977755e-06, "epoch": 0.7635403267174776, "total_flos": 1785891385959204576, "step": 57632 }, { "loss": 1.171142578125, "learning_rate": 7.153565070739249e-06, "epoch": 0.7639642802581313, "total_flos": 1786881705819789696, "step": 57664 }, { "loss": 1.169677734375, "learning_rate": 7.1407155572807225e-06, "epoch": 0.7643882337987852, "total_flos": 1787915721704115360, "step": 57696 }, { "loss": 1.187744140625, "learning_rate": 7.127866043822195e-06, "epoch": 0.764812187339439, "total_flos": 1788970202000148672, "step": 57728 }, { "loss": 1.17919921875, "learning_rate": 7.115016530363668e-06, "epoch": 0.7652361408800927, "total_flos": 1790008239977970336, "step": 57760 }, { "loss": 1.19140625, "learning_rate": 7.102167016905142e-06, "epoch": 0.7656600944207466, "total_flos": 1790973864184490016, "step": 57792 }, { "loss": 1.18896484375, "learning_rate": 7.089317503446615e-06, "epoch": 0.7660840479614004, "total_flos": 1792014894599872704, "step": 57824 }, { "loss": 1.18701171875, "learning_rate": 7.076467989988087e-06, "epoch": 0.7665080015020541, "total_flos": 1793036988999076224, "step": 57856 }, { "loss": 1.20068359375, "learning_rate": 7.063618476529561e-06, "epoch": 0.766931955042708, "total_flos": 1794005316052425216, "step": 57888 }, { "loss": 1.163330078125, "learning_rate": 7.050768963071034e-06, "epoch": 0.7673559085833618, "total_flos": 1795071315624231072, "step": 57920 }, { "loss": 1.18212890625, "learning_rate": 7.037919449612507e-06, "epoch": 0.7677798621240155, "total_flos": 1796118813565955328, "step": 57952 }, { "loss": 1.189208984375, "learning_rate": 7.0250699361539805e-06, "epoch": 0.7682038156646693, "total_flos": 1797164525698167360, "step": 57984 }, { "loss": 1.16015625, "learning_rate": 7.012220422695453e-06, "epoch": 0.768627769205323, "total_flos": 1798146913990378368, "step": 58016 }, { "loss": 1.1845703125, "learning_rate": 6.999370909236927e-06, "epoch": 0.7690517227459769, "total_flos": 1799160867672345984, "step": 58048 }, { "loss": 1.1728515625, "learning_rate": 6.9865213957784e-06, "epoch": 0.7694756762866307, "total_flos": 1800107732834800320, "step": 58080 }, { "loss": 1.1787109375, "learning_rate": 6.973671882319873e-06, "epoch": 0.7698996298272844, "total_flos": 1801125869494003776, "step": 58112 }, { "loss": 1.176513671875, "learning_rate": 6.9608223688613454e-06, "epoch": 0.7703235833679383, "total_flos": 1802185948544183520, "step": 58144 }, { "loss": 1.188720703125, "learning_rate": 6.94797285540282e-06, "epoch": 0.7707475369085921, "total_flos": 1803238482146964768, "step": 58176 }, { "loss": 1.19189453125, "learning_rate": 6.935123341944292e-06, "epoch": 0.7711714904492458, "total_flos": 1804260705253160160, "step": 58208 }, { "loss": 1.222412109375, "learning_rate": 6.922273828485765e-06, "epoch": 0.7715954439898997, "total_flos": 1805257894849436448, "step": 58240 }, { "loss": 1.19775390625, "learning_rate": 6.9094243150272385e-06, "epoch": 0.7720193975305534, "total_flos": 1806264753558477120, "step": 58272 }, { "loss": 1.16845703125, "learning_rate": 6.896574801568712e-06, "epoch": 0.7724433510712072, "total_flos": 1807295937888981600, "step": 58304 }, { "loss": 1.171142578125, "learning_rate": 6.883725288110185e-06, "epoch": 0.772867304611861, "total_flos": 1808330484689648736, "step": 58336 }, { "loss": 1.171630859375, "learning_rate": 6.870875774651658e-06, "epoch": 0.7732912581525148, "total_flos": 1809353850070396992, "step": 58368 }, { "loss": 1.1806640625, "learning_rate": 6.858026261193131e-06, "epoch": 0.7737152116931686, "total_flos": 1810362639384315744, "step": 58400 }, { "loss": 1.166259765625, "learning_rate": 6.845176747734604e-06, "epoch": 0.7741391652338224, "total_flos": 1811389013290999008, "step": 58432 }, { "loss": 1.169677734375, "learning_rate": 6.832327234276078e-06, "epoch": 0.7745631187744761, "total_flos": 1812388085227031424, "step": 58464 }, { "loss": 1.1962890625, "learning_rate": 6.8194777208175504e-06, "epoch": 0.77498707231513, "total_flos": 1813401491904283584, "step": 58496 }, { "loss": 1.17431640625, "learning_rate": 6.806628207359023e-06, "epoch": 0.7754110258557838, "total_flos": 1814404714640803872, "step": 58528 }, { "loss": 1.189453125, "learning_rate": 6.7937786939004966e-06, "epoch": 0.7758349793964375, "total_flos": 1815400102339193952, "step": 58560 }, { "loss": 1.19873046875, "learning_rate": 6.78092918044197e-06, "epoch": 0.7762589329370914, "total_flos": 1816405255680592320, "step": 58592 }, { "loss": 1.1953125, "learning_rate": 6.768079666983443e-06, "epoch": 0.7766828864777451, "total_flos": 1817415975599389152, "step": 58624 }, { "loss": 1.170654296875, "learning_rate": 6.755230153524916e-06, "epoch": 0.7771068400183989, "total_flos": 1818452243856072576, "step": 58656 }, { "loss": 1.213623046875, "learning_rate": 6.742380640066389e-06, "epoch": 0.7775307935590527, "total_flos": 1819397500181128512, "step": 58688 }, { "loss": 1.21435546875, "learning_rate": 6.729531126607862e-06, "epoch": 0.7779547470997065, "total_flos": 1820355482410005792, "step": 58720 }, { "loss": 1.207763671875, "learning_rate": 6.716681613149336e-06, "epoch": 0.7783787006403603, "total_flos": 1821388597345388352, "step": 58752 }, { "loss": 1.1865234375, "learning_rate": 6.7038320996908085e-06, "epoch": 0.7788026541810141, "total_flos": 1822400121682884384, "step": 58784 }, { "loss": 1.187744140625, "learning_rate": 6.690982586232281e-06, "epoch": 0.7792266077216679, "total_flos": 1823391776878510176, "step": 58816 }, { "loss": 1.17822265625, "learning_rate": 6.6781330727737555e-06, "epoch": 0.7796505612623217, "total_flos": 1824430265330803392, "step": 58848 }, { "loss": 1.20166015625, "learning_rate": 6.665283559315228e-06, "epoch": 0.7800745148029754, "total_flos": 1825400297751794688, "step": 58880 }, { "loss": 1.1357421875, "learning_rate": 6.652434045856701e-06, "epoch": 0.7804984683436292, "total_flos": 1826459154085551648, "step": 58912 }, { "loss": 1.192138671875, "learning_rate": 6.639584532398174e-06, "epoch": 0.7809224218842831, "total_flos": 1827482085080202336, "step": 58944 }, { "loss": 1.15478515625, "learning_rate": 6.626735018939648e-06, "epoch": 0.7813463754249368, "total_flos": 1828474850373633024, "step": 58976 }, { "loss": 1.20361328125, "learning_rate": 6.61388550548112e-06, "epoch": 0.7817703289655906, "total_flos": 1829457126047226144, "step": 59008 }, { "loss": 1.1748046875, "learning_rate": 6.601035992022594e-06, "epoch": 0.7821942825062445, "total_flos": 1830458305560250464, "step": 59040 }, { "loss": 1.1884765625, "learning_rate": 6.5881864785640665e-06, "epoch": 0.7826182360468982, "total_flos": 1831453918495876320, "step": 59072 }, { "loss": 1.176513671875, "learning_rate": 6.57533696510554e-06, "epoch": 0.783042189587552, "total_flos": 1832472827397031008, "step": 59104 }, { "loss": 1.15576171875, "learning_rate": 6.5624874516470135e-06, "epoch": 0.7834661431282058, "total_flos": 1833464933067128352, "step": 59136 }, { "loss": 1.18310546875, "learning_rate": 6.549637938188486e-06, "epoch": 0.7838900966688596, "total_flos": 1834493076694949856, "step": 59168 }, { "loss": 1.221923828125, "learning_rate": 6.53678842472996e-06, "epoch": 0.7843140502095134, "total_flos": 1835444173099761984, "step": 59200 }, { "loss": 1.1796875, "learning_rate": 6.523938911271433e-06, "epoch": 0.7847380037501671, "total_flos": 1836471415778640384, "step": 59232 }, { "loss": 1.19482421875, "learning_rate": 6.511089397812906e-06, "epoch": 0.7851619572908209, "total_flos": 1837498690634266752, "step": 59264 }, { "loss": 1.201171875, "learning_rate": 6.498239884354379e-06, "epoch": 0.7855859108314748, "total_flos": 1838486838564364032, "step": 59296 }, { "loss": 1.18896484375, "learning_rate": 6.485390370895852e-06, "epoch": 0.7860098643721285, "total_flos": 1839488500728607872, "step": 59328 }, { "loss": 1.167236328125, "learning_rate": 6.472540857437325e-06, "epoch": 0.7864338179127823, "total_flos": 1840541533070982624, "step": 59360 }, { "loss": 1.205078125, "learning_rate": 6.459691343978799e-06, "epoch": 0.7868577714534362, "total_flos": 1841571591215308224, "step": 59392 }, { "loss": 1.189697265625, "learning_rate": 6.4468418305202715e-06, "epoch": 0.7872817249940899, "total_flos": 1842605478392642016, "step": 59424 }, { "loss": 1.1259765625, "learning_rate": 6.433992317061744e-06, "epoch": 0.7877056785347437, "total_flos": 1843634120760057024, "step": 59456 }, { "loss": 1.190673828125, "learning_rate": 6.4211428036032185e-06, "epoch": 0.7881296320753974, "total_flos": 1844652289596008448, "step": 59488 }, { "loss": 1.14794921875, "learning_rate": 6.408293290144691e-06, "epoch": 0.7885535856160513, "total_flos": 1845723871833586752, "step": 59520 }, { "loss": 1.14794921875, "learning_rate": 6.395443776686164e-06, "epoch": 0.7889775391567051, "total_flos": 1846799878374010656, "step": 59552 }, { "loss": 1.158447265625, "learning_rate": 6.382594263227637e-06, "epoch": 0.7894014926973588, "total_flos": 1847770924362562944, "step": 59584 }, { "loss": 1.176513671875, "learning_rate": 6.369744749769111e-06, "epoch": 0.7898254462380127, "total_flos": 1848829668077702016, "step": 59616 }, { "loss": 1.158447265625, "learning_rate": 6.356895236310583e-06, "epoch": 0.7902493997786665, "total_flos": 1849882507359588960, "step": 59648 }, { "loss": 1.154541015625, "learning_rate": 6.344045722852057e-06, "epoch": 0.7906733533193202, "total_flos": 1850938033399931232, "step": 59680 }, { "loss": 1.175048828125, "learning_rate": 6.3311962093935296e-06, "epoch": 0.791097306859974, "total_flos": 1851930106893280608, "step": 59712 }, { "loss": 1.200927734375, "learning_rate": 6.318346695935003e-06, "epoch": 0.7915212604006279, "total_flos": 1852950849869069472, "step": 59744 }, { "loss": 1.1845703125, "learning_rate": 6.3054971824764765e-06, "epoch": 0.7919452139412816, "total_flos": 1853938273822337472, "step": 59776 }, { "loss": 1.171875, "learning_rate": 6.292647669017949e-06, "epoch": 0.7923691674819354, "total_flos": 1854995489141948064, "step": 59808 }, { "loss": 1.1865234375, "learning_rate": 6.279798155559422e-06, "epoch": 0.7927931210225891, "total_flos": 1856003490125541600, "step": 59840 }, { "loss": 1.183349609375, "learning_rate": 6.266948642100896e-06, "epoch": 0.793217074563243, "total_flos": 1857032148581330592, "step": 59872 }, { "loss": 1.179931640625, "learning_rate": 6.254099128642369e-06, "epoch": 0.7936410281038968, "total_flos": 1858046198793542112, "step": 59904 }, { "loss": 1.18017578125, "learning_rate": 6.2412496151838415e-06, "epoch": 0.7940649816445505, "total_flos": 1859046589976241216, "step": 59936 }, { "loss": 1.190673828125, "learning_rate": 6.228400101725315e-06, "epoch": 0.7944889351852044, "total_flos": 1860031697203655520, "step": 59968 }, { "loss": 1.19287109375, "learning_rate": 6.2155505882667884e-06, "epoch": 0.7949128887258582, "total_flos": 1861067289748631616, "step": 60000 }, { "loss": 1.205078125, "learning_rate": 6.202701074808261e-06, "epoch": 0.7953368422665119, "total_flos": 1862106357382388256, "step": 60032 }, { "loss": 1.150634765625, "learning_rate": 6.1898515613497346e-06, "epoch": 0.7957607958071657, "total_flos": 1863170571144681888, "step": 60064 }, { "loss": 1.15576171875, "learning_rate": 6.177002047891207e-06, "epoch": 0.7961847493478195, "total_flos": 1864181484123966528, "step": 60096 }, { "loss": 1.1728515625, "learning_rate": 6.164152534432681e-06, "epoch": 0.7966087028884733, "total_flos": 1865172061398535392, "step": 60128 }, { "loss": 1.188720703125, "learning_rate": 6.151303020974154e-06, "epoch": 0.7970326564291271, "total_flos": 1866177021679445952, "step": 60160 }, { "loss": 1.182373046875, "learning_rate": 6.138453507515627e-06, "epoch": 0.7974566099697808, "total_flos": 1867224133500194592, "step": 60192 }, { "loss": 1.184814453125, "learning_rate": 6.1256039940570995e-06, "epoch": 0.7978805635104347, "total_flos": 1868259951282406464, "step": 60224 }, { "loss": 1.216796875, "learning_rate": 6.112754480598574e-06, "epoch": 0.7983045170510885, "total_flos": 1869298085790472032, "step": 60256 }, { "loss": 1.203857421875, "learning_rate": 6.0999049671400465e-06, "epoch": 0.7987284705917422, "total_flos": 1870296305042683296, "step": 60288 }, { "loss": 1.161376953125, "learning_rate": 6.087055453681519e-06, "epoch": 0.7991524241323961, "total_flos": 1871305898775301248, "step": 60320 }, { "loss": 1.206298828125, "learning_rate": 6.074205940222993e-06, "epoch": 0.7995763776730499, "total_flos": 1872284313239138208, "step": 60352 }, { "loss": 1.166015625, "learning_rate": 6.061356426764466e-06, "epoch": 0.8000003312137036, "total_flos": 1873281969398260032, "step": 60384 }, { "loss": 1.187744140625, "learning_rate": 6.048506913305939e-06, "epoch": 0.8004242847543575, "total_flos": 1874299382080634208, "step": 60416 }, { "loss": 1.19873046875, "learning_rate": 6.035657399847412e-06, "epoch": 0.8008482382950112, "total_flos": 1875292501318292544, "step": 60448 }, { "loss": 1.17529296875, "learning_rate": 6.022807886388885e-06, "epoch": 0.801272191835665, "total_flos": 1876312439875382208, "step": 60480 }, { "loss": 1.201416015625, "learning_rate": 6.009958372930358e-06, "epoch": 0.8016961453763188, "total_flos": 1877320682184585504, "step": 60512 }, { "loss": 1.170166015625, "learning_rate": 5.997108859471832e-06, "epoch": 0.8021200989169726, "total_flos": 1878348407514683424, "step": 60544 }, { "loss": 1.195068359375, "learning_rate": 5.9842593460133045e-06, "epoch": 0.8025440524576264, "total_flos": 1879350246651041088, "step": 60576 }, { "loss": 1.19677734375, "learning_rate": 5.971409832554777e-06, "epoch": 0.8029680059982802, "total_flos": 1880346117000650688, "step": 60608 }, { "loss": 1.200439453125, "learning_rate": 5.9585603190962515e-06, "epoch": 0.8033919595389339, "total_flos": 1881352879179447456, "step": 60640 }, { "loss": 1.208251953125, "learning_rate": 5.945710805637724e-06, "epoch": 0.8038159130795878, "total_flos": 1882352996859788832, "step": 60672 }, { "loss": 1.187255859375, "learning_rate": 5.932861292179197e-06, "epoch": 0.8042398666202415, "total_flos": 1883433990796147776, "step": 60704 }, { "loss": 1.1943359375, "learning_rate": 5.92001177872067e-06, "epoch": 0.8046638201608953, "total_flos": 1884452465311204896, "step": 60736 }, { "loss": 1.18115234375, "learning_rate": 5.907162265262144e-06, "epoch": 0.8050877737015492, "total_flos": 1885475605454717376, "step": 60768 }, { "loss": 1.18798828125, "learning_rate": 5.894312751803616e-06, "epoch": 0.8055117272422029, "total_flos": 1886517440288799264, "step": 60800 }, { "loss": 1.20263671875, "learning_rate": 5.88146323834509e-06, "epoch": 0.8059356807828567, "total_flos": 1887549606010116768, "step": 60832 }, { "loss": 1.158203125, "learning_rate": 5.8686137248865625e-06, "epoch": 0.8063596343235105, "total_flos": 1888568643618263328, "step": 60864 }, { "loss": 1.1943359375, "learning_rate": 5.855764211428037e-06, "epoch": 0.8067835878641643, "total_flos": 1889566428484377024, "step": 60896 }, { "loss": 1.167724609375, "learning_rate": 5.8429146979695095e-06, "epoch": 0.8072075414048181, "total_flos": 1890571710532767264, "step": 60928 }, { "loss": 1.18505859375, "learning_rate": 5.830065184510982e-06, "epoch": 0.8076314949454719, "total_flos": 1891599612834979008, "step": 60960 }, { "loss": 1.134033203125, "learning_rate": 5.817215671052456e-06, "epoch": 0.8080554484861256, "total_flos": 1892650183656134208, "step": 60992 }, { "loss": 1.181640625, "learning_rate": 5.804366157593929e-06, "epoch": 0.8084794020267795, "total_flos": 1893679163879402880, "step": 61024 }, { "loss": 1.175537109375, "learning_rate": 5.791516644135402e-06, "epoch": 0.8089033555674332, "total_flos": 1894699794236573856, "step": 61056 }, { "loss": 1.1806640625, "learning_rate": 5.778667130676875e-06, "epoch": 0.809327309108087, "total_flos": 1895723465296427808, "step": 61088 }, { "loss": 1.1787109375, "learning_rate": 5.765817617218348e-06, "epoch": 0.8097512626487409, "total_flos": 1896737402890021440, "step": 61120 }, { "loss": 1.202880859375, "learning_rate": 5.752968103759821e-06, "epoch": 0.8101752161893946, "total_flos": 1897774829509631712, "step": 61152 }, { "loss": 1.181396484375, "learning_rate": 5.740118590301295e-06, "epoch": 0.8105991697300484, "total_flos": 1898785710312168384, "step": 61184 }, { "loss": 1.170654296875, "learning_rate": 5.7272690768427675e-06, "epoch": 0.8110231232707023, "total_flos": 1899817634707876128, "step": 61216 }, { "loss": 1.1708984375, "learning_rate": 5.71441956338424e-06, "epoch": 0.811447076811356, "total_flos": 1900841563181713824, "step": 61248 }, { "loss": 1.18798828125, "learning_rate": 5.7015700499257145e-06, "epoch": 0.8118710303520098, "total_flos": 1901827748330185056, "step": 61280 }, { "loss": 1.178955078125, "learning_rate": 5.688720536467187e-06, "epoch": 0.8122949838926635, "total_flos": 1902875873718494688, "step": 61312 }, { "loss": 1.1845703125, "learning_rate": 5.67587102300866e-06, "epoch": 0.8127189374333174, "total_flos": 1903860015643469952, "step": 61344 }, { "loss": 1.191650390625, "learning_rate": 5.663021509550133e-06, "epoch": 0.8131428909739712, "total_flos": 1904894996830234656, "step": 61376 }, { "loss": 1.179931640625, "learning_rate": 5.650171996091607e-06, "epoch": 0.8135668445146249, "total_flos": 1905921531620657760, "step": 61408 }, { "loss": 1.207275390625, "learning_rate": 5.6373224826330794e-06, "epoch": 0.8139907980552787, "total_flos": 1906894057739616576, "step": 61440 }, { "eval_loss": 1.0126022362804041, "epoch": 0.8139907980552787, "total_flos": 1906894057739616576, "step": 61440 }, { "loss": 1.1884765625, "learning_rate": 5.624472969174553e-06, "epoch": 0.8144147515959326, "total_flos": 1907917632269226624, "step": 61472 }, { "loss": 1.172607421875, "learning_rate": 5.611623455716026e-06, "epoch": 0.8148387051365863, "total_flos": 1908918715252007040, "step": 61504 }, { "loss": 1.157470703125, "learning_rate": 5.598773942257499e-06, "epoch": 0.8152626586772401, "total_flos": 1910009378301130368, "step": 61536 }, { "loss": 1.182373046875, "learning_rate": 5.5859244287989726e-06, "epoch": 0.815686612217894, "total_flos": 1911044890404236544, "step": 61568 }, { "loss": 1.16162109375, "learning_rate": 5.573074915340445e-06, "epoch": 0.8161105657585477, "total_flos": 1912069623296773440, "step": 61600 }, { "loss": 1.190185546875, "learning_rate": 5.560225401881918e-06, "epoch": 0.8165345192992015, "total_flos": 1913049807481748640, "step": 61632 }, { "loss": 1.19482421875, "learning_rate": 5.547375888423392e-06, "epoch": 0.8169584728398552, "total_flos": 1914040609993553280, "step": 61664 }, { "loss": 1.17919921875, "learning_rate": 5.534526374964865e-06, "epoch": 0.8173824263805091, "total_flos": 1915055577243081888, "step": 61696 }, { "loss": 1.170654296875, "learning_rate": 5.5216768615063375e-06, "epoch": 0.8178063799211629, "total_flos": 1916057512909683456, "step": 61728 }, { "loss": 1.170654296875, "learning_rate": 5.508827348047811e-06, "epoch": 0.8182303334618166, "total_flos": 1917081457471895136, "step": 61760 }, { "loss": 1.1640625, "learning_rate": 5.4959778345892845e-06, "epoch": 0.8186542870024704, "total_flos": 1918100639875407552, "step": 61792 }, { "loss": 1.182373046875, "learning_rate": 5.483128321130757e-06, "epoch": 0.8190782405431243, "total_flos": 1919102849044366848, "step": 61824 }, { "loss": 1.189697265625, "learning_rate": 5.470278807672231e-06, "epoch": 0.819502194083778, "total_flos": 1920105235185439968, "step": 61856 }, { "loss": 1.19970703125, "learning_rate": 5.457429294213703e-06, "epoch": 0.8199261476244318, "total_flos": 1921117451323017312, "step": 61888 }, { "loss": 1.1640625, "learning_rate": 5.444579780755177e-06, "epoch": 0.8203501011650856, "total_flos": 1922134960535635392, "step": 61920 }, { "loss": 1.150390625, "learning_rate": 5.43173026729665e-06, "epoch": 0.8207740547057394, "total_flos": 1923153612022806336, "step": 61952 }, { "loss": 1.187744140625, "learning_rate": 5.418880753838123e-06, "epoch": 0.8211980082463932, "total_flos": 1924187450935018176, "step": 61984 }, { "loss": 1.177001953125, "learning_rate": 5.4060312403795955e-06, "epoch": 0.8216219617870469, "total_flos": 1925194856648774304, "step": 62016 }, { "loss": 1.181884765625, "learning_rate": 5.39318172692107e-06, "epoch": 0.8220459153277008, "total_flos": 1926184034234806560, "step": 62048 }, { "loss": 1.1728515625, "learning_rate": 5.3803322134625425e-06, "epoch": 0.8224698688683546, "total_flos": 1927216682607343584, "step": 62080 }, { "loss": 1.17333984375, "learning_rate": 5.367482700004015e-06, "epoch": 0.8228938224090083, "total_flos": 1928203639997766048, "step": 62112 }, { "loss": 1.161376953125, "learning_rate": 5.354633186545489e-06, "epoch": 0.8233177759496622, "total_flos": 1929200282589326880, "step": 62144 }, { "loss": 1.185546875, "learning_rate": 5.341783673086962e-06, "epoch": 0.8237417294903159, "total_flos": 1930192903087391712, "step": 62176 }, { "loss": 1.1318359375, "learning_rate": 5.328934159628435e-06, "epoch": 0.8241656830309697, "total_flos": 1931195820144806304, "step": 62208 }, { "loss": 1.166015625, "learning_rate": 5.316084646169908e-06, "epoch": 0.8245896365716235, "total_flos": 1932251635775880288, "step": 62240 }, { "loss": 1.152587890625, "learning_rate": 5.303235132711381e-06, "epoch": 0.8250135901122773, "total_flos": 1933274132384433408, "step": 62272 }, { "loss": 1.168212890625, "learning_rate": 5.290385619252854e-06, "epoch": 0.8254375436529311, "total_flos": 1934329079243312256, "step": 62304 }, { "loss": 1.156494140625, "learning_rate": 5.277536105794328e-06, "epoch": 0.8258614971935849, "total_flos": 1935354053461458912, "step": 62336 }, { "loss": 1.190185546875, "learning_rate": 5.2646865923358005e-06, "epoch": 0.8262854507342386, "total_flos": 1936343536726596864, "step": 62368 }, { "loss": 1.150634765625, "learning_rate": 5.251837078877273e-06, "epoch": 0.8267094042748925, "total_flos": 1937402907888321312, "step": 62400 }, { "loss": 1.180419921875, "learning_rate": 5.2389875654187475e-06, "epoch": 0.8271333578155463, "total_flos": 1938425436673622400, "step": 62432 }, { "loss": 1.190673828125, "learning_rate": 5.22613805196022e-06, "epoch": 0.8275573113562, "total_flos": 1939466885386728672, "step": 62464 }, { "loss": 1.185302734375, "learning_rate": 5.213288538501694e-06, "epoch": 0.8279812648968539, "total_flos": 1940468885406826176, "step": 62496 }, { "loss": 1.18505859375, "learning_rate": 5.200439025043166e-06, "epoch": 0.8284052184375076, "total_flos": 1941462004644484512, "step": 62528 }, { "loss": 1.170166015625, "learning_rate": 5.18758951158464e-06, "epoch": 0.8288291719781614, "total_flos": 1942489022086127136, "step": 62560 }, { "loss": 1.17041015625, "learning_rate": 5.174739998126113e-06, "epoch": 0.8292531255188152, "total_flos": 1943520399477119424, "step": 62592 }, { "loss": 1.16162109375, "learning_rate": 5.161890484667586e-06, "epoch": 0.829677079059469, "total_flos": 1944554415361445088, "step": 62624 }, { "loss": 1.177978515625, "learning_rate": 5.1490409712090586e-06, "epoch": 0.8301010326001228, "total_flos": 1945559230846989792, "step": 62656 }, { "loss": 1.187255859375, "learning_rate": 5.136191457750533e-06, "epoch": 0.8305249861407766, "total_flos": 1946600985239201760, "step": 62688 }, { "loss": 1.17041015625, "learning_rate": 5.1233419442920055e-06, "epoch": 0.8309489396814304, "total_flos": 1947592817406941376, "step": 62720 }, { "loss": 1.176513671875, "learning_rate": 5.110492430833478e-06, "epoch": 0.8313728932220842, "total_flos": 1948609699172974080, "step": 62752 }, { "loss": 1.17724609375, "learning_rate": 5.097642917374952e-06, "epoch": 0.8317968467627379, "total_flos": 1949600019033559200, "step": 62784 }, { "loss": 1.154541015625, "learning_rate": 5.084793403916425e-06, "epoch": 0.8322208003033917, "total_flos": 1950586381154144256, "step": 62816 }, { "loss": 1.166748046875, "learning_rate": 5.071943890457898e-06, "epoch": 0.8326447538440456, "total_flos": 1951616632358957664, "step": 62848 }, { "loss": 1.152587890625, "learning_rate": 5.059094376999371e-06, "epoch": 0.8330687073846993, "total_flos": 1952609992922225760, "step": 62880 }, { "loss": 1.171630859375, "learning_rate": 5.046244863540844e-06, "epoch": 0.8334926609253531, "total_flos": 1953638120461673280, "step": 62912 }, { "loss": 1.18896484375, "learning_rate": 5.0333953500823174e-06, "epoch": 0.833916614466007, "total_flos": 1954640458337624448, "step": 62944 }, { "loss": 1.18798828125, "learning_rate": 5.020545836623791e-06, "epoch": 0.8343405680066607, "total_flos": 1955664161574226368, "step": 62976 }, { "loss": 1.1708984375, "learning_rate": 5.007696323165264e-06, "epoch": 0.8347645215473145, "total_flos": 1956701845607820384, "step": 63008 }, { "loss": 1.187744140625, "learning_rate": 4.994846809706736e-06, "epoch": 0.8351884750879683, "total_flos": 1957745401897918560, "step": 63040 }, { "loss": 1.187744140625, "learning_rate": 4.9819972962482106e-06, "epoch": 0.8356124286286221, "total_flos": 1958767238883138336, "step": 63072 }, { "loss": 1.177978515625, "learning_rate": 4.969147782789683e-06, "epoch": 0.8360363821692759, "total_flos": 1959804086321285184, "step": 63104 }, { "loss": 1.191650390625, "learning_rate": 4.956298269331156e-06, "epoch": 0.8364603357099296, "total_flos": 1960793601763171104, "step": 63136 }, { "loss": 1.156005859375, "learning_rate": 4.943448755872629e-06, "epoch": 0.8368842892505834, "total_flos": 1961854694380911840, "step": 63168 }, { "loss": 1.15869140625, "learning_rate": 4.930599242414103e-06, "epoch": 0.8373082427912373, "total_flos": 1962874504231009632, "step": 63200 }, { "loss": 1.19140625, "learning_rate": 4.9177497289555755e-06, "epoch": 0.837732196331891, "total_flos": 1963907184780294624, "step": 63232 }, { "loss": 1.1767578125, "learning_rate": 4.904900215497049e-06, "epoch": 0.8381561498725448, "total_flos": 1964906095832587200, "step": 63264 }, { "loss": 1.17041015625, "learning_rate": 4.892050702038522e-06, "epoch": 0.8385801034131987, "total_flos": 1965953915541791136, "step": 63296 }, { "loss": 1.1533203125, "learning_rate": 4.879201188579995e-06, "epoch": 0.8390040569538524, "total_flos": 1967002523581320288, "step": 63328 }, { "loss": 1.170166015625, "learning_rate": 4.866351675121469e-06, "epoch": 0.8394280104945062, "total_flos": 1968040255880036256, "step": 63360 }, { "loss": 1.183837890625, "learning_rate": 4.853502161662941e-06, "epoch": 0.8398519640351599, "total_flos": 1969061497595418624, "step": 63392 }, { "loss": 1.176513671875, "learning_rate": 4.840652648204414e-06, "epoch": 0.8402759175758138, "total_flos": 1970086729227549024, "step": 63424 }, { "loss": 1.170654296875, "learning_rate": 4.827803134745888e-06, "epoch": 0.8406998711164676, "total_flos": 1971116739106752672, "step": 63456 }, { "loss": 1.1865234375, "learning_rate": 4.814953621287361e-06, "epoch": 0.8411238246571213, "total_flos": 1972114298735630592, "step": 63488 }, { "loss": 1.17333984375, "learning_rate": 4.8021041078288335e-06, "epoch": 0.8415477781977752, "total_flos": 1973156101392964512, "step": 63520 }, { "loss": 1.15771484375, "learning_rate": 4.789254594370307e-06, "epoch": 0.841971731738429, "total_flos": 1974181236494851008, "step": 63552 }, { "loss": 1.17724609375, "learning_rate": 4.7764050809117805e-06, "epoch": 0.8423956852790827, "total_flos": 1975193710046412096, "step": 63584 }, { "loss": 1.18505859375, "learning_rate": 4.763555567453253e-06, "epoch": 0.8428196388197365, "total_flos": 1976251922845209696, "step": 63616 }, { "loss": 1.170166015625, "learning_rate": 4.750706053994727e-06, "epoch": 0.8432435923603904, "total_flos": 1977277588863437664, "step": 63648 }, { "loss": 1.180419921875, "learning_rate": 4.737856540536199e-06, "epoch": 0.8436675459010441, "total_flos": 1978275196757437536, "step": 63680 }, { "loss": 1.173095703125, "learning_rate": 4.725007027077673e-06, "epoch": 0.8440914994416979, "total_flos": 1979273512539892704, "step": 63712 }, { "loss": 1.171142578125, "learning_rate": 4.712157513619146e-06, "epoch": 0.8445154529823516, "total_flos": 1980311277015356640, "step": 63744 }, { "loss": 1.2060546875, "learning_rate": 4.699308000160619e-06, "epoch": 0.8449394065230055, "total_flos": 1981285524590331744, "step": 63776 }, { "loss": 1.1552734375, "learning_rate": 4.6864584867020915e-06, "epoch": 0.8453633600636593, "total_flos": 1982320135744494816, "step": 63808 }, { "loss": 1.17626953125, "learning_rate": 4.673608973243566e-06, "epoch": 0.845787313604313, "total_flos": 1983371012244755712, "step": 63840 }, { "loss": 1.2001953125, "learning_rate": 4.6607594597850385e-06, "epoch": 0.8462112671449669, "total_flos": 1984377758335178496, "step": 63872 }, { "loss": 1.169189453125, "learning_rate": 4.647909946326511e-06, "epoch": 0.8466352206856207, "total_flos": 1985400013618121856, "step": 63904 }, { "loss": 1.13037109375, "learning_rate": 4.635060432867985e-06, "epoch": 0.8470591742262744, "total_flos": 1986410717448544704, "step": 63936 }, { "loss": 1.179931640625, "learning_rate": 4.622210919409458e-06, "epoch": 0.8474831277669282, "total_flos": 1987439086313601984, "step": 63968 }, { "loss": 1.1748046875, "learning_rate": 4.609361405950931e-06, "epoch": 0.847907081307582, "total_flos": 1988482948282805856, "step": 64000 }, { "loss": 1.16064453125, "learning_rate": 4.596511892492404e-06, "epoch": 0.8483310348482358, "total_flos": 1989500135727944256, "step": 64032 }, { "loss": 1.201416015625, "learning_rate": 4.583662379033877e-06, "epoch": 0.8487549883888896, "total_flos": 1990479724643082048, "step": 64064 }, { "loss": 1.1484375, "learning_rate": 4.57081286557535e-06, "epoch": 0.8491789419295434, "total_flos": 1991465845438057344, "step": 64096 }, { "loss": 1.18310546875, "learning_rate": 4.557963352116824e-06, "epoch": 0.8496028954701972, "total_flos": 1992453784219292832, "step": 64128 }, { "loss": 1.155029296875, "learning_rate": 4.5451138386582966e-06, "epoch": 0.850026849010851, "total_flos": 1993458100965244032, "step": 64160 }, { "loss": 1.187744140625, "learning_rate": 4.53226432519977e-06, "epoch": 0.8504508025515047, "total_flos": 1994423628641519808, "step": 64192 }, { "loss": 1.162841796875, "learning_rate": 4.5194148117412435e-06, "epoch": 0.8508747560921586, "total_flos": 1995491188785602112, "step": 64224 }, { "loss": 1.1640625, "learning_rate": 4.506565298282716e-06, "epoch": 0.8512987096328124, "total_flos": 1996519589827407360, "step": 64256 }, { "loss": 1.192626953125, "learning_rate": 4.49371578482419e-06, "epoch": 0.8517226631734661, "total_flos": 1997486436750349824, "step": 64288 }, { "loss": 1.183349609375, "learning_rate": 4.480866271365662e-06, "epoch": 0.85214661671412, "total_flos": 1998516591424919328, "step": 64320 }, { "loss": 1.169677734375, "learning_rate": 4.468016757907136e-06, "epoch": 0.8525705702547737, "total_flos": 1999530368134773120, "step": 64352 }, { "loss": 1.140380859375, "learning_rate": 4.455167244448609e-06, "epoch": 0.8529945237954275, "total_flos": 2000575838941375392, "step": 64384 }, { "loss": 1.189697265625, "learning_rate": 4.442317730990082e-06, "epoch": 0.8534184773360813, "total_flos": 2001610096151310816, "step": 64416 }, { "loss": 1.151611328125, "learning_rate": 4.429468217531555e-06, "epoch": 0.8538424308767351, "total_flos": 2002613302799457120, "step": 64448 }, { "loss": 1.191650390625, "learning_rate": 4.416618704073029e-06, "epoch": 0.8542663844173889, "total_flos": 2003588917886220864, "step": 64480 }, { "loss": 1.140625, "learning_rate": 4.4037691906145016e-06, "epoch": 0.8546903379580427, "total_flos": 2004639601325993952, "step": 64512 }, { "loss": 1.187744140625, "learning_rate": 4.390919677155974e-06, "epoch": 0.8551142914986964, "total_flos": 2005660247771538912, "step": 64544 }, { "loss": 1.190185546875, "learning_rate": 4.378070163697448e-06, "epoch": 0.8555382450393503, "total_flos": 2006632983039359520, "step": 64576 }, { "loss": 1.190185546875, "learning_rate": 4.365220650238921e-06, "epoch": 0.855962198580004, "total_flos": 2007667191984172992, "step": 64608 }, { "loss": 1.166015625, "learning_rate": 4.352371136780394e-06, "epoch": 0.8563861521206578, "total_flos": 2008702044463945824, "step": 64640 }, { "loss": 1.186279296875, "learning_rate": 4.339521623321867e-06, "epoch": 0.8568101056613117, "total_flos": 2009709997182417408, "step": 64672 }, { "loss": 1.162109375, "learning_rate": 4.32667210986334e-06, "epoch": 0.8572340592019654, "total_flos": 2010747134211295968, "step": 64704 }, { "loss": 1.19384765625, "learning_rate": 4.3138225964048135e-06, "epoch": 0.8576580127426192, "total_flos": 2011743326328385248, "step": 64736 }, { "loss": 1.166259765625, "learning_rate": 4.300973082946287e-06, "epoch": 0.858081966283273, "total_flos": 2012752775265637344, "step": 64768 }, { "loss": 1.1689453125, "learning_rate": 4.28812356948776e-06, "epoch": 0.8585059198239268, "total_flos": 2013749337415328256, "step": 64800 }, { "loss": 1.154052734375, "learning_rate": 4.275274056029232e-06, "epoch": 0.8589298733645806, "total_flos": 2014780103448109152, "step": 64832 }, { "loss": 1.169677734375, "learning_rate": 4.262424542570707e-06, "epoch": 0.8593538269052344, "total_flos": 2015806123410564768, "step": 64864 }, { "loss": 1.1689453125, "learning_rate": 4.249575029112179e-06, "epoch": 0.8597777804458882, "total_flos": 2016806530681637856, "step": 64896 }, { "loss": 1.1796875, "learning_rate": 4.236725515653652e-06, "epoch": 0.860201733986542, "total_flos": 2017862024545232160, "step": 64928 }, { "loss": 1.130859375, "learning_rate": 4.223876002195125e-06, "epoch": 0.8606256875271957, "total_flos": 2018950242161509920, "step": 64960 }, { "loss": 1.175537109375, "learning_rate": 4.211026488736599e-06, "epoch": 0.8610496410678495, "total_flos": 2019935204593558368, "step": 64992 }, { "loss": 1.1806640625, "learning_rate": 4.1981769752780715e-06, "epoch": 0.8614735946085034, "total_flos": 2020954740941298432, "step": 65024 }, { "loss": 1.16748046875, "learning_rate": 4.185327461819545e-06, "epoch": 0.8618975481491571, "total_flos": 2021928843720907680, "step": 65056 }, { "loss": 1.17724609375, "learning_rate": 4.172477948361018e-06, "epoch": 0.8623215016898109, "total_flos": 2022914160097183776, "step": 65088 }, { "loss": 1.1806640625, "learning_rate": 4.159628434902491e-06, "epoch": 0.8627454552304648, "total_flos": 2023925636169557856, "step": 65120 }, { "loss": 1.16748046875, "learning_rate": 4.146778921443965e-06, "epoch": 0.8631694087711185, "total_flos": 2024889812422418976, "step": 65152 }, { "loss": 1.154296875, "learning_rate": 4.133929407985437e-06, "epoch": 0.8635933623117723, "total_flos": 2025936843801297696, "step": 65184 }, { "loss": 1.185791015625, "learning_rate": 4.12107989452691e-06, "epoch": 0.864017315852426, "total_flos": 2026950861836761248, "step": 65216 }, { "loss": 1.1474609375, "learning_rate": 4.108230381068384e-06, "epoch": 0.8644412693930799, "total_flos": 2027971106072956608, "step": 65248 }, { "loss": 1.172119140625, "learning_rate": 4.095380867609857e-06, "epoch": 0.8648652229337337, "total_flos": 2028998863579802496, "step": 65280 }, { "loss": 1.181884765625, "learning_rate": 4.0825313541513295e-06, "epoch": 0.8652891764743874, "total_flos": 2029981235783639520, "step": 65312 }, { "loss": 1.185302734375, "learning_rate": 4.069681840692803e-06, "epoch": 0.8657131300150412, "total_flos": 2030988866734631424, "step": 65344 }, { "loss": 1.164306640625, "learning_rate": 4.0568323272342765e-06, "epoch": 0.8661370835556951, "total_flos": 2031968487826517184, "step": 65376 }, { "loss": 1.168701171875, "learning_rate": 4.043982813775749e-06, "epoch": 0.8665610370963488, "total_flos": 2033007603725395776, "step": 65408 }, { "loss": 1.182373046875, "learning_rate": 4.031133300317223e-06, "epoch": 0.8669849906370026, "total_flos": 2034017680109233248, "step": 65440 }, { "loss": 1.1728515625, "learning_rate": 4.018283786858695e-06, "epoch": 0.8674089441776565, "total_flos": 2035016832487135584, "step": 65472 }, { "loss": 1.208984375, "learning_rate": 4.005434273400169e-06, "epoch": 0.8678328977183102, "total_flos": 2036004449500891392, "step": 65504 }, { "loss": 1.138427734375, "learning_rate": 3.992584759941642e-06, "epoch": 0.868256851258964, "total_flos": 2036990007202777248, "step": 65536 }, { "loss": 1.153564453125, "learning_rate": 3.979735246483115e-06, "epoch": 0.8686808047996177, "total_flos": 2037979780058646912, "step": 65568 }, { "loss": 1.172119140625, "learning_rate": 3.9668857330245876e-06, "epoch": 0.8691047583402716, "total_flos": 2038984112892972096, "step": 65600 }, { "loss": 1.144287109375, "learning_rate": 3.954036219566062e-06, "epoch": 0.8695287118809254, "total_flos": 2040046396050387648, "step": 65632 }, { "loss": 1.190185546875, "learning_rate": 3.9411867061075345e-06, "epoch": 0.8699526654215791, "total_flos": 2040994676989752576, "step": 65664 }, { "loss": 1.154541015625, "learning_rate": 3.928337192649007e-06, "epoch": 0.870376618962233, "total_flos": 2041991753967410976, "step": 65696 }, { "loss": 1.164306640625, "learning_rate": 3.915487679190481e-06, "epoch": 0.8708005725028868, "total_flos": 2043056595176289984, "step": 65728 }, { "loss": 1.185791015625, "learning_rate": 3.902638165731954e-06, "epoch": 0.8712245260435405, "total_flos": 2044094391828501888, "step": 65760 }, { "loss": 1.171630859375, "learning_rate": 3.889788652273428e-06, "epoch": 0.8716484795841943, "total_flos": 2045133250313396736, "step": 65792 }, { "loss": 1.16357421875, "learning_rate": 3.8769391388149e-06, "epoch": 0.8720724331248481, "total_flos": 2046195211703332608, "step": 65824 }, { "loss": 1.180419921875, "learning_rate": 3.864089625356373e-06, "epoch": 0.8724963866655019, "total_flos": 2047211707348389696, "step": 65856 }, { "loss": 1.183837890625, "learning_rate": 3.8512401118978464e-06, "epoch": 0.8729203402061557, "total_flos": 2048217246810763680, "step": 65888 }, { "loss": 1.1943359375, "learning_rate": 3.83839059843932e-06, "epoch": 0.8733442937468094, "total_flos": 2049216978370129440, "step": 65920 }, { "loss": 1.195556640625, "learning_rate": 3.825541084980793e-06, "epoch": 0.8737682472874633, "total_flos": 2050263575362910592, "step": 65952 }, { "loss": 1.17919921875, "learning_rate": 3.812691571522266e-06, "epoch": 0.8741922008281171, "total_flos": 2051280247980081504, "step": 65984 }, { "loss": 1.189453125, "learning_rate": 3.799842058063739e-06, "epoch": 0.8746161543687708, "total_flos": 2052294571694650752, "step": 66016 }, { "loss": 1.16748046875, "learning_rate": 3.7869925446052118e-06, "epoch": 0.8750401079094247, "total_flos": 2053306063855398816, "step": 66048 }, { "loss": 1.1728515625, "learning_rate": 3.7741430311466857e-06, "epoch": 0.8754640614500785, "total_flos": 2054394812388018048, "step": 66080 }, { "loss": 1.14697265625, "learning_rate": 3.7612935176881583e-06, "epoch": 0.8758880149907322, "total_flos": 2055448246939742400, "step": 66112 }, { "loss": 1.1396484375, "learning_rate": 3.7484440042296314e-06, "epoch": 0.876311968531386, "total_flos": 2056474041664962240, "step": 66144 }, { "loss": 1.15869140625, "learning_rate": 3.7355944907711045e-06, "epoch": 0.8767359220720398, "total_flos": 2057493272333596608, "step": 66176 }, { "loss": 1.166015625, "learning_rate": 3.722744977312578e-06, "epoch": 0.8771598756126936, "total_flos": 2058463127782474080, "step": 66208 }, { "loss": 1.16748046875, "learning_rate": 3.709895463854051e-06, "epoch": 0.8775838291533474, "total_flos": 2059501455351027456, "step": 66240 }, { "loss": 1.187744140625, "learning_rate": 3.697045950395524e-06, "epoch": 0.8780077826940011, "total_flos": 2060517291372751200, "step": 66272 }, { "loss": 1.171142578125, "learning_rate": 3.684196436936997e-06, "epoch": 0.878431736234655, "total_flos": 2061599491937158944, "step": 66304 }, { "loss": 1.175537109375, "learning_rate": 3.6713469234784707e-06, "epoch": 0.8788556897753088, "total_flos": 2062632429900427680, "step": 66336 }, { "loss": 1.2109375, "learning_rate": 3.6584974100199433e-06, "epoch": 0.8792796433159625, "total_flos": 2063673830348412000, "step": 66368 }, { "loss": 1.130126953125, "learning_rate": 3.645647896561417e-06, "epoch": 0.8797035968566164, "total_flos": 2064723821988103776, "step": 66400 }, { "loss": 1.166015625, "learning_rate": 3.63279838310289e-06, "epoch": 0.8801275503972701, "total_flos": 2065745884210559328, "step": 66432 }, { "loss": 1.177490234375, "learning_rate": 3.619948869644363e-06, "epoch": 0.8805515039379239, "total_flos": 2066746098421144608, "step": 66464 }, { "loss": 1.146728515625, "learning_rate": 3.607099356185836e-06, "epoch": 0.8809754574785778, "total_flos": 2067832337167422336, "step": 66496 }, { "loss": 1.154052734375, "learning_rate": 3.5942498427273095e-06, "epoch": 0.8813994110192315, "total_flos": 2068888008003130464, "step": 66528 }, { "loss": 1.18115234375, "learning_rate": 3.581400329268782e-06, "epoch": 0.8818233645598853, "total_flos": 2069898068298593952, "step": 66560 }, { "loss": 1.14892578125, "learning_rate": 3.5685508158102556e-06, "epoch": 0.8822473181005391, "total_flos": 2070917347232350272, "step": 66592 }, { "loss": 1.16650390625, "learning_rate": 3.5557013023517287e-06, "epoch": 0.8826712716411929, "total_flos": 2071952537567976768, "step": 66624 }, { "loss": 1.1796875, "learning_rate": 3.5428517888932018e-06, "epoch": 0.8830952251818467, "total_flos": 2072925063686935584, "step": 66656 }, { "loss": 1.152587890625, "learning_rate": 3.530002275434675e-06, "epoch": 0.8835191787225005, "total_flos": 2073991481556465024, "step": 66688 }, { "loss": 1.17626953125, "learning_rate": 3.5171527619761483e-06, "epoch": 0.8839431322631542, "total_flos": 2075077768567864704, "step": 66720 }, { "loss": 1.1533203125, "learning_rate": 3.504303248517621e-06, "epoch": 0.8843670858038081, "total_flos": 2076090676505523360, "step": 66752 }, { "loss": 1.17626953125, "learning_rate": 3.4914537350590945e-06, "epoch": 0.8847910393444618, "total_flos": 2077136903465702880, "step": 66784 }, { "loss": 1.162353515625, "learning_rate": 3.4786042216005675e-06, "epoch": 0.8852149928851156, "total_flos": 2078134752685312512, "step": 66816 }, { "loss": 1.177734375, "learning_rate": 3.4657547081420406e-06, "epoch": 0.8856389464257695, "total_flos": 2079164263824922656, "step": 66848 }, { "loss": 1.17578125, "learning_rate": 3.4529051946835137e-06, "epoch": 0.8860628999664232, "total_flos": 2080188996717459552, "step": 66880 }, { "loss": 1.158935546875, "learning_rate": 3.440055681224987e-06, "epoch": 0.886486853507077, "total_flos": 2081229641011866624, "step": 66912 }, { "loss": 1.1728515625, "learning_rate": 3.42720616776646e-06, "epoch": 0.8869108070477308, "total_flos": 2082254132578793760, "step": 66944 }, { "loss": 1.1611328125, "learning_rate": 3.4143566543079333e-06, "epoch": 0.8873347605883846, "total_flos": 2083265737358159712, "step": 66976 }, { "loss": 1.1669921875, "learning_rate": 3.4015071408494064e-06, "epoch": 0.8877587141290384, "total_flos": 2084294395813948704, "step": 67008 }, { "loss": 1.156982421875, "learning_rate": 3.38865762739088e-06, "epoch": 0.8881826676696921, "total_flos": 2085324437869900320, "step": 67040 }, { "loss": 1.195068359375, "learning_rate": 3.3758081139323525e-06, "epoch": 0.888606621210346, "total_flos": 2086318329349509888, "step": 67072 }, { "loss": 1.190185546875, "learning_rate": 3.362958600473826e-06, "epoch": 0.8890305747509998, "total_flos": 2087294845385216736, "step": 67104 }, { "loss": 1.1748046875, "learning_rate": 3.350109087015299e-06, "epoch": 0.8894545282916535, "total_flos": 2088314767853932416, "step": 67136 }, { "loss": 1.16357421875, "learning_rate": 3.337259573556772e-06, "epoch": 0.8898784818323073, "total_flos": 2089350038631428832, "step": 67168 }, { "loss": 1.181640625, "learning_rate": 3.324410060098245e-06, "epoch": 0.8903024353729612, "total_flos": 2090337703910306592, "step": 67200 }, { "loss": 1.173828125, "learning_rate": 3.3115605466397187e-06, "epoch": 0.8907263889136149, "total_flos": 2091335987516013792, "step": 67232 }, { "loss": 1.171142578125, "learning_rate": 3.2987110331811913e-06, "epoch": 0.8911503424542687, "total_flos": 2092337746210501536, "step": 67264 }, { "loss": 1.1357421875, "learning_rate": 3.285861519722665e-06, "epoch": 0.8915742959949224, "total_flos": 2093385437212713600, "step": 67296 }, { "loss": 1.1728515625, "learning_rate": 3.273012006264138e-06, "epoch": 0.8919982495355763, "total_flos": 2094395561861673024, "step": 67328 }, { "loss": 1.163818359375, "learning_rate": 3.260162492805611e-06, "epoch": 0.8924222030762301, "total_flos": 2095409885576242272, "step": 67360 }, { "loss": 1.155517578125, "learning_rate": 3.247312979347084e-06, "epoch": 0.8928461566168838, "total_flos": 2096417371731868320, "step": 67392 }, { "loss": 1.181884765625, "learning_rate": 3.2344634658885575e-06, "epoch": 0.8932701101575377, "total_flos": 2097405037010746080, "step": 67424 }, { "loss": 1.14990234375, "learning_rate": 3.22161395243003e-06, "epoch": 0.8936940636981915, "total_flos": 2098438795481088000, "step": 67456 }, { "loss": 1.16552734375, "learning_rate": 3.2087644389715036e-06, "epoch": 0.8941180172388452, "total_flos": 2099446362078583968, "step": 67488 }, { "loss": 1.164306640625, "learning_rate": 3.1959149255129767e-06, "epoch": 0.894541970779499, "total_flos": 2100495919332178176, "step": 67520 }, { "loss": 1.148681640625, "learning_rate": 3.1830654120544498e-06, "epoch": 0.8949659243201529, "total_flos": 2101497549319674048, "step": 67552 }, { "loss": 1.201171875, "learning_rate": 3.170215898595923e-06, "epoch": 0.8953898778608066, "total_flos": 2102502220009852896, "step": 67584 }, { "loss": 1.203125, "learning_rate": 3.1573663851373963e-06, "epoch": 0.8958138314014604, "total_flos": 2103436021412632224, "step": 67616 }, { "loss": 1.145751953125, "learning_rate": 3.144516871678869e-06, "epoch": 0.8962377849421141, "total_flos": 2104471083041266848, "step": 67648 }, { "loss": 1.182861328125, "learning_rate": 3.1316673582203425e-06, "epoch": 0.896661738482768, "total_flos": 2105456399417542944, "step": 67680 }, { "loss": 1.19921875, "learning_rate": 3.1188178447618155e-06, "epoch": 0.8970856920234218, "total_flos": 2106444531259266240, "step": 67712 }, { "loss": 1.185546875, "learning_rate": 3.1059683313032886e-06, "epoch": 0.8975096455640755, "total_flos": 2107465821239770560, "step": 67744 }, { "loss": 1.18603515625, "learning_rate": 3.0931188178447617e-06, "epoch": 0.8979335991047294, "total_flos": 2108485711531738272, "step": 67776 }, { "loss": 1.188232421875, "learning_rate": 3.080269304386235e-06, "epoch": 0.8983575526453832, "total_flos": 2109494806524762720, "step": 67808 }, { "loss": 1.160400390625, "learning_rate": 3.0674197909277082e-06, "epoch": 0.8987815061860369, "total_flos": 2110511511318681600, "step": 67840 }, { "loss": 1.194091796875, "learning_rate": 3.0545702774691813e-06, "epoch": 0.8992054597266907, "total_flos": 2111502522979348032, "step": 67872 }, { "loss": 1.17822265625, "learning_rate": 3.0417207640106544e-06, "epoch": 0.8996294132673445, "total_flos": 2112494162086599840, "step": 67904 }, { "loss": 1.168212890625, "learning_rate": 3.028871250552128e-06, "epoch": 0.9000533668079983, "total_flos": 2113512089596941504, "step": 67936 }, { "loss": 1.1787109375, "learning_rate": 3.0160217370936005e-06, "epoch": 0.9004773203486521, "total_flos": 2114544625350860640, "step": 67968 }, { "loss": 1.17626953125, "learning_rate": 3.003172223635074e-06, "epoch": 0.9009012738893059, "total_flos": 2115534977388193728, "step": 68000 }, { "loss": 1.17529296875, "learning_rate": 2.990322710176547e-06, "epoch": 0.9013252274299597, "total_flos": 2116547434851380832, "step": 68032 }, { "loss": 1.149658203125, "learning_rate": 2.97747319671802e-06, "epoch": 0.9017491809706135, "total_flos": 2117543176493998560, "step": 68064 }, { "loss": 1.1884765625, "learning_rate": 2.964623683259493e-06, "epoch": 0.9021731345112672, "total_flos": 2118611203200926400, "step": 68096 }, { "loss": 1.139404296875, "learning_rate": 2.9517741698009667e-06, "epoch": 0.9025970880519211, "total_flos": 2119681031805740448, "step": 68128 }, { "loss": 1.168701171875, "learning_rate": 2.9389246563424393e-06, "epoch": 0.9030210415925749, "total_flos": 2120674826755106112, "step": 68160 }, { "loss": 1.189453125, "learning_rate": 2.926075142883913e-06, "epoch": 0.9034449951332286, "total_flos": 2121670085746504320, "step": 68192 }, { "loss": 1.1591796875, "learning_rate": 2.913225629425386e-06, "epoch": 0.9038689486738825, "total_flos": 2122687675400992320, "step": 68224 }, { "loss": 1.1953125, "learning_rate": 2.900376115966859e-06, "epoch": 0.9042929022145362, "total_flos": 2123722704852878976, "step": 68256 }, { "loss": 1.18505859375, "learning_rate": 2.887526602508332e-06, "epoch": 0.90471685575519, "total_flos": 2124733199534440032, "step": 68288 }, { "loss": 1.16650390625, "learning_rate": 2.8746770890498055e-06, "epoch": 0.9051408092958438, "total_flos": 2125718419380472224, "step": 68320 }, { "loss": 1.1904296875, "learning_rate": 2.861827575591278e-06, "epoch": 0.9055647628364976, "total_flos": 2126724666731301504, "step": 68352 }, { "loss": 1.192138671875, "learning_rate": 2.8489780621327517e-06, "epoch": 0.9059887163771514, "total_flos": 2127717190699122432, "step": 68384 }, { "loss": 1.17333984375, "learning_rate": 2.8361285486742247e-06, "epoch": 0.9064126699178052, "total_flos": 2128730935232228256, "step": 68416 }, { "loss": 1.180908203125, "learning_rate": 2.823279035215698e-06, "epoch": 0.906836623458459, "total_flos": 2129732774368585920, "step": 68448 }, { "loss": 1.167724609375, "learning_rate": 2.810429521757171e-06, "epoch": 0.9072605769991128, "total_flos": 2130729256076406912, "step": 68480 }, { "loss": 1.2001953125, "learning_rate": 2.7975800082986444e-06, "epoch": 0.9076845305397665, "total_flos": 2131752412308293376, "step": 68512 }, { "loss": 1.1708984375, "learning_rate": 2.784730494840117e-06, "epoch": 0.9081084840804203, "total_flos": 2132785028504082432, "step": 68544 }, { "loss": 1.17724609375, "learning_rate": 2.7718809813815905e-06, "epoch": 0.9085324376210742, "total_flos": 2133884958456620544, "step": 68576 }, { "loss": 1.16064453125, "learning_rate": 2.7590314679230636e-06, "epoch": 0.9089563911617279, "total_flos": 2134882775499482208, "step": 68608 }, { "loss": 1.148681640625, "learning_rate": 2.7461819544645366e-06, "epoch": 0.9093803447023817, "total_flos": 2135890454715596064, "step": 68640 }, { "loss": 1.155517578125, "learning_rate": 2.7333324410060097e-06, "epoch": 0.9098042982430355, "total_flos": 2136925661139596544, "step": 68672 }, { "loss": 1.177490234375, "learning_rate": 2.720482927547483e-06, "epoch": 0.9102282517836893, "total_flos": 2137975331011808640, "step": 68704 }, { "loss": 1.164794921875, "learning_rate": 2.7076334140889562e-06, "epoch": 0.9106522053243431, "total_flos": 2138985552191011968, "step": 68736 }, { "loss": 1.162109375, "learning_rate": 2.6947839006304293e-06, "epoch": 0.9110761588649969, "total_flos": 2139982066075580928, "step": 68768 }, { "loss": 1.171142578125, "learning_rate": 2.6819343871719024e-06, "epoch": 0.9115001124056507, "total_flos": 2140973817801450624, "step": 68800 }, { "loss": 1.170166015625, "learning_rate": 2.669084873713376e-06, "epoch": 0.9119240659463045, "total_flos": 2142006981001955136, "step": 68832 }, { "loss": 1.171142578125, "learning_rate": 2.6562353602548485e-06, "epoch": 0.9123480194869582, "total_flos": 2142999698030263872, "step": 68864 }, { "loss": 1.178466796875, "learning_rate": 2.643385846796322e-06, "epoch": 0.912771973027612, "total_flos": 2144043157790118144, "step": 68896 }, { "loss": 1.168701171875, "learning_rate": 2.630536333337795e-06, "epoch": 0.9131959265682659, "total_flos": 2145073022873955936, "step": 68928 }, { "loss": 1.175537109375, "learning_rate": 2.617686819879268e-06, "epoch": 0.9136198801089196, "total_flos": 2146031246428442976, "step": 68960 }, { "loss": 1.141357421875, "learning_rate": 2.6048373064207412e-06, "epoch": 0.9140438336495734, "total_flos": 2147035724058134016, "step": 68992 }, { "loss": 1.166748046875, "learning_rate": 2.5919877929622147e-06, "epoch": 0.9144677871902273, "total_flos": 2148110572235631072, "step": 69024 }, { "loss": 1.154541015625, "learning_rate": 2.5791382795036874e-06, "epoch": 0.914891740730881, "total_flos": 2149118267540118912, "step": 69056 }, { "loss": 1.151123046875, "learning_rate": 2.566288766045161e-06, "epoch": 0.9153156942715348, "total_flos": 2150138576129810208, "step": 69088 }, { "loss": 1.1357421875, "learning_rate": 2.553439252586634e-06, "epoch": 0.9157396478121885, "total_flos": 2151194343495762240, "step": 69120 }, { "loss": 1.199462890625, "learning_rate": 2.540589739128107e-06, "epoch": 0.9161636013528424, "total_flos": 2152161737423420160, "step": 69152 }, { "loss": 1.1748046875, "learning_rate": 2.52774022566958e-06, "epoch": 0.9165875548934962, "total_flos": 2153239787187340032, "step": 69184 }, { "loss": 1.17822265625, "learning_rate": 2.5148907122110535e-06, "epoch": 0.9170115084341499, "total_flos": 2154260900195730528, "step": 69216 }, { "loss": 1.159912109375, "learning_rate": 2.502041198752526e-06, "epoch": 0.9174354619748037, "total_flos": 2155248131088510720, "step": 69248 }, { "loss": 1.175537109375, "learning_rate": 2.4891916852939997e-06, "epoch": 0.9178594155154576, "total_flos": 2156264948501047488, "step": 69280 }, { "loss": 1.175537109375, "learning_rate": 2.4763421718354727e-06, "epoch": 0.9182833690561113, "total_flos": 2157240933620412864, "step": 69312 }, { "loss": 1.17431640625, "learning_rate": 2.463492658376946e-06, "epoch": 0.9187073225967651, "total_flos": 2158251653539209696, "step": 69344 }, { "loss": 1.152099609375, "learning_rate": 2.450643144918419e-06, "epoch": 0.919131276137419, "total_flos": 2159301194704429920, "step": 69376 }, { "loss": 1.152587890625, "learning_rate": 2.4377936314598924e-06, "epoch": 0.9195552296780727, "total_flos": 2160316773312169920, "step": 69408 }, { "loss": 1.176025390625, "learning_rate": 2.424944118001365e-06, "epoch": 0.9199791832187265, "total_flos": 2161311034824381120, "step": 69440 }, { "loss": 1.167236328125, "learning_rate": 2.4120946045428385e-06, "epoch": 0.9204031367593802, "total_flos": 2162355685123910208, "step": 69472 }, { "loss": 1.195068359375, "learning_rate": 2.3992450910843116e-06, "epoch": 0.9208270903000341, "total_flos": 2163362012916609408, "step": 69504 }, { "loss": 1.160400390625, "learning_rate": 2.386395577625785e-06, "epoch": 0.9212510438406879, "total_flos": 2164421432343455808, "step": 69536 }, { "loss": 1.16748046875, "learning_rate": 2.3735460641672577e-06, "epoch": 0.9216749973813416, "total_flos": 2165459261172415680, "step": 69568 }, { "loss": 1.167724609375, "learning_rate": 2.360696550708731e-06, "epoch": 0.9220989509219955, "total_flos": 2166465411993001056, "step": 69600 }, { "loss": 1.141357421875, "learning_rate": 2.3478470372502043e-06, "epoch": 0.9225229044626493, "total_flos": 2167547194259685216, "step": 69632 }, { "loss": 1.15283203125, "learning_rate": 2.3349975237916773e-06, "epoch": 0.922946858003303, "total_flos": 2168599695685718496, "step": 69664 }, { "loss": 1.115234375, "learning_rate": 2.3221480103331504e-06, "epoch": 0.9233708115439568, "total_flos": 2169675139133052960, "step": 69696 }, { "loss": 1.175537109375, "learning_rate": 2.309298496874624e-06, "epoch": 0.9237947650846106, "total_flos": 2170695302927378400, "step": 69728 }, { "loss": 1.130615234375, "learning_rate": 2.2964489834160965e-06, "epoch": 0.9242187186252644, "total_flos": 2171770070663005536, "step": 69760 }, { "loss": 1.187255859375, "learning_rate": 2.28359946995757e-06, "epoch": 0.9246426721659182, "total_flos": 2172759795253753248, "step": 69792 }, { "loss": 1.154296875, "learning_rate": 2.270749956499043e-06, "epoch": 0.9250666257065719, "total_flos": 2173794470761412256, "step": 69824 }, { "loss": 1.15576171875, "learning_rate": 2.257900443040516e-06, "epoch": 0.9254905792472258, "total_flos": 2174759853642322176, "step": 69856 }, { "loss": 1.154541015625, "learning_rate": 2.2450509295819892e-06, "epoch": 0.9259145327878796, "total_flos": 2175726990155996352, "step": 69888 }, { "loss": 1.183837890625, "learning_rate": 2.2322014161234627e-06, "epoch": 0.9263384863285333, "total_flos": 2176747620513167328, "step": 69920 }, { "loss": 1.181884765625, "learning_rate": 2.2193519026649354e-06, "epoch": 0.9267624398691872, "total_flos": 2177727161163183168, "step": 69952 }, { "loss": 1.17041015625, "learning_rate": 2.206502389206409e-06, "epoch": 0.927186393409841, "total_flos": 2178730641313687200, "step": 69984 }, { "loss": 1.154541015625, "learning_rate": 2.193652875747882e-06, "epoch": 0.9276103469504947, "total_flos": 2179784268925899360, "step": 70016 }, { "loss": 1.172119140625, "learning_rate": 2.180803362289355e-06, "epoch": 0.9280343004911485, "total_flos": 2180775457558679616, "step": 70048 }, { "loss": 1.1689453125, "learning_rate": 2.167953848830828e-06, "epoch": 0.9284582540318023, "total_flos": 2181763685930646816, "step": 70080 }, { "loss": 1.11181640625, "learning_rate": 2.1551043353723015e-06, "epoch": 0.9288822075724561, "total_flos": 2182845709522940736, "step": 70112 }, { "loss": 1.171630859375, "learning_rate": 2.142254821913774e-06, "epoch": 0.9293061611131099, "total_flos": 2183872469550599616, "step": 70144 }, { "loss": 1.1796875, "learning_rate": 2.1294053084552477e-06, "epoch": 0.9297301146537637, "total_flos": 2184836774510452608, "step": 70176 }, { "loss": 1.190673828125, "learning_rate": 2.1165557949967208e-06, "epoch": 0.9301540681944175, "total_flos": 2185809863722500864, "step": 70208 }, { "loss": 1.171630859375, "learning_rate": 2.103706281538194e-06, "epoch": 0.9305780217350713, "total_flos": 2186815660598858592, "step": 70240 }, { "loss": 1.165771484375, "learning_rate": 2.090856768079667e-06, "epoch": 0.931001975275725, "total_flos": 2187841181821720704, "step": 70272 }, { "loss": 1.17529296875, "learning_rate": 2.0780072546211404e-06, "epoch": 0.9314259288163789, "total_flos": 2188847252200436160, "step": 70304 }, { "loss": 1.180908203125, "learning_rate": 2.0651577411626134e-06, "epoch": 0.9318498823570326, "total_flos": 2189896005035331168, "step": 70336 }, { "loss": 1.14404296875, "learning_rate": 2.0523082277040865e-06, "epoch": 0.9322738358976864, "total_flos": 2190937341129819552, "step": 70368 }, { "loss": 1.156494140625, "learning_rate": 2.0394587142455596e-06, "epoch": 0.9326977894383403, "total_flos": 2191951761374632704, "step": 70400 }, { "loss": 1.160888671875, "learning_rate": 2.026609200787033e-06, "epoch": 0.933121742978994, "total_flos": 2192977266509120832, "step": 70432 }, { "loss": 1.175048828125, "learning_rate": 2.0137596873285057e-06, "epoch": 0.9335456965196478, "total_flos": 2193998218633771488, "step": 70464 }, { "loss": 1.173583984375, "learning_rate": 2.000910173869979e-06, "epoch": 0.9339696500603016, "total_flos": 2195014489041592800, "step": 70496 }, { "loss": 1.145751953125, "learning_rate": 1.9880606604114523e-06, "epoch": 0.9343936036009554, "total_flos": 2196069580695837504, "step": 70528 }, { "loss": 1.155517578125, "learning_rate": 1.9752111469529253e-06, "epoch": 0.9348175571416092, "total_flos": 2197083405670813248, "step": 70560 }, { "loss": 1.15380859375, "learning_rate": 1.9623616334943984e-06, "epoch": 0.935241510682263, "total_flos": 2198118242062212096, "step": 70592 }, { "loss": 1.158203125, "learning_rate": 1.949512120035872e-06, "epoch": 0.9356654642229167, "total_flos": 2199195600026050656, "step": 70624 }, { "loss": 1.1630859375, "learning_rate": 1.9366626065773445e-06, "epoch": 0.9360894177635706, "total_flos": 2200165889861025696, "step": 70656 }, { "loss": 1.18310546875, "learning_rate": 1.923813093118818e-06, "epoch": 0.9365133713042243, "total_flos": 2201159797429009248, "step": 70688 }, { "loss": 1.189453125, "learning_rate": 1.910963579660291e-06, "epoch": 0.9369373248448781, "total_flos": 2202131454775772928, "step": 70720 }, { "loss": 1.16357421875, "learning_rate": 1.898114066201764e-06, "epoch": 0.937361278385532, "total_flos": 2203171680772456416, "step": 70752 }, { "loss": 1.132080078125, "learning_rate": 1.8852645527432372e-06, "epoch": 0.9377852319261857, "total_flos": 2204159490846700032, "step": 70784 }, { "loss": 1.16650390625, "learning_rate": 1.8724150392847103e-06, "epoch": 0.9382091854668395, "total_flos": 2205181295655171840, "step": 70816 }, { "loss": 1.196533203125, "learning_rate": 1.8595655258261836e-06, "epoch": 0.9386331390074933, "total_flos": 2206179707967870912, "step": 70848 }, { "loss": 1.1494140625, "learning_rate": 1.8467160123676567e-06, "epoch": 0.9390570925481471, "total_flos": 2207215541838456768, "step": 70880 }, { "loss": 1.143310546875, "learning_rate": 1.83386649890913e-06, "epoch": 0.9394810460888009, "total_flos": 2208198573665627136, "step": 70912 }, { "loss": 1.166748046875, "learning_rate": 1.821016985450603e-06, "epoch": 0.9399049996294546, "total_flos": 2209247197793530272, "step": 70944 }, { "loss": 1.166259765625, "learning_rate": 1.808167471992076e-06, "epoch": 0.9403289531701085, "total_flos": 2210271383681351712, "step": 70976 }, { "loss": 1.17822265625, "learning_rate": 1.7953179585335494e-06, "epoch": 0.9407529067107623, "total_flos": 2211308311561368480, "step": 71008 }, { "loss": 1.165283203125, "learning_rate": 1.7824684450750224e-06, "epoch": 0.941176860251416, "total_flos": 2212306176869352096, "step": 71040 }, { "loss": 1.189453125, "learning_rate": 1.7696189316164955e-06, "epoch": 0.9416008137920698, "total_flos": 2213269725675627840, "step": 71072 }, { "loss": 1.147705078125, "learning_rate": 1.7567694181579688e-06, "epoch": 0.9420247673327237, "total_flos": 2214285400813611744, "step": 71104 }, { "loss": 1.18359375, "learning_rate": 1.7439199046994418e-06, "epoch": 0.9424487208733774, "total_flos": 2215275881557936704, "step": 71136 }, { "loss": 1.18115234375, "learning_rate": 1.731070391240915e-06, "epoch": 0.9428726744140312, "total_flos": 2216288210314131936, "step": 71168 }, { "loss": 1.176025390625, "learning_rate": 1.7182208777823882e-06, "epoch": 0.943296627954685, "total_flos": 2217329369436506496, "step": 71200 }, { "loss": 1.1884765625, "learning_rate": 1.7053713643238612e-06, "epoch": 0.9437205814953388, "total_flos": 2218341006392620416, "step": 71232 }, { "loss": 1.14794921875, "learning_rate": 1.6925218508653343e-06, "epoch": 0.9441445350359926, "total_flos": 2219418943537922400, "step": 71264 }, { "loss": 1.16162109375, "learning_rate": 1.6796723374068076e-06, "epoch": 0.9445684885766463, "total_flos": 2220462419386150656, "step": 71296 }, { "loss": 1.180908203125, "learning_rate": 1.6668228239482807e-06, "epoch": 0.9449924421173002, "total_flos": 2221448974567223520, "step": 71328 }, { "loss": 1.181396484375, "learning_rate": 1.653973310489754e-06, "epoch": 0.945416395657954, "total_flos": 2222440565409353376, "step": 71360 }, { "loss": 1.17822265625, "learning_rate": 1.641123797031227e-06, "epoch": 0.9458403491986077, "total_flos": 2223472312832947296, "step": 71392 }, { "loss": 1.16552734375, "learning_rate": 1.6282742835727e-06, "epoch": 0.9462643027392615, "total_flos": 2224465576865971488, "step": 71424 }, { "loss": 1.166259765625, "learning_rate": 1.6154247701141734e-06, "epoch": 0.9466882562799154, "total_flos": 2225428176458182176, "step": 71456 }, { "loss": 1.152099609375, "learning_rate": 1.6025752566556464e-06, "epoch": 0.9471122098205691, "total_flos": 2226450174327141792, "step": 71488 }, { "loss": 1.17041015625, "learning_rate": 1.5897257431971195e-06, "epoch": 0.9475361633612229, "total_flos": 2227476950443174656, "step": 71520 }, { "loss": 1.175048828125, "learning_rate": 1.5768762297385928e-06, "epoch": 0.9479601169018766, "total_flos": 2228524625357012736, "step": 71552 }, { "loss": 1.19287109375, "learning_rate": 1.5640267162800658e-06, "epoch": 0.9483840704425305, "total_flos": 2229547990737760992, "step": 71584 }, { "loss": 1.1845703125, "learning_rate": 1.551177202821539e-06, "epoch": 0.9488080239831843, "total_flos": 2230548510627451968, "step": 71616 }, { "loss": 1.18115234375, "learning_rate": 1.5383276893630122e-06, "epoch": 0.949231977523838, "total_flos": 2231593418340964800, "step": 71648 }, { "loss": 1.15625, "learning_rate": 1.5254781759044853e-06, "epoch": 0.9496559310644919, "total_flos": 2232610412725615392, "step": 71680 }, { "eval_loss": 1.0298347629368305, "epoch": 0.9496559310644919, "total_flos": 2232610412725615392, "step": 71680 }, { "loss": 1.150146484375, "learning_rate": 1.5126286624459583e-06, "epoch": 0.9500798846051457, "total_flos": 2233624961677420416, "step": 71712 }, { "loss": 1.138427734375, "learning_rate": 1.4997791489874316e-06, "epoch": 0.9505038381457994, "total_flos": 2234701161278332128, "step": 71744 }, { "loss": 1.159423828125, "learning_rate": 1.4869296355289047e-06, "epoch": 0.9509277916864532, "total_flos": 2235727937394364992, "step": 71776 }, { "loss": 1.153076171875, "learning_rate": 1.474080122070378e-06, "epoch": 0.9513517452271071, "total_flos": 2236748358602674176, "step": 71808 }, { "loss": 1.21044921875, "learning_rate": 1.461230608611851e-06, "epoch": 0.9517756987677608, "total_flos": 2237746577854885440, "step": 71840 }, { "loss": 1.15478515625, "learning_rate": 1.448381095153324e-06, "epoch": 0.9521996523084146, "total_flos": 2238796279903845504, "step": 71872 }, { "loss": 1.1923828125, "learning_rate": 1.4355315816947974e-06, "epoch": 0.9526236058490684, "total_flos": 2239791941104593312, "step": 71904 }, { "loss": 1.142822265625, "learning_rate": 1.4226820682362704e-06, "epoch": 0.9530475593897222, "total_flos": 2240812957582739904, "step": 71936 }, { "loss": 1.1796875, "learning_rate": 1.4098325547777435e-06, "epoch": 0.953471512930376, "total_flos": 2241834923274951552, "step": 71968 }, { "loss": 1.15625, "learning_rate": 1.3969830413192168e-06, "epoch": 0.9538954664710297, "total_flos": 2242841186714154816, "step": 72000 }, { "loss": 1.150390625, "learning_rate": 1.3841335278606898e-06, "epoch": 0.9543194200116836, "total_flos": 2243877744561569952, "step": 72032 }, { "loss": 1.167724609375, "learning_rate": 1.371284014402163e-06, "epoch": 0.9547433735523374, "total_flos": 2244899790695651520, "step": 72064 }, { "loss": 1.1767578125, "learning_rate": 1.3584345009436362e-06, "epoch": 0.9551673270929911, "total_flos": 2245953900959083200, "step": 72096 }, { "loss": 1.1513671875, "learning_rate": 1.3455849874851093e-06, "epoch": 0.955591280633645, "total_flos": 2246931527092594944, "step": 72128 }, { "loss": 1.12744140625, "learning_rate": 1.3327354740265825e-06, "epoch": 0.9560152341742987, "total_flos": 2247973571075538624, "step": 72160 }, { "loss": 1.159912109375, "learning_rate": 1.3198859605680556e-06, "epoch": 0.9564391877149525, "total_flos": 2249021342519620608, "step": 72192 }, { "loss": 1.177978515625, "learning_rate": 1.3070364471095287e-06, "epoch": 0.9568631412556063, "total_flos": 2250009474361343904, "step": 72224 }, { "loss": 1.1513671875, "learning_rate": 1.294186933651002e-06, "epoch": 0.9572870947962601, "total_flos": 2251047641046157440, "step": 72256 }, { "loss": 1.157958984375, "learning_rate": 1.281337420192475e-06, "epoch": 0.9577110483369139, "total_flos": 2252078310548694432, "step": 72288 }, { "loss": 1.169677734375, "learning_rate": 1.268487906733948e-06, "epoch": 0.9581350018775677, "total_flos": 2253063964780824192, "step": 72320 }, { "loss": 1.19287109375, "learning_rate": 1.2556383932754214e-06, "epoch": 0.9585589554182214, "total_flos": 2254019228074498176, "step": 72352 }, { "loss": 1.16943359375, "learning_rate": 1.2427888798168944e-06, "epoch": 0.9589829089588753, "total_flos": 2254998865254757920, "step": 72384 }, { "loss": 1.1259765625, "learning_rate": 1.2299393663583675e-06, "epoch": 0.959406862499529, "total_flos": 2256026719291847712, "step": 72416 }, { "loss": 1.145263671875, "learning_rate": 1.2170898528998408e-06, "epoch": 0.9598308160401828, "total_flos": 2257019420231782464, "step": 72448 }, { "loss": 1.19091796875, "learning_rate": 1.2042403394413139e-06, "epoch": 0.9602547695808367, "total_flos": 2258033019969522432, "step": 72480 }, { "loss": 1.14208984375, "learning_rate": 1.191390825982787e-06, "epoch": 0.9606787231214904, "total_flos": 2259039476469213504, "step": 72512 }, { "loss": 1.1279296875, "learning_rate": 1.1785413125242602e-06, "epoch": 0.9611026766621442, "total_flos": 2260084030238498688, "step": 72544 }, { "loss": 1.16943359375, "learning_rate": 1.1656917990657333e-06, "epoch": 0.961526630202798, "total_flos": 2261061865520872224, "step": 72576 }, { "loss": 1.160888671875, "learning_rate": 1.1528422856072065e-06, "epoch": 0.9619505837434518, "total_flos": 2262063865540969728, "step": 72608 }, { "loss": 1.1708984375, "learning_rate": 1.1399927721486796e-06, "epoch": 0.9623745372841056, "total_flos": 2263038048762448896, "step": 72640 }, { "loss": 1.18017578125, "learning_rate": 1.1271432586901527e-06, "epoch": 0.9627984908247594, "total_flos": 2264003302936366944, "step": 72672 }, { "loss": 1.150390625, "learning_rate": 1.114293745231626e-06, "epoch": 0.9632224443654132, "total_flos": 2265027585354432288, "step": 72704 }, { "loss": 1.19091796875, "learning_rate": 1.101444231773099e-06, "epoch": 0.963646397906067, "total_flos": 2266017776508025536, "step": 72736 }, { "loss": 1.160888671875, "learning_rate": 1.088594718314572e-06, "epoch": 0.9640703514467207, "total_flos": 2267079721809587424, "step": 72768 }, { "loss": 1.194091796875, "learning_rate": 1.0757452048560454e-06, "epoch": 0.9644943049873745, "total_flos": 2268083925936920736, "step": 72800 }, { "loss": 1.154541015625, "learning_rate": 1.0628956913975184e-06, "epoch": 0.9649182585280284, "total_flos": 2269100083726124160, "step": 72832 }, { "loss": 1.148681640625, "learning_rate": 1.0500461779389915e-06, "epoch": 0.9653422120686821, "total_flos": 2270179179234352992, "step": 72864 }, { "loss": 1.15576171875, "learning_rate": 1.0371966644804648e-06, "epoch": 0.9657661656093359, "total_flos": 2271230860153313088, "step": 72896 }, { "loss": 1.16259765625, "learning_rate": 1.0243471510219379e-06, "epoch": 0.9661901191499898, "total_flos": 2272236834001784640, "step": 72928 }, { "loss": 1.148193359375, "learning_rate": 1.0114976375634111e-06, "epoch": 0.9666140726906435, "total_flos": 2273274324974890848, "step": 72960 }, { "loss": 1.158935546875, "learning_rate": 9.986481241048842e-07, "epoch": 0.9670380262312973, "total_flos": 2274257276360191296, "step": 72992 }, { "loss": 1.1767578125, "learning_rate": 9.857986106463573e-07, "epoch": 0.967461979771951, "total_flos": 2275275155605411008, "step": 73024 }, { "loss": 1.125244140625, "learning_rate": 9.729490971878306e-07, "epoch": 0.9678859333126049, "total_flos": 2276354846383477248, "step": 73056 }, { "loss": 1.19970703125, "learning_rate": 9.600995837293036e-07, "epoch": 0.9683098868532587, "total_flos": 2277367014255932640, "step": 73088 }, { "loss": 1.1845703125, "learning_rate": 9.472500702707767e-07, "epoch": 0.9687338403939124, "total_flos": 2278361404475135712, "step": 73120 }, { "loss": 1.17919921875, "learning_rate": 9.344005568122499e-07, "epoch": 0.9691577939345662, "total_flos": 2279408902416859968, "step": 73152 }, { "loss": 1.17431640625, "learning_rate": 9.21551043353723e-07, "epoch": 0.9695817474752201, "total_flos": 2280422164298746272, "step": 73184 }, { "loss": 1.19189453125, "learning_rate": 9.087015298951962e-07, "epoch": 0.9700057010158738, "total_flos": 2281393049403558720, "step": 73216 }, { "loss": 1.1328125, "learning_rate": 8.958520164366693e-07, "epoch": 0.9704296545565276, "total_flos": 2282486431387885344, "step": 73248 }, { "loss": 1.1630859375, "learning_rate": 8.830025029781425e-07, "epoch": 0.9708536080971815, "total_flos": 2283476622541478592, "step": 73280 }, { "loss": 1.174072265625, "learning_rate": 8.701529895196156e-07, "epoch": 0.9712775616378352, "total_flos": 2284507324220763552, "step": 73312 }, { "loss": 1.186279296875, "learning_rate": 8.573034760610888e-07, "epoch": 0.971701515178489, "total_flos": 2285508761147771616, "step": 73344 }, { "loss": 1.177490234375, "learning_rate": 8.444539626025619e-07, "epoch": 0.9721254687191427, "total_flos": 2286527670048926304, "step": 73376 }, { "loss": 1.16455078125, "learning_rate": 8.31604449144035e-07, "epoch": 0.9725494222597966, "total_flos": 2287544149605609408, "step": 73408 }, { "loss": 1.19287109375, "learning_rate": 8.187549356855082e-07, "epoch": 0.9729733758004504, "total_flos": 2288582782853268480, "step": 73440 }, { "loss": 1.187255859375, "learning_rate": 8.059054222269813e-07, "epoch": 0.9733973293411041, "total_flos": 2289570448132146240, "step": 73472 }, { "loss": 1.150634765625, "learning_rate": 7.930559087684545e-07, "epoch": 0.973821282881758, "total_flos": 2290626022437610464, "step": 73504 }, { "loss": 1.177001953125, "learning_rate": 7.802063953099276e-07, "epoch": 0.9742452364224118, "total_flos": 2291636227528439808, "step": 73536 }, { "loss": 1.202880859375, "learning_rate": 7.673568818514008e-07, "epoch": 0.9746691899630655, "total_flos": 2292670533003497184, "step": 73568 }, { "loss": 1.15869140625, "learning_rate": 7.545073683928739e-07, "epoch": 0.9750931435037193, "total_flos": 2293681606866521664, "step": 73600 }, { "loss": 1.142578125, "learning_rate": 7.41657854934347e-07, "epoch": 0.9755170970443731, "total_flos": 2294670639657188064, "step": 73632 }, { "loss": 1.168701171875, "learning_rate": 7.288083414758202e-07, "epoch": 0.9759410505850269, "total_flos": 2295693345414602976, "step": 73664 }, { "loss": 1.16162109375, "learning_rate": 7.159588280172934e-07, "epoch": 0.9763650041256807, "total_flos": 2296716871679091072, "step": 73696 }, { "loss": 1.180419921875, "learning_rate": 7.031093145587665e-07, "epoch": 0.9767889576663344, "total_flos": 2297746608055936992, "step": 73728 }, { "loss": 1.170166015625, "learning_rate": 6.902598011002396e-07, "epoch": 0.9772129112069883, "total_flos": 2298766723585140480, "step": 73760 }, { "loss": 1.15673828125, "learning_rate": 6.774102876417128e-07, "epoch": 0.9776368647476421, "total_flos": 2299776252964262496, "step": 73792 }, { "loss": 1.15966796875, "learning_rate": 6.645607741831859e-07, "epoch": 0.9780608182882958, "total_flos": 2300854817556149856, "step": 73824 }, { "loss": 1.154296875, "learning_rate": 6.51711260724659e-07, "epoch": 0.9784847718289497, "total_flos": 2301862641567629568, "step": 73856 }, { "loss": 1.15869140625, "learning_rate": 6.388617472661322e-07, "epoch": 0.9789087253696035, "total_flos": 2302899247680166656, "step": 73888 }, { "loss": 1.15185546875, "learning_rate": 6.260122338076054e-07, "epoch": 0.9793326789102572, "total_flos": 2303974465890265344, "step": 73920 }, { "loss": 1.180419921875, "learning_rate": 6.131627203490785e-07, "epoch": 0.979756632450911, "total_flos": 2304955357963695840, "step": 73952 }, { "loss": 1.175537109375, "learning_rate": 6.003132068905516e-07, "epoch": 0.9801805859915648, "total_flos": 2305932630152979936, "step": 73984 }, { "loss": 1.189697265625, "learning_rate": 5.874636934320248e-07, "epoch": 0.9806045395322186, "total_flos": 2306923786609012224, "step": 74016 }, { "loss": 1.183349609375, "learning_rate": 5.746141799734979e-07, "epoch": 0.9810284930728724, "total_flos": 2307952380711305280, "step": 74048 }, { "loss": 1.156982421875, "learning_rate": 5.61764666514971e-07, "epoch": 0.9814524466135262, "total_flos": 2308966929663110304, "step": 74080 }, { "loss": 1.182373046875, "learning_rate": 5.489151530564442e-07, "epoch": 0.98187640015418, "total_flos": 2309959228393695456, "step": 74112 }, { "loss": 1.189208984375, "learning_rate": 5.360656395979174e-07, "epoch": 0.9823003536948338, "total_flos": 2310961566269646624, "step": 74144 }, { "loss": 1.177734375, "learning_rate": 5.232161261393905e-07, "epoch": 0.9827243072354875, "total_flos": 2312004543378281376, "step": 74176 }, { "loss": 1.1552734375, "learning_rate": 5.103666126808636e-07, "epoch": 0.9831482607761414, "total_flos": 2313042677886346944, "step": 74208 }, { "loss": 1.205078125, "learning_rate": 4.975170992223368e-07, "epoch": 0.9835722143167951, "total_flos": 2314053800014493376, "step": 74240 }, { "loss": 1.148681640625, "learning_rate": 4.846675857638099e-07, "epoch": 0.9839961678574489, "total_flos": 2315116807148738208, "step": 74272 }, { "loss": 1.17578125, "learning_rate": 4.71818072305283e-07, "epoch": 0.9844201213981028, "total_flos": 2316089671123550688, "step": 74304 }, { "loss": 1.162109375, "learning_rate": 4.589685588467562e-07, "epoch": 0.9848440749387565, "total_flos": 2317105153201046784, "step": 74336 }, { "loss": 1.139404296875, "learning_rate": 4.461190453882293e-07, "epoch": 0.9852680284794103, "total_flos": 2318135323963990272, "step": 74368 }, { "loss": 1.18212890625, "learning_rate": 4.3326953192970247e-07, "epoch": 0.9856919820200641, "total_flos": 2319131853936933216, "step": 74400 }, { "loss": 1.136962890625, "learning_rate": 4.204200184711756e-07, "epoch": 0.9861159355607179, "total_flos": 2320157021215567680, "step": 74432 }, { "loss": 1.161376953125, "learning_rate": 4.0757050501264876e-07, "epoch": 0.9865398891013717, "total_flos": 2321179244321763072, "step": 74464 }, { "loss": 1.172119140625, "learning_rate": 3.947209915541219e-07, "epoch": 0.9869638426420255, "total_flos": 2322183207123486624, "step": 74496 }, { "loss": 1.157470703125, "learning_rate": 3.81871478095595e-07, "epoch": 0.9873877961826792, "total_flos": 2323188489171876864, "step": 74528 }, { "loss": 1.193115234375, "learning_rate": 3.690219646370682e-07, "epoch": 0.9878117497233331, "total_flos": 2324145843954168768, "step": 74560 }, { "loss": 1.19970703125, "learning_rate": 3.561724511785413e-07, "epoch": 0.9882357032639868, "total_flos": 2325149951551258176, "step": 74592 }, { "loss": 1.158447265625, "learning_rate": 3.433229377200145e-07, "epoch": 0.9886596568046406, "total_flos": 2326148396040705216, "step": 74624 }, { "loss": 1.173095703125, "learning_rate": 3.304734242614876e-07, "epoch": 0.9890836103452945, "total_flos": 2327158488512916672, "step": 74656 }, { "loss": 1.17138671875, "learning_rate": 3.1762391080296077e-07, "epoch": 0.9895075638859482, "total_flos": 2328159957616672704, "step": 74688 }, { "loss": 1.15380859375, "learning_rate": 3.047743973444339e-07, "epoch": 0.989931517426602, "total_flos": 2329174860512705376, "step": 74720 }, { "loss": 1.163330078125, "learning_rate": 2.91924883885907e-07, "epoch": 0.9903554709672558, "total_flos": 2330139438974916096, "step": 74752 }, { "loss": 1.19775390625, "learning_rate": 2.790753704273802e-07, "epoch": 0.9907794245079096, "total_flos": 2331092787752085984, "step": 74784 }, { "loss": 1.149169921875, "learning_rate": 2.662258569688533e-07, "epoch": 0.9912033780485634, "total_flos": 2332150421369420160, "step": 74816 }, { "loss": 1.166748046875, "learning_rate": 2.533763435103265e-07, "epoch": 0.9916273315892171, "total_flos": 2333180045127648192, "step": 74848 }, { "loss": 1.16650390625, "learning_rate": 2.405268300517996e-07, "epoch": 0.992051285129871, "total_flos": 2334232433935063584, "step": 74880 }, { "loss": 1.17919921875, "learning_rate": 2.2767731659327274e-07, "epoch": 0.9924752386705248, "total_flos": 2335188308586948960, "step": 74912 }, { "loss": 1.171630859375, "learning_rate": 2.148278031347459e-07, "epoch": 0.9928991922111785, "total_flos": 2336243030208592032, "step": 74944 }, { "loss": 1.185302734375, "learning_rate": 2.0197828967621904e-07, "epoch": 0.9933231457518323, "total_flos": 2337244998051941568, "step": 74976 }, { "loss": 1.18896484375, "learning_rate": 1.8912877621769219e-07, "epoch": 0.9937470992924862, "total_flos": 2338221417557404512, "step": 75008 }, { "loss": 1.16015625, "learning_rate": 1.7627926275916533e-07, "epoch": 0.9941710528331399, "total_flos": 2339228678475794784, "step": 75040 }, { "loss": 1.166748046875, "learning_rate": 1.6342974930063848e-07, "epoch": 0.9945950063737937, "total_flos": 2340249534070201536, "step": 75072 }, { "loss": 1.161376953125, "learning_rate": 1.5058023584211163e-07, "epoch": 0.9950189599144476, "total_flos": 2341278771707453952, "step": 75104 }, { "loss": 1.161865234375, "learning_rate": 1.3773072238358475e-07, "epoch": 0.9954429134551013, "total_flos": 2342291583114868704, "step": 75136 }, { "loss": 1.171875, "learning_rate": 1.248812089250579e-07, "epoch": 0.9958668669957551, "total_flos": 2343302560447649280, "step": 75168 }, { "loss": 1.12158203125, "learning_rate": 1.1203169546653103e-07, "epoch": 0.9962908205364088, "total_flos": 2344421732095472256, "step": 75200 }, { "loss": 1.186279296875, "learning_rate": 9.918218200800418e-08, "epoch": 0.9967147740770627, "total_flos": 2345452996867846656, "step": 75232 }, { "loss": 1.15576171875, "learning_rate": 8.633266854947732e-08, "epoch": 0.9971387276177165, "total_flos": 2346456879227700288, "step": 75264 }, { "loss": 1.1494140625, "learning_rate": 7.348315509095047e-08, "epoch": 0.9975626811583702, "total_flos": 2347497973996578912, "step": 75296 }, { "loss": 1.161865234375, "learning_rate": 6.06336416324236e-08, "epoch": 0.997986634699024, "total_flos": 2348528997443343552, "step": 75328 }, { "loss": 1.167724609375, "learning_rate": 4.778412817389675e-08, "epoch": 0.9984105882396779, "total_flos": 2349551381433278784, "step": 75360 }, { "loss": 1.17822265625, "learning_rate": 3.493461471536989e-08, "epoch": 0.9988345417803316, "total_flos": 2350548780178416864, "step": 75392 }, { "loss": 1.16552734375, "learning_rate": 2.2085101256843033e-08, "epoch": 0.9992584953209854, "total_flos": 2351528851744774176, "step": 75424 }, { "loss": 1.173095703125, "learning_rate": 9.235587798316178e-09, "epoch": 0.9996824488616392, "total_flos": 2352517611033082848, "step": 75456 } ]