{ "best_metric": 0.14823544025421143, "best_model_checkpoint": "./mistral7b/13-02-24-Weni-ZeroShot-3.3.0-Mistral-7b-Multilanguage-3.1.0_zeroshot-2_max_steps-4968_batch_128_2024-02-13_03/checkpoint-4780", "epoch": 147.07692307692307, "eval_steps": 20, "global_step": 4780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.62, "learning_rate": 2.82258064516129e-06, "loss": 1.6698, "step": 20 }, { "epoch": 0.62, "eval_loss": 1.6584945917129517, "eval_runtime": 26.1472, "eval_samples_per_second": 17.669, "eval_steps_per_second": 0.574, "step": 20 }, { "epoch": 1.23, "learning_rate": 1.0483870967741936e-05, "loss": 1.6069, "step": 40 }, { "epoch": 1.23, "eval_loss": 1.5037014484405518, "eval_runtime": 26.055, "eval_samples_per_second": 17.732, "eval_steps_per_second": 0.576, "step": 40 }, { "epoch": 1.85, "learning_rate": 1.8548387096774193e-05, "loss": 1.3817, "step": 60 }, { "epoch": 1.85, "eval_loss": 1.246311902999878, "eval_runtime": 26.0429, "eval_samples_per_second": 17.74, "eval_steps_per_second": 0.576, "step": 60 }, { "epoch": 2.46, "learning_rate": 2.661290322580645e-05, "loss": 1.1419, "step": 80 }, { "epoch": 2.46, "eval_loss": 1.0472239255905151, "eval_runtime": 26.0923, "eval_samples_per_second": 17.706, "eval_steps_per_second": 0.575, "step": 80 }, { "epoch": 3.08, "learning_rate": 3.467741935483872e-05, "loss": 0.9906, "step": 100 }, { "epoch": 3.08, "eval_loss": 0.9301682710647583, "eval_runtime": 26.1937, "eval_samples_per_second": 17.638, "eval_steps_per_second": 0.573, "step": 100 }, { "epoch": 3.69, "learning_rate": 4.2741935483870973e-05, "loss": 0.8634, "step": 120 }, { "epoch": 3.69, "eval_loss": 0.7793559432029724, "eval_runtime": 26.0879, "eval_samples_per_second": 17.709, "eval_steps_per_second": 0.575, "step": 120 }, { "epoch": 4.31, "learning_rate": 5.080645161290323e-05, "loss": 0.7382, "step": 140 }, { "epoch": 4.31, "eval_loss": 0.7010539174079895, "eval_runtime": 26.032, "eval_samples_per_second": 17.747, "eval_steps_per_second": 0.576, "step": 140 }, { "epoch": 4.92, "learning_rate": 5.887096774193549e-05, "loss": 0.6869, "step": 160 }, { "epoch": 4.92, "eval_loss": 0.6714752316474915, "eval_runtime": 44.7747, "eval_samples_per_second": 10.318, "eval_steps_per_second": 0.335, "step": 160 }, { "epoch": 5.54, "learning_rate": 6.693548387096774e-05, "loss": 0.6623, "step": 180 }, { "epoch": 5.54, "eval_loss": 0.6569082140922546, "eval_runtime": 44.9201, "eval_samples_per_second": 10.285, "eval_steps_per_second": 0.334, "step": 180 }, { "epoch": 6.15, "learning_rate": 7.500000000000001e-05, "loss": 0.6508, "step": 200 }, { "epoch": 6.15, "eval_loss": 0.6456889510154724, "eval_runtime": 44.7964, "eval_samples_per_second": 10.313, "eval_steps_per_second": 0.335, "step": 200 }, { "epoch": 6.77, "learning_rate": 8.306451612903227e-05, "loss": 0.6394, "step": 220 }, { "epoch": 6.77, "eval_loss": 0.6361492276191711, "eval_runtime": 26.7207, "eval_samples_per_second": 17.29, "eval_steps_per_second": 0.561, "step": 220 }, { "epoch": 7.38, "learning_rate": 9.112903225806452e-05, "loss": 0.6289, "step": 240 }, { "epoch": 7.38, "eval_loss": 0.6280443072319031, "eval_runtime": 26.6969, "eval_samples_per_second": 17.305, "eval_steps_per_second": 0.562, "step": 240 }, { "epoch": 8.0, "learning_rate": 9.919354838709678e-05, "loss": 0.6239, "step": 260 }, { "epoch": 8.0, "eval_loss": 0.6212936043739319, "eval_runtime": 26.7173, "eval_samples_per_second": 17.292, "eval_steps_per_second": 0.561, "step": 260 }, { "epoch": 8.62, "learning_rate": 0.00010725806451612903, "loss": 0.6171, "step": 280 }, { "epoch": 8.62, "eval_loss": 0.614993155002594, "eval_runtime": 26.7242, "eval_samples_per_second": 17.288, "eval_steps_per_second": 0.561, "step": 280 }, { "epoch": 9.23, "learning_rate": 0.00011532258064516131, "loss": 0.6096, "step": 300 }, { "epoch": 9.23, "eval_loss": 0.608863353729248, "eval_runtime": 26.7229, "eval_samples_per_second": 17.289, "eval_steps_per_second": 0.561, "step": 300 }, { "epoch": 9.85, "learning_rate": 0.00012338709677419356, "loss": 0.6048, "step": 320 }, { "epoch": 9.85, "eval_loss": 0.6036637425422668, "eval_runtime": 26.725, "eval_samples_per_second": 17.287, "eval_steps_per_second": 0.561, "step": 320 }, { "epoch": 10.46, "learning_rate": 0.0001314516129032258, "loss": 0.5986, "step": 340 }, { "epoch": 10.46, "eval_loss": 0.5977433323860168, "eval_runtime": 26.7485, "eval_samples_per_second": 17.272, "eval_steps_per_second": 0.561, "step": 340 }, { "epoch": 11.08, "learning_rate": 0.0001395161290322581, "loss": 0.5914, "step": 360 }, { "epoch": 11.08, "eval_loss": 0.591983437538147, "eval_runtime": 44.868, "eval_samples_per_second": 10.297, "eval_steps_per_second": 0.334, "step": 360 }, { "epoch": 11.69, "learning_rate": 0.00014758064516129032, "loss": 0.5871, "step": 380 }, { "epoch": 11.69, "eval_loss": 0.5865333676338196, "eval_runtime": 44.9887, "eval_samples_per_second": 10.269, "eval_steps_per_second": 0.333, "step": 380 }, { "epoch": 12.31, "learning_rate": 0.0001556451612903226, "loss": 0.5808, "step": 400 }, { "epoch": 12.31, "eval_loss": 0.5812229514122009, "eval_runtime": 26.7165, "eval_samples_per_second": 17.293, "eval_steps_per_second": 0.561, "step": 400 }, { "epoch": 12.92, "learning_rate": 0.00016370967741935485, "loss": 0.5746, "step": 420 }, { "epoch": 12.92, "eval_loss": 0.5760770440101624, "eval_runtime": 26.7316, "eval_samples_per_second": 17.283, "eval_steps_per_second": 0.561, "step": 420 }, { "epoch": 13.54, "learning_rate": 0.00017177419354838711, "loss": 0.5684, "step": 440 }, { "epoch": 13.54, "eval_loss": 0.5711672306060791, "eval_runtime": 26.708, "eval_samples_per_second": 17.298, "eval_steps_per_second": 0.562, "step": 440 }, { "epoch": 14.15, "learning_rate": 0.00017983870967741935, "loss": 0.5641, "step": 460 }, { "epoch": 14.15, "eval_loss": 0.5648314356803894, "eval_runtime": 26.7361, "eval_samples_per_second": 17.28, "eval_steps_per_second": 0.561, "step": 460 }, { "epoch": 14.77, "learning_rate": 0.00018790322580645164, "loss": 0.5573, "step": 480 }, { "epoch": 14.77, "eval_loss": 0.5593515634536743, "eval_runtime": 26.7412, "eval_samples_per_second": 17.277, "eval_steps_per_second": 0.561, "step": 480 }, { "epoch": 15.38, "learning_rate": 0.00019596774193548388, "loss": 0.5517, "step": 500 }, { "epoch": 15.38, "eval_loss": 0.5539582967758179, "eval_runtime": 26.732, "eval_samples_per_second": 17.283, "eval_steps_per_second": 0.561, "step": 500 }, { "epoch": 16.0, "learning_rate": 0.00019999753245902063, "loss": 0.5447, "step": 520 }, { "epoch": 16.0, "eval_loss": 0.54853355884552, "eval_runtime": 26.7332, "eval_samples_per_second": 17.282, "eval_steps_per_second": 0.561, "step": 520 }, { "epoch": 16.62, "learning_rate": 0.00019997779286183058, "loss": 0.5372, "step": 540 }, { "epoch": 16.62, "eval_loss": 0.5420479774475098, "eval_runtime": 26.733, "eval_samples_per_second": 17.282, "eval_steps_per_second": 0.561, "step": 540 }, { "epoch": 17.23, "learning_rate": 0.00019993831756406357, "loss": 0.5314, "step": 560 }, { "epoch": 17.23, "eval_loss": 0.5360643267631531, "eval_runtime": 26.736, "eval_samples_per_second": 17.28, "eval_steps_per_second": 0.561, "step": 560 }, { "epoch": 17.85, "learning_rate": 0.0001998791143581767, "loss": 0.5248, "step": 580 }, { "epoch": 17.85, "eval_loss": 0.5307183265686035, "eval_runtime": 26.7116, "eval_samples_per_second": 17.296, "eval_steps_per_second": 0.562, "step": 580 }, { "epoch": 18.46, "learning_rate": 0.00019980019493093267, "loss": 0.5195, "step": 600 }, { "epoch": 18.46, "eval_loss": 0.5241357684135437, "eval_runtime": 26.7372, "eval_samples_per_second": 17.279, "eval_steps_per_second": 0.561, "step": 600 }, { "epoch": 19.08, "learning_rate": 0.00019970157486109296, "loss": 0.5136, "step": 620 }, { "epoch": 19.08, "eval_loss": 0.5182597637176514, "eval_runtime": 26.7038, "eval_samples_per_second": 17.301, "eval_steps_per_second": 0.562, "step": 620 }, { "epoch": 19.69, "learning_rate": 0.00019958327361634248, "loss": 0.5036, "step": 640 }, { "epoch": 19.69, "eval_loss": 0.5129547715187073, "eval_runtime": 26.7294, "eval_samples_per_second": 17.284, "eval_steps_per_second": 0.561, "step": 640 }, { "epoch": 20.31, "learning_rate": 0.00019944531454944663, "loss": 0.4996, "step": 660 }, { "epoch": 20.31, "eval_loss": 0.5069959163665771, "eval_runtime": 26.7503, "eval_samples_per_second": 17.271, "eval_steps_per_second": 0.561, "step": 660 }, { "epoch": 20.92, "learning_rate": 0.0001992877248936415, "loss": 0.4941, "step": 680 }, { "epoch": 20.92, "eval_loss": 0.5006260871887207, "eval_runtime": 26.7128, "eval_samples_per_second": 17.295, "eval_steps_per_second": 0.562, "step": 680 }, { "epoch": 21.54, "learning_rate": 0.000199110535757258, "loss": 0.4838, "step": 700 }, { "epoch": 21.54, "eval_loss": 0.4946294128894806, "eval_runtime": 26.7236, "eval_samples_per_second": 17.288, "eval_steps_per_second": 0.561, "step": 700 }, { "epoch": 22.15, "learning_rate": 0.00019891378211758096, "loss": 0.4795, "step": 720 }, { "epoch": 22.15, "eval_loss": 0.4879631996154785, "eval_runtime": 26.733, "eval_samples_per_second": 17.282, "eval_steps_per_second": 0.561, "step": 720 }, { "epoch": 22.77, "learning_rate": 0.0001986975028139447, "loss": 0.4722, "step": 740 }, { "epoch": 22.77, "eval_loss": 0.48206356167793274, "eval_runtime": 26.7335, "eval_samples_per_second": 17.282, "eval_steps_per_second": 0.561, "step": 740 }, { "epoch": 23.38, "learning_rate": 0.00019846174054006607, "loss": 0.464, "step": 760 }, { "epoch": 23.38, "eval_loss": 0.4757327735424042, "eval_runtime": 26.722, "eval_samples_per_second": 17.289, "eval_steps_per_second": 0.561, "step": 760 }, { "epoch": 24.0, "learning_rate": 0.00019820654183561658, "loss": 0.4605, "step": 780 }, { "epoch": 24.0, "eval_loss": 0.47157037258148193, "eval_runtime": 26.6987, "eval_samples_per_second": 17.304, "eval_steps_per_second": 0.562, "step": 780 }, { "epoch": 24.62, "learning_rate": 0.00019793195707703567, "loss": 0.4524, "step": 800 }, { "epoch": 24.62, "eval_loss": 0.46418875455856323, "eval_runtime": 26.7165, "eval_samples_per_second": 17.293, "eval_steps_per_second": 0.561, "step": 800 }, { "epoch": 25.23, "learning_rate": 0.00019763804046758602, "loss": 0.4461, "step": 820 }, { "epoch": 25.23, "eval_loss": 0.45834338665008545, "eval_runtime": 26.7468, "eval_samples_per_second": 17.273, "eval_steps_per_second": 0.561, "step": 820 }, { "epoch": 25.85, "learning_rate": 0.00019732485002665415, "loss": 0.4393, "step": 840 }, { "epoch": 25.85, "eval_loss": 0.4538223147392273, "eval_runtime": 26.746, "eval_samples_per_second": 17.274, "eval_steps_per_second": 0.561, "step": 840 }, { "epoch": 26.46, "learning_rate": 0.00019699244757829702, "loss": 0.4337, "step": 860 }, { "epoch": 26.46, "eval_loss": 0.4459408223628998, "eval_runtime": 26.705, "eval_samples_per_second": 17.3, "eval_steps_per_second": 0.562, "step": 860 }, { "epoch": 27.08, "learning_rate": 0.0001966408987390381, "loss": 0.4269, "step": 880 }, { "epoch": 27.08, "eval_loss": 0.43985316157341003, "eval_runtime": 26.7756, "eval_samples_per_second": 17.255, "eval_steps_per_second": 0.56, "step": 880 }, { "epoch": 27.69, "learning_rate": 0.00019627027290491458, "loss": 0.4191, "step": 900 }, { "epoch": 27.69, "eval_loss": 0.43426012992858887, "eval_runtime": 26.7408, "eval_samples_per_second": 17.277, "eval_steps_per_second": 0.561, "step": 900 }, { "epoch": 28.31, "learning_rate": 0.00019588064323777853, "loss": 0.4138, "step": 920 }, { "epoch": 28.31, "eval_loss": 0.4298732876777649, "eval_runtime": 26.7628, "eval_samples_per_second": 17.263, "eval_steps_per_second": 0.56, "step": 920 }, { "epoch": 28.92, "learning_rate": 0.00019549296276462325, "loss": 0.408, "step": 940 }, { "epoch": 28.92, "eval_loss": 0.42369845509529114, "eval_runtime": 26.7274, "eval_samples_per_second": 17.286, "eval_steps_per_second": 0.561, "step": 940 }, { "epoch": 29.54, "learning_rate": 0.00019506650024792317, "loss": 0.4001, "step": 960 }, { "epoch": 29.54, "eval_loss": 0.4199902415275574, "eval_runtime": 26.7231, "eval_samples_per_second": 17.288, "eval_steps_per_second": 0.561, "step": 960 }, { "epoch": 30.15, "learning_rate": 0.0001946212715239476, "loss": 0.3978, "step": 980 }, { "epoch": 30.15, "eval_loss": 0.41251733899116516, "eval_runtime": 26.7147, "eval_samples_per_second": 17.294, "eval_steps_per_second": 0.561, "step": 980 }, { "epoch": 30.77, "learning_rate": 0.00019415736448122193, "loss": 0.3891, "step": 1000 }, { "epoch": 30.77, "eval_loss": 0.40809690952301025, "eval_runtime": 26.7253, "eval_samples_per_second": 17.287, "eval_steps_per_second": 0.561, "step": 1000 }, { "epoch": 31.38, "learning_rate": 0.0001936748706953874, "loss": 0.3861, "step": 1020 }, { "epoch": 31.38, "eval_loss": 0.40387141704559326, "eval_runtime": 26.7175, "eval_samples_per_second": 17.292, "eval_steps_per_second": 0.561, "step": 1020 }, { "epoch": 32.0, "learning_rate": 0.00019317388541112396, "loss": 0.3806, "step": 1040 }, { "epoch": 32.0, "eval_loss": 0.3994995355606079, "eval_runtime": 26.6886, "eval_samples_per_second": 17.311, "eval_steps_per_second": 0.562, "step": 1040 }, { "epoch": 32.62, "learning_rate": 0.000192654507523349, "loss": 0.3744, "step": 1060 }, { "epoch": 32.62, "eval_loss": 0.3944377303123474, "eval_runtime": 43.1554, "eval_samples_per_second": 10.705, "eval_steps_per_second": 0.348, "step": 1060 }, { "epoch": 33.23, "learning_rate": 0.00019211683955769538, "loss": 0.3704, "step": 1080 }, { "epoch": 33.23, "eval_loss": 0.3890739977359772, "eval_runtime": 44.8442, "eval_samples_per_second": 10.302, "eval_steps_per_second": 0.334, "step": 1080 }, { "epoch": 33.85, "learning_rate": 0.00019156098765027262, "loss": 0.3642, "step": 1100 }, { "epoch": 33.85, "eval_loss": 0.3840695321559906, "eval_runtime": 44.6934, "eval_samples_per_second": 10.337, "eval_steps_per_second": 0.336, "step": 1100 }, { "epoch": 34.46, "learning_rate": 0.00019098706152671576, "loss": 0.3578, "step": 1120 }, { "epoch": 34.46, "eval_loss": 0.37998247146606445, "eval_runtime": 44.349, "eval_samples_per_second": 10.417, "eval_steps_per_second": 0.338, "step": 1120 }, { "epoch": 35.08, "learning_rate": 0.00019039517448052535, "loss": 0.3547, "step": 1140 }, { "epoch": 35.08, "eval_loss": 0.3763927221298218, "eval_runtime": 44.8048, "eval_samples_per_second": 10.311, "eval_steps_per_second": 0.335, "step": 1140 }, { "epoch": 35.69, "learning_rate": 0.00018978544335070314, "loss": 0.3494, "step": 1160 }, { "epoch": 35.69, "eval_loss": 0.37159162759780884, "eval_runtime": 44.97, "eval_samples_per_second": 10.274, "eval_steps_per_second": 0.334, "step": 1160 }, { "epoch": 36.31, "learning_rate": 0.0001891579884986881, "loss": 0.3449, "step": 1180 }, { "epoch": 36.31, "eval_loss": 0.36737060546875, "eval_runtime": 44.3891, "eval_samples_per_second": 10.408, "eval_steps_per_second": 0.338, "step": 1180 }, { "epoch": 36.92, "learning_rate": 0.00018851293378459685, "loss": 0.3409, "step": 1200 }, { "epoch": 36.92, "eval_loss": 0.3632607161998749, "eval_runtime": 44.9198, "eval_samples_per_second": 10.285, "eval_steps_per_second": 0.334, "step": 1200 }, { "epoch": 37.54, "learning_rate": 0.0001878504065427736, "loss": 0.3339, "step": 1220 }, { "epoch": 37.54, "eval_loss": 0.3598220944404602, "eval_runtime": 44.8761, "eval_samples_per_second": 10.295, "eval_steps_per_second": 0.334, "step": 1220 }, { "epoch": 38.15, "learning_rate": 0.00018717053755665437, "loss": 0.3301, "step": 1240 }, { "epoch": 38.15, "eval_loss": 0.35608917474746704, "eval_runtime": 44.9887, "eval_samples_per_second": 10.269, "eval_steps_per_second": 0.333, "step": 1240 }, { "epoch": 38.77, "learning_rate": 0.00018647346103295003, "loss": 0.3267, "step": 1260 }, { "epoch": 38.77, "eval_loss": 0.3520090579986572, "eval_runtime": 40.8773, "eval_samples_per_second": 11.302, "eval_steps_per_second": 0.367, "step": 1260 }, { "epoch": 39.38, "learning_rate": 0.00018575931457515382, "loss": 0.3247, "step": 1280 }, { "epoch": 39.38, "eval_loss": 0.34774109721183777, "eval_runtime": 45.7461, "eval_samples_per_second": 10.099, "eval_steps_per_second": 0.328, "step": 1280 }, { "epoch": 40.0, "learning_rate": 0.00018502823915637846, "loss": 0.3196, "step": 1300 }, { "epoch": 40.0, "eval_loss": 0.34475430846214294, "eval_runtime": 44.903, "eval_samples_per_second": 10.289, "eval_steps_per_second": 0.334, "step": 1300 }, { "epoch": 40.62, "learning_rate": 0.00018428037909152785, "loss": 0.3155, "step": 1320 }, { "epoch": 40.62, "eval_loss": 0.3413088619709015, "eval_runtime": 44.6571, "eval_samples_per_second": 10.345, "eval_steps_per_second": 0.336, "step": 1320 }, { "epoch": 41.23, "learning_rate": 0.00018351588200880907, "loss": 0.311, "step": 1340 }, { "epoch": 41.23, "eval_loss": 0.3366176187992096, "eval_runtime": 45.0459, "eval_samples_per_second": 10.256, "eval_steps_per_second": 0.333, "step": 1340 }, { "epoch": 41.85, "learning_rate": 0.00018273489882059062, "loss": 0.3059, "step": 1360 }, { "epoch": 41.85, "eval_loss": 0.3341914713382721, "eval_runtime": 45.038, "eval_samples_per_second": 10.258, "eval_steps_per_second": 0.333, "step": 1360 }, { "epoch": 42.46, "learning_rate": 0.0001819375836936121, "loss": 0.3047, "step": 1380 }, { "epoch": 42.46, "eval_loss": 0.33061912655830383, "eval_runtime": 26.7201, "eval_samples_per_second": 17.29, "eval_steps_per_second": 0.561, "step": 1380 }, { "epoch": 43.08, "learning_rate": 0.00018112409401855158, "loss": 0.3006, "step": 1400 }, { "epoch": 43.08, "eval_loss": 0.32672399282455444, "eval_runtime": 45.0029, "eval_samples_per_second": 10.266, "eval_steps_per_second": 0.333, "step": 1400 }, { "epoch": 43.69, "learning_rate": 0.00018029459037895658, "loss": 0.2967, "step": 1420 }, { "epoch": 43.69, "eval_loss": 0.32409900426864624, "eval_runtime": 45.8902, "eval_samples_per_second": 10.067, "eval_steps_per_second": 0.327, "step": 1420 }, { "epoch": 44.31, "learning_rate": 0.00017944923651954474, "loss": 0.2924, "step": 1440 }, { "epoch": 44.31, "eval_loss": 0.3199877142906189, "eval_runtime": 44.6863, "eval_samples_per_second": 10.339, "eval_steps_per_second": 0.336, "step": 1440 }, { "epoch": 44.92, "learning_rate": 0.00017858819931388032, "loss": 0.2876, "step": 1460 }, { "epoch": 44.92, "eval_loss": 0.3171309530735016, "eval_runtime": 45.7535, "eval_samples_per_second": 10.098, "eval_steps_per_second": 0.328, "step": 1460 }, { "epoch": 45.54, "learning_rate": 0.0001777116487314335, "loss": 0.2848, "step": 1480 }, { "epoch": 45.54, "eval_loss": 0.3142802119255066, "eval_runtime": 45.7609, "eval_samples_per_second": 10.096, "eval_steps_per_second": 0.328, "step": 1480 }, { "epoch": 46.15, "learning_rate": 0.00017681975780402807, "loss": 0.2836, "step": 1500 }, { "epoch": 46.15, "eval_loss": 0.31119367480278015, "eval_runtime": 45.7107, "eval_samples_per_second": 10.107, "eval_steps_per_second": 0.328, "step": 1500 }, { "epoch": 46.77, "learning_rate": 0.00017591270259168477, "loss": 0.2786, "step": 1520 }, { "epoch": 46.77, "eval_loss": 0.3083397448062897, "eval_runtime": 45.8269, "eval_samples_per_second": 10.081, "eval_steps_per_second": 0.327, "step": 1520 }, { "epoch": 47.38, "learning_rate": 0.00017499066214786708, "loss": 0.2766, "step": 1540 }, { "epoch": 47.38, "eval_loss": 0.307574063539505, "eval_runtime": 45.6083, "eval_samples_per_second": 10.13, "eval_steps_per_second": 0.329, "step": 1540 }, { "epoch": 48.0, "learning_rate": 0.00017405381848413571, "loss": 0.273, "step": 1560 }, { "epoch": 48.0, "eval_loss": 0.3024856150150299, "eval_runtime": 45.6794, "eval_samples_per_second": 10.114, "eval_steps_per_second": 0.328, "step": 1560 }, { "epoch": 48.62, "learning_rate": 0.0001731023565342195, "loss": 0.2691, "step": 1580 }, { "epoch": 48.62, "eval_loss": 0.3002566397190094, "eval_runtime": 45.8955, "eval_samples_per_second": 10.066, "eval_steps_per_second": 0.327, "step": 1580 }, { "epoch": 49.23, "learning_rate": 0.00017213646411750935, "loss": 0.2657, "step": 1600 }, { "epoch": 49.23, "eval_loss": 0.29747503995895386, "eval_runtime": 45.7917, "eval_samples_per_second": 10.089, "eval_steps_per_second": 0.328, "step": 1600 }, { "epoch": 49.85, "learning_rate": 0.00017115633190198238, "loss": 0.2615, "step": 1620 }, { "epoch": 49.85, "eval_loss": 0.2955474853515625, "eval_runtime": 45.8407, "eval_samples_per_second": 10.078, "eval_steps_per_second": 0.327, "step": 1620 }, { "epoch": 50.46, "learning_rate": 0.000170162153366564, "loss": 0.2614, "step": 1640 }, { "epoch": 50.46, "eval_loss": 0.2920401394367218, "eval_runtime": 45.835, "eval_samples_per_second": 10.08, "eval_steps_per_second": 0.327, "step": 1640 }, { "epoch": 51.08, "learning_rate": 0.00016915412476293512, "loss": 0.2587, "step": 1660 }, { "epoch": 51.08, "eval_loss": 0.28886348009109497, "eval_runtime": 45.8928, "eval_samples_per_second": 10.067, "eval_steps_per_second": 0.327, "step": 1660 }, { "epoch": 51.69, "learning_rate": 0.00016813244507679165, "loss": 0.2543, "step": 1680 }, { "epoch": 51.69, "eval_loss": 0.28654780983924866, "eval_runtime": 45.8541, "eval_samples_per_second": 10.075, "eval_steps_per_second": 0.327, "step": 1680 }, { "epoch": 52.31, "learning_rate": 0.0001670973159885648, "loss": 0.2507, "step": 1700 }, { "epoch": 52.31, "eval_loss": 0.28392159938812256, "eval_runtime": 46.0394, "eval_samples_per_second": 10.035, "eval_steps_per_second": 0.326, "step": 1700 }, { "epoch": 52.92, "learning_rate": 0.000166048941833609, "loss": 0.2512, "step": 1720 }, { "epoch": 52.92, "eval_loss": 0.28163596987724304, "eval_runtime": 45.9037, "eval_samples_per_second": 10.065, "eval_steps_per_second": 0.327, "step": 1720 }, { "epoch": 53.54, "learning_rate": 0.00016498752956186605, "loss": 0.2446, "step": 1740 }, { "epoch": 53.54, "eval_loss": 0.27993378043174744, "eval_runtime": 45.8612, "eval_samples_per_second": 10.074, "eval_steps_per_second": 0.327, "step": 1740 }, { "epoch": 54.15, "learning_rate": 0.00016391328869701306, "loss": 0.2428, "step": 1760 }, { "epoch": 54.15, "eval_loss": 0.27705731987953186, "eval_runtime": 45.8648, "eval_samples_per_second": 10.073, "eval_steps_per_second": 0.327, "step": 1760 }, { "epoch": 54.77, "learning_rate": 0.00016282643129510212, "loss": 0.2421, "step": 1780 }, { "epoch": 54.77, "eval_loss": 0.27512410283088684, "eval_runtime": 45.742, "eval_samples_per_second": 10.1, "eval_steps_per_second": 0.328, "step": 1780 }, { "epoch": 55.38, "learning_rate": 0.00016172717190270045, "loss": 0.24, "step": 1800 }, { "epoch": 55.38, "eval_loss": 0.2725893259048462, "eval_runtime": 26.75, "eval_samples_per_second": 17.271, "eval_steps_per_second": 0.561, "step": 1800 }, { "epoch": 56.0, "learning_rate": 0.00016061572751453862, "loss": 0.2379, "step": 1820 }, { "epoch": 56.0, "eval_loss": 0.2703319787979126, "eval_runtime": 26.7376, "eval_samples_per_second": 17.279, "eval_steps_per_second": 0.561, "step": 1820 }, { "epoch": 56.62, "learning_rate": 0.0001594923175306756, "loss": 0.2348, "step": 1840 }, { "epoch": 56.62, "eval_loss": 0.2680712342262268, "eval_runtime": 26.7571, "eval_samples_per_second": 17.266, "eval_steps_per_second": 0.561, "step": 1840 }, { "epoch": 57.23, "learning_rate": 0.00015835716371318908, "loss": 0.2318, "step": 1860 }, { "epoch": 57.23, "eval_loss": 0.2659159302711487, "eval_runtime": 26.7348, "eval_samples_per_second": 17.281, "eval_steps_per_second": 0.561, "step": 1860 }, { "epoch": 57.85, "learning_rate": 0.00015721049014239943, "loss": 0.2288, "step": 1880 }, { "epoch": 57.85, "eval_loss": 0.263480007648468, "eval_runtime": 26.7538, "eval_samples_per_second": 17.269, "eval_steps_per_second": 0.561, "step": 1880 }, { "epoch": 58.46, "learning_rate": 0.0001560525231726359, "loss": 0.2288, "step": 1900 }, { "epoch": 58.46, "eval_loss": 0.2611861228942871, "eval_runtime": 26.7314, "eval_samples_per_second": 17.283, "eval_steps_per_second": 0.561, "step": 1900 }, { "epoch": 59.08, "learning_rate": 0.00015488349138755448, "loss": 0.2239, "step": 1920 }, { "epoch": 59.08, "eval_loss": 0.25924989581108093, "eval_runtime": 26.7587, "eval_samples_per_second": 17.265, "eval_steps_per_second": 0.561, "step": 1920 }, { "epoch": 59.69, "learning_rate": 0.0001537036255550147, "loss": 0.2233, "step": 1940 }, { "epoch": 59.69, "eval_loss": 0.2566944360733032, "eval_runtime": 26.7636, "eval_samples_per_second": 17.262, "eval_steps_per_second": 0.56, "step": 1940 }, { "epoch": 60.31, "learning_rate": 0.0001525131585815264, "loss": 0.2199, "step": 1960 }, { "epoch": 60.31, "eval_loss": 0.25529178977012634, "eval_runtime": 26.7593, "eval_samples_per_second": 17.265, "eval_steps_per_second": 0.561, "step": 1960 }, { "epoch": 60.92, "learning_rate": 0.00015131232546627355, "loss": 0.219, "step": 1980 }, { "epoch": 60.92, "eval_loss": 0.2528415322303772, "eval_runtime": 26.7591, "eval_samples_per_second": 17.265, "eval_steps_per_second": 0.561, "step": 1980 }, { "epoch": 61.54, "learning_rate": 0.0001501013632547252, "loss": 0.217, "step": 2000 }, { "epoch": 61.54, "eval_loss": 0.25113263726234436, "eval_runtime": 26.7433, "eval_samples_per_second": 17.275, "eval_steps_per_second": 0.561, "step": 2000 }, { "epoch": 62.15, "learning_rate": 0.00014888051099184256, "loss": 0.2154, "step": 2020 }, { "epoch": 62.15, "eval_loss": 0.24899105727672577, "eval_runtime": 26.7472, "eval_samples_per_second": 17.273, "eval_steps_per_second": 0.561, "step": 2020 }, { "epoch": 62.77, "learning_rate": 0.0001476500096748913, "loss": 0.2126, "step": 2040 }, { "epoch": 62.77, "eval_loss": 0.24699197709560394, "eval_runtime": 26.7455, "eval_samples_per_second": 17.274, "eval_steps_per_second": 0.561, "step": 2040 }, { "epoch": 63.38, "learning_rate": 0.00014641010220586858, "loss": 0.2085, "step": 2060 }, { "epoch": 63.38, "eval_loss": 0.24530422687530518, "eval_runtime": 26.7332, "eval_samples_per_second": 17.282, "eval_steps_per_second": 0.561, "step": 2060 }, { "epoch": 64.0, "learning_rate": 0.0001451610333435538, "loss": 0.2088, "step": 2080 }, { "epoch": 64.0, "eval_loss": 0.24252080917358398, "eval_runtime": 26.7344, "eval_samples_per_second": 17.281, "eval_steps_per_second": 0.561, "step": 2080 }, { "epoch": 64.62, "learning_rate": 0.00014390304965519312, "loss": 0.207, "step": 2100 }, { "epoch": 64.62, "eval_loss": 0.24115830659866333, "eval_runtime": 26.7241, "eval_samples_per_second": 17.288, "eval_steps_per_second": 0.561, "step": 2100 }, { "epoch": 65.23, "learning_rate": 0.00014263639946782695, "loss": 0.2066, "step": 2120 }, { "epoch": 65.23, "eval_loss": 0.23876874148845673, "eval_runtime": 26.7538, "eval_samples_per_second": 17.269, "eval_steps_per_second": 0.561, "step": 2120 }, { "epoch": 65.85, "learning_rate": 0.00014136133281926987, "loss": 0.2021, "step": 2140 }, { "epoch": 65.85, "eval_loss": 0.2371101826429367, "eval_runtime": 26.7047, "eval_samples_per_second": 17.3, "eval_steps_per_second": 0.562, "step": 2140 }, { "epoch": 66.46, "learning_rate": 0.00014007810140875295, "loss": 0.2016, "step": 2160 }, { "epoch": 66.46, "eval_loss": 0.2353435754776001, "eval_runtime": 26.7324, "eval_samples_per_second": 17.282, "eval_steps_per_second": 0.561, "step": 2160 }, { "epoch": 67.08, "learning_rate": 0.00013878695854723826, "loss": 0.1986, "step": 2180 }, { "epoch": 67.08, "eval_loss": 0.23351863026618958, "eval_runtime": 45.6122, "eval_samples_per_second": 10.129, "eval_steps_per_second": 0.329, "step": 2180 }, { "epoch": 67.69, "learning_rate": 0.0001374881591074148, "loss": 0.1965, "step": 2200 }, { "epoch": 67.69, "eval_loss": 0.23166298866271973, "eval_runtime": 45.6494, "eval_samples_per_second": 10.121, "eval_steps_per_second": 0.329, "step": 2200 }, { "epoch": 68.31, "learning_rate": 0.0001361819594733868, "loss": 0.1969, "step": 2220 }, { "epoch": 68.31, "eval_loss": 0.23032891750335693, "eval_runtime": 26.7108, "eval_samples_per_second": 17.296, "eval_steps_per_second": 0.562, "step": 2220 }, { "epoch": 68.92, "learning_rate": 0.00013486861749006286, "loss": 0.1957, "step": 2240 }, { "epoch": 68.92, "eval_loss": 0.22889761626720428, "eval_runtime": 26.7424, "eval_samples_per_second": 17.276, "eval_steps_per_second": 0.561, "step": 2240 }, { "epoch": 69.54, "learning_rate": 0.0001335483924122575, "loss": 0.1918, "step": 2260 }, { "epoch": 69.54, "eval_loss": 0.2264855057001114, "eval_runtime": 26.7477, "eval_samples_per_second": 17.273, "eval_steps_per_second": 0.561, "step": 2260 }, { "epoch": 70.15, "learning_rate": 0.00013222154485351375, "loss": 0.1913, "step": 2280 }, { "epoch": 70.15, "eval_loss": 0.22507672011852264, "eval_runtime": 45.5727, "eval_samples_per_second": 10.138, "eval_steps_per_second": 0.329, "step": 2280 }, { "epoch": 70.77, "learning_rate": 0.0001308883367346581, "loss": 0.1892, "step": 2300 }, { "epoch": 70.77, "eval_loss": 0.22298868000507355, "eval_runtime": 26.731, "eval_samples_per_second": 17.283, "eval_steps_per_second": 0.561, "step": 2300 }, { "epoch": 71.38, "learning_rate": 0.00012954903123209687, "loss": 0.1885, "step": 2320 }, { "epoch": 71.38, "eval_loss": 0.2219810038805008, "eval_runtime": 26.7121, "eval_samples_per_second": 17.296, "eval_steps_per_second": 0.562, "step": 2320 }, { "epoch": 72.0, "learning_rate": 0.0001282038927258651, "loss": 0.1876, "step": 2340 }, { "epoch": 72.0, "eval_loss": 0.2204855978488922, "eval_runtime": 45.041, "eval_samples_per_second": 10.257, "eval_steps_per_second": 0.333, "step": 2340 }, { "epoch": 72.62, "learning_rate": 0.0001268531867474377, "loss": 0.1855, "step": 2360 }, { "epoch": 72.62, "eval_loss": 0.21908599138259888, "eval_runtime": 45.1286, "eval_samples_per_second": 10.237, "eval_steps_per_second": 0.332, "step": 2360 }, { "epoch": 73.23, "learning_rate": 0.00012549717992731317, "loss": 0.1841, "step": 2380 }, { "epoch": 73.23, "eval_loss": 0.21735349297523499, "eval_runtime": 26.5835, "eval_samples_per_second": 17.379, "eval_steps_per_second": 0.564, "step": 2380 }, { "epoch": 73.85, "learning_rate": 0.0001241361399423808, "loss": 0.1819, "step": 2400 }, { "epoch": 73.85, "eval_loss": 0.21623647212982178, "eval_runtime": 26.7905, "eval_samples_per_second": 17.245, "eval_steps_per_second": 0.56, "step": 2400 }, { "epoch": 74.46, "learning_rate": 0.0001227703354630807, "loss": 0.1812, "step": 2420 }, { "epoch": 74.46, "eval_loss": 0.21434533596038818, "eval_runtime": 26.7405, "eval_samples_per_second": 17.277, "eval_steps_per_second": 0.561, "step": 2420 }, { "epoch": 75.08, "learning_rate": 0.0001214000361003683, "loss": 0.1801, "step": 2440 }, { "epoch": 75.08, "eval_loss": 0.21285748481750488, "eval_runtime": 26.7466, "eval_samples_per_second": 17.273, "eval_steps_per_second": 0.561, "step": 2440 }, { "epoch": 75.69, "learning_rate": 0.00012002551235249268, "loss": 0.1773, "step": 2460 }, { "epoch": 75.69, "eval_loss": 0.21103879809379578, "eval_runtime": 26.7635, "eval_samples_per_second": 17.262, "eval_steps_per_second": 0.56, "step": 2460 }, { "epoch": 76.31, "learning_rate": 0.00011864703555160028, "loss": 0.1771, "step": 2480 }, { "epoch": 76.31, "eval_loss": 0.21016329526901245, "eval_runtime": 26.7904, "eval_samples_per_second": 17.245, "eval_steps_per_second": 0.56, "step": 2480 }, { "epoch": 76.92, "learning_rate": 0.00011726487781017337, "loss": 0.1752, "step": 2500 }, { "epoch": 76.92, "eval_loss": 0.2086782455444336, "eval_runtime": 44.9559, "eval_samples_per_second": 10.277, "eval_steps_per_second": 0.334, "step": 2500 }, { "epoch": 77.54, "learning_rate": 0.00011587931196731505, "loss": 0.1742, "step": 2520 }, { "epoch": 77.54, "eval_loss": 0.2075587958097458, "eval_runtime": 44.9658, "eval_samples_per_second": 10.274, "eval_steps_per_second": 0.334, "step": 2520 }, { "epoch": 78.15, "learning_rate": 0.00011449061153489055, "loss": 0.1735, "step": 2540 }, { "epoch": 78.15, "eval_loss": 0.20602142810821533, "eval_runtime": 26.6678, "eval_samples_per_second": 17.324, "eval_steps_per_second": 0.562, "step": 2540 }, { "epoch": 78.77, "learning_rate": 0.00011309905064353575, "loss": 0.1715, "step": 2560 }, { "epoch": 78.77, "eval_loss": 0.20492884516716003, "eval_runtime": 26.7865, "eval_samples_per_second": 17.248, "eval_steps_per_second": 0.56, "step": 2560 }, { "epoch": 79.38, "learning_rate": 0.00011170490398854336, "loss": 0.1707, "step": 2580 }, { "epoch": 79.38, "eval_loss": 0.2027878314256668, "eval_runtime": 42.4616, "eval_samples_per_second": 10.88, "eval_steps_per_second": 0.353, "step": 2580 }, { "epoch": 80.0, "learning_rate": 0.0001103084467756382, "loss": 0.1701, "step": 2600 }, { "epoch": 80.0, "eval_loss": 0.201686292886734, "eval_runtime": 45.0476, "eval_samples_per_second": 10.256, "eval_steps_per_second": 0.333, "step": 2600 }, { "epoch": 80.62, "learning_rate": 0.00010890995466665108, "loss": 0.1675, "step": 2620 }, { "epoch": 80.62, "eval_loss": 0.2003440409898758, "eval_runtime": 45.0995, "eval_samples_per_second": 10.244, "eval_steps_per_second": 0.333, "step": 2620 }, { "epoch": 81.23, "learning_rate": 0.00010750970372510307, "loss": 0.1663, "step": 2640 }, { "epoch": 81.23, "eval_loss": 0.19951596856117249, "eval_runtime": 45.0074, "eval_samples_per_second": 10.265, "eval_steps_per_second": 0.333, "step": 2640 }, { "epoch": 81.85, "learning_rate": 0.00010610797036171014, "loss": 0.1653, "step": 2660 }, { "epoch": 81.85, "eval_loss": 0.19835925102233887, "eval_runtime": 45.0031, "eval_samples_per_second": 10.266, "eval_steps_per_second": 0.333, "step": 2660 }, { "epoch": 82.46, "learning_rate": 0.00010470503127981977, "loss": 0.165, "step": 2680 }, { "epoch": 82.46, "eval_loss": 0.19678974151611328, "eval_runtime": 45.1089, "eval_samples_per_second": 10.242, "eval_steps_per_second": 0.333, "step": 2680 }, { "epoch": 83.08, "learning_rate": 0.0001033011634207891, "loss": 0.1644, "step": 2700 }, { "epoch": 83.08, "eval_loss": 0.19566014409065247, "eval_runtime": 26.7641, "eval_samples_per_second": 17.262, "eval_steps_per_second": 0.56, "step": 2700 }, { "epoch": 83.69, "learning_rate": 0.00010189664390931682, "loss": 0.1631, "step": 2720 }, { "epoch": 83.69, "eval_loss": 0.19463180005550385, "eval_runtime": 26.7406, "eval_samples_per_second": 17.277, "eval_steps_per_second": 0.561, "step": 2720 }, { "epoch": 84.31, "learning_rate": 0.00010049174999873823, "loss": 0.162, "step": 2740 }, { "epoch": 84.31, "eval_loss": 0.1935625970363617, "eval_runtime": 26.7779, "eval_samples_per_second": 17.253, "eval_steps_per_second": 0.56, "step": 2740 }, { "epoch": 84.92, "learning_rate": 9.908675901629543e-05, "loss": 0.1604, "step": 2760 }, { "epoch": 84.92, "eval_loss": 0.19222331047058105, "eval_runtime": 26.7706, "eval_samples_per_second": 17.258, "eval_steps_per_second": 0.56, "step": 2760 }, { "epoch": 85.54, "learning_rate": 9.768194830839252e-05, "loss": 0.1598, "step": 2780 }, { "epoch": 85.54, "eval_loss": 0.19124871492385864, "eval_runtime": 26.778, "eval_samples_per_second": 17.253, "eval_steps_per_second": 0.56, "step": 2780 }, { "epoch": 86.15, "learning_rate": 9.627759518584733e-05, "loss": 0.1583, "step": 2800 }, { "epoch": 86.15, "eval_loss": 0.19053253531455994, "eval_runtime": 26.7797, "eval_samples_per_second": 17.252, "eval_steps_per_second": 0.56, "step": 2800 }, { "epoch": 86.77, "learning_rate": 9.487397686914985e-05, "loss": 0.1581, "step": 2820 }, { "epoch": 86.77, "eval_loss": 0.18871891498565674, "eval_runtime": 26.767, "eval_samples_per_second": 17.26, "eval_steps_per_second": 0.56, "step": 2820 }, { "epoch": 87.38, "learning_rate": 9.347137043373885e-05, "loss": 0.1569, "step": 2840 }, { "epoch": 87.38, "eval_loss": 0.18785762786865234, "eval_runtime": 26.7805, "eval_samples_per_second": 17.251, "eval_steps_per_second": 0.56, "step": 2840 }, { "epoch": 88.0, "learning_rate": 9.20700527553069e-05, "loss": 0.1553, "step": 2860 }, { "epoch": 88.0, "eval_loss": 0.18669484555721283, "eval_runtime": 26.7742, "eval_samples_per_second": 17.255, "eval_steps_per_second": 0.56, "step": 2860 }, { "epoch": 88.62, "learning_rate": 9.067030045514476e-05, "loss": 0.154, "step": 2880 }, { "epoch": 88.62, "eval_loss": 0.1860661506652832, "eval_runtime": 26.7794, "eval_samples_per_second": 17.252, "eval_steps_per_second": 0.56, "step": 2880 }, { "epoch": 89.23, "learning_rate": 8.927238984553626e-05, "loss": 0.1549, "step": 2900 }, { "epoch": 89.23, "eval_loss": 0.18506208062171936, "eval_runtime": 26.7725, "eval_samples_per_second": 17.257, "eval_steps_per_second": 0.56, "step": 2900 }, { "epoch": 89.85, "learning_rate": 8.787659687521403e-05, "loss": 0.1528, "step": 2920 }, { "epoch": 89.85, "eval_loss": 0.18385158479213715, "eval_runtime": 26.763, "eval_samples_per_second": 17.263, "eval_steps_per_second": 0.56, "step": 2920 }, { "epoch": 90.46, "learning_rate": 8.648319707488682e-05, "loss": 0.1523, "step": 2940 }, { "epoch": 90.46, "eval_loss": 0.18269173800945282, "eval_runtime": 26.7762, "eval_samples_per_second": 17.254, "eval_steps_per_second": 0.56, "step": 2940 }, { "epoch": 91.08, "learning_rate": 8.509246550284961e-05, "loss": 0.1513, "step": 2960 }, { "epoch": 91.08, "eval_loss": 0.18222320079803467, "eval_runtime": 26.788, "eval_samples_per_second": 17.247, "eval_steps_per_second": 0.56, "step": 2960 }, { "epoch": 91.69, "learning_rate": 8.37046766906869e-05, "loss": 0.1503, "step": 2980 }, { "epoch": 91.69, "eval_loss": 0.18125151097774506, "eval_runtime": 26.7603, "eval_samples_per_second": 17.264, "eval_steps_per_second": 0.561, "step": 2980 }, { "epoch": 92.31, "learning_rate": 8.232010458907992e-05, "loss": 0.1502, "step": 3000 }, { "epoch": 92.31, "eval_loss": 0.1806618869304657, "eval_runtime": 26.7921, "eval_samples_per_second": 17.244, "eval_steps_per_second": 0.56, "step": 3000 }, { "epoch": 92.92, "learning_rate": 8.093902251372853e-05, "loss": 0.1481, "step": 3020 }, { "epoch": 92.92, "eval_loss": 0.17959125339984894, "eval_runtime": 26.7772, "eval_samples_per_second": 17.254, "eval_steps_per_second": 0.56, "step": 3020 }, { "epoch": 93.54, "learning_rate": 7.956170309139842e-05, "loss": 0.1475, "step": 3040 }, { "epoch": 93.54, "eval_loss": 0.1786828190088272, "eval_runtime": 26.773, "eval_samples_per_second": 17.256, "eval_steps_per_second": 0.56, "step": 3040 }, { "epoch": 94.15, "learning_rate": 7.825698244184431e-05, "loss": 0.1469, "step": 3060 }, { "epoch": 94.15, "eval_loss": 0.17820928990840912, "eval_runtime": 26.7735, "eval_samples_per_second": 17.256, "eval_steps_per_second": 0.56, "step": 3060 }, { "epoch": 94.77, "learning_rate": 7.68877814745228e-05, "loss": 0.1472, "step": 3080 }, { "epoch": 94.77, "eval_loss": 0.17709802091121674, "eval_runtime": 26.7597, "eval_samples_per_second": 17.265, "eval_steps_per_second": 0.561, "step": 3080 }, { "epoch": 95.38, "learning_rate": 7.552314287861831e-05, "loss": 0.1461, "step": 3100 }, { "epoch": 95.38, "eval_loss": 0.1761600822210312, "eval_runtime": 26.7646, "eval_samples_per_second": 17.262, "eval_steps_per_second": 0.56, "step": 3100 }, { "epoch": 96.0, "learning_rate": 7.416333603493977e-05, "loss": 0.145, "step": 3120 }, { "epoch": 96.0, "eval_loss": 0.17534850537776947, "eval_runtime": 26.7471, "eval_samples_per_second": 17.273, "eval_steps_per_second": 0.561, "step": 3120 }, { "epoch": 96.62, "learning_rate": 7.280862937050435e-05, "loss": 0.143, "step": 3140 }, { "epoch": 96.62, "eval_loss": 0.1751878708600998, "eval_runtime": 26.7568, "eval_samples_per_second": 17.267, "eval_steps_per_second": 0.561, "step": 3140 }, { "epoch": 97.23, "learning_rate": 7.152662566194701e-05, "loss": 0.1436, "step": 3160 }, { "epoch": 97.23, "eval_loss": 0.17516781389713287, "eval_runtime": 26.8034, "eval_samples_per_second": 17.237, "eval_steps_per_second": 0.56, "step": 3160 }, { "epoch": 97.85, "learning_rate": 7.018263255002402e-05, "loss": 0.1426, "step": 3180 }, { "epoch": 97.85, "eval_loss": 0.17369630932807922, "eval_runtime": 26.781, "eval_samples_per_second": 17.251, "eval_steps_per_second": 0.56, "step": 3180 }, { "epoch": 98.46, "learning_rate": 6.884452541156719e-05, "loss": 0.1427, "step": 3200 }, { "epoch": 98.46, "eval_loss": 0.17294321954250336, "eval_runtime": 26.7722, "eval_samples_per_second": 17.257, "eval_steps_per_second": 0.56, "step": 3200 }, { "epoch": 99.08, "learning_rate": 6.751256839005342e-05, "loss": 0.142, "step": 3220 }, { "epoch": 99.08, "eval_loss": 0.17208707332611084, "eval_runtime": 26.7495, "eval_samples_per_second": 17.271, "eval_steps_per_second": 0.561, "step": 3220 }, { "epoch": 99.69, "learning_rate": 6.625314525914243e-05, "loss": 0.1411, "step": 3240 }, { "epoch": 99.69, "eval_loss": 0.17149858176708221, "eval_runtime": 26.756, "eval_samples_per_second": 17.267, "eval_steps_per_second": 0.561, "step": 3240 }, { "epoch": 100.31, "learning_rate": 6.493393606401967e-05, "loss": 0.1406, "step": 3260 }, { "epoch": 100.31, "eval_loss": 0.1708817183971405, "eval_runtime": 26.7681, "eval_samples_per_second": 17.259, "eval_steps_per_second": 0.56, "step": 3260 }, { "epoch": 100.92, "learning_rate": 6.36216489394732e-05, "loss": 0.1403, "step": 3280 }, { "epoch": 100.92, "eval_loss": 0.16994836926460266, "eval_runtime": 26.7644, "eval_samples_per_second": 17.262, "eval_steps_per_second": 0.56, "step": 3280 }, { "epoch": 101.54, "learning_rate": 6.231654293208744e-05, "loss": 0.1401, "step": 3300 }, { "epoch": 101.54, "eval_loss": 0.16944177448749542, "eval_runtime": 26.7612, "eval_samples_per_second": 17.264, "eval_steps_per_second": 0.561, "step": 3300 }, { "epoch": 102.15, "learning_rate": 6.101887567088831e-05, "loss": 0.1377, "step": 3320 }, { "epoch": 102.15, "eval_loss": 0.16865964233875275, "eval_runtime": 26.7843, "eval_samples_per_second": 17.249, "eval_steps_per_second": 0.56, "step": 3320 }, { "epoch": 102.77, "learning_rate": 5.972890331648686e-05, "loss": 0.1383, "step": 3340 }, { "epoch": 102.77, "eval_loss": 0.16790008544921875, "eval_runtime": 26.7761, "eval_samples_per_second": 17.254, "eval_steps_per_second": 0.56, "step": 3340 }, { "epoch": 103.38, "learning_rate": 5.8446880510513144e-05, "loss": 0.1378, "step": 3360 }, { "epoch": 103.38, "eval_loss": 0.1674834042787552, "eval_runtime": 26.7598, "eval_samples_per_second": 17.265, "eval_steps_per_second": 0.561, "step": 3360 }, { "epoch": 104.0, "learning_rate": 5.717306032534962e-05, "loss": 0.1372, "step": 3380 }, { "epoch": 104.0, "eval_loss": 0.16623561084270477, "eval_runtime": 26.77, "eval_samples_per_second": 17.258, "eval_steps_per_second": 0.56, "step": 3380 }, { "epoch": 104.62, "learning_rate": 5.5907694214174344e-05, "loss": 0.1362, "step": 3400 }, { "epoch": 104.62, "eval_loss": 0.16605305671691895, "eval_runtime": 26.7686, "eval_samples_per_second": 17.259, "eval_steps_per_second": 0.56, "step": 3400 }, { "epoch": 105.23, "learning_rate": 5.4651031961324364e-05, "loss": 0.1343, "step": 3420 }, { "epoch": 105.23, "eval_loss": 0.16553008556365967, "eval_runtime": 26.7736, "eval_samples_per_second": 17.256, "eval_steps_per_second": 0.56, "step": 3420 }, { "epoch": 105.85, "learning_rate": 5.3403321632987425e-05, "loss": 0.1357, "step": 3440 }, { "epoch": 105.85, "eval_loss": 0.16530947387218475, "eval_runtime": 26.7838, "eval_samples_per_second": 17.249, "eval_steps_per_second": 0.56, "step": 3440 }, { "epoch": 106.46, "learning_rate": 5.2164809528234015e-05, "loss": 0.1344, "step": 3460 }, { "epoch": 106.46, "eval_loss": 0.1647026091814041, "eval_runtime": 26.7756, "eval_samples_per_second": 17.254, "eval_steps_per_second": 0.56, "step": 3460 }, { "epoch": 107.08, "learning_rate": 5.0935740130397494e-05, "loss": 0.1339, "step": 3480 }, { "epoch": 107.08, "eval_loss": 0.16388827562332153, "eval_runtime": 26.7918, "eval_samples_per_second": 17.244, "eval_steps_per_second": 0.56, "step": 3480 }, { "epoch": 107.69, "learning_rate": 4.971635605881291e-05, "loss": 0.1336, "step": 3500 }, { "epoch": 107.69, "eval_loss": 0.16345228254795074, "eval_runtime": 26.7727, "eval_samples_per_second": 17.256, "eval_steps_per_second": 0.56, "step": 3500 }, { "epoch": 108.31, "learning_rate": 4.850689802092378e-05, "loss": 0.1333, "step": 3520 }, { "epoch": 108.31, "eval_loss": 0.16290676593780518, "eval_runtime": 26.7927, "eval_samples_per_second": 17.243, "eval_steps_per_second": 0.56, "step": 3520 }, { "epoch": 108.92, "learning_rate": 4.730760476476611e-05, "loss": 0.1332, "step": 3540 }, { "epoch": 108.92, "eval_loss": 0.1624392867088318, "eval_runtime": 26.7947, "eval_samples_per_second": 17.242, "eval_steps_per_second": 0.56, "step": 3540 }, { "epoch": 109.54, "learning_rate": 4.611871303183952e-05, "loss": 0.1322, "step": 3560 }, { "epoch": 109.54, "eval_loss": 0.1618904024362564, "eval_runtime": 26.7757, "eval_samples_per_second": 17.254, "eval_steps_per_second": 0.56, "step": 3560 }, { "epoch": 110.15, "learning_rate": 4.4940457510374136e-05, "loss": 0.1327, "step": 3580 }, { "epoch": 110.15, "eval_loss": 0.16122287511825562, "eval_runtime": 26.8152, "eval_samples_per_second": 17.229, "eval_steps_per_second": 0.559, "step": 3580 }, { "epoch": 110.77, "learning_rate": 4.3773070789003026e-05, "loss": 0.1311, "step": 3600 }, { "epoch": 110.77, "eval_loss": 0.16065527498722076, "eval_runtime": 26.7937, "eval_samples_per_second": 17.243, "eval_steps_per_second": 0.56, "step": 3600 }, { "epoch": 111.38, "learning_rate": 4.261678331084884e-05, "loss": 0.132, "step": 3620 }, { "epoch": 111.38, "eval_loss": 0.16051311790943146, "eval_runtime": 26.7716, "eval_samples_per_second": 17.257, "eval_steps_per_second": 0.56, "step": 3620 }, { "epoch": 112.0, "learning_rate": 4.147182332803439e-05, "loss": 0.131, "step": 3640 }, { "epoch": 112.0, "eval_loss": 0.16002397239208221, "eval_runtime": 26.809, "eval_samples_per_second": 17.233, "eval_steps_per_second": 0.56, "step": 3640 }, { "epoch": 112.62, "learning_rate": 4.0338416856625294e-05, "loss": 0.1298, "step": 3660 }, { "epoch": 112.62, "eval_loss": 0.15938027203083038, "eval_runtime": 26.7668, "eval_samples_per_second": 17.26, "eval_steps_per_second": 0.56, "step": 3660 }, { "epoch": 113.23, "learning_rate": 3.921678763201434e-05, "loss": 0.13, "step": 3680 }, { "epoch": 113.23, "eval_loss": 0.1591978669166565, "eval_runtime": 26.7683, "eval_samples_per_second": 17.259, "eval_steps_per_second": 0.56, "step": 3680 }, { "epoch": 113.85, "learning_rate": 3.810715706475575e-05, "loss": 0.1302, "step": 3700 }, { "epoch": 113.85, "eval_loss": 0.15868616104125977, "eval_runtime": 26.7596, "eval_samples_per_second": 17.265, "eval_steps_per_second": 0.561, "step": 3700 }, { "epoch": 114.46, "learning_rate": 3.70097441968588e-05, "loss": 0.1292, "step": 3720 }, { "epoch": 114.46, "eval_loss": 0.1582469791173935, "eval_runtime": 26.8259, "eval_samples_per_second": 17.222, "eval_steps_per_second": 0.559, "step": 3720 }, { "epoch": 115.08, "learning_rate": 3.592476565854854e-05, "loss": 0.1284, "step": 3740 }, { "epoch": 115.08, "eval_loss": 0.15772594511508942, "eval_runtime": 26.7663, "eval_samples_per_second": 17.261, "eval_steps_per_second": 0.56, "step": 3740 }, { "epoch": 115.69, "learning_rate": 3.485243562550297e-05, "loss": 0.1278, "step": 3760 }, { "epoch": 115.69, "eval_loss": 0.1572510004043579, "eval_runtime": 26.8195, "eval_samples_per_second": 17.226, "eval_steps_per_second": 0.559, "step": 3760 }, { "epoch": 116.31, "learning_rate": 3.379296577657434e-05, "loss": 0.1281, "step": 3780 }, { "epoch": 116.31, "eval_loss": 0.156888946890831, "eval_runtime": 26.7809, "eval_samples_per_second": 17.251, "eval_steps_per_second": 0.56, "step": 3780 }, { "epoch": 116.92, "learning_rate": 3.2746565252003815e-05, "loss": 0.1277, "step": 3800 }, { "epoch": 116.92, "eval_loss": 0.15669023990631104, "eval_runtime": 26.7251, "eval_samples_per_second": 17.287, "eval_steps_per_second": 0.561, "step": 3800 }, { "epoch": 117.54, "learning_rate": 3.1713440612136924e-05, "loss": 0.1266, "step": 3820 }, { "epoch": 117.54, "eval_loss": 0.1565510779619217, "eval_runtime": 26.7847, "eval_samples_per_second": 17.249, "eval_steps_per_second": 0.56, "step": 3820 }, { "epoch": 118.15, "learning_rate": 3.069379579664835e-05, "loss": 0.1279, "step": 3840 }, { "epoch": 118.15, "eval_loss": 0.15575794875621796, "eval_runtime": 26.785, "eval_samples_per_second": 17.248, "eval_steps_per_second": 0.56, "step": 3840 }, { "epoch": 118.77, "learning_rate": 2.9737802267115754e-05, "loss": 0.1261, "step": 3860 }, { "epoch": 118.77, "eval_loss": 0.15544316172599792, "eval_runtime": 26.805, "eval_samples_per_second": 17.236, "eval_steps_per_second": 0.56, "step": 3860 }, { "epoch": 119.38, "learning_rate": 2.8745019577809483e-05, "loss": 0.1271, "step": 3880 }, { "epoch": 119.38, "eval_loss": 0.15522195398807526, "eval_runtime": 26.7839, "eval_samples_per_second": 17.249, "eval_steps_per_second": 0.56, "step": 3880 }, { "epoch": 120.0, "learning_rate": 2.7766302681695688e-05, "loss": 0.1263, "step": 3900 }, { "epoch": 120.0, "eval_loss": 0.1547752469778061, "eval_runtime": 26.8002, "eval_samples_per_second": 17.239, "eval_steps_per_second": 0.56, "step": 3900 }, { "epoch": 120.62, "learning_rate": 2.6801844778314467e-05, "loss": 0.1254, "step": 3920 }, { "epoch": 120.62, "eval_loss": 0.15464647114276886, "eval_runtime": 26.8029, "eval_samples_per_second": 17.237, "eval_steps_per_second": 0.56, "step": 3920 }, { "epoch": 121.23, "learning_rate": 2.5851836252468897e-05, "loss": 0.1255, "step": 3940 }, { "epoch": 121.23, "eval_loss": 0.15429826080799103, "eval_runtime": 26.8105, "eval_samples_per_second": 17.232, "eval_steps_per_second": 0.559, "step": 3940 }, { "epoch": 121.85, "learning_rate": 2.491646463664261e-05, "loss": 0.1261, "step": 3960 }, { "epoch": 121.85, "eval_loss": 0.1540435552597046, "eval_runtime": 26.8082, "eval_samples_per_second": 17.234, "eval_steps_per_second": 0.56, "step": 3960 }, { "epoch": 122.46, "learning_rate": 2.399591457398106e-05, "loss": 0.1257, "step": 3980 }, { "epoch": 122.46, "eval_loss": 0.15359282493591309, "eval_runtime": 26.7751, "eval_samples_per_second": 17.255, "eval_steps_per_second": 0.56, "step": 3980 }, { "epoch": 123.08, "learning_rate": 2.3090367781842413e-05, "loss": 0.1246, "step": 4000 }, { "epoch": 123.08, "eval_loss": 0.15332242846488953, "eval_runtime": 26.7777, "eval_samples_per_second": 17.253, "eval_steps_per_second": 0.56, "step": 4000 }, { "epoch": 123.69, "learning_rate": 2.2200003015926705e-05, "loss": 0.1247, "step": 4020 }, { "epoch": 123.69, "eval_loss": 0.15318149328231812, "eval_runtime": 26.7776, "eval_samples_per_second": 17.253, "eval_steps_per_second": 0.56, "step": 4020 }, { "epoch": 124.31, "learning_rate": 2.1324996034989165e-05, "loss": 0.1252, "step": 4040 }, { "epoch": 124.31, "eval_loss": 0.15291614830493927, "eval_runtime": 26.7677, "eval_samples_per_second": 17.26, "eval_steps_per_second": 0.56, "step": 4040 }, { "epoch": 124.92, "learning_rate": 2.046551956614534e-05, "loss": 0.1249, "step": 4060 }, { "epoch": 124.92, "eval_loss": 0.15260463953018188, "eval_runtime": 26.7848, "eval_samples_per_second": 17.249, "eval_steps_per_second": 0.56, "step": 4060 }, { "epoch": 125.54, "learning_rate": 1.9621743270774597e-05, "loss": 0.1242, "step": 4080 }, { "epoch": 125.54, "eval_loss": 0.15242013335227966, "eval_runtime": 26.7819, "eval_samples_per_second": 17.25, "eval_steps_per_second": 0.56, "step": 4080 }, { "epoch": 126.15, "learning_rate": 1.8793833711028773e-05, "loss": 0.1239, "step": 4100 }, { "epoch": 126.15, "eval_loss": 0.1519923359155655, "eval_runtime": 26.789, "eval_samples_per_second": 17.246, "eval_steps_per_second": 0.56, "step": 4100 }, { "epoch": 126.77, "learning_rate": 1.7981954316952786e-05, "loss": 0.1231, "step": 4120 }, { "epoch": 126.77, "eval_loss": 0.15172038972377777, "eval_runtime": 26.788, "eval_samples_per_second": 17.246, "eval_steps_per_second": 0.56, "step": 4120 }, { "epoch": 127.38, "learning_rate": 1.718626535422332e-05, "loss": 0.1235, "step": 4140 }, { "epoch": 127.38, "eval_loss": 0.15152348577976227, "eval_runtime": 26.7862, "eval_samples_per_second": 17.248, "eval_steps_per_second": 0.56, "step": 4140 }, { "epoch": 128.0, "learning_rate": 1.6406923892512284e-05, "loss": 0.123, "step": 4160 }, { "epoch": 128.0, "eval_loss": 0.151360422372818, "eval_runtime": 26.8008, "eval_samples_per_second": 17.238, "eval_steps_per_second": 0.56, "step": 4160 }, { "epoch": 128.62, "learning_rate": 1.5644083774481043e-05, "loss": 0.123, "step": 4180 }, { "epoch": 128.62, "eval_loss": 0.1512284278869629, "eval_runtime": 26.7984, "eval_samples_per_second": 17.24, "eval_steps_per_second": 0.56, "step": 4180 }, { "epoch": 129.23, "learning_rate": 1.489789558541187e-05, "loss": 0.1235, "step": 4200 }, { "epoch": 129.23, "eval_loss": 0.15105997025966644, "eval_runtime": 26.7894, "eval_samples_per_second": 17.246, "eval_steps_per_second": 0.56, "step": 4200 }, { "epoch": 129.85, "learning_rate": 1.4168506623482202e-05, "loss": 0.1222, "step": 4220 }, { "epoch": 129.85, "eval_loss": 0.15094506740570068, "eval_runtime": 26.7743, "eval_samples_per_second": 17.255, "eval_steps_per_second": 0.56, "step": 4220 }, { "epoch": 130.46, "learning_rate": 1.3456060870687937e-05, "loss": 0.1221, "step": 4240 }, { "epoch": 130.46, "eval_loss": 0.1505899429321289, "eval_runtime": 26.7534, "eval_samples_per_second": 17.269, "eval_steps_per_second": 0.561, "step": 4240 }, { "epoch": 131.08, "learning_rate": 1.2760698964421091e-05, "loss": 0.1212, "step": 4260 }, { "epoch": 131.08, "eval_loss": 0.15049409866333008, "eval_runtime": 26.7596, "eval_samples_per_second": 17.265, "eval_steps_per_second": 0.561, "step": 4260 }, { "epoch": 131.69, "learning_rate": 1.2082558169708081e-05, "loss": 0.122, "step": 4280 }, { "epoch": 131.69, "eval_loss": 0.15041232109069824, "eval_runtime": 26.7564, "eval_samples_per_second": 17.267, "eval_steps_per_second": 0.561, "step": 4280 }, { "epoch": 132.31, "learning_rate": 1.1421772352113336e-05, "loss": 0.1225, "step": 4300 }, { "epoch": 132.31, "eval_loss": 0.1501646488904953, "eval_runtime": 26.7249, "eval_samples_per_second": 17.287, "eval_steps_per_second": 0.561, "step": 4300 }, { "epoch": 132.92, "learning_rate": 1.0778471951314229e-05, "loss": 0.1213, "step": 4320 }, { "epoch": 132.92, "eval_loss": 0.15006287395954132, "eval_runtime": 26.7285, "eval_samples_per_second": 17.285, "eval_steps_per_second": 0.561, "step": 4320 }, { "epoch": 133.54, "learning_rate": 1.015278395535203e-05, "loss": 0.1225, "step": 4340 }, { "epoch": 133.54, "eval_loss": 0.14982885122299194, "eval_runtime": 26.7639, "eval_samples_per_second": 17.262, "eval_steps_per_second": 0.56, "step": 4340 }, { "epoch": 134.15, "learning_rate": 9.54483187556453e-06, "loss": 0.1219, "step": 4360 }, { "epoch": 134.15, "eval_loss": 0.14970383048057556, "eval_runtime": 26.7778, "eval_samples_per_second": 17.253, "eval_steps_per_second": 0.56, "step": 4360 }, { "epoch": 134.77, "learning_rate": 8.954735722204689e-06, "loss": 0.1213, "step": 4380 }, { "epoch": 134.77, "eval_loss": 0.14960302412509918, "eval_runtime": 26.7677, "eval_samples_per_second": 17.26, "eval_steps_per_second": 0.56, "step": 4380 }, { "epoch": 135.38, "learning_rate": 8.382611980750532e-06, "loss": 0.1216, "step": 4400 }, { "epoch": 135.38, "eval_loss": 0.14945241808891296, "eval_runtime": 26.7712, "eval_samples_per_second": 17.257, "eval_steps_per_second": 0.56, "step": 4400 }, { "epoch": 136.0, "learning_rate": 7.828573588910859e-06, "loss": 0.1211, "step": 4420 }, { "epoch": 136.0, "eval_loss": 0.1492658108472824, "eval_runtime": 26.7652, "eval_samples_per_second": 17.261, "eval_steps_per_second": 0.56, "step": 4420 }, { "epoch": 136.62, "learning_rate": 7.292729914331142e-06, "loss": 0.1216, "step": 4440 }, { "epoch": 136.62, "eval_loss": 0.14914917945861816, "eval_runtime": 26.7705, "eval_samples_per_second": 17.258, "eval_steps_per_second": 0.56, "step": 4440 }, { "epoch": 137.23, "learning_rate": 6.775186733004424e-06, "loss": 0.1197, "step": 4460 }, { "epoch": 137.23, "eval_loss": 0.14917601644992828, "eval_runtime": 26.7525, "eval_samples_per_second": 17.269, "eval_steps_per_second": 0.561, "step": 4460 }, { "epoch": 137.85, "learning_rate": 6.276046208390873e-06, "loss": 0.1203, "step": 4480 }, { "epoch": 137.85, "eval_loss": 0.14903923869132996, "eval_runtime": 26.762, "eval_samples_per_second": 17.263, "eval_steps_per_second": 0.56, "step": 4480 }, { "epoch": 138.46, "learning_rate": 5.795406871250797e-06, "loss": 0.1209, "step": 4500 }, { "epoch": 138.46, "eval_loss": 0.14884509146213531, "eval_runtime": 26.758, "eval_samples_per_second": 17.266, "eval_steps_per_second": 0.561, "step": 4500 }, { "epoch": 139.08, "learning_rate": 5.333363600194396e-06, "loss": 0.1197, "step": 4520 }, { "epoch": 139.08, "eval_loss": 0.14882220327854156, "eval_runtime": 26.765, "eval_samples_per_second": 17.261, "eval_steps_per_second": 0.56, "step": 4520 }, { "epoch": 139.69, "learning_rate": 4.890007602952828e-06, "loss": 0.1202, "step": 4540 }, { "epoch": 139.69, "eval_loss": 0.1487365961074829, "eval_runtime": 26.7652, "eval_samples_per_second": 17.261, "eval_steps_per_second": 0.56, "step": 4540 }, { "epoch": 140.31, "learning_rate": 4.46542639837364e-06, "loss": 0.121, "step": 4560 }, { "epoch": 140.31, "eval_loss": 0.1486121267080307, "eval_runtime": 26.748, "eval_samples_per_second": 17.272, "eval_steps_per_second": 0.561, "step": 4560 }, { "epoch": 140.92, "learning_rate": 4.059703799144476e-06, "loss": 0.1202, "step": 4580 }, { "epoch": 140.92, "eval_loss": 0.1485925018787384, "eval_runtime": 26.769, "eval_samples_per_second": 17.259, "eval_steps_per_second": 0.56, "step": 4580 }, { "epoch": 141.54, "learning_rate": 3.6729198952483724e-06, "loss": 0.1194, "step": 4600 }, { "epoch": 141.54, "eval_loss": 0.14861957728862762, "eval_runtime": 26.7652, "eval_samples_per_second": 17.261, "eval_steps_per_second": 0.56, "step": 4600 }, { "epoch": 142.15, "learning_rate": 3.305151038153964e-06, "loss": 0.1199, "step": 4620 }, { "epoch": 142.15, "eval_loss": 0.1484871208667755, "eval_runtime": 26.7519, "eval_samples_per_second": 17.27, "eval_steps_per_second": 0.561, "step": 4620 }, { "epoch": 142.77, "learning_rate": 2.956469825743613e-06, "loss": 0.1201, "step": 4640 }, { "epoch": 142.77, "eval_loss": 0.14845435321331024, "eval_runtime": 26.7438, "eval_samples_per_second": 17.275, "eval_steps_per_second": 0.561, "step": 4640 }, { "epoch": 143.38, "learning_rate": 2.6269450879825243e-06, "loss": 0.1198, "step": 4660 }, { "epoch": 143.38, "eval_loss": 0.1484329104423523, "eval_runtime": 26.7558, "eval_samples_per_second": 17.267, "eval_steps_per_second": 0.561, "step": 4660 }, { "epoch": 144.0, "learning_rate": 2.316641873331704e-06, "loss": 0.12, "step": 4680 }, { "epoch": 144.0, "eval_loss": 0.14837703108787537, "eval_runtime": 26.7449, "eval_samples_per_second": 17.274, "eval_steps_per_second": 0.561, "step": 4680 }, { "epoch": 144.62, "learning_rate": 2.025621435907221e-06, "loss": 0.1197, "step": 4700 }, { "epoch": 144.62, "eval_loss": 0.14830969274044037, "eval_runtime": 26.7686, "eval_samples_per_second": 17.259, "eval_steps_per_second": 0.56, "step": 4700 }, { "epoch": 145.23, "learning_rate": 1.753941223388733e-06, "loss": 0.1195, "step": 4720 }, { "epoch": 145.23, "eval_loss": 0.14829565584659576, "eval_runtime": 26.7574, "eval_samples_per_second": 17.266, "eval_steps_per_second": 0.561, "step": 4720 }, { "epoch": 145.85, "learning_rate": 1.5016548656791697e-06, "loss": 0.1206, "step": 4740 }, { "epoch": 145.85, "eval_loss": 0.1482698619365692, "eval_runtime": 26.744, "eval_samples_per_second": 17.275, "eval_steps_per_second": 0.561, "step": 4740 }, { "epoch": 146.46, "learning_rate": 1.2688121643181893e-06, "loss": 0.1211, "step": 4760 }, { "epoch": 146.46, "eval_loss": 0.1482834368944168, "eval_runtime": 26.7646, "eval_samples_per_second": 17.262, "eval_steps_per_second": 0.56, "step": 4760 }, { "epoch": 147.08, "learning_rate": 1.0554590826512778e-06, "loss": 0.1196, "step": 4780 }, { "epoch": 147.08, "eval_loss": 0.14823544025421143, "eval_runtime": 26.7343, "eval_samples_per_second": 17.281, "eval_steps_per_second": 0.561, "step": 4780 } ], "logging_steps": 20, "max_steps": 4968, "num_input_tokens_seen": 0, "num_train_epochs": 156, "save_steps": 20, "total_flos": 9874827745689600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }