{ "best_metric": 2.2190773487091064, "best_model_checkpoint": "./model_tweets_2020_Q1_25/checkpoint-1952000", "epoch": 6.556194784547049, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_loss": 2.5883126258850098, "eval_runtime": 332.1552, "eval_samples_per_second": 928.078, "eval_steps_per_second": 58.006, "step": 8000 }, { "epoch": 0.04, "learning_rate": 4.0726666666666665e-07, "loss": 2.7561, "step": 16000 }, { "epoch": 0.04, "eval_loss": 2.4957592487335205, "eval_runtime": 332.3107, "eval_samples_per_second": 927.644, "eval_steps_per_second": 57.979, "step": 16000 }, { "epoch": 0.07, "eval_loss": 2.437556266784668, "eval_runtime": 332.4781, "eval_samples_per_second": 927.177, "eval_steps_per_second": 57.95, "step": 24000 }, { "epoch": 0.09, "learning_rate": 4.0453333333333336e-07, "loss": 2.531, "step": 32000 }, { "epoch": 0.09, "eval_loss": 2.409024477005005, "eval_runtime": 332.2129, "eval_samples_per_second": 927.917, "eval_steps_per_second": 57.996, "step": 32000 }, { "epoch": 0.11, "eval_loss": 2.3791210651397705, "eval_runtime": 331.9964, "eval_samples_per_second": 928.522, "eval_steps_per_second": 58.034, "step": 40000 }, { "epoch": 0.13, "learning_rate": 4.018e-07, "loss": 2.4627, "step": 48000 }, { "epoch": 0.13, "eval_loss": 2.3666348457336426, "eval_runtime": 332.5374, "eval_samples_per_second": 927.011, "eval_steps_per_second": 57.939, "step": 48000 }, { "epoch": 0.15, "eval_loss": 2.3457391262054443, "eval_runtime": 333.7346, "eval_samples_per_second": 923.686, "eval_steps_per_second": 57.732, "step": 56000 }, { "epoch": 0.17, "learning_rate": 3.9906666666666667e-07, "loss": 2.4252, "step": 64000 }, { "epoch": 0.17, "eval_loss": 2.337965726852417, "eval_runtime": 331.9355, "eval_samples_per_second": 928.693, "eval_steps_per_second": 58.044, "step": 64000 }, { "epoch": 0.2, "eval_loss": 2.329805374145508, "eval_runtime": 333.3329, "eval_samples_per_second": 924.799, "eval_steps_per_second": 57.801, "step": 72000 }, { "epoch": 0.22, "learning_rate": 3.963333333333333e-07, "loss": 2.4061, "step": 80000 }, { "epoch": 0.22, "eval_loss": 2.325295925140381, "eval_runtime": 334.0512, "eval_samples_per_second": 922.811, "eval_steps_per_second": 57.677, "step": 80000 }, { "epoch": 0.24, "eval_loss": 2.317714214324951, "eval_runtime": 334.2839, "eval_samples_per_second": 922.168, "eval_steps_per_second": 57.637, "step": 88000 }, { "epoch": 0.26, "learning_rate": 3.936e-07, "loss": 2.395, "step": 96000 }, { "epoch": 0.26, "eval_loss": 2.3130738735198975, "eval_runtime": 334.6418, "eval_samples_per_second": 921.182, "eval_steps_per_second": 57.575, "step": 96000 }, { "epoch": 0.28, "eval_loss": 2.305846691131592, "eval_runtime": 333.8278, "eval_samples_per_second": 923.428, "eval_steps_per_second": 57.715, "step": 104000 }, { "epoch": 0.31, "learning_rate": 3.908666666666667e-07, "loss": 2.3843, "step": 112000 }, { "epoch": 0.31, "eval_loss": 2.3009700775146484, "eval_runtime": 334.6239, "eval_samples_per_second": 921.231, "eval_steps_per_second": 57.578, "step": 112000 }, { "epoch": 0.33, "eval_loss": 2.2925422191619873, "eval_runtime": 335.4315, "eval_samples_per_second": 919.013, "eval_steps_per_second": 57.439, "step": 120000 }, { "epoch": 0.35, "learning_rate": 3.8813333333333334e-07, "loss": 2.3738, "step": 128000 }, { "epoch": 0.35, "eval_loss": 2.2916228771209717, "eval_runtime": 334.3802, "eval_samples_per_second": 921.903, "eval_steps_per_second": 57.62, "step": 128000 }, { "epoch": 0.37, "eval_loss": 2.29465389251709, "eval_runtime": 333.899, "eval_samples_per_second": 923.231, "eval_steps_per_second": 57.703, "step": 136000 }, { "epoch": 0.39, "learning_rate": 3.854e-07, "loss": 2.3686, "step": 144000 }, { "epoch": 0.39, "eval_loss": 2.2834742069244385, "eval_runtime": 334.2854, "eval_samples_per_second": 922.164, "eval_steps_per_second": 57.636, "step": 144000 }, { "epoch": 0.42, "eval_loss": 2.2863762378692627, "eval_runtime": 334.9469, "eval_samples_per_second": 920.343, "eval_steps_per_second": 57.523, "step": 152000 }, { "epoch": 0.44, "learning_rate": 3.8266666666666665e-07, "loss": 2.3615, "step": 160000 }, { "epoch": 0.44, "eval_loss": 2.283402919769287, "eval_runtime": 340.7251, "eval_samples_per_second": 904.735, "eval_steps_per_second": 56.547, "step": 160000 }, { "epoch": 0.46, "eval_loss": 2.276758909225464, "eval_runtime": 337.1601, "eval_samples_per_second": 914.301, "eval_steps_per_second": 57.145, "step": 168000 }, { "epoch": 0.48, "learning_rate": 3.799333333333333e-07, "loss": 2.3515, "step": 176000 }, { "epoch": 0.48, "eval_loss": 2.280259847640991, "eval_runtime": 339.3606, "eval_samples_per_second": 908.373, "eval_steps_per_second": 56.774, "step": 176000 }, { "epoch": 0.5, "eval_loss": 2.280385732650757, "eval_runtime": 337.3298, "eval_samples_per_second": 913.841, "eval_steps_per_second": 57.116, "step": 184000 }, { "epoch": 0.52, "learning_rate": 3.772e-07, "loss": 2.3508, "step": 192000 }, { "epoch": 0.52, "eval_loss": 2.275428295135498, "eval_runtime": 335.956, "eval_samples_per_second": 917.578, "eval_steps_per_second": 57.35, "step": 192000 }, { "epoch": 0.55, "eval_loss": 2.2767016887664795, "eval_runtime": 335.4697, "eval_samples_per_second": 918.909, "eval_steps_per_second": 57.433, "step": 200000 }, { "epoch": 0.57, "learning_rate": 3.7446666666666667e-07, "loss": 2.35, "step": 208000 }, { "epoch": 0.57, "eval_loss": 2.2741713523864746, "eval_runtime": 335.6132, "eval_samples_per_second": 918.516, "eval_steps_per_second": 57.408, "step": 208000 }, { "epoch": 0.59, "eval_loss": 2.272230386734009, "eval_runtime": 334.8971, "eval_samples_per_second": 920.48, "eval_steps_per_second": 57.531, "step": 216000 }, { "epoch": 0.61, "learning_rate": 3.7173333333333333e-07, "loss": 2.3385, "step": 224000 }, { "epoch": 0.61, "eval_loss": 2.266052484512329, "eval_runtime": 336.4044, "eval_samples_per_second": 916.356, "eval_steps_per_second": 57.273, "step": 224000 }, { "epoch": 0.63, "eval_loss": 2.2705941200256348, "eval_runtime": 333.5697, "eval_samples_per_second": 924.143, "eval_steps_per_second": 57.76, "step": 232000 }, { "epoch": 0.66, "learning_rate": 3.69e-07, "loss": 2.3393, "step": 240000 }, { "epoch": 0.66, "eval_loss": 2.2633328437805176, "eval_runtime": 337.2689, "eval_samples_per_second": 914.007, "eval_steps_per_second": 57.127, "step": 240000 }, { "epoch": 0.68, "eval_loss": 2.26476788520813, "eval_runtime": 335.5887, "eval_samples_per_second": 918.583, "eval_steps_per_second": 57.413, "step": 248000 }, { "epoch": 0.7, "learning_rate": 3.6626666666666664e-07, "loss": 2.3392, "step": 256000 }, { "epoch": 0.7, "eval_loss": 2.2656445503234863, "eval_runtime": 336.8746, "eval_samples_per_second": 915.076, "eval_steps_per_second": 57.193, "step": 256000 }, { "epoch": 0.72, "eval_loss": 2.2660086154937744, "eval_runtime": 335.5863, "eval_samples_per_second": 918.589, "eval_steps_per_second": 57.413, "step": 264000 }, { "epoch": 0.74, "learning_rate": 3.6353333333333335e-07, "loss": 2.3336, "step": 272000 }, { "epoch": 0.74, "eval_loss": 2.265676736831665, "eval_runtime": 336.9338, "eval_samples_per_second": 914.916, "eval_steps_per_second": 57.183, "step": 272000 }, { "epoch": 0.76, "eval_loss": 2.2604660987854004, "eval_runtime": 334.7066, "eval_samples_per_second": 921.004, "eval_steps_per_second": 57.564, "step": 280000 }, { "epoch": 0.79, "learning_rate": 3.608e-07, "loss": 2.3324, "step": 288000 }, { "epoch": 0.79, "eval_loss": 2.261540651321411, "eval_runtime": 336.5364, "eval_samples_per_second": 915.996, "eval_steps_per_second": 57.251, "step": 288000 }, { "epoch": 0.81, "eval_loss": 2.255148410797119, "eval_runtime": 334.2192, "eval_samples_per_second": 922.347, "eval_steps_per_second": 57.648, "step": 296000 }, { "epoch": 0.83, "learning_rate": 3.5806666666666666e-07, "loss": 2.3312, "step": 304000 }, { "epoch": 0.83, "eval_loss": 2.2581000328063965, "eval_runtime": 334.0976, "eval_samples_per_second": 922.683, "eval_steps_per_second": 57.669, "step": 304000 }, { "epoch": 0.85, "eval_loss": 2.262571334838867, "eval_runtime": 333.6502, "eval_samples_per_second": 923.92, "eval_steps_per_second": 57.746, "step": 312000 }, { "epoch": 0.87, "learning_rate": 3.553333333333333e-07, "loss": 2.3352, "step": 320000 }, { "epoch": 0.87, "eval_loss": 2.2576231956481934, "eval_runtime": 334.3477, "eval_samples_per_second": 921.992, "eval_steps_per_second": 57.626, "step": 320000 }, { "epoch": 0.9, "eval_loss": 2.2552905082702637, "eval_runtime": 334.1252, "eval_samples_per_second": 922.606, "eval_steps_per_second": 57.664, "step": 328000 }, { "epoch": 0.92, "learning_rate": 3.5259999999999997e-07, "loss": 2.3287, "step": 336000 }, { "epoch": 0.92, "eval_loss": 2.259079694747925, "eval_runtime": 336.4319, "eval_samples_per_second": 916.281, "eval_steps_per_second": 57.269, "step": 336000 }, { "epoch": 0.94, "eval_loss": 2.2557525634765625, "eval_runtime": 334.0372, "eval_samples_per_second": 922.849, "eval_steps_per_second": 57.679, "step": 344000 }, { "epoch": 0.96, "learning_rate": 3.498666666666667e-07, "loss": 2.321, "step": 352000 }, { "epoch": 0.96, "eval_loss": 2.260307788848877, "eval_runtime": 335.3355, "eval_samples_per_second": 919.276, "eval_steps_per_second": 57.456, "step": 352000 }, { "epoch": 0.98, "eval_loss": 2.256932258605957, "eval_runtime": 334.5306, "eval_samples_per_second": 921.488, "eval_steps_per_second": 57.594, "step": 360000 }, { "epoch": 1.01, "learning_rate": 3.4713333333333333e-07, "loss": 2.3278, "step": 368000 }, { "epoch": 1.01, "eval_loss": 2.254404067993164, "eval_runtime": 335.0696, "eval_samples_per_second": 920.006, "eval_steps_per_second": 57.501, "step": 368000 }, { "epoch": 1.03, "eval_loss": 2.260352373123169, "eval_runtime": 334.4596, "eval_samples_per_second": 921.684, "eval_steps_per_second": 57.606, "step": 376000 }, { "epoch": 1.05, "learning_rate": 3.444e-07, "loss": 2.319, "step": 384000 }, { "epoch": 1.05, "eval_loss": 2.25346040725708, "eval_runtime": 334.8356, "eval_samples_per_second": 920.649, "eval_steps_per_second": 57.542, "step": 384000 }, { "epoch": 1.07, "eval_loss": 2.241976499557495, "eval_runtime": 335.4873, "eval_samples_per_second": 918.86, "eval_steps_per_second": 57.43, "step": 392000 }, { "epoch": 1.09, "learning_rate": 3.416666666666667e-07, "loss": 2.3151, "step": 400000 }, { "epoch": 1.09, "eval_loss": 2.2583224773406982, "eval_runtime": 335.4778, "eval_samples_per_second": 918.887, "eval_steps_per_second": 57.432, "step": 400000 }, { "epoch": 1.11, "eval_loss": 2.2534608840942383, "eval_runtime": 335.9955, "eval_samples_per_second": 917.471, "eval_steps_per_second": 57.343, "step": 408000 }, { "epoch": 1.14, "learning_rate": 3.3893333333333335e-07, "loss": 2.3144, "step": 416000 }, { "epoch": 1.14, "eval_loss": 2.258167266845703, "eval_runtime": 335.4405, "eval_samples_per_second": 918.989, "eval_steps_per_second": 57.438, "step": 416000 }, { "epoch": 1.16, "eval_loss": 2.249640464782715, "eval_runtime": 334.4567, "eval_samples_per_second": 921.692, "eval_steps_per_second": 57.607, "step": 424000 }, { "epoch": 1.18, "learning_rate": 3.3619999999999995e-07, "loss": 2.3191, "step": 432000 }, { "epoch": 1.18, "eval_loss": 2.2531871795654297, "eval_runtime": 335.2874, "eval_samples_per_second": 919.408, "eval_steps_per_second": 57.464, "step": 432000 }, { "epoch": 1.2, "eval_loss": 2.2515294551849365, "eval_runtime": 335.0984, "eval_samples_per_second": 919.927, "eval_steps_per_second": 57.497, "step": 440000 }, { "epoch": 1.22, "learning_rate": 3.3346666666666666e-07, "loss": 2.3168, "step": 448000 }, { "epoch": 1.22, "eval_loss": 2.2500967979431152, "eval_runtime": 336.248, "eval_samples_per_second": 916.782, "eval_steps_per_second": 57.3, "step": 448000 }, { "epoch": 1.25, "eval_loss": 2.245288848876953, "eval_runtime": 337.1188, "eval_samples_per_second": 914.414, "eval_steps_per_second": 57.152, "step": 456000 }, { "epoch": 1.27, "learning_rate": 3.307333333333333e-07, "loss": 2.3156, "step": 464000 }, { "epoch": 1.27, "eval_loss": 2.2444746494293213, "eval_runtime": 335.5009, "eval_samples_per_second": 918.823, "eval_steps_per_second": 57.428, "step": 464000 }, { "epoch": 1.29, "eval_loss": 2.2484536170959473, "eval_runtime": 334.7608, "eval_samples_per_second": 920.855, "eval_steps_per_second": 57.555, "step": 472000 }, { "epoch": 1.31, "learning_rate": 3.28e-07, "loss": 2.3178, "step": 480000 }, { "epoch": 1.31, "eval_loss": 2.249347448348999, "eval_runtime": 335.2634, "eval_samples_per_second": 919.474, "eval_steps_per_second": 57.468, "step": 480000 }, { "epoch": 1.33, "eval_loss": 2.244321346282959, "eval_runtime": 335.8842, "eval_samples_per_second": 917.775, "eval_steps_per_second": 57.362, "step": 488000 }, { "epoch": 1.35, "learning_rate": 3.252666666666667e-07, "loss": 2.3113, "step": 496000 }, { "epoch": 1.35, "eval_loss": 2.2493255138397217, "eval_runtime": 336.0104, "eval_samples_per_second": 917.43, "eval_steps_per_second": 57.34, "step": 496000 }, { "epoch": 1.38, "eval_loss": 2.2493276596069336, "eval_runtime": 336.0104, "eval_samples_per_second": 917.43, "eval_steps_per_second": 57.34, "step": 504000 }, { "epoch": 1.4, "learning_rate": 3.2253333333333334e-07, "loss": 2.3116, "step": 512000 }, { "epoch": 1.4, "eval_loss": 2.2483131885528564, "eval_runtime": 335.3926, "eval_samples_per_second": 919.12, "eval_steps_per_second": 57.446, "step": 512000 }, { "epoch": 1.42, "eval_loss": 2.245854616165161, "eval_runtime": 335.4507, "eval_samples_per_second": 918.961, "eval_steps_per_second": 57.436, "step": 520000 }, { "epoch": 1.44, "learning_rate": 3.198e-07, "loss": 2.3166, "step": 528000 }, { "epoch": 1.44, "eval_loss": 2.248081922531128, "eval_runtime": 336.9072, "eval_samples_per_second": 914.988, "eval_steps_per_second": 57.188, "step": 528000 }, { "epoch": 1.46, "eval_loss": 2.254246711730957, "eval_runtime": 335.1811, "eval_samples_per_second": 919.7, "eval_steps_per_second": 57.482, "step": 536000 }, { "epoch": 1.49, "learning_rate": 3.1706666666666665e-07, "loss": 2.3158, "step": 544000 }, { "epoch": 1.49, "eval_loss": 2.244293451309204, "eval_runtime": 336.0363, "eval_samples_per_second": 917.359, "eval_steps_per_second": 57.336, "step": 544000 }, { "epoch": 1.51, "eval_loss": 2.240211009979248, "eval_runtime": 335.2536, "eval_samples_per_second": 919.501, "eval_steps_per_second": 57.47, "step": 552000 }, { "epoch": 1.53, "learning_rate": 3.1433333333333336e-07, "loss": 2.3148, "step": 560000 }, { "epoch": 1.53, "eval_loss": 2.2449421882629395, "eval_runtime": 335.5679, "eval_samples_per_second": 918.64, "eval_steps_per_second": 57.416, "step": 560000 }, { "epoch": 1.55, "eval_loss": 2.2415246963500977, "eval_runtime": 337.8499, "eval_samples_per_second": 912.435, "eval_steps_per_second": 57.028, "step": 568000 }, { "epoch": 1.57, "learning_rate": 3.116e-07, "loss": 2.3145, "step": 576000 }, { "epoch": 1.57, "eval_loss": 2.2471208572387695, "eval_runtime": 338.2882, "eval_samples_per_second": 911.253, "eval_steps_per_second": 56.954, "step": 576000 }, { "epoch": 1.6, "eval_loss": 2.2469303607940674, "eval_runtime": 338.2218, "eval_samples_per_second": 911.431, "eval_steps_per_second": 56.966, "step": 584000 }, { "epoch": 1.62, "learning_rate": 3.0886666666666667e-07, "loss": 2.3119, "step": 592000 }, { "epoch": 1.62, "eval_loss": 2.244541645050049, "eval_runtime": 336.9016, "eval_samples_per_second": 915.003, "eval_steps_per_second": 57.189, "step": 592000 }, { "epoch": 1.64, "eval_loss": 2.248655080795288, "eval_runtime": 335.5748, "eval_samples_per_second": 918.621, "eval_steps_per_second": 57.415, "step": 600000 }, { "epoch": 1.66, "learning_rate": 3.061333333333333e-07, "loss": 2.3045, "step": 608000 }, { "epoch": 1.66, "eval_loss": 2.2455668449401855, "eval_runtime": 335.9119, "eval_samples_per_second": 917.699, "eval_steps_per_second": 57.357, "step": 608000 }, { "epoch": 1.68, "eval_loss": 2.246622085571289, "eval_runtime": 335.9733, "eval_samples_per_second": 917.531, "eval_steps_per_second": 57.347, "step": 616000 }, { "epoch": 1.7, "learning_rate": 3.034e-07, "loss": 2.3046, "step": 624000 }, { "epoch": 1.7, "eval_loss": 2.2357234954833984, "eval_runtime": 337.5898, "eval_samples_per_second": 913.138, "eval_steps_per_second": 57.072, "step": 624000 }, { "epoch": 1.73, "eval_loss": 2.244824171066284, "eval_runtime": 336.5413, "eval_samples_per_second": 915.983, "eval_steps_per_second": 57.25, "step": 632000 }, { "epoch": 1.75, "learning_rate": 3.0066666666666663e-07, "loss": 2.3083, "step": 640000 }, { "epoch": 1.75, "eval_loss": 2.2381463050842285, "eval_runtime": 337.7594, "eval_samples_per_second": 912.679, "eval_steps_per_second": 57.044, "step": 640000 }, { "epoch": 1.77, "eval_loss": 2.24391770362854, "eval_runtime": 337.5805, "eval_samples_per_second": 913.163, "eval_steps_per_second": 57.074, "step": 648000 }, { "epoch": 1.79, "learning_rate": 2.9793333333333334e-07, "loss": 2.3065, "step": 656000 }, { "epoch": 1.79, "eval_loss": 2.2402257919311523, "eval_runtime": 336.8532, "eval_samples_per_second": 915.135, "eval_steps_per_second": 57.197, "step": 656000 }, { "epoch": 1.81, "eval_loss": 2.2439496517181396, "eval_runtime": 337.4274, "eval_samples_per_second": 913.577, "eval_steps_per_second": 57.1, "step": 664000 }, { "epoch": 1.84, "learning_rate": 2.952e-07, "loss": 2.307, "step": 672000 }, { "epoch": 1.84, "eval_loss": 2.2409019470214844, "eval_runtime": 336.4075, "eval_samples_per_second": 916.347, "eval_steps_per_second": 57.273, "step": 672000 }, { "epoch": 1.86, "eval_loss": 2.2425954341888428, "eval_runtime": 336.4355, "eval_samples_per_second": 916.271, "eval_steps_per_second": 57.268, "step": 680000 }, { "epoch": 1.88, "learning_rate": 2.9246666666666665e-07, "loss": 2.3026, "step": 688000 }, { "epoch": 1.88, "eval_loss": 2.2386796474456787, "eval_runtime": 336.9441, "eval_samples_per_second": 914.888, "eval_steps_per_second": 57.182, "step": 688000 }, { "epoch": 1.9, "eval_loss": 2.235677719116211, "eval_runtime": 337.5327, "eval_samples_per_second": 913.292, "eval_steps_per_second": 57.082, "step": 696000 }, { "epoch": 1.92, "learning_rate": 2.897333333333333e-07, "loss": 2.2949, "step": 704000 }, { "epoch": 1.92, "eval_loss": 2.237877607345581, "eval_runtime": 336.876, "eval_samples_per_second": 915.073, "eval_steps_per_second": 57.193, "step": 704000 }, { "epoch": 1.95, "eval_loss": 2.2408130168914795, "eval_runtime": 338.2422, "eval_samples_per_second": 911.376, "eval_steps_per_second": 56.962, "step": 712000 }, { "epoch": 1.97, "learning_rate": 2.8699999999999996e-07, "loss": 2.2951, "step": 720000 }, { "epoch": 1.97, "eval_loss": 2.2431986331939697, "eval_runtime": 337.6174, "eval_samples_per_second": 913.063, "eval_steps_per_second": 57.068, "step": 720000 }, { "epoch": 1.99, "eval_loss": 2.244434118270874, "eval_runtime": 337.68, "eval_samples_per_second": 912.894, "eval_steps_per_second": 57.057, "step": 728000 }, { "epoch": 2.01, "learning_rate": 2.8426666666666667e-07, "loss": 2.3011, "step": 736000 }, { "epoch": 2.01, "eval_loss": 2.2381932735443115, "eval_runtime": 336.7238, "eval_samples_per_second": 915.486, "eval_steps_per_second": 57.219, "step": 736000 }, { "epoch": 2.03, "eval_loss": 2.2391436100006104, "eval_runtime": 337.1171, "eval_samples_per_second": 914.418, "eval_steps_per_second": 57.152, "step": 744000 }, { "epoch": 2.05, "learning_rate": 2.815333333333333e-07, "loss": 2.3017, "step": 752000 }, { "epoch": 2.05, "eval_loss": 2.236323833465576, "eval_runtime": 338.3716, "eval_samples_per_second": 911.028, "eval_steps_per_second": 56.94, "step": 752000 }, { "epoch": 2.08, "eval_loss": 2.2444024085998535, "eval_runtime": 337.1979, "eval_samples_per_second": 914.199, "eval_steps_per_second": 57.139, "step": 760000 }, { "epoch": 2.1, "learning_rate": 2.7880000000000003e-07, "loss": 2.2978, "step": 768000 }, { "epoch": 2.1, "eval_loss": 2.2370431423187256, "eval_runtime": 338.942, "eval_samples_per_second": 909.495, "eval_steps_per_second": 56.845, "step": 768000 }, { "epoch": 2.12, "eval_loss": 2.2350406646728516, "eval_runtime": 337.2566, "eval_samples_per_second": 914.04, "eval_steps_per_second": 57.129, "step": 776000 }, { "epoch": 2.14, "learning_rate": 2.7606666666666664e-07, "loss": 2.2961, "step": 784000 }, { "epoch": 2.14, "eval_loss": 2.234744071960449, "eval_runtime": 338.2021, "eval_samples_per_second": 911.485, "eval_steps_per_second": 56.969, "step": 784000 }, { "epoch": 2.16, "eval_loss": 2.238616704940796, "eval_runtime": 338.0434, "eval_samples_per_second": 911.913, "eval_steps_per_second": 56.996, "step": 792000 }, { "epoch": 2.19, "learning_rate": 2.733333333333333e-07, "loss": 2.2968, "step": 800000 }, { "epoch": 2.19, "eval_loss": 2.2322075366973877, "eval_runtime": 337.896, "eval_samples_per_second": 912.31, "eval_steps_per_second": 57.021, "step": 800000 }, { "epoch": 2.21, "eval_loss": 2.240255117416382, "eval_runtime": 338.7394, "eval_samples_per_second": 910.039, "eval_steps_per_second": 56.879, "step": 808000 }, { "epoch": 2.23, "learning_rate": 2.706e-07, "loss": 2.2962, "step": 816000 }, { "epoch": 2.23, "eval_loss": 2.2347311973571777, "eval_runtime": 339.8784, "eval_samples_per_second": 906.989, "eval_steps_per_second": 56.688, "step": 816000 }, { "epoch": 2.25, "eval_loss": 2.239776372909546, "eval_runtime": 338.4053, "eval_samples_per_second": 910.937, "eval_steps_per_second": 56.935, "step": 824000 }, { "epoch": 2.27, "learning_rate": 2.6786666666666666e-07, "loss": 2.2984, "step": 832000 }, { "epoch": 2.27, "eval_loss": 2.235778570175171, "eval_runtime": 338.4942, "eval_samples_per_second": 910.698, "eval_steps_per_second": 56.92, "step": 832000 }, { "epoch": 2.29, "eval_loss": 2.2412359714508057, "eval_runtime": 338.0942, "eval_samples_per_second": 911.775, "eval_steps_per_second": 56.987, "step": 840000 }, { "epoch": 2.32, "learning_rate": 2.651333333333333e-07, "loss": 2.3029, "step": 848000 }, { "epoch": 2.32, "eval_loss": 2.238647937774658, "eval_runtime": 338.342, "eval_samples_per_second": 911.108, "eval_steps_per_second": 56.945, "step": 848000 }, { "epoch": 2.34, "eval_loss": 2.234633445739746, "eval_runtime": 339.1684, "eval_samples_per_second": 908.888, "eval_steps_per_second": 56.807, "step": 856000 }, { "epoch": 2.36, "learning_rate": 2.624e-07, "loss": 2.2985, "step": 864000 }, { "epoch": 2.36, "eval_loss": 2.2323224544525146, "eval_runtime": 340.1121, "eval_samples_per_second": 906.366, "eval_steps_per_second": 56.649, "step": 864000 }, { "epoch": 2.38, "eval_loss": 2.2386910915374756, "eval_runtime": 338.6678, "eval_samples_per_second": 910.231, "eval_steps_per_second": 56.891, "step": 872000 }, { "epoch": 2.4, "learning_rate": 2.596666666666667e-07, "loss": 2.2922, "step": 880000 }, { "epoch": 2.4, "eval_loss": 2.230320453643799, "eval_runtime": 338.2571, "eval_samples_per_second": 911.336, "eval_steps_per_second": 56.96, "step": 880000 }, { "epoch": 2.43, "eval_loss": 2.232644557952881, "eval_runtime": 338.4677, "eval_samples_per_second": 910.769, "eval_steps_per_second": 56.924, "step": 888000 }, { "epoch": 2.45, "learning_rate": 2.5693333333333333e-07, "loss": 2.2967, "step": 896000 }, { "epoch": 2.45, "eval_loss": 2.2422056198120117, "eval_runtime": 338.7421, "eval_samples_per_second": 910.031, "eval_steps_per_second": 56.878, "step": 896000 }, { "epoch": 2.47, "eval_loss": 2.235010862350464, "eval_runtime": 339.4694, "eval_samples_per_second": 908.082, "eval_steps_per_second": 56.756, "step": 904000 }, { "epoch": 2.49, "learning_rate": 2.542e-07, "loss": 2.2917, "step": 912000 }, { "epoch": 2.49, "eval_loss": 2.2299275398254395, "eval_runtime": 339.4601, "eval_samples_per_second": 908.107, "eval_steps_per_second": 56.758, "step": 912000 }, { "epoch": 2.51, "eval_loss": 2.2307727336883545, "eval_runtime": 339.5941, "eval_samples_per_second": 907.748, "eval_steps_per_second": 56.735, "step": 920000 }, { "epoch": 2.54, "learning_rate": 2.5146666666666664e-07, "loss": 2.2912, "step": 928000 }, { "epoch": 2.54, "eval_loss": 2.23453688621521, "eval_runtime": 339.4986, "eval_samples_per_second": 908.004, "eval_steps_per_second": 56.751, "step": 928000 }, { "epoch": 2.56, "eval_loss": 2.2263941764831543, "eval_runtime": 340.6179, "eval_samples_per_second": 905.02, "eval_steps_per_second": 56.565, "step": 936000 }, { "epoch": 2.58, "learning_rate": 2.4873333333333335e-07, "loss": 2.2887, "step": 944000 }, { "epoch": 2.58, "eval_loss": 2.236109972000122, "eval_runtime": 339.6326, "eval_samples_per_second": 907.646, "eval_steps_per_second": 56.729, "step": 944000 }, { "epoch": 2.6, "eval_loss": 2.2318520545959473, "eval_runtime": 342.6146, "eval_samples_per_second": 899.746, "eval_steps_per_second": 56.235, "step": 952000 }, { "epoch": 2.62, "learning_rate": 2.46e-07, "loss": 2.2956, "step": 960000 }, { "epoch": 2.62, "eval_loss": 2.2339940071105957, "eval_runtime": 342.333, "eval_samples_per_second": 900.486, "eval_steps_per_second": 56.281, "step": 960000 }, { "epoch": 2.64, "eval_loss": 2.235605478286743, "eval_runtime": 342.7919, "eval_samples_per_second": 899.28, "eval_steps_per_second": 56.206, "step": 968000 }, { "epoch": 2.67, "learning_rate": 2.4326666666666666e-07, "loss": 2.2927, "step": 976000 }, { "epoch": 2.67, "eval_loss": 2.2365500926971436, "eval_runtime": 342.6264, "eval_samples_per_second": 899.715, "eval_steps_per_second": 56.233, "step": 976000 }, { "epoch": 2.69, "eval_loss": 2.2334821224212646, "eval_runtime": 342.0197, "eval_samples_per_second": 901.311, "eval_steps_per_second": 56.333, "step": 984000 }, { "epoch": 2.71, "learning_rate": 2.405333333333333e-07, "loss": 2.2872, "step": 992000 }, { "epoch": 2.71, "eval_loss": 2.232952833175659, "eval_runtime": 343.3686, "eval_samples_per_second": 897.77, "eval_steps_per_second": 56.112, "step": 992000 }, { "epoch": 2.73, "eval_loss": 2.225148916244507, "eval_runtime": 342.4862, "eval_samples_per_second": 900.083, "eval_steps_per_second": 56.256, "step": 1000000 }, { "epoch": 2.75, "learning_rate": 2.3779999999999997e-07, "loss": 2.2936, "step": 1008000 }, { "epoch": 2.75, "eval_loss": 2.232741594314575, "eval_runtime": 342.7157, "eval_samples_per_second": 899.48, "eval_steps_per_second": 56.219, "step": 1008000 }, { "epoch": 2.78, "eval_loss": 2.2326343059539795, "eval_runtime": 344.8133, "eval_samples_per_second": 894.008, "eval_steps_per_second": 55.877, "step": 1016000 }, { "epoch": 2.8, "learning_rate": 2.3506666666666668e-07, "loss": 2.2899, "step": 1024000 }, { "epoch": 2.8, "eval_loss": 2.2306628227233887, "eval_runtime": 344.8753, "eval_samples_per_second": 893.848, "eval_steps_per_second": 55.867, "step": 1024000 }, { "epoch": 2.82, "eval_loss": 2.2291181087493896, "eval_runtime": 343.9896, "eval_samples_per_second": 896.149, "eval_steps_per_second": 56.01, "step": 1032000 }, { "epoch": 2.84, "learning_rate": 2.3233333333333334e-07, "loss": 2.2931, "step": 1040000 }, { "epoch": 2.84, "eval_loss": 2.228482723236084, "eval_runtime": 343.2535, "eval_samples_per_second": 898.071, "eval_steps_per_second": 56.131, "step": 1040000 }, { "epoch": 2.86, "eval_loss": 2.2326762676239014, "eval_runtime": 340.5017, "eval_samples_per_second": 905.329, "eval_steps_per_second": 56.584, "step": 1048000 }, { "epoch": 2.88, "learning_rate": 2.2960000000000002e-07, "loss": 2.3042, "step": 1056000 }, { "epoch": 2.88, "eval_loss": 2.2366533279418945, "eval_runtime": 342.3036, "eval_samples_per_second": 900.563, "eval_steps_per_second": 56.286, "step": 1056000 }, { "epoch": 2.91, "eval_loss": 2.2344892024993896, "eval_runtime": 341.9516, "eval_samples_per_second": 901.49, "eval_steps_per_second": 56.344, "step": 1064000 }, { "epoch": 2.93, "learning_rate": 2.2686666666666667e-07, "loss": 2.2864, "step": 1072000 }, { "epoch": 2.93, "eval_loss": 2.2267308235168457, "eval_runtime": 341.6099, "eval_samples_per_second": 902.392, "eval_steps_per_second": 56.401, "step": 1072000 }, { "epoch": 2.95, "eval_loss": 2.2342631816864014, "eval_runtime": 343.6113, "eval_samples_per_second": 897.136, "eval_steps_per_second": 56.072, "step": 1080000 }, { "epoch": 2.97, "learning_rate": 2.2413333333333333e-07, "loss": 2.2933, "step": 1088000 }, { "epoch": 2.97, "eval_loss": 2.235445261001587, "eval_runtime": 342.1009, "eval_samples_per_second": 901.097, "eval_steps_per_second": 56.32, "step": 1088000 }, { "epoch": 2.99, "eval_loss": 2.226022243499756, "eval_runtime": 343.4712, "eval_samples_per_second": 897.502, "eval_steps_per_second": 56.095, "step": 1096000 }, { "epoch": 3.02, "learning_rate": 2.214e-07, "loss": 2.2909, "step": 1104000 }, { "epoch": 3.02, "eval_loss": 2.2340822219848633, "eval_runtime": 342.2127, "eval_samples_per_second": 900.802, "eval_steps_per_second": 56.301, "step": 1104000 }, { "epoch": 3.04, "eval_loss": 2.2265801429748535, "eval_runtime": 344.5846, "eval_samples_per_second": 894.602, "eval_steps_per_second": 55.914, "step": 1112000 }, { "epoch": 3.06, "learning_rate": 2.1866666666666667e-07, "loss": 2.2889, "step": 1120000 }, { "epoch": 3.06, "eval_loss": 2.225277900695801, "eval_runtime": 343.7811, "eval_samples_per_second": 896.693, "eval_steps_per_second": 56.044, "step": 1120000 }, { "epoch": 3.08, "eval_loss": 2.225517511367798, "eval_runtime": 344.8771, "eval_samples_per_second": 893.843, "eval_steps_per_second": 55.866, "step": 1128000 }, { "epoch": 3.1, "learning_rate": 2.1593333333333332e-07, "loss": 2.292, "step": 1136000 }, { "epoch": 3.1, "eval_loss": 2.219359874725342, "eval_runtime": 342.5656, "eval_samples_per_second": 899.874, "eval_steps_per_second": 56.243, "step": 1136000 }, { "epoch": 3.13, "eval_loss": 2.2318532466888428, "eval_runtime": 342.1989, "eval_samples_per_second": 900.839, "eval_steps_per_second": 56.304, "step": 1144000 }, { "epoch": 3.15, "learning_rate": 2.132e-07, "loss": 2.282, "step": 1152000 }, { "epoch": 3.15, "eval_loss": 2.2221035957336426, "eval_runtime": 342.4265, "eval_samples_per_second": 900.24, "eval_steps_per_second": 56.266, "step": 1152000 }, { "epoch": 3.17, "eval_loss": 2.2272608280181885, "eval_runtime": 341.618, "eval_samples_per_second": 902.371, "eval_steps_per_second": 56.399, "step": 1160000 }, { "epoch": 3.19, "learning_rate": 2.1046666666666666e-07, "loss": 2.2827, "step": 1168000 }, { "epoch": 3.19, "eval_loss": 2.2295727729797363, "eval_runtime": 344.8334, "eval_samples_per_second": 893.956, "eval_steps_per_second": 55.873, "step": 1168000 }, { "epoch": 3.21, "eval_loss": 2.2331955432891846, "eval_runtime": 343.112, "eval_samples_per_second": 898.441, "eval_steps_per_second": 56.154, "step": 1176000 }, { "epoch": 3.23, "learning_rate": 2.0773333333333334e-07, "loss": 2.2937, "step": 1184000 }, { "epoch": 3.23, "eval_loss": 2.230241298675537, "eval_runtime": 342.0014, "eval_samples_per_second": 901.359, "eval_steps_per_second": 56.336, "step": 1184000 }, { "epoch": 3.26, "eval_loss": 2.2262063026428223, "eval_runtime": 344.0166, "eval_samples_per_second": 896.079, "eval_steps_per_second": 56.006, "step": 1192000 }, { "epoch": 3.28, "learning_rate": 2.05e-07, "loss": 2.2845, "step": 1200000 }, { "epoch": 3.28, "eval_loss": 2.231752872467041, "eval_runtime": 342.9925, "eval_samples_per_second": 898.754, "eval_steps_per_second": 56.173, "step": 1200000 }, { "epoch": 3.3, "eval_loss": 2.229050636291504, "eval_runtime": 342.7011, "eval_samples_per_second": 899.519, "eval_steps_per_second": 56.221, "step": 1208000 }, { "epoch": 3.32, "learning_rate": 2.0226666666666668e-07, "loss": 2.284, "step": 1216000 }, { "epoch": 3.32, "eval_loss": 2.232661008834839, "eval_runtime": 343.2876, "eval_samples_per_second": 897.982, "eval_steps_per_second": 56.125, "step": 1216000 }, { "epoch": 3.34, "eval_loss": 2.230750560760498, "eval_runtime": 343.2951, "eval_samples_per_second": 897.962, "eval_steps_per_second": 56.124, "step": 1224000 }, { "epoch": 3.37, "learning_rate": 1.9953333333333333e-07, "loss": 2.2923, "step": 1232000 }, { "epoch": 3.37, "eval_loss": 2.226369857788086, "eval_runtime": 343.209, "eval_samples_per_second": 898.188, "eval_steps_per_second": 56.138, "step": 1232000 }, { "epoch": 3.39, "eval_loss": 2.2390074729919434, "eval_runtime": 342.5512, "eval_samples_per_second": 899.912, "eval_steps_per_second": 56.246, "step": 1240000 }, { "epoch": 3.41, "learning_rate": 1.968e-07, "loss": 2.2859, "step": 1248000 }, { "epoch": 3.41, "eval_loss": 2.2309505939483643, "eval_runtime": 343.4164, "eval_samples_per_second": 897.645, "eval_steps_per_second": 56.104, "step": 1248000 }, { "epoch": 3.43, "eval_loss": 2.22867751121521, "eval_runtime": 342.6067, "eval_samples_per_second": 899.766, "eval_steps_per_second": 56.236, "step": 1256000 }, { "epoch": 3.45, "learning_rate": 1.9406666666666667e-07, "loss": 2.2879, "step": 1264000 }, { "epoch": 3.45, "eval_loss": 2.228405714035034, "eval_runtime": 345.1005, "eval_samples_per_second": 893.264, "eval_steps_per_second": 55.83, "step": 1264000 }, { "epoch": 3.47, "eval_loss": 2.2228379249572754, "eval_runtime": 343.2335, "eval_samples_per_second": 898.123, "eval_steps_per_second": 56.134, "step": 1272000 }, { "epoch": 3.5, "learning_rate": 1.9133333333333333e-07, "loss": 2.292, "step": 1280000 }, { "epoch": 3.5, "eval_loss": 2.2295541763305664, "eval_runtime": 343.6906, "eval_samples_per_second": 896.929, "eval_steps_per_second": 56.059, "step": 1280000 }, { "epoch": 3.52, "eval_loss": 2.232851982116699, "eval_runtime": 343.5216, "eval_samples_per_second": 897.37, "eval_steps_per_second": 56.087, "step": 1288000 }, { "epoch": 3.54, "learning_rate": 1.886e-07, "loss": 2.2827, "step": 1296000 }, { "epoch": 3.54, "eval_loss": 2.226313352584839, "eval_runtime": 343.9653, "eval_samples_per_second": 896.213, "eval_steps_per_second": 56.014, "step": 1296000 }, { "epoch": 3.56, "eval_loss": 2.2323992252349854, "eval_runtime": 345.0144, "eval_samples_per_second": 893.487, "eval_steps_per_second": 55.844, "step": 1304000 }, { "epoch": 3.58, "learning_rate": 1.8586666666666666e-07, "loss": 2.2829, "step": 1312000 }, { "epoch": 3.58, "eval_loss": 2.2231664657592773, "eval_runtime": 343.513, "eval_samples_per_second": 897.393, "eval_steps_per_second": 56.088, "step": 1312000 }, { "epoch": 3.61, "eval_loss": 2.2273144721984863, "eval_runtime": 344.0809, "eval_samples_per_second": 895.911, "eval_steps_per_second": 55.996, "step": 1320000 }, { "epoch": 3.63, "learning_rate": 1.8313333333333332e-07, "loss": 2.2863, "step": 1328000 }, { "epoch": 3.63, "eval_loss": 2.2296173572540283, "eval_runtime": 344.7931, "eval_samples_per_second": 894.061, "eval_steps_per_second": 55.88, "step": 1328000 }, { "epoch": 3.65, "eval_loss": 2.2293524742126465, "eval_runtime": 343.7387, "eval_samples_per_second": 896.803, "eval_steps_per_second": 56.051, "step": 1336000 }, { "epoch": 3.67, "learning_rate": 1.804e-07, "loss": 2.2796, "step": 1344000 }, { "epoch": 3.67, "eval_loss": 2.228300094604492, "eval_runtime": 345.0604, "eval_samples_per_second": 893.368, "eval_steps_per_second": 55.837, "step": 1344000 }, { "epoch": 3.69, "eval_loss": 2.2279834747314453, "eval_runtime": 343.4098, "eval_samples_per_second": 897.662, "eval_steps_per_second": 56.105, "step": 1352000 }, { "epoch": 3.72, "learning_rate": 1.7766666666666666e-07, "loss": 2.2835, "step": 1360000 }, { "epoch": 3.72, "eval_loss": 2.226436138153076, "eval_runtime": 344.8134, "eval_samples_per_second": 894.008, "eval_steps_per_second": 55.877, "step": 1360000 }, { "epoch": 3.74, "eval_loss": 2.222442150115967, "eval_runtime": 344.7708, "eval_samples_per_second": 894.119, "eval_steps_per_second": 55.883, "step": 1368000 }, { "epoch": 3.76, "learning_rate": 1.7493333333333334e-07, "loss": 2.2875, "step": 1376000 }, { "epoch": 3.76, "eval_loss": 2.2218754291534424, "eval_runtime": 346.0197, "eval_samples_per_second": 890.891, "eval_steps_per_second": 55.682, "step": 1376000 }, { "epoch": 3.78, "eval_loss": 2.224281072616577, "eval_runtime": 344.7226, "eval_samples_per_second": 894.244, "eval_steps_per_second": 55.891, "step": 1384000 }, { "epoch": 3.8, "learning_rate": 1.722e-07, "loss": 2.2792, "step": 1392000 }, { "epoch": 3.8, "eval_loss": 2.232009172439575, "eval_runtime": 344.233, "eval_samples_per_second": 895.516, "eval_steps_per_second": 55.971, "step": 1392000 }, { "epoch": 3.82, "eval_loss": 2.227288246154785, "eval_runtime": 344.3453, "eval_samples_per_second": 895.223, "eval_steps_per_second": 55.953, "step": 1400000 }, { "epoch": 3.85, "learning_rate": 1.6946666666666668e-07, "loss": 2.2932, "step": 1408000 }, { "epoch": 3.85, "eval_loss": 2.225741386413574, "eval_runtime": 343.9791, "eval_samples_per_second": 896.177, "eval_steps_per_second": 56.012, "step": 1408000 }, { "epoch": 3.87, "eval_loss": 2.235980749130249, "eval_runtime": 344.5804, "eval_samples_per_second": 894.613, "eval_steps_per_second": 55.914, "step": 1416000 }, { "epoch": 3.89, "learning_rate": 1.6673333333333333e-07, "loss": 2.2899, "step": 1424000 }, { "epoch": 3.89, "eval_loss": 2.227717399597168, "eval_runtime": 345.0207, "eval_samples_per_second": 893.471, "eval_steps_per_second": 55.843, "step": 1424000 }, { "epoch": 3.91, "eval_loss": 2.227459192276001, "eval_runtime": 345.1331, "eval_samples_per_second": 893.18, "eval_steps_per_second": 55.825, "step": 1432000 }, { "epoch": 3.93, "learning_rate": 1.64e-07, "loss": 2.2859, "step": 1440000 }, { "epoch": 3.93, "eval_loss": 2.228656530380249, "eval_runtime": 345.5823, "eval_samples_per_second": 892.019, "eval_steps_per_second": 55.752, "step": 1440000 }, { "epoch": 3.96, "eval_loss": 2.2210566997528076, "eval_runtime": 345.2225, "eval_samples_per_second": 892.949, "eval_steps_per_second": 55.81, "step": 1448000 }, { "epoch": 3.98, "learning_rate": 1.6126666666666667e-07, "loss": 2.2876, "step": 1456000 }, { "epoch": 3.98, "eval_loss": 2.2235565185546875, "eval_runtime": 345.2552, "eval_samples_per_second": 892.864, "eval_steps_per_second": 55.805, "step": 1456000 }, { "epoch": 4.0, "eval_loss": 2.2287678718566895, "eval_runtime": 345.4865, "eval_samples_per_second": 892.266, "eval_steps_per_second": 55.768, "step": 1464000 }, { "epoch": 4.02, "learning_rate": 1.5853333333333332e-07, "loss": 2.2879, "step": 1472000 }, { "epoch": 4.02, "eval_loss": 2.2225778102874756, "eval_runtime": 346.081, "eval_samples_per_second": 890.734, "eval_steps_per_second": 55.672, "step": 1472000 }, { "epoch": 4.04, "eval_loss": 2.2241647243499756, "eval_runtime": 345.7366, "eval_samples_per_second": 891.621, "eval_steps_per_second": 55.727, "step": 1480000 }, { "epoch": 4.06, "learning_rate": 1.558e-07, "loss": 2.282, "step": 1488000 }, { "epoch": 4.06, "eval_loss": 2.2286031246185303, "eval_runtime": 345.8095, "eval_samples_per_second": 891.433, "eval_steps_per_second": 55.716, "step": 1488000 }, { "epoch": 4.09, "eval_loss": 2.220984697341919, "eval_runtime": 346.175, "eval_samples_per_second": 890.492, "eval_steps_per_second": 55.657, "step": 1496000 }, { "epoch": 4.11, "learning_rate": 1.5306666666666666e-07, "loss": 2.2828, "step": 1504000 }, { "epoch": 4.11, "eval_loss": 2.2303643226623535, "eval_runtime": 345.9183, "eval_samples_per_second": 891.153, "eval_steps_per_second": 55.698, "step": 1504000 }, { "epoch": 4.13, "eval_loss": 2.2310214042663574, "eval_runtime": 345.0795, "eval_samples_per_second": 893.319, "eval_steps_per_second": 55.834, "step": 1512000 }, { "epoch": 4.15, "learning_rate": 1.5033333333333332e-07, "loss": 2.2765, "step": 1520000 }, { "epoch": 4.15, "eval_loss": 2.229534387588501, "eval_runtime": 346.2314, "eval_samples_per_second": 890.347, "eval_steps_per_second": 55.648, "step": 1520000 }, { "epoch": 4.17, "eval_loss": 2.2276086807250977, "eval_runtime": 345.5194, "eval_samples_per_second": 892.181, "eval_steps_per_second": 55.762, "step": 1528000 }, { "epoch": 4.2, "learning_rate": 1.476e-07, "loss": 2.2839, "step": 1536000 }, { "epoch": 4.2, "eval_loss": 2.226030111312866, "eval_runtime": 345.3689, "eval_samples_per_second": 892.57, "eval_steps_per_second": 55.787, "step": 1536000 }, { "epoch": 4.22, "eval_loss": 2.225531578063965, "eval_runtime": 346.1485, "eval_samples_per_second": 890.56, "eval_steps_per_second": 55.661, "step": 1544000 }, { "epoch": 4.24, "learning_rate": 1.4486666666666665e-07, "loss": 2.2845, "step": 1552000 }, { "epoch": 4.24, "eval_loss": 2.2199981212615967, "eval_runtime": 345.8554, "eval_samples_per_second": 891.315, "eval_steps_per_second": 55.708, "step": 1552000 }, { "epoch": 4.26, "eval_loss": 2.222754955291748, "eval_runtime": 346.6039, "eval_samples_per_second": 889.39, "eval_steps_per_second": 55.588, "step": 1560000 }, { "epoch": 4.28, "learning_rate": 1.4213333333333334e-07, "loss": 2.2816, "step": 1568000 }, { "epoch": 4.28, "eval_loss": 2.2322280406951904, "eval_runtime": 346.0535, "eval_samples_per_second": 890.804, "eval_steps_per_second": 55.676, "step": 1568000 }, { "epoch": 4.31, "eval_loss": 2.225015163421631, "eval_runtime": 345.7846, "eval_samples_per_second": 891.497, "eval_steps_per_second": 55.72, "step": 1576000 }, { "epoch": 4.33, "learning_rate": 1.3940000000000002e-07, "loss": 2.2965, "step": 1584000 }, { "epoch": 4.33, "eval_loss": 2.2242190837860107, "eval_runtime": 348.9458, "eval_samples_per_second": 883.421, "eval_steps_per_second": 55.215, "step": 1584000 }, { "epoch": 4.35, "eval_loss": 2.22951340675354, "eval_runtime": 346.387, "eval_samples_per_second": 889.947, "eval_steps_per_second": 55.623, "step": 1592000 }, { "epoch": 4.37, "learning_rate": 1.3666666666666665e-07, "loss": 2.2806, "step": 1600000 }, { "epoch": 4.37, "eval_loss": 2.219784736633301, "eval_runtime": 346.2214, "eval_samples_per_second": 890.372, "eval_steps_per_second": 55.649, "step": 1600000 }, { "epoch": 4.39, "eval_loss": 2.230062961578369, "eval_runtime": 346.9441, "eval_samples_per_second": 888.518, "eval_steps_per_second": 55.533, "step": 1608000 }, { "epoch": 4.41, "learning_rate": 1.3393333333333333e-07, "loss": 2.2868, "step": 1616000 }, { "epoch": 4.41, "eval_loss": 2.2308502197265625, "eval_runtime": 348.3618, "eval_samples_per_second": 884.902, "eval_steps_per_second": 55.307, "step": 1616000 }, { "epoch": 4.44, "eval_loss": 2.226969003677368, "eval_runtime": 347.2742, "eval_samples_per_second": 887.673, "eval_steps_per_second": 55.481, "step": 1624000 }, { "epoch": 4.46, "learning_rate": 1.312e-07, "loss": 2.2907, "step": 1632000 }, { "epoch": 4.46, "eval_loss": 2.2290947437286377, "eval_runtime": 346.9393, "eval_samples_per_second": 888.53, "eval_steps_per_second": 55.534, "step": 1632000 }, { "epoch": 4.48, "eval_loss": 2.226861000061035, "eval_runtime": 347.5547, "eval_samples_per_second": 886.957, "eval_steps_per_second": 55.436, "step": 1640000 }, { "epoch": 4.5, "learning_rate": 1.2846666666666667e-07, "loss": 2.2809, "step": 1648000 }, { "epoch": 4.5, "eval_loss": 2.2260689735412598, "eval_runtime": 347.4948, "eval_samples_per_second": 887.11, "eval_steps_per_second": 55.445, "step": 1648000 }, { "epoch": 4.52, "eval_loss": 2.231820583343506, "eval_runtime": 348.114, "eval_samples_per_second": 885.532, "eval_steps_per_second": 55.347, "step": 1656000 }, { "epoch": 4.55, "learning_rate": 1.2573333333333332e-07, "loss": 2.2876, "step": 1664000 }, { "epoch": 4.55, "eval_loss": 2.22523832321167, "eval_runtime": 347.061, "eval_samples_per_second": 888.219, "eval_steps_per_second": 55.515, "step": 1664000 }, { "epoch": 4.57, "eval_loss": 2.2248425483703613, "eval_runtime": 347.102, "eval_samples_per_second": 888.114, "eval_steps_per_second": 55.508, "step": 1672000 }, { "epoch": 4.59, "learning_rate": 1.23e-07, "loss": 2.2844, "step": 1680000 }, { "epoch": 4.59, "eval_loss": 2.222309112548828, "eval_runtime": 347.6799, "eval_samples_per_second": 886.637, "eval_steps_per_second": 55.416, "step": 1680000 }, { "epoch": 4.61, "eval_loss": 2.2250306606292725, "eval_runtime": 348.9099, "eval_samples_per_second": 883.512, "eval_steps_per_second": 55.221, "step": 1688000 }, { "epoch": 4.63, "learning_rate": 1.2026666666666666e-07, "loss": 2.2841, "step": 1696000 }, { "epoch": 4.63, "eval_loss": 2.227815866470337, "eval_runtime": 347.3551, "eval_samples_per_second": 887.466, "eval_steps_per_second": 55.468, "step": 1696000 }, { "epoch": 4.65, "eval_loss": 2.222553014755249, "eval_runtime": 347.2892, "eval_samples_per_second": 887.635, "eval_steps_per_second": 55.478, "step": 1704000 }, { "epoch": 4.68, "learning_rate": 1.1753333333333334e-07, "loss": 2.2851, "step": 1712000 }, { "epoch": 4.68, "eval_loss": 2.2273736000061035, "eval_runtime": 347.2652, "eval_samples_per_second": 887.696, "eval_steps_per_second": 55.482, "step": 1712000 }, { "epoch": 4.7, "eval_loss": 2.2246508598327637, "eval_runtime": 348.4684, "eval_samples_per_second": 884.631, "eval_steps_per_second": 55.291, "step": 1720000 }, { "epoch": 4.72, "learning_rate": 1.1480000000000001e-07, "loss": 2.2863, "step": 1728000 }, { "epoch": 4.72, "eval_loss": 2.223870277404785, "eval_runtime": 347.9398, "eval_samples_per_second": 885.975, "eval_steps_per_second": 55.375, "step": 1728000 }, { "epoch": 4.74, "eval_loss": 2.2227413654327393, "eval_runtime": 350.0067, "eval_samples_per_second": 880.743, "eval_steps_per_second": 55.048, "step": 1736000 }, { "epoch": 4.76, "learning_rate": 1.1206666666666666e-07, "loss": 2.2788, "step": 1744000 }, { "epoch": 4.76, "eval_loss": 2.223409414291382, "eval_runtime": 350.9466, "eval_samples_per_second": 878.384, "eval_steps_per_second": 54.9, "step": 1744000 }, { "epoch": 4.79, "eval_loss": 2.2293310165405273, "eval_runtime": 350.7999, "eval_samples_per_second": 878.752, "eval_steps_per_second": 54.923, "step": 1752000 }, { "epoch": 4.81, "learning_rate": 1.0933333333333333e-07, "loss": 2.2849, "step": 1760000 }, { "epoch": 4.81, "eval_loss": 2.2198665142059326, "eval_runtime": 350.5794, "eval_samples_per_second": 879.304, "eval_steps_per_second": 54.958, "step": 1760000 }, { "epoch": 4.83, "eval_loss": 2.2308552265167236, "eval_runtime": 351.9489, "eval_samples_per_second": 875.883, "eval_steps_per_second": 54.744, "step": 1768000 }, { "epoch": 4.85, "learning_rate": 1.066e-07, "loss": 2.2826, "step": 1776000 }, { "epoch": 4.85, "eval_loss": 2.223459243774414, "eval_runtime": 352.3884, "eval_samples_per_second": 874.79, "eval_steps_per_second": 54.675, "step": 1776000 }, { "epoch": 4.87, "eval_loss": 2.2291903495788574, "eval_runtime": 350.0779, "eval_samples_per_second": 880.564, "eval_steps_per_second": 55.036, "step": 1784000 }, { "epoch": 4.9, "learning_rate": 1.0386666666666667e-07, "loss": 2.2809, "step": 1792000 }, { "epoch": 4.9, "eval_loss": 2.224785089492798, "eval_runtime": 351.7673, "eval_samples_per_second": 876.335, "eval_steps_per_second": 54.772, "step": 1792000 }, { "epoch": 4.92, "eval_loss": 2.218683958053589, "eval_runtime": 353.6242, "eval_samples_per_second": 871.733, "eval_steps_per_second": 54.484, "step": 1800000 }, { "epoch": 4.94, "learning_rate": 1.0113333333333334e-07, "loss": 2.2865, "step": 1808000 }, { "epoch": 4.94, "eval_loss": 2.233116388320923, "eval_runtime": 351.3999, "eval_samples_per_second": 877.251, "eval_steps_per_second": 54.829, "step": 1808000 }, { "epoch": 4.96, "eval_loss": 2.2243831157684326, "eval_runtime": 349.1059, "eval_samples_per_second": 883.016, "eval_steps_per_second": 55.19, "step": 1816000 }, { "epoch": 4.98, "learning_rate": 9.84e-08, "loss": 2.2773, "step": 1824000 }, { "epoch": 4.98, "eval_loss": 2.2246005535125732, "eval_runtime": 349.0697, "eval_samples_per_second": 883.107, "eval_steps_per_second": 55.195, "step": 1824000 }, { "epoch": 5.0, "eval_loss": 2.2314696311950684, "eval_runtime": 349.3246, "eval_samples_per_second": 882.463, "eval_steps_per_second": 55.155, "step": 1832000 }, { "epoch": 5.03, "learning_rate": 9.566666666666666e-08, "loss": 2.2738, "step": 1840000 }, { "epoch": 5.03, "eval_loss": 2.231853485107422, "eval_runtime": 349.0971, "eval_samples_per_second": 883.038, "eval_steps_per_second": 55.191, "step": 1840000 }, { "epoch": 5.05, "eval_loss": 2.225752115249634, "eval_runtime": 350.4379, "eval_samples_per_second": 879.659, "eval_steps_per_second": 54.98, "step": 1848000 }, { "epoch": 5.07, "learning_rate": 9.293333333333333e-08, "loss": 2.2806, "step": 1856000 }, { "epoch": 5.07, "eval_loss": 2.2240936756134033, "eval_runtime": 350.1455, "eval_samples_per_second": 880.394, "eval_steps_per_second": 55.026, "step": 1856000 }, { "epoch": 5.09, "eval_loss": 2.2228317260742188, "eval_runtime": 352.1603, "eval_samples_per_second": 875.357, "eval_steps_per_second": 54.711, "step": 1864000 }, { "epoch": 5.11, "learning_rate": 9.02e-08, "loss": 2.2822, "step": 1872000 }, { "epoch": 5.11, "eval_loss": 2.2218220233917236, "eval_runtime": 352.822, "eval_samples_per_second": 873.715, "eval_steps_per_second": 54.608, "step": 1872000 }, { "epoch": 5.14, "eval_loss": 2.227595329284668, "eval_runtime": 350.713, "eval_samples_per_second": 878.969, "eval_steps_per_second": 54.937, "step": 1880000 }, { "epoch": 5.16, "learning_rate": 8.746666666666667e-08, "loss": 2.2866, "step": 1888000 }, { "epoch": 5.16, "eval_loss": 2.2233176231384277, "eval_runtime": 351.7914, "eval_samples_per_second": 876.275, "eval_steps_per_second": 54.768, "step": 1888000 }, { "epoch": 5.18, "eval_loss": 2.2265591621398926, "eval_runtime": 350.4277, "eval_samples_per_second": 879.685, "eval_steps_per_second": 54.981, "step": 1896000 }, { "epoch": 5.2, "learning_rate": 8.473333333333334e-08, "loss": 2.2831, "step": 1904000 }, { "epoch": 5.2, "eval_loss": 2.2230618000030518, "eval_runtime": 353.0183, "eval_samples_per_second": 873.229, "eval_steps_per_second": 54.578, "step": 1904000 }, { "epoch": 5.22, "eval_loss": 2.224078416824341, "eval_runtime": 349.9623, "eval_samples_per_second": 880.855, "eval_steps_per_second": 55.055, "step": 1912000 }, { "epoch": 5.24, "learning_rate": 8.2e-08, "loss": 2.2875, "step": 1920000 }, { "epoch": 5.24, "eval_loss": 2.226329803466797, "eval_runtime": 352.5996, "eval_samples_per_second": 874.266, "eval_steps_per_second": 54.643, "step": 1920000 }, { "epoch": 5.27, "eval_loss": 2.22342586517334, "eval_runtime": 350.4146, "eval_samples_per_second": 879.718, "eval_steps_per_second": 54.983, "step": 1928000 }, { "epoch": 5.29, "learning_rate": 7.926666666666666e-08, "loss": 2.2802, "step": 1936000 }, { "epoch": 5.29, "eval_loss": 2.2269349098205566, "eval_runtime": 352.2475, "eval_samples_per_second": 875.14, "eval_steps_per_second": 54.697, "step": 1936000 }, { "epoch": 5.31, "eval_loss": 2.2252981662750244, "eval_runtime": 351.5457, "eval_samples_per_second": 876.887, "eval_steps_per_second": 54.807, "step": 1944000 }, { "epoch": 5.33, "learning_rate": 7.653333333333333e-08, "loss": 2.2905, "step": 1952000 }, { "epoch": 5.33, "eval_loss": 2.2190773487091064, "eval_runtime": 351.1477, "eval_samples_per_second": 877.881, "eval_steps_per_second": 54.869, "step": 1952000 }, { "epoch": 5.35, "eval_loss": 2.2216830253601074, "eval_runtime": 350.8504, "eval_samples_per_second": 878.625, "eval_steps_per_second": 54.915, "step": 1960000 }, { "epoch": 5.38, "learning_rate": 7.38e-08, "loss": 2.282, "step": 1968000 }, { "epoch": 5.38, "eval_loss": 2.221177577972412, "eval_runtime": 352.0571, "eval_samples_per_second": 875.614, "eval_steps_per_second": 54.727, "step": 1968000 }, { "epoch": 5.4, "eval_loss": 2.221277952194214, "eval_runtime": 350.8021, "eval_samples_per_second": 878.746, "eval_steps_per_second": 54.923, "step": 1976000 }, { "epoch": 5.42, "learning_rate": 7.106666666666667e-08, "loss": 2.2798, "step": 1984000 }, { "epoch": 5.42, "eval_loss": 2.2217955589294434, "eval_runtime": 352.0687, "eval_samples_per_second": 875.585, "eval_steps_per_second": 54.725, "step": 1984000 }, { "epoch": 5.44, "eval_loss": 2.222245216369629, "eval_runtime": 351.0214, "eval_samples_per_second": 878.197, "eval_steps_per_second": 54.888, "step": 1992000 }, { "epoch": 5.46, "learning_rate": 6.833333333333332e-08, "loss": 2.2864, "step": 2000000 }, { "epoch": 5.46, "eval_loss": 2.2212188243865967, "eval_runtime": 351.5119, "eval_samples_per_second": 876.972, "eval_steps_per_second": 54.812, "step": 2000000 }, { "epoch": 5.49, "eval_loss": 2.228152275085449, "eval_runtime": 351.2144, "eval_samples_per_second": 877.715, "eval_steps_per_second": 54.858, "step": 2008000 }, { "epoch": 5.51, "learning_rate": 6.56e-08, "loss": 2.2867, "step": 2016000 }, { "epoch": 5.51, "eval_loss": 2.2304341793060303, "eval_runtime": 352.0733, "eval_samples_per_second": 875.573, "eval_steps_per_second": 54.724, "step": 2016000 }, { "epoch": 5.53, "eval_loss": 2.2221643924713135, "eval_runtime": 353.329, "eval_samples_per_second": 872.462, "eval_steps_per_second": 54.53, "step": 2024000 }, { "epoch": 5.55, "learning_rate": 6.286666666666666e-08, "loss": 2.2834, "step": 2032000 }, { "epoch": 5.55, "eval_loss": 2.2284741401672363, "eval_runtime": 352.1812, "eval_samples_per_second": 875.305, "eval_steps_per_second": 54.708, "step": 2032000 }, { "epoch": 5.57, "eval_loss": 2.222963571548462, "eval_runtime": 351.3628, "eval_samples_per_second": 877.344, "eval_steps_per_second": 54.835, "step": 2040000 }, { "epoch": 5.59, "learning_rate": 6.013333333333333e-08, "loss": 2.2851, "step": 2048000 }, { "epoch": 5.59, "eval_loss": 2.223684072494507, "eval_runtime": 351.5337, "eval_samples_per_second": 876.917, "eval_steps_per_second": 54.808, "step": 2048000 }, { "epoch": 5.62, "eval_loss": 2.228254795074463, "eval_runtime": 352.7325, "eval_samples_per_second": 873.937, "eval_steps_per_second": 54.622, "step": 2056000 }, { "epoch": 5.64, "learning_rate": 5.7400000000000004e-08, "loss": 2.2774, "step": 2064000 }, { "epoch": 5.64, "eval_loss": 2.2232439517974854, "eval_runtime": 352.1455, "eval_samples_per_second": 875.394, "eval_steps_per_second": 54.713, "step": 2064000 }, { "epoch": 5.66, "eval_loss": 2.2282047271728516, "eval_runtime": 352.0221, "eval_samples_per_second": 875.701, "eval_steps_per_second": 54.732, "step": 2072000 }, { "epoch": 5.68, "learning_rate": 5.4666666666666666e-08, "loss": 2.277, "step": 2080000 }, { "epoch": 5.68, "eval_loss": 2.2271482944488525, "eval_runtime": 351.8672, "eval_samples_per_second": 876.086, "eval_steps_per_second": 54.756, "step": 2080000 }, { "epoch": 5.7, "eval_loss": 2.2255890369415283, "eval_runtime": 351.9475, "eval_samples_per_second": 875.886, "eval_steps_per_second": 54.744, "step": 2088000 }, { "epoch": 5.73, "learning_rate": 5.1933333333333335e-08, "loss": 2.2868, "step": 2096000 }, { "epoch": 5.73, "eval_loss": 2.2252378463745117, "eval_runtime": 352.5562, "eval_samples_per_second": 874.374, "eval_steps_per_second": 54.649, "step": 2096000 }, { "epoch": 5.75, "eval_loss": 2.228463888168335, "eval_runtime": 352.4037, "eval_samples_per_second": 874.753, "eval_steps_per_second": 54.673, "step": 2104000 }, { "epoch": 5.77, "learning_rate": 4.92e-08, "loss": 2.2727, "step": 2112000 }, { "epoch": 5.77, "eval_loss": 2.2250723838806152, "eval_runtime": 351.7999, "eval_samples_per_second": 876.254, "eval_steps_per_second": 54.767, "step": 2112000 }, { "epoch": 5.79, "eval_loss": 2.2239432334899902, "eval_runtime": 352.3889, "eval_samples_per_second": 874.789, "eval_steps_per_second": 54.675, "step": 2120000 }, { "epoch": 5.81, "learning_rate": 4.6466666666666666e-08, "loss": 2.2803, "step": 2128000 }, { "epoch": 5.81, "eval_loss": 2.228705883026123, "eval_runtime": 352.9086, "eval_samples_per_second": 873.501, "eval_steps_per_second": 54.595, "step": 2128000 }, { "epoch": 5.84, "eval_loss": 2.227353096008301, "eval_runtime": 353.6833, "eval_samples_per_second": 871.588, "eval_steps_per_second": 54.475, "step": 2136000 }, { "epoch": 5.86, "learning_rate": 4.3733333333333335e-08, "loss": 2.2785, "step": 2144000 }, { "epoch": 5.86, "eval_loss": 2.2227251529693604, "eval_runtime": 353.3218, "eval_samples_per_second": 872.479, "eval_steps_per_second": 54.531, "step": 2144000 }, { "epoch": 5.88, "eval_loss": 2.226724863052368, "eval_runtime": 352.6583, "eval_samples_per_second": 874.121, "eval_steps_per_second": 54.634, "step": 2152000 }, { "epoch": 5.9, "learning_rate": 4.1e-08, "loss": 2.2829, "step": 2160000 }, { "epoch": 5.9, "eval_loss": 2.225097894668579, "eval_runtime": 352.6716, "eval_samples_per_second": 874.088, "eval_steps_per_second": 54.632, "step": 2160000 }, { "epoch": 5.92, "eval_loss": 2.222792387008667, "eval_runtime": 353.112, "eval_samples_per_second": 872.998, "eval_steps_per_second": 54.563, "step": 2168000 }, { "epoch": 5.94, "learning_rate": 3.8266666666666665e-08, "loss": 2.2816, "step": 2176000 }, { "epoch": 5.94, "eval_loss": 2.22352933883667, "eval_runtime": 353.4605, "eval_samples_per_second": 872.137, "eval_steps_per_second": 54.51, "step": 2176000 }, { "epoch": 5.97, "eval_loss": 2.22891902923584, "eval_runtime": 352.6495, "eval_samples_per_second": 874.143, "eval_steps_per_second": 54.635, "step": 2184000 }, { "epoch": 5.99, "learning_rate": 3.5533333333333334e-08, "loss": 2.283, "step": 2192000 }, { "epoch": 5.99, "eval_loss": 2.2238047122955322, "eval_runtime": 353.765, "eval_samples_per_second": 871.386, "eval_steps_per_second": 54.463, "step": 2192000 }, { "epoch": 6.01, "eval_loss": 2.224536657333374, "eval_runtime": 353.3721, "eval_samples_per_second": 872.355, "eval_steps_per_second": 54.523, "step": 2200000 }, { "epoch": 6.03, "learning_rate": 3.28e-08, "loss": 2.2761, "step": 2208000 }, { "epoch": 6.03, "eval_loss": 2.2296528816223145, "eval_runtime": 353.2693, "eval_samples_per_second": 872.609, "eval_steps_per_second": 54.539, "step": 2208000 }, { "epoch": 6.05, "eval_loss": 2.230041265487671, "eval_runtime": 355.1155, "eval_samples_per_second": 868.073, "eval_steps_per_second": 54.256, "step": 2216000 }, { "epoch": 6.08, "learning_rate": 3.0066666666666665e-08, "loss": 2.2823, "step": 2224000 }, { "epoch": 6.08, "eval_loss": 2.22680401802063, "eval_runtime": 364.0284, "eval_samples_per_second": 846.818, "eval_steps_per_second": 52.927, "step": 2224000 }, { "epoch": 6.1, "eval_loss": 2.2252140045166016, "eval_runtime": 353.3986, "eval_samples_per_second": 872.29, "eval_steps_per_second": 54.519, "step": 2232000 }, { "epoch": 6.12, "learning_rate": 2.7333333333333333e-08, "loss": 2.2715, "step": 2240000 }, { "epoch": 6.12, "eval_loss": 2.2239723205566406, "eval_runtime": 353.6515, "eval_samples_per_second": 871.666, "eval_steps_per_second": 54.48, "step": 2240000 }, { "epoch": 6.14, "eval_loss": 2.2233335971832275, "eval_runtime": 353.2896, "eval_samples_per_second": 872.559, "eval_steps_per_second": 54.536, "step": 2248000 }, { "epoch": 6.16, "learning_rate": 2.46e-08, "loss": 2.2809, "step": 2256000 }, { "epoch": 6.16, "eval_loss": 2.223785161972046, "eval_runtime": 354.1994, "eval_samples_per_second": 870.318, "eval_steps_per_second": 54.396, "step": 2256000 }, { "epoch": 6.18, "eval_loss": 2.220431089401245, "eval_runtime": 353.1693, "eval_samples_per_second": 872.856, "eval_steps_per_second": 54.555, "step": 2264000 }, { "epoch": 6.21, "learning_rate": 2.1866666666666667e-08, "loss": 2.2823, "step": 2272000 }, { "epoch": 6.21, "eval_loss": 2.2218103408813477, "eval_runtime": 354.0959, "eval_samples_per_second": 870.572, "eval_steps_per_second": 54.412, "step": 2272000 }, { "epoch": 6.23, "eval_loss": 2.2294769287109375, "eval_runtime": 353.4329, "eval_samples_per_second": 872.205, "eval_steps_per_second": 54.514, "step": 2280000 }, { "epoch": 6.25, "learning_rate": 1.9133333333333333e-08, "loss": 2.2848, "step": 2288000 }, { "epoch": 6.25, "eval_loss": 2.2298202514648438, "eval_runtime": 353.5589, "eval_samples_per_second": 871.894, "eval_steps_per_second": 54.494, "step": 2288000 }, { "epoch": 6.27, "eval_loss": 2.2298853397369385, "eval_runtime": 354.6654, "eval_samples_per_second": 869.174, "eval_steps_per_second": 54.324, "step": 2296000 }, { "epoch": 6.29, "learning_rate": 1.64e-08, "loss": 2.2847, "step": 2304000 }, { "epoch": 6.29, "eval_loss": 2.224604606628418, "eval_runtime": 354.3384, "eval_samples_per_second": 869.976, "eval_steps_per_second": 54.375, "step": 2304000 }, { "epoch": 6.32, "eval_loss": 2.222991704940796, "eval_runtime": 353.8486, "eval_samples_per_second": 871.181, "eval_steps_per_second": 54.45, "step": 2312000 }, { "epoch": 6.34, "learning_rate": 1.3666666666666667e-08, "loss": 2.2783, "step": 2320000 }, { "epoch": 6.34, "eval_loss": 2.2260053157806396, "eval_runtime": 354.5313, "eval_samples_per_second": 869.503, "eval_steps_per_second": 54.345, "step": 2320000 }, { "epoch": 6.36, "eval_loss": 2.217644453048706, "eval_runtime": 355.0802, "eval_samples_per_second": 868.159, "eval_steps_per_second": 54.261, "step": 2328000 }, { "epoch": 6.38, "learning_rate": 1.0933333333333334e-08, "loss": 2.2791, "step": 2336000 }, { "epoch": 6.38, "eval_loss": 2.221074342727661, "eval_runtime": 353.9231, "eval_samples_per_second": 870.997, "eval_steps_per_second": 54.438, "step": 2336000 }, { "epoch": 6.4, "eval_loss": 2.2261996269226074, "eval_runtime": 355.1404, "eval_samples_per_second": 868.012, "eval_steps_per_second": 54.252, "step": 2344000 }, { "epoch": 6.43, "learning_rate": 8.2e-09, "loss": 2.2797, "step": 2352000 }, { "epoch": 6.43, "eval_loss": 2.2293312549591064, "eval_runtime": 354.4371, "eval_samples_per_second": 869.734, "eval_steps_per_second": 54.359, "step": 2352000 }, { "epoch": 6.45, "eval_loss": 2.221876859664917, "eval_runtime": 356.764, "eval_samples_per_second": 864.061, "eval_steps_per_second": 54.005, "step": 2360000 }, { "epoch": 6.47, "learning_rate": 5.466666666666667e-09, "loss": 2.2784, "step": 2368000 }, { "epoch": 6.47, "eval_loss": 2.2249085903167725, "eval_runtime": 354.8521, "eval_samples_per_second": 868.717, "eval_steps_per_second": 54.296, "step": 2368000 }, { "epoch": 6.49, "eval_loss": 2.2216453552246094, "eval_runtime": 355.228, "eval_samples_per_second": 867.798, "eval_steps_per_second": 54.238, "step": 2376000 }, { "epoch": 6.51, "learning_rate": 2.7333333333333334e-09, "loss": 2.271, "step": 2384000 }, { "epoch": 6.51, "eval_loss": 2.225550413131714, "eval_runtime": 355.1559, "eval_samples_per_second": 867.974, "eval_steps_per_second": 54.249, "step": 2384000 }, { "epoch": 6.53, "eval_loss": 2.2295968532562256, "eval_runtime": 355.7601, "eval_samples_per_second": 866.5, "eval_steps_per_second": 54.157, "step": 2392000 }, { "epoch": 6.56, "learning_rate": 0.0, "loss": 2.2787, "step": 2400000 }, { "epoch": 6.56, "eval_loss": 2.2274532318115234, "eval_runtime": 356.8124, "eval_samples_per_second": 863.944, "eval_steps_per_second": 53.998, "step": 2400000 }, { "epoch": 6.56, "step": 2400000, "total_flos": 7.335203818962209e+17, "train_loss": 2.304743234049479, "train_runtime": 256347.6755, "train_samples_per_second": 149.797, "train_steps_per_second": 9.362 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 7, "save_steps": 32000, "total_flos": 7.335203818962209e+17, "trial_name": null, "trial_params": null }