{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8311170212765957, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.5e-06, "loss": 0.999, "step": 25 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 0.7998, "step": 50 }, { "epoch": 0.01, "learning_rate": 7.500000000000001e-06, "loss": 0.6289, "step": 75 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 0.5845, "step": 100 }, { "epoch": 0.02, "learning_rate": 9.948979591836737e-06, "loss": 0.5564, "step": 125 }, { "epoch": 0.02, "learning_rate": 9.89795918367347e-06, "loss": 0.5448, "step": 150 }, { "epoch": 0.03, "learning_rate": 9.846938775510205e-06, "loss": 0.5098, "step": 175 }, { "epoch": 0.03, "learning_rate": 9.795918367346939e-06, "loss": 0.5203, "step": 200 }, { "epoch": 0.04, "learning_rate": 9.744897959183674e-06, "loss": 0.5095, "step": 225 }, { "epoch": 0.04, "learning_rate": 9.693877551020408e-06, "loss": 0.5024, "step": 250 }, { "epoch": 0.04, "eval_loss": 0.507320761680603, "eval_runtime": 183.288, "eval_samples_per_second": 5.412, "eval_steps_per_second": 0.087, "eval_wer": 0.2948083939915912, "step": 250 }, { "epoch": 0.05, "learning_rate": 9.642857142857144e-06, "loss": 0.4942, "step": 275 }, { "epoch": 0.05, "learning_rate": 9.591836734693878e-06, "loss": 0.4981, "step": 300 }, { "epoch": 0.05, "learning_rate": 9.540816326530612e-06, "loss": 0.4824, "step": 325 }, { "epoch": 0.06, "learning_rate": 9.489795918367348e-06, "loss": 0.48, "step": 350 }, { "epoch": 0.06, "learning_rate": 9.438775510204082e-06, "loss": 0.4599, "step": 375 }, { "epoch": 0.07, "learning_rate": 9.387755102040818e-06, "loss": 0.4482, "step": 400 }, { "epoch": 0.07, "learning_rate": 9.336734693877552e-06, "loss": 0.4674, "step": 425 }, { "epoch": 0.07, "learning_rate": 9.285714285714288e-06, "loss": 0.4648, "step": 450 }, { "epoch": 0.08, "learning_rate": 9.234693877551022e-06, "loss": 0.4611, "step": 475 }, { "epoch": 0.08, "learning_rate": 9.183673469387756e-06, "loss": 0.4684, "step": 500 }, { "epoch": 0.08, "eval_loss": 0.46387979388237, "eval_runtime": 182.1295, "eval_samples_per_second": 5.447, "eval_steps_per_second": 0.088, "eval_wer": 0.2783982515604453, "step": 500 }, { "epoch": 0.09, "learning_rate": 9.13265306122449e-06, "loss": 0.4524, "step": 525 }, { "epoch": 0.09, "learning_rate": 9.081632653061225e-06, "loss": 0.4511, "step": 550 }, { "epoch": 0.1, "learning_rate": 9.03061224489796e-06, "loss": 0.4479, "step": 575 }, { "epoch": 0.1, "learning_rate": 8.979591836734695e-06, "loss": 0.4444, "step": 600 }, { "epoch": 0.1, "learning_rate": 8.92857142857143e-06, "loss": 0.4537, "step": 625 }, { "epoch": 0.11, "learning_rate": 8.877551020408163e-06, "loss": 0.4349, "step": 650 }, { "epoch": 0.11, "learning_rate": 8.826530612244899e-06, "loss": 0.4395, "step": 675 }, { "epoch": 0.12, "learning_rate": 8.775510204081633e-06, "loss": 0.4236, "step": 700 }, { "epoch": 0.12, "learning_rate": 8.724489795918369e-06, "loss": 0.428, "step": 725 }, { "epoch": 0.12, "learning_rate": 8.673469387755103e-06, "loss": 0.4246, "step": 750 }, { "epoch": 0.12, "eval_loss": 0.43956783413887024, "eval_runtime": 182.7895, "eval_samples_per_second": 5.427, "eval_steps_per_second": 0.088, "eval_wer": 0.27576818358615324, "step": 750 }, { "epoch": 0.13, "learning_rate": 8.622448979591837e-06, "loss": 0.4315, "step": 775 }, { "epoch": 0.13, "learning_rate": 8.571428571428571e-06, "loss": 0.431, "step": 800 }, { "epoch": 0.14, "learning_rate": 8.520408163265307e-06, "loss": 0.4164, "step": 825 }, { "epoch": 0.14, "learning_rate": 8.469387755102042e-06, "loss": 0.4197, "step": 850 }, { "epoch": 0.15, "learning_rate": 8.418367346938776e-06, "loss": 0.4328, "step": 875 }, { "epoch": 0.15, "learning_rate": 8.36734693877551e-06, "loss": 0.4136, "step": 900 }, { "epoch": 0.15, "learning_rate": 8.316326530612246e-06, "loss": 0.4275, "step": 925 }, { "epoch": 0.16, "learning_rate": 8.26530612244898e-06, "loss": 0.4084, "step": 950 }, { "epoch": 0.16, "learning_rate": 8.214285714285714e-06, "loss": 0.4173, "step": 975 }, { "epoch": 0.17, "learning_rate": 8.16326530612245e-06, "loss": 0.4132, "step": 1000 }, { "epoch": 0.17, "eval_loss": 0.4221729636192322, "eval_runtime": 183.8667, "eval_samples_per_second": 5.395, "eval_steps_per_second": 0.087, "eval_wer": 0.26639625122705635, "step": 1000 }, { "epoch": 0.17, "learning_rate": 8.112244897959184e-06, "loss": 0.4238, "step": 1025 }, { "epoch": 0.17, "learning_rate": 8.06122448979592e-06, "loss": 0.4153, "step": 1050 }, { "epoch": 0.18, "learning_rate": 8.010204081632654e-06, "loss": 0.4125, "step": 1075 }, { "epoch": 0.18, "learning_rate": 7.959183673469388e-06, "loss": 0.4161, "step": 1100 }, { "epoch": 0.19, "learning_rate": 7.908163265306124e-06, "loss": 0.4055, "step": 1125 }, { "epoch": 0.19, "learning_rate": 7.857142857142858e-06, "loss": 0.4028, "step": 1150 }, { "epoch": 0.2, "learning_rate": 7.806122448979593e-06, "loss": 0.4051, "step": 1175 }, { "epoch": 0.2, "learning_rate": 7.755102040816327e-06, "loss": 0.4036, "step": 1200 }, { "epoch": 0.2, "learning_rate": 7.704081632653061e-06, "loss": 0.4018, "step": 1225 }, { "epoch": 0.21, "learning_rate": 7.653061224489796e-06, "loss": 0.4021, "step": 1250 }, { "epoch": 0.21, "eval_loss": 0.4100790321826935, "eval_runtime": 182.7493, "eval_samples_per_second": 5.428, "eval_steps_per_second": 0.088, "eval_wer": 0.26334018632735084, "step": 1250 }, { "epoch": 0.21, "learning_rate": 7.602040816326531e-06, "loss": 0.4036, "step": 1275 }, { "epoch": 0.22, "learning_rate": 7.551020408163265e-06, "loss": 0.3895, "step": 1300 }, { "epoch": 0.22, "learning_rate": 7.500000000000001e-06, "loss": 0.4034, "step": 1325 }, { "epoch": 0.22, "learning_rate": 7.448979591836736e-06, "loss": 0.4014, "step": 1350 }, { "epoch": 0.23, "learning_rate": 7.39795918367347e-06, "loss": 0.4036, "step": 1375 }, { "epoch": 0.23, "learning_rate": 7.346938775510205e-06, "loss": 0.3913, "step": 1400 }, { "epoch": 0.24, "learning_rate": 7.295918367346939e-06, "loss": 0.3865, "step": 1425 }, { "epoch": 0.24, "learning_rate": 7.244897959183675e-06, "loss": 0.3969, "step": 1450 }, { "epoch": 0.25, "learning_rate": 7.193877551020409e-06, "loss": 0.3928, "step": 1475 }, { "epoch": 0.25, "learning_rate": 7.1428571428571436e-06, "loss": 0.3871, "step": 1500 }, { "epoch": 0.25, "eval_loss": 0.39817574620246887, "eval_runtime": 183.3354, "eval_samples_per_second": 5.411, "eval_steps_per_second": 0.087, "eval_wer": 0.26191402270748826, "step": 1500 }, { "epoch": 0.25, "learning_rate": 7.091836734693878e-06, "loss": 0.4017, "step": 1525 }, { "epoch": 0.26, "learning_rate": 7.0408163265306125e-06, "loss": 0.4, "step": 1550 }, { "epoch": 0.26, "learning_rate": 6.989795918367348e-06, "loss": 0.3988, "step": 1575 }, { "epoch": 0.27, "learning_rate": 6.938775510204082e-06, "loss": 0.3863, "step": 1600 }, { "epoch": 0.27, "learning_rate": 6.887755102040817e-06, "loss": 0.381, "step": 1625 }, { "epoch": 0.27, "learning_rate": 6.836734693877551e-06, "loss": 0.373, "step": 1650 }, { "epoch": 0.28, "learning_rate": 6.785714285714287e-06, "loss": 0.3968, "step": 1675 }, { "epoch": 0.28, "learning_rate": 6.734693877551021e-06, "loss": 0.3876, "step": 1700 }, { "epoch": 0.29, "learning_rate": 6.683673469387756e-06, "loss": 0.3899, "step": 1725 }, { "epoch": 0.29, "learning_rate": 6.63265306122449e-06, "loss": 0.3813, "step": 1750 }, { "epoch": 0.29, "eval_loss": 0.3895135223865509, "eval_runtime": 182.7821, "eval_samples_per_second": 5.427, "eval_steps_per_second": 0.088, "eval_wer": 0.25772813987516435, "step": 1750 }, { "epoch": 0.3, "learning_rate": 6.581632653061225e-06, "loss": 0.3816, "step": 1775 }, { "epoch": 0.3, "learning_rate": 6.530612244897959e-06, "loss": 0.3805, "step": 1800 }, { "epoch": 0.3, "learning_rate": 6.4795918367346946e-06, "loss": 0.3901, "step": 1825 }, { "epoch": 0.31, "learning_rate": 6.4285714285714295e-06, "loss": 0.3785, "step": 1850 }, { "epoch": 0.31, "learning_rate": 6.3775510204081635e-06, "loss": 0.3803, "step": 1875 }, { "epoch": 0.32, "learning_rate": 6.326530612244899e-06, "loss": 0.3803, "step": 1900 }, { "epoch": 0.32, "learning_rate": 6.275510204081633e-06, "loss": 0.3767, "step": 1925 }, { "epoch": 0.32, "learning_rate": 6.224489795918368e-06, "loss": 0.3728, "step": 1950 }, { "epoch": 0.33, "learning_rate": 6.173469387755102e-06, "loss": 0.3747, "step": 1975 }, { "epoch": 0.33, "learning_rate": 6.122448979591837e-06, "loss": 0.3878, "step": 2000 }, { "epoch": 0.33, "eval_loss": 0.38268980383872986, "eval_runtime": 182.9118, "eval_samples_per_second": 5.423, "eval_steps_per_second": 0.087, "eval_wer": 0.2533014761719546, "step": 2000 }, { "epoch": 0.34, "learning_rate": 6.071428571428571e-06, "loss": 0.3725, "step": 2025 }, { "epoch": 0.34, "learning_rate": 6.020408163265307e-06, "loss": 0.3761, "step": 2050 }, { "epoch": 0.34, "learning_rate": 5.969387755102042e-06, "loss": 0.3851, "step": 2075 }, { "epoch": 0.35, "learning_rate": 5.918367346938776e-06, "loss": 0.3764, "step": 2100 }, { "epoch": 0.35, "learning_rate": 5.867346938775511e-06, "loss": 0.3737, "step": 2125 }, { "epoch": 0.36, "learning_rate": 5.816326530612246e-06, "loss": 0.3724, "step": 2150 }, { "epoch": 0.36, "learning_rate": 5.7653061224489805e-06, "loss": 0.3837, "step": 2175 }, { "epoch": 0.37, "learning_rate": 5.7142857142857145e-06, "loss": 0.3666, "step": 2200 }, { "epoch": 0.37, "learning_rate": 5.663265306122449e-06, "loss": 0.3748, "step": 2225 }, { "epoch": 0.37, "learning_rate": 5.6122448979591834e-06, "loss": 0.3704, "step": 2250 }, { "epoch": 0.37, "eval_loss": 0.3770383894443512, "eval_runtime": 183.5258, "eval_samples_per_second": 5.405, "eval_steps_per_second": 0.087, "eval_wer": 0.2533385193828601, "step": 2250 }, { "epoch": 0.38, "learning_rate": 5.561224489795919e-06, "loss": 0.3625, "step": 2275 }, { "epoch": 0.38, "learning_rate": 5.510204081632653e-06, "loss": 0.3609, "step": 2300 }, { "epoch": 0.39, "learning_rate": 5.459183673469388e-06, "loss": 0.3715, "step": 2325 }, { "epoch": 0.39, "learning_rate": 5.408163265306123e-06, "loss": 0.3676, "step": 2350 }, { "epoch": 0.39, "learning_rate": 5.357142857142857e-06, "loss": 0.3703, "step": 2375 }, { "epoch": 0.4, "learning_rate": 5.306122448979593e-06, "loss": 0.3606, "step": 2400 }, { "epoch": 0.4, "learning_rate": 5.255102040816327e-06, "loss": 0.3691, "step": 2425 }, { "epoch": 0.41, "learning_rate": 5.204081632653062e-06, "loss": 0.3776, "step": 2450 }, { "epoch": 0.41, "learning_rate": 5.153061224489796e-06, "loss": 0.3652, "step": 2475 }, { "epoch": 0.42, "learning_rate": 5.1020408163265315e-06, "loss": 0.3516, "step": 2500 }, { "epoch": 0.42, "eval_loss": 0.3713986277580261, "eval_runtime": 182.6295, "eval_samples_per_second": 5.432, "eval_steps_per_second": 0.088, "eval_wer": 0.25398677557370675, "step": 2500 }, { "epoch": 0.42, "learning_rate": 5.0510204081632655e-06, "loss": 0.3715, "step": 2525 }, { "epoch": 0.42, "learning_rate": 5e-06, "loss": 0.3557, "step": 2550 }, { "epoch": 0.43, "learning_rate": 4.948979591836735e-06, "loss": 0.3676, "step": 2575 }, { "epoch": 0.43, "learning_rate": 4.897959183673469e-06, "loss": 0.3661, "step": 2600 }, { "epoch": 0.44, "learning_rate": 4.846938775510204e-06, "loss": 0.3649, "step": 2625 }, { "epoch": 0.44, "learning_rate": 4.795918367346939e-06, "loss": 0.3525, "step": 2650 }, { "epoch": 0.44, "learning_rate": 4.744897959183674e-06, "loss": 0.3715, "step": 2675 }, { "epoch": 0.45, "learning_rate": 4.693877551020409e-06, "loss": 0.3639, "step": 2700 }, { "epoch": 0.45, "learning_rate": 4.642857142857144e-06, "loss": 0.3619, "step": 2725 }, { "epoch": 0.46, "learning_rate": 4.591836734693878e-06, "loss": 0.3792, "step": 2750 }, { "epoch": 0.46, "eval_loss": 0.3675037920475006, "eval_runtime": 183.0666, "eval_samples_per_second": 5.419, "eval_steps_per_second": 0.087, "eval_wer": 0.24952306865959142, "step": 2750 }, { "epoch": 0.46, "learning_rate": 4.540816326530613e-06, "loss": 0.3601, "step": 2775 }, { "epoch": 0.47, "learning_rate": 4.489795918367348e-06, "loss": 0.363, "step": 2800 }, { "epoch": 0.47, "learning_rate": 4.438775510204082e-06, "loss": 0.3729, "step": 2825 }, { "epoch": 0.47, "learning_rate": 4.3877551020408165e-06, "loss": 0.365, "step": 2850 }, { "epoch": 0.48, "learning_rate": 4.336734693877551e-06, "loss": 0.356, "step": 2875 }, { "epoch": 0.48, "learning_rate": 4.2857142857142855e-06, "loss": 0.3539, "step": 2900 }, { "epoch": 0.49, "learning_rate": 4.234693877551021e-06, "loss": 0.3608, "step": 2925 }, { "epoch": 0.49, "learning_rate": 4.183673469387755e-06, "loss": 0.361, "step": 2950 }, { "epoch": 0.49, "learning_rate": 4.13265306122449e-06, "loss": 0.3486, "step": 2975 }, { "epoch": 0.5, "learning_rate": 4.081632653061225e-06, "loss": 0.3476, "step": 3000 }, { "epoch": 0.5, "eval_loss": 0.36362290382385254, "eval_runtime": 182.4755, "eval_samples_per_second": 5.436, "eval_steps_per_second": 0.088, "eval_wer": 0.24563353151451167, "step": 3000 }, { "epoch": 0.5, "learning_rate": 4.03061224489796e-06, "loss": 0.3764, "step": 3025 }, { "epoch": 0.51, "learning_rate": 3.979591836734694e-06, "loss": 0.3542, "step": 3050 }, { "epoch": 0.51, "learning_rate": 3.928571428571429e-06, "loss": 0.3615, "step": 3075 }, { "epoch": 0.52, "learning_rate": 3.877551020408164e-06, "loss": 0.3655, "step": 3100 }, { "epoch": 0.52, "learning_rate": 3.826530612244898e-06, "loss": 0.3709, "step": 3125 }, { "epoch": 0.52, "learning_rate": 3.7755102040816327e-06, "loss": 0.3613, "step": 3150 }, { "epoch": 0.53, "learning_rate": 3.724489795918368e-06, "loss": 0.3608, "step": 3175 }, { "epoch": 0.53, "learning_rate": 3.6734693877551024e-06, "loss": 0.353, "step": 3200 }, { "epoch": 0.54, "learning_rate": 3.6224489795918373e-06, "loss": 0.354, "step": 3225 }, { "epoch": 0.54, "learning_rate": 3.5714285714285718e-06, "loss": 0.3522, "step": 3250 }, { "epoch": 0.54, "eval_loss": 0.36105236411094666, "eval_runtime": 182.9956, "eval_samples_per_second": 5.421, "eval_steps_per_second": 0.087, "eval_wer": 0.24617065807264174, "step": 3250 }, { "epoch": 0.54, "learning_rate": 3.5204081632653062e-06, "loss": 0.3526, "step": 3275 }, { "epoch": 0.55, "learning_rate": 3.469387755102041e-06, "loss": 0.3578, "step": 3300 }, { "epoch": 0.55, "learning_rate": 3.4183673469387756e-06, "loss": 0.3519, "step": 3325 }, { "epoch": 0.56, "learning_rate": 3.3673469387755105e-06, "loss": 0.3487, "step": 3350 }, { "epoch": 0.56, "learning_rate": 3.316326530612245e-06, "loss": 0.354, "step": 3375 }, { "epoch": 0.57, "learning_rate": 3.2653061224489794e-06, "loss": 0.3605, "step": 3400 }, { "epoch": 0.57, "learning_rate": 3.2142857142857147e-06, "loss": 0.3644, "step": 3425 }, { "epoch": 0.57, "learning_rate": 3.1632653061224496e-06, "loss": 0.3592, "step": 3450 }, { "epoch": 0.58, "learning_rate": 3.112244897959184e-06, "loss": 0.3502, "step": 3475 }, { "epoch": 0.58, "learning_rate": 3.0612244897959185e-06, "loss": 0.3545, "step": 3500 }, { "epoch": 0.58, "eval_loss": 0.35600587725639343, "eval_runtime": 183.8697, "eval_samples_per_second": 5.395, "eval_steps_per_second": 0.087, "eval_wer": 0.243985108629216, "step": 3500 }, { "epoch": 0.59, "learning_rate": 3.0102040816326534e-06, "loss": 0.3603, "step": 3525 }, { "epoch": 0.59, "learning_rate": 2.959183673469388e-06, "loss": 0.3518, "step": 3550 }, { "epoch": 0.59, "learning_rate": 2.908163265306123e-06, "loss": 0.3635, "step": 3575 }, { "epoch": 0.6, "learning_rate": 2.8571428571428573e-06, "loss": 0.352, "step": 3600 }, { "epoch": 0.6, "learning_rate": 2.8061224489795917e-06, "loss": 0.3506, "step": 3625 }, { "epoch": 0.61, "learning_rate": 2.7551020408163266e-06, "loss": 0.3449, "step": 3650 }, { "epoch": 0.61, "learning_rate": 2.7040816326530615e-06, "loss": 0.3591, "step": 3675 }, { "epoch": 0.62, "learning_rate": 2.6530612244897964e-06, "loss": 0.3499, "step": 3700 }, { "epoch": 0.62, "learning_rate": 2.602040816326531e-06, "loss": 0.3546, "step": 3725 }, { "epoch": 0.62, "learning_rate": 2.5510204081632657e-06, "loss": 0.3426, "step": 3750 }, { "epoch": 0.62, "eval_loss": 0.3543338179588318, "eval_runtime": 184.2454, "eval_samples_per_second": 5.384, "eval_steps_per_second": 0.087, "eval_wer": 0.24635587412716933, "step": 3750 }, { "epoch": 0.63, "learning_rate": 2.5e-06, "loss": 0.354, "step": 3775 }, { "epoch": 0.63, "learning_rate": 2.4489795918367347e-06, "loss": 0.3416, "step": 3800 }, { "epoch": 0.64, "learning_rate": 2.3979591836734696e-06, "loss": 0.355, "step": 3825 }, { "epoch": 0.64, "learning_rate": 2.3469387755102044e-06, "loss": 0.355, "step": 3850 }, { "epoch": 0.64, "learning_rate": 2.295918367346939e-06, "loss": 0.3428, "step": 3875 }, { "epoch": 0.65, "learning_rate": 2.244897959183674e-06, "loss": 0.35, "step": 3900 }, { "epoch": 0.65, "learning_rate": 2.1938775510204083e-06, "loss": 0.3511, "step": 3925 }, { "epoch": 0.66, "learning_rate": 2.1428571428571427e-06, "loss": 0.3477, "step": 3950 }, { "epoch": 0.66, "learning_rate": 2.0918367346938776e-06, "loss": 0.3494, "step": 3975 }, { "epoch": 0.66, "learning_rate": 2.0408163265306125e-06, "loss": 0.3437, "step": 4000 }, { "epoch": 0.66, "eval_loss": 0.3523597717285156, "eval_runtime": 184.0518, "eval_samples_per_second": 5.39, "eval_steps_per_second": 0.087, "eval_wer": 0.24635587412716933, "step": 4000 }, { "epoch": 0.67, "learning_rate": 1.989795918367347e-06, "loss": 0.35, "step": 4025 }, { "epoch": 0.67, "learning_rate": 1.938775510204082e-06, "loss": 0.339, "step": 4050 }, { "epoch": 0.68, "learning_rate": 1.8877551020408163e-06, "loss": 0.3504, "step": 4075 }, { "epoch": 0.68, "learning_rate": 1.8367346938775512e-06, "loss": 0.3401, "step": 4100 }, { "epoch": 0.69, "learning_rate": 1.7857142857142859e-06, "loss": 0.3424, "step": 4125 }, { "epoch": 0.69, "learning_rate": 1.7346938775510206e-06, "loss": 0.3567, "step": 4150 }, { "epoch": 0.69, "learning_rate": 1.6836734693877552e-06, "loss": 0.3622, "step": 4175 }, { "epoch": 0.7, "learning_rate": 1.6326530612244897e-06, "loss": 0.3425, "step": 4200 }, { "epoch": 0.7, "learning_rate": 1.5816326530612248e-06, "loss": 0.3546, "step": 4225 }, { "epoch": 0.71, "learning_rate": 1.5306122448979593e-06, "loss": 0.3562, "step": 4250 }, { "epoch": 0.71, "eval_loss": 0.3507314622402191, "eval_runtime": 182.7316, "eval_samples_per_second": 5.429, "eval_steps_per_second": 0.088, "eval_wer": 0.2451519697727399, "step": 4250 }, { "epoch": 0.71, "learning_rate": 1.479591836734694e-06, "loss": 0.3458, "step": 4275 }, { "epoch": 0.71, "learning_rate": 1.4285714285714286e-06, "loss": 0.3528, "step": 4300 }, { "epoch": 0.72, "learning_rate": 1.3775510204081633e-06, "loss": 0.3499, "step": 4325 }, { "epoch": 0.72, "learning_rate": 1.3265306122448982e-06, "loss": 0.3419, "step": 4350 }, { "epoch": 0.73, "learning_rate": 1.2755102040816329e-06, "loss": 0.3436, "step": 4375 }, { "epoch": 0.73, "learning_rate": 1.2244897959183673e-06, "loss": 0.3436, "step": 4400 }, { "epoch": 0.74, "learning_rate": 1.1734693877551022e-06, "loss": 0.3528, "step": 4425 }, { "epoch": 0.74, "learning_rate": 1.122448979591837e-06, "loss": 0.3397, "step": 4450 }, { "epoch": 0.74, "learning_rate": 1.0714285714285714e-06, "loss": 0.3449, "step": 4475 }, { "epoch": 0.75, "learning_rate": 1.0224489795918368e-06, "loss": 0.3555, "step": 4500 }, { "epoch": 0.75, "eval_loss": 0.34912917017936707, "eval_runtime": 183.2099, "eval_samples_per_second": 5.415, "eval_steps_per_second": 0.087, "eval_wer": 0.24263303143116446, "step": 4500 }, { "epoch": 0.75, "learning_rate": 9.714285714285715e-07, "loss": 0.3449, "step": 4525 }, { "epoch": 0.76, "learning_rate": 9.204081632653062e-07, "loss": 0.3393, "step": 4550 }, { "epoch": 0.76, "learning_rate": 8.693877551020409e-07, "loss": 0.3454, "step": 4575 }, { "epoch": 0.76, "learning_rate": 8.183673469387755e-07, "loss": 0.3411, "step": 4600 }, { "epoch": 0.77, "learning_rate": 7.673469387755103e-07, "loss": 0.3416, "step": 4625 }, { "epoch": 0.77, "learning_rate": 7.16326530612245e-07, "loss": 0.3298, "step": 4650 }, { "epoch": 0.78, "learning_rate": 6.653061224489797e-07, "loss": 0.3472, "step": 4675 }, { "epoch": 0.78, "learning_rate": 6.142857142857143e-07, "loss": 0.3519, "step": 4700 }, { "epoch": 0.79, "learning_rate": 5.632653061224491e-07, "loss": 0.3387, "step": 4725 }, { "epoch": 0.79, "learning_rate": 5.122448979591837e-07, "loss": 0.3397, "step": 4750 }, { "epoch": 0.79, "eval_loss": 0.34834620356559753, "eval_runtime": 183.7297, "eval_samples_per_second": 5.399, "eval_steps_per_second": 0.087, "eval_wer": 0.24191068881850677, "step": 4750 }, { "epoch": 0.79, "learning_rate": 4.612244897959184e-07, "loss": 0.3428, "step": 4775 }, { "epoch": 0.8, "learning_rate": 4.102040816326531e-07, "loss": 0.3493, "step": 4800 }, { "epoch": 0.8, "learning_rate": 3.591836734693878e-07, "loss": 0.3368, "step": 4825 }, { "epoch": 0.81, "learning_rate": 3.0816326530612243e-07, "loss": 0.3547, "step": 4850 }, { "epoch": 0.81, "learning_rate": 2.5714285714285716e-07, "loss": 0.3426, "step": 4875 }, { "epoch": 0.81, "learning_rate": 2.0612244897959186e-07, "loss": 0.3496, "step": 4900 }, { "epoch": 0.82, "learning_rate": 1.5510204081632654e-07, "loss": 0.3479, "step": 4925 }, { "epoch": 0.82, "learning_rate": 1.0408163265306123e-07, "loss": 0.3472, "step": 4950 }, { "epoch": 0.83, "learning_rate": 5.306122448979593e-08, "loss": 0.3397, "step": 4975 }, { "epoch": 0.83, "learning_rate": 2.0408163265306126e-09, "loss": 0.3516, "step": 5000 }, { "epoch": 0.83, "eval_loss": 0.34790730476379395, "eval_runtime": 184.5434, "eval_samples_per_second": 5.375, "eval_steps_per_second": 0.087, "eval_wer": 0.2426515530366172, "step": 5000 }, { "epoch": 0.83, "step": 5000, "total_flos": 9.23473281024e+19, "train_loss": 0.38958581829071043, "train_runtime": 37554.5281, "train_samples_per_second": 8.521, "train_steps_per_second": 0.133 } ], "max_steps": 5000, "num_train_epochs": 1, "total_flos": 9.23473281024e+19, "trial_name": null, "trial_params": null }