{ "best_metric": 15.246076710047603, "best_model_checkpoint": "./whisper-turbo/checkpoint-8000", "epoch": 3.4057045551298426, "eval_steps": 1000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010642826734780758, "grad_norm": 24.32292938232422, "learning_rate": 2.4000000000000003e-07, "loss": 2.3553, "step": 25 }, { "epoch": 0.021285653469561516, "grad_norm": 13.848426818847656, "learning_rate": 4.900000000000001e-07, "loss": 1.8027, "step": 50 }, { "epoch": 0.031928480204342274, "grad_norm": 12.110209465026855, "learning_rate": 7.4e-07, "loss": 1.43, "step": 75 }, { "epoch": 0.04257130693912303, "grad_norm": 11.48365306854248, "learning_rate": 9.9e-07, "loss": 1.2826, "step": 100 }, { "epoch": 0.05321413367390379, "grad_norm": 12.327783584594727, "learning_rate": 1.2400000000000002e-06, "loss": 1.0584, "step": 125 }, { "epoch": 0.06385696040868455, "grad_norm": 10.366923332214355, "learning_rate": 1.4900000000000001e-06, "loss": 0.9736, "step": 150 }, { "epoch": 0.07449978714346531, "grad_norm": 12.267701148986816, "learning_rate": 1.74e-06, "loss": 0.9528, "step": 175 }, { "epoch": 0.08514261387824607, "grad_norm": 10.672560691833496, "learning_rate": 1.9900000000000004e-06, "loss": 0.8758, "step": 200 }, { "epoch": 0.09578544061302682, "grad_norm": 11.000222206115723, "learning_rate": 2.24e-06, "loss": 0.7971, "step": 225 }, { "epoch": 0.10642826734780758, "grad_norm": 10.404646873474121, "learning_rate": 2.4900000000000003e-06, "loss": 0.7646, "step": 250 }, { "epoch": 0.11707109408258834, "grad_norm": 8.721226692199707, "learning_rate": 2.7400000000000004e-06, "loss": 0.7578, "step": 275 }, { "epoch": 0.1277139208173691, "grad_norm": 9.198404312133789, "learning_rate": 2.99e-06, "loss": 0.6623, "step": 300 }, { "epoch": 0.13835674755214986, "grad_norm": 9.394553184509277, "learning_rate": 3.2400000000000003e-06, "loss": 0.669, "step": 325 }, { "epoch": 0.14899957428693061, "grad_norm": 9.5685453414917, "learning_rate": 3.49e-06, "loss": 0.6431, "step": 350 }, { "epoch": 0.15964240102171137, "grad_norm": 10.325225830078125, "learning_rate": 3.74e-06, "loss": 0.6444, "step": 375 }, { "epoch": 0.17028522775649213, "grad_norm": 8.178572654724121, "learning_rate": 3.990000000000001e-06, "loss": 0.609, "step": 400 }, { "epoch": 0.1809280544912729, "grad_norm": 9.746500015258789, "learning_rate": 4.24e-06, "loss": 0.5755, "step": 425 }, { "epoch": 0.19157088122605365, "grad_norm": 8.11845874786377, "learning_rate": 4.49e-06, "loss": 0.5589, "step": 450 }, { "epoch": 0.2022137079608344, "grad_norm": 7.166477203369141, "learning_rate": 4.74e-06, "loss": 0.5578, "step": 475 }, { "epoch": 0.21285653469561516, "grad_norm": 7.681941032409668, "learning_rate": 4.9900000000000005e-06, "loss": 0.5076, "step": 500 }, { "epoch": 0.22349936143039592, "grad_norm": 8.438258171081543, "learning_rate": 5.240000000000001e-06, "loss": 0.5445, "step": 525 }, { "epoch": 0.23414218816517668, "grad_norm": 9.802384376525879, "learning_rate": 5.490000000000001e-06, "loss": 0.5098, "step": 550 }, { "epoch": 0.24478501489995744, "grad_norm": 7.197368144989014, "learning_rate": 5.74e-06, "loss": 0.4969, "step": 575 }, { "epoch": 0.2554278416347382, "grad_norm": 7.666371822357178, "learning_rate": 5.99e-06, "loss": 0.4861, "step": 600 }, { "epoch": 0.2660706683695189, "grad_norm": 7.324782371520996, "learning_rate": 6.24e-06, "loss": 0.481, "step": 625 }, { "epoch": 0.2767134951042997, "grad_norm": 8.649055480957031, "learning_rate": 6.4900000000000005e-06, "loss": 0.447, "step": 650 }, { "epoch": 0.28735632183908044, "grad_norm": 7.136589050292969, "learning_rate": 6.740000000000001e-06, "loss": 0.4778, "step": 675 }, { "epoch": 0.29799914857386123, "grad_norm": 7.153022289276123, "learning_rate": 6.99e-06, "loss": 0.4674, "step": 700 }, { "epoch": 0.30864197530864196, "grad_norm": 6.950058937072754, "learning_rate": 7.24e-06, "loss": 0.4434, "step": 725 }, { "epoch": 0.31928480204342274, "grad_norm": 7.339558124542236, "learning_rate": 7.49e-06, "loss": 0.4439, "step": 750 }, { "epoch": 0.3299276287782035, "grad_norm": 6.6849541664123535, "learning_rate": 7.74e-06, "loss": 0.442, "step": 775 }, { "epoch": 0.34057045551298426, "grad_norm": 7.065944194793701, "learning_rate": 7.990000000000001e-06, "loss": 0.4147, "step": 800 }, { "epoch": 0.351213282247765, "grad_norm": 6.242930889129639, "learning_rate": 8.24e-06, "loss": 0.3905, "step": 825 }, { "epoch": 0.3618561089825458, "grad_norm": 6.885308742523193, "learning_rate": 8.48e-06, "loss": 0.4077, "step": 850 }, { "epoch": 0.3724989357173265, "grad_norm": 5.589861869812012, "learning_rate": 8.730000000000001e-06, "loss": 0.4074, "step": 875 }, { "epoch": 0.3831417624521073, "grad_norm": 6.651442050933838, "learning_rate": 8.98e-06, "loss": 0.4013, "step": 900 }, { "epoch": 0.393784589186888, "grad_norm": 5.705496311187744, "learning_rate": 9.230000000000001e-06, "loss": 0.3897, "step": 925 }, { "epoch": 0.4044274159216688, "grad_norm": 6.6162333488464355, "learning_rate": 9.48e-06, "loss": 0.365, "step": 950 }, { "epoch": 0.41507024265644954, "grad_norm": 7.273537635803223, "learning_rate": 9.73e-06, "loss": 0.3924, "step": 975 }, { "epoch": 0.4257130693912303, "grad_norm": 6.101346969604492, "learning_rate": 9.980000000000001e-06, "loss": 0.3715, "step": 1000 }, { "epoch": 0.4257130693912303, "eval_loss": 0.3456858992576599, "eval_runtime": 2451.0815, "eval_samples_per_second": 1.949, "eval_steps_per_second": 0.061, "eval_wer": 40.46916574655637, "step": 1000 }, { "epoch": 0.43635589612601106, "grad_norm": 5.119203567504883, "learning_rate": 9.967142857142858e-06, "loss": 0.3875, "step": 1025 }, { "epoch": 0.44699872286079184, "grad_norm": 5.488610744476318, "learning_rate": 9.931428571428571e-06, "loss": 0.3741, "step": 1050 }, { "epoch": 0.45764154959557257, "grad_norm": 5.710753917694092, "learning_rate": 9.895714285714287e-06, "loss": 0.3622, "step": 1075 }, { "epoch": 0.46828437633035336, "grad_norm": 5.7168192863464355, "learning_rate": 9.86e-06, "loss": 0.3652, "step": 1100 }, { "epoch": 0.4789272030651341, "grad_norm": 5.204087734222412, "learning_rate": 9.824285714285716e-06, "loss": 0.3696, "step": 1125 }, { "epoch": 0.4895700297999149, "grad_norm": 5.014431476593018, "learning_rate": 9.78857142857143e-06, "loss": 0.347, "step": 1150 }, { "epoch": 0.5002128565346956, "grad_norm": 5.643191814422607, "learning_rate": 9.752857142857143e-06, "loss": 0.3681, "step": 1175 }, { "epoch": 0.5108556832694764, "grad_norm": 6.40764045715332, "learning_rate": 9.717142857142858e-06, "loss": 0.34, "step": 1200 }, { "epoch": 0.5214985100042572, "grad_norm": 5.89484167098999, "learning_rate": 9.681428571428572e-06, "loss": 0.3257, "step": 1225 }, { "epoch": 0.5321413367390379, "grad_norm": 6.474817752838135, "learning_rate": 9.645714285714286e-06, "loss": 0.3398, "step": 1250 }, { "epoch": 0.5427841634738186, "grad_norm": 6.272877216339111, "learning_rate": 9.610000000000001e-06, "loss": 0.3315, "step": 1275 }, { "epoch": 0.5534269902085994, "grad_norm": 4.669580936431885, "learning_rate": 9.574285714285715e-06, "loss": 0.3296, "step": 1300 }, { "epoch": 0.5640698169433802, "grad_norm": 4.576137065887451, "learning_rate": 9.538571428571428e-06, "loss": 0.3334, "step": 1325 }, { "epoch": 0.5747126436781609, "grad_norm": 4.5423665046691895, "learning_rate": 9.502857142857144e-06, "loss": 0.3087, "step": 1350 }, { "epoch": 0.5853554704129417, "grad_norm": 4.673890113830566, "learning_rate": 9.467142857142857e-06, "loss": 0.3286, "step": 1375 }, { "epoch": 0.5959982971477225, "grad_norm": 4.775241374969482, "learning_rate": 9.431428571428573e-06, "loss": 0.3258, "step": 1400 }, { "epoch": 0.6066411238825032, "grad_norm": 5.259005069732666, "learning_rate": 9.395714285714287e-06, "loss": 0.29, "step": 1425 }, { "epoch": 0.6172839506172839, "grad_norm": 4.7707014083862305, "learning_rate": 9.360000000000002e-06, "loss": 0.2879, "step": 1450 }, { "epoch": 0.6279267773520647, "grad_norm": 4.998105525970459, "learning_rate": 9.324285714285714e-06, "loss": 0.2957, "step": 1475 }, { "epoch": 0.6385696040868455, "grad_norm": 4.137457370758057, "learning_rate": 9.28857142857143e-06, "loss": 0.2897, "step": 1500 }, { "epoch": 0.6492124308216263, "grad_norm": 4.685913562774658, "learning_rate": 9.252857142857143e-06, "loss": 0.3282, "step": 1525 }, { "epoch": 0.659855257556407, "grad_norm": 5.36374044418335, "learning_rate": 9.217142857142858e-06, "loss": 0.2747, "step": 1550 }, { "epoch": 0.6704980842911877, "grad_norm": 4.616824150085449, "learning_rate": 9.181428571428572e-06, "loss": 0.285, "step": 1575 }, { "epoch": 0.6811409110259685, "grad_norm": 4.848719120025635, "learning_rate": 9.145714285714287e-06, "loss": 0.2971, "step": 1600 }, { "epoch": 0.6917837377607493, "grad_norm": 4.435796737670898, "learning_rate": 9.110000000000001e-06, "loss": 0.2993, "step": 1625 }, { "epoch": 0.70242656449553, "grad_norm": 4.055502414703369, "learning_rate": 9.074285714285716e-06, "loss": 0.2713, "step": 1650 }, { "epoch": 0.7130693912303108, "grad_norm": 5.476015090942383, "learning_rate": 9.038571428571428e-06, "loss": 0.2553, "step": 1675 }, { "epoch": 0.7237122179650916, "grad_norm": 4.443753242492676, "learning_rate": 9.002857142857144e-06, "loss": 0.2772, "step": 1700 }, { "epoch": 0.7343550446998723, "grad_norm": 4.617072105407715, "learning_rate": 8.967142857142857e-06, "loss": 0.2745, "step": 1725 }, { "epoch": 0.744997871434653, "grad_norm": 4.322467803955078, "learning_rate": 8.931428571428573e-06, "loss": 0.2756, "step": 1750 }, { "epoch": 0.7556406981694338, "grad_norm": 5.194156169891357, "learning_rate": 8.895714285714286e-06, "loss": 0.2571, "step": 1775 }, { "epoch": 0.7662835249042146, "grad_norm": 5.350680828094482, "learning_rate": 8.860000000000002e-06, "loss": 0.2705, "step": 1800 }, { "epoch": 0.7769263516389954, "grad_norm": 5.343641757965088, "learning_rate": 8.824285714285715e-06, "loss": 0.2499, "step": 1825 }, { "epoch": 0.787569178373776, "grad_norm": 4.356059551239014, "learning_rate": 8.788571428571429e-06, "loss": 0.2767, "step": 1850 }, { "epoch": 0.7982120051085568, "grad_norm": 4.316229820251465, "learning_rate": 8.752857142857144e-06, "loss": 0.2484, "step": 1875 }, { "epoch": 0.8088548318433376, "grad_norm": 4.627383232116699, "learning_rate": 8.717142857142858e-06, "loss": 0.2559, "step": 1900 }, { "epoch": 0.8194976585781183, "grad_norm": 4.916121006011963, "learning_rate": 8.681428571428572e-06, "loss": 0.2755, "step": 1925 }, { "epoch": 0.8301404853128991, "grad_norm": 5.244263172149658, "learning_rate": 8.645714285714287e-06, "loss": 0.2334, "step": 1950 }, { "epoch": 0.8407833120476799, "grad_norm": 4.568859100341797, "learning_rate": 8.61e-06, "loss": 0.2542, "step": 1975 }, { "epoch": 0.8514261387824607, "grad_norm": 3.6536848545074463, "learning_rate": 8.574285714285714e-06, "loss": 0.251, "step": 2000 }, { "epoch": 0.8514261387824607, "eval_loss": 0.21811740100383759, "eval_runtime": 2451.0584, "eval_samples_per_second": 1.949, "eval_steps_per_second": 0.061, "eval_wer": 27.706481799916737, "step": 2000 }, { "epoch": 0.8620689655172413, "grad_norm": 4.318572521209717, "learning_rate": 8.53857142857143e-06, "loss": 0.2319, "step": 2025 }, { "epoch": 0.8727117922520221, "grad_norm": 4.489058494567871, "learning_rate": 8.502857142857143e-06, "loss": 0.2531, "step": 2050 }, { "epoch": 0.8833546189868029, "grad_norm": 4.232712745666504, "learning_rate": 8.467142857142859e-06, "loss": 0.2491, "step": 2075 }, { "epoch": 0.8939974457215837, "grad_norm": 4.031393051147461, "learning_rate": 8.431428571428572e-06, "loss": 0.2485, "step": 2100 }, { "epoch": 0.9046402724563644, "grad_norm": 3.8136720657348633, "learning_rate": 8.395714285714286e-06, "loss": 0.2412, "step": 2125 }, { "epoch": 0.9152830991911451, "grad_norm": 4.3343505859375, "learning_rate": 8.36e-06, "loss": 0.2378, "step": 2150 }, { "epoch": 0.9259259259259259, "grad_norm": 3.6388914585113525, "learning_rate": 8.324285714285715e-06, "loss": 0.2409, "step": 2175 }, { "epoch": 0.9365687526607067, "grad_norm": 5.596227169036865, "learning_rate": 8.288571428571429e-06, "loss": 0.2363, "step": 2200 }, { "epoch": 0.9472115793954874, "grad_norm": 4.016772747039795, "learning_rate": 8.252857142857144e-06, "loss": 0.2281, "step": 2225 }, { "epoch": 0.9578544061302682, "grad_norm": 5.106402397155762, "learning_rate": 8.217142857142858e-06, "loss": 0.224, "step": 2250 }, { "epoch": 0.968497232865049, "grad_norm": 3.714061975479126, "learning_rate": 8.181428571428573e-06, "loss": 0.2306, "step": 2275 }, { "epoch": 0.9791400595998297, "grad_norm": 4.1780009269714355, "learning_rate": 8.145714285714287e-06, "loss": 0.2284, "step": 2300 }, { "epoch": 0.9897828863346104, "grad_norm": 4.007058143615723, "learning_rate": 8.110000000000002e-06, "loss": 0.237, "step": 2325 }, { "epoch": 1.0004257130693912, "grad_norm": 2.91274094581604, "learning_rate": 8.074285714285714e-06, "loss": 0.227, "step": 2350 }, { "epoch": 1.0110685398041719, "grad_norm": 4.321012496948242, "learning_rate": 8.03857142857143e-06, "loss": 0.1917, "step": 2375 }, { "epoch": 1.0217113665389528, "grad_norm": 4.613705635070801, "learning_rate": 8.002857142857143e-06, "loss": 0.1861, "step": 2400 }, { "epoch": 1.0323541932737335, "grad_norm": 3.9575023651123047, "learning_rate": 7.967142857142858e-06, "loss": 0.1931, "step": 2425 }, { "epoch": 1.0429970200085144, "grad_norm": 4.651571273803711, "learning_rate": 7.931428571428572e-06, "loss": 0.1784, "step": 2450 }, { "epoch": 1.053639846743295, "grad_norm": 4.0472412109375, "learning_rate": 7.895714285714287e-06, "loss": 0.2002, "step": 2475 }, { "epoch": 1.0642826734780757, "grad_norm": 3.4641237258911133, "learning_rate": 7.860000000000001e-06, "loss": 0.1885, "step": 2500 }, { "epoch": 1.0749255002128566, "grad_norm": 3.260540008544922, "learning_rate": 7.824285714285715e-06, "loss": 0.1924, "step": 2525 }, { "epoch": 1.0855683269476373, "grad_norm": 4.416691303253174, "learning_rate": 7.788571428571428e-06, "loss": 0.181, "step": 2550 }, { "epoch": 1.096211153682418, "grad_norm": 3.7334911823272705, "learning_rate": 7.752857142857144e-06, "loss": 0.1667, "step": 2575 }, { "epoch": 1.1068539804171988, "grad_norm": 4.4988555908203125, "learning_rate": 7.717142857142857e-06, "loss": 0.1864, "step": 2600 }, { "epoch": 1.1174968071519795, "grad_norm": 4.6382222175598145, "learning_rate": 7.681428571428573e-06, "loss": 0.1805, "step": 2625 }, { "epoch": 1.1281396338867604, "grad_norm": 4.512842178344727, "learning_rate": 7.645714285714286e-06, "loss": 0.1848, "step": 2650 }, { "epoch": 1.138782460621541, "grad_norm": 3.889390468597412, "learning_rate": 7.610000000000001e-06, "loss": 0.1846, "step": 2675 }, { "epoch": 1.1494252873563218, "grad_norm": 4.247312068939209, "learning_rate": 7.574285714285715e-06, "loss": 0.1799, "step": 2700 }, { "epoch": 1.1600681140911027, "grad_norm": 4.321536540985107, "learning_rate": 7.53857142857143e-06, "loss": 0.1764, "step": 2725 }, { "epoch": 1.1707109408258833, "grad_norm": 4.06414794921875, "learning_rate": 7.502857142857144e-06, "loss": 0.1903, "step": 2750 }, { "epoch": 1.181353767560664, "grad_norm": 3.314551591873169, "learning_rate": 7.467142857142857e-06, "loss": 0.1614, "step": 2775 }, { "epoch": 1.191996594295445, "grad_norm": 4.245212078094482, "learning_rate": 7.431428571428572e-06, "loss": 0.1867, "step": 2800 }, { "epoch": 1.2026394210302256, "grad_norm": 3.1465117931365967, "learning_rate": 7.395714285714286e-06, "loss": 0.1584, "step": 2825 }, { "epoch": 1.2132822477650063, "grad_norm": 4.1284637451171875, "learning_rate": 7.360000000000001e-06, "loss": 0.1856, "step": 2850 }, { "epoch": 1.2239250744997872, "grad_norm": 3.685889720916748, "learning_rate": 7.324285714285715e-06, "loss": 0.171, "step": 2875 }, { "epoch": 1.2345679012345678, "grad_norm": 4.70512580871582, "learning_rate": 7.28857142857143e-06, "loss": 0.1752, "step": 2900 }, { "epoch": 1.2452107279693487, "grad_norm": 3.846862316131592, "learning_rate": 7.252857142857143e-06, "loss": 0.1697, "step": 2925 }, { "epoch": 1.2558535547041294, "grad_norm": 3.7466206550598145, "learning_rate": 7.217142857142858e-06, "loss": 0.1796, "step": 2950 }, { "epoch": 1.2664963814389103, "grad_norm": 3.8162903785705566, "learning_rate": 7.182857142857144e-06, "loss": 0.1591, "step": 2975 }, { "epoch": 1.277139208173691, "grad_norm": 3.880910873413086, "learning_rate": 7.147142857142858e-06, "loss": 0.1569, "step": 3000 }, { "epoch": 1.277139208173691, "eval_loss": 0.1813717633485794, "eval_runtime": 2447.146, "eval_samples_per_second": 1.952, "eval_steps_per_second": 0.061, "eval_wer": 24.153347693087408, "step": 3000 }, { "epoch": 1.2877820349084717, "grad_norm": 3.98262882232666, "learning_rate": 7.111428571428572e-06, "loss": 0.1804, "step": 3025 }, { "epoch": 1.2984248616432525, "grad_norm": 3.6790521144866943, "learning_rate": 7.075714285714286e-06, "loss": 0.1647, "step": 3050 }, { "epoch": 1.3090676883780332, "grad_norm": 3.431762456893921, "learning_rate": 7.04e-06, "loss": 0.1662, "step": 3075 }, { "epoch": 1.319710515112814, "grad_norm": 4.0635247230529785, "learning_rate": 7.004285714285715e-06, "loss": 0.1726, "step": 3100 }, { "epoch": 1.3303533418475948, "grad_norm": 3.1607766151428223, "learning_rate": 6.968571428571429e-06, "loss": 0.1544, "step": 3125 }, { "epoch": 1.3409961685823755, "grad_norm": 4.5737385749816895, "learning_rate": 6.932857142857143e-06, "loss": 0.1644, "step": 3150 }, { "epoch": 1.3516389953171561, "grad_norm": 4.182763576507568, "learning_rate": 6.8971428571428575e-06, "loss": 0.167, "step": 3175 }, { "epoch": 1.362281822051937, "grad_norm": 3.3566439151763916, "learning_rate": 6.861428571428572e-06, "loss": 0.1631, "step": 3200 }, { "epoch": 1.3729246487867177, "grad_norm": 3.771667718887329, "learning_rate": 6.8257142857142866e-06, "loss": 0.1675, "step": 3225 }, { "epoch": 1.3835674755214984, "grad_norm": 4.14226770401001, "learning_rate": 6.790000000000001e-06, "loss": 0.1785, "step": 3250 }, { "epoch": 1.3942103022562793, "grad_norm": 4.599484443664551, "learning_rate": 6.754285714285715e-06, "loss": 0.1832, "step": 3275 }, { "epoch": 1.40485312899106, "grad_norm": 3.78108286857605, "learning_rate": 6.718571428571428e-06, "loss": 0.1765, "step": 3300 }, { "epoch": 1.4154959557258409, "grad_norm": 3.3249051570892334, "learning_rate": 6.682857142857143e-06, "loss": 0.1517, "step": 3325 }, { "epoch": 1.4261387824606215, "grad_norm": 3.299750804901123, "learning_rate": 6.647142857142857e-06, "loss": 0.1632, "step": 3350 }, { "epoch": 1.4367816091954024, "grad_norm": 4.0860066413879395, "learning_rate": 6.611428571428572e-06, "loss": 0.1457, "step": 3375 }, { "epoch": 1.447424435930183, "grad_norm": 4.305485725402832, "learning_rate": 6.575714285714286e-06, "loss": 0.1638, "step": 3400 }, { "epoch": 1.4580672626649638, "grad_norm": 3.656642436981201, "learning_rate": 6.540000000000001e-06, "loss": 0.1681, "step": 3425 }, { "epoch": 1.4687100893997447, "grad_norm": 3.596554756164551, "learning_rate": 6.504285714285715e-06, "loss": 0.1473, "step": 3450 }, { "epoch": 1.4793529161345254, "grad_norm": 3.35798716545105, "learning_rate": 6.46857142857143e-06, "loss": 0.1443, "step": 3475 }, { "epoch": 1.489995742869306, "grad_norm": 3.782789468765259, "learning_rate": 6.432857142857143e-06, "loss": 0.1399, "step": 3500 }, { "epoch": 1.500638569604087, "grad_norm": 3.556546926498413, "learning_rate": 6.397142857142857e-06, "loss": 0.1657, "step": 3525 }, { "epoch": 1.5112813963388676, "grad_norm": 4.0330657958984375, "learning_rate": 6.361428571428572e-06, "loss": 0.1455, "step": 3550 }, { "epoch": 1.5219242230736483, "grad_norm": 3.4194424152374268, "learning_rate": 6.325714285714286e-06, "loss": 0.1558, "step": 3575 }, { "epoch": 1.5325670498084292, "grad_norm": 3.4053897857666016, "learning_rate": 6.290000000000001e-06, "loss": 0.1667, "step": 3600 }, { "epoch": 1.5432098765432098, "grad_norm": 3.4398772716522217, "learning_rate": 6.254285714285715e-06, "loss": 0.1704, "step": 3625 }, { "epoch": 1.5538527032779905, "grad_norm": 3.950698137283325, "learning_rate": 6.21857142857143e-06, "loss": 0.1587, "step": 3650 }, { "epoch": 1.5644955300127714, "grad_norm": 3.5105514526367188, "learning_rate": 6.1828571428571434e-06, "loss": 0.1662, "step": 3675 }, { "epoch": 1.5751383567475523, "grad_norm": 3.1570792198181152, "learning_rate": 6.147142857142858e-06, "loss": 0.1542, "step": 3700 }, { "epoch": 1.5857811834823328, "grad_norm": 3.395730495452881, "learning_rate": 6.111428571428572e-06, "loss": 0.1419, "step": 3725 }, { "epoch": 1.5964240102171137, "grad_norm": 3.692760944366455, "learning_rate": 6.075714285714286e-06, "loss": 0.1515, "step": 3750 }, { "epoch": 1.6070668369518946, "grad_norm": 4.292817115783691, "learning_rate": 6.040000000000001e-06, "loss": 0.1558, "step": 3775 }, { "epoch": 1.617709663686675, "grad_norm": 2.7795393466949463, "learning_rate": 6.004285714285715e-06, "loss": 0.1603, "step": 3800 }, { "epoch": 1.628352490421456, "grad_norm": 3.6494193077087402, "learning_rate": 5.968571428571429e-06, "loss": 0.1527, "step": 3825 }, { "epoch": 1.6389953171562368, "grad_norm": 3.185007333755493, "learning_rate": 5.932857142857143e-06, "loss": 0.1415, "step": 3850 }, { "epoch": 1.6496381438910175, "grad_norm": 4.0278143882751465, "learning_rate": 5.897142857142858e-06, "loss": 0.1595, "step": 3875 }, { "epoch": 1.6602809706257982, "grad_norm": 3.8083670139312744, "learning_rate": 5.861428571428572e-06, "loss": 0.1596, "step": 3900 }, { "epoch": 1.670923797360579, "grad_norm": 5.412234783172607, "learning_rate": 5.825714285714286e-06, "loss": 0.1418, "step": 3925 }, { "epoch": 1.6815666240953597, "grad_norm": 3.8275325298309326, "learning_rate": 5.7900000000000005e-06, "loss": 0.1725, "step": 3950 }, { "epoch": 1.6922094508301404, "grad_norm": 3.4874017238616943, "learning_rate": 5.754285714285714e-06, "loss": 0.1334, "step": 3975 }, { "epoch": 1.7028522775649213, "grad_norm": 2.9034647941589355, "learning_rate": 5.718571428571429e-06, "loss": 0.1436, "step": 4000 }, { "epoch": 1.7028522775649213, "eval_loss": 0.1530725359916687, "eval_runtime": 2470.8785, "eval_samples_per_second": 1.933, "eval_steps_per_second": 0.061, "eval_wer": 20.381197169077055, "step": 4000 }, { "epoch": 1.713495104299702, "grad_norm": 3.192444086074829, "learning_rate": 5.682857142857143e-06, "loss": 0.1391, "step": 4025 }, { "epoch": 1.7241379310344827, "grad_norm": 3.376185655593872, "learning_rate": 5.647142857142858e-06, "loss": 0.1447, "step": 4050 }, { "epoch": 1.7347807577692635, "grad_norm": 3.2235193252563477, "learning_rate": 5.611428571428572e-06, "loss": 0.1473, "step": 4075 }, { "epoch": 1.7454235845040442, "grad_norm": 3.4376378059387207, "learning_rate": 5.575714285714287e-06, "loss": 0.1526, "step": 4100 }, { "epoch": 1.756066411238825, "grad_norm": 3.4150240421295166, "learning_rate": 5.540000000000001e-06, "loss": 0.1503, "step": 4125 }, { "epoch": 1.7667092379736058, "grad_norm": 3.757262706756592, "learning_rate": 5.504285714285714e-06, "loss": 0.1311, "step": 4150 }, { "epoch": 1.7773520647083867, "grad_norm": 3.725192070007324, "learning_rate": 5.4685714285714285e-06, "loss": 0.1506, "step": 4175 }, { "epoch": 1.7879948914431671, "grad_norm": 3.243486166000366, "learning_rate": 5.432857142857143e-06, "loss": 0.1529, "step": 4200 }, { "epoch": 1.798637718177948, "grad_norm": 3.1005189418792725, "learning_rate": 5.3971428571428575e-06, "loss": 0.1592, "step": 4225 }, { "epoch": 1.809280544912729, "grad_norm": 2.6923441886901855, "learning_rate": 5.361428571428572e-06, "loss": 0.1373, "step": 4250 }, { "epoch": 1.8199233716475096, "grad_norm": 3.4601283073425293, "learning_rate": 5.3257142857142865e-06, "loss": 0.1358, "step": 4275 }, { "epoch": 1.8305661983822903, "grad_norm": 4.46110200881958, "learning_rate": 5.290000000000001e-06, "loss": 0.1406, "step": 4300 }, { "epoch": 1.8412090251170712, "grad_norm": 3.4556360244750977, "learning_rate": 5.254285714285715e-06, "loss": 0.1314, "step": 4325 }, { "epoch": 1.8518518518518519, "grad_norm": 2.851836919784546, "learning_rate": 5.218571428571429e-06, "loss": 0.1263, "step": 4350 }, { "epoch": 1.8624946785866325, "grad_norm": 3.1507768630981445, "learning_rate": 5.182857142857143e-06, "loss": 0.1263, "step": 4375 }, { "epoch": 1.8731375053214134, "grad_norm": 3.7861220836639404, "learning_rate": 5.147142857142857e-06, "loss": 0.1423, "step": 4400 }, { "epoch": 1.883780332056194, "grad_norm": 2.670792818069458, "learning_rate": 5.111428571428572e-06, "loss": 0.1378, "step": 4425 }, { "epoch": 1.8944231587909748, "grad_norm": 3.21482515335083, "learning_rate": 5.075714285714286e-06, "loss": 0.1305, "step": 4450 }, { "epoch": 1.9050659855257557, "grad_norm": 3.0958456993103027, "learning_rate": 5.04e-06, "loss": 0.1312, "step": 4475 }, { "epoch": 1.9157088122605364, "grad_norm": 3.2010111808776855, "learning_rate": 5.0042857142857145e-06, "loss": 0.1358, "step": 4500 }, { "epoch": 1.926351638995317, "grad_norm": 4.211108684539795, "learning_rate": 4.968571428571429e-06, "loss": 0.149, "step": 4525 }, { "epoch": 1.936994465730098, "grad_norm": 3.6158218383789062, "learning_rate": 4.932857142857143e-06, "loss": 0.1456, "step": 4550 }, { "epoch": 1.9476372924648788, "grad_norm": 3.1304032802581787, "learning_rate": 4.897142857142857e-06, "loss": 0.1474, "step": 4575 }, { "epoch": 1.9582801191996593, "grad_norm": 3.7992565631866455, "learning_rate": 4.861428571428572e-06, "loss": 0.1252, "step": 4600 }, { "epoch": 1.9689229459344402, "grad_norm": 3.0859761238098145, "learning_rate": 4.825714285714286e-06, "loss": 0.1459, "step": 4625 }, { "epoch": 1.979565772669221, "grad_norm": 4.332040309906006, "learning_rate": 4.79e-06, "loss": 0.129, "step": 4650 }, { "epoch": 1.9902085994040017, "grad_norm": 4.2954816818237305, "learning_rate": 4.754285714285714e-06, "loss": 0.1566, "step": 4675 }, { "epoch": 2.0008514261387824, "grad_norm": 2.788947105407715, "learning_rate": 4.718571428571429e-06, "loss": 0.145, "step": 4700 }, { "epoch": 2.0114942528735633, "grad_norm": 3.2599875926971436, "learning_rate": 4.682857142857143e-06, "loss": 0.1063, "step": 4725 }, { "epoch": 2.0221370796083438, "grad_norm": 3.0225577354431152, "learning_rate": 4.647142857142857e-06, "loss": 0.0877, "step": 4750 }, { "epoch": 2.0327799063431247, "grad_norm": 3.564682960510254, "learning_rate": 4.6114285714285716e-06, "loss": 0.1014, "step": 4775 }, { "epoch": 2.0434227330779056, "grad_norm": 2.5339510440826416, "learning_rate": 4.575714285714286e-06, "loss": 0.0906, "step": 4800 }, { "epoch": 2.0540655598126865, "grad_norm": 2.7343597412109375, "learning_rate": 4.540000000000001e-06, "loss": 0.0994, "step": 4825 }, { "epoch": 2.064708386547467, "grad_norm": 2.6490981578826904, "learning_rate": 4.504285714285715e-06, "loss": 0.0979, "step": 4850 }, { "epoch": 2.075351213282248, "grad_norm": 1.9775068759918213, "learning_rate": 4.468571428571429e-06, "loss": 0.1023, "step": 4875 }, { "epoch": 2.0859940400170287, "grad_norm": 2.2302167415618896, "learning_rate": 4.432857142857143e-06, "loss": 0.101, "step": 4900 }, { "epoch": 2.096636866751809, "grad_norm": 2.7685494422912598, "learning_rate": 4.397142857142858e-06, "loss": 0.0934, "step": 4925 }, { "epoch": 2.10727969348659, "grad_norm": 2.8027827739715576, "learning_rate": 4.361428571428572e-06, "loss": 0.0962, "step": 4950 }, { "epoch": 2.117922520221371, "grad_norm": 2.9173505306243896, "learning_rate": 4.325714285714286e-06, "loss": 0.0934, "step": 4975 }, { "epoch": 2.1285653469561514, "grad_norm": 2.7315633296966553, "learning_rate": 4.2900000000000004e-06, "loss": 0.0931, "step": 5000 }, { "epoch": 2.1285653469561514, "eval_loss": 0.13744878768920898, "eval_runtime": 2441.1543, "eval_samples_per_second": 1.957, "eval_steps_per_second": 0.061, "eval_wer": 18.466161058519013, "step": 5000 }, { "epoch": 2.1392081736909323, "grad_norm": 2.411224126815796, "learning_rate": 4.254285714285715e-06, "loss": 0.1058, "step": 5025 }, { "epoch": 2.149851000425713, "grad_norm": 2.7599411010742188, "learning_rate": 4.2185714285714294e-06, "loss": 0.105, "step": 5050 }, { "epoch": 2.1604938271604937, "grad_norm": 2.873077392578125, "learning_rate": 4.182857142857143e-06, "loss": 0.1122, "step": 5075 }, { "epoch": 2.1711366538952745, "grad_norm": 2.4859185218811035, "learning_rate": 4.147142857142858e-06, "loss": 0.0956, "step": 5100 }, { "epoch": 2.1817794806300554, "grad_norm": 2.307053565979004, "learning_rate": 4.111428571428572e-06, "loss": 0.0936, "step": 5125 }, { "epoch": 2.192422307364836, "grad_norm": 2.692552328109741, "learning_rate": 4.075714285714286e-06, "loss": 0.0814, "step": 5150 }, { "epoch": 2.203065134099617, "grad_norm": 2.640380382537842, "learning_rate": 4.04e-06, "loss": 0.0961, "step": 5175 }, { "epoch": 2.2137079608343977, "grad_norm": 1.9715120792388916, "learning_rate": 4.004285714285715e-06, "loss": 0.0911, "step": 5200 }, { "epoch": 2.224350787569178, "grad_norm": 2.4855728149414062, "learning_rate": 3.9685714285714284e-06, "loss": 0.0871, "step": 5225 }, { "epoch": 2.234993614303959, "grad_norm": 2.190443992614746, "learning_rate": 3.932857142857143e-06, "loss": 0.0923, "step": 5250 }, { "epoch": 2.24563644103874, "grad_norm": 2.5768940448760986, "learning_rate": 3.8971428571428575e-06, "loss": 0.1033, "step": 5275 }, { "epoch": 2.256279267773521, "grad_norm": 2.527087926864624, "learning_rate": 3.861428571428571e-06, "loss": 0.081, "step": 5300 }, { "epoch": 2.2669220945083013, "grad_norm": 3.3411247730255127, "learning_rate": 3.825714285714286e-06, "loss": 0.0931, "step": 5325 }, { "epoch": 2.277564921243082, "grad_norm": 2.852933645248413, "learning_rate": 3.79e-06, "loss": 0.0736, "step": 5350 }, { "epoch": 2.288207747977863, "grad_norm": 3.596585512161255, "learning_rate": 3.7542857142857146e-06, "loss": 0.0892, "step": 5375 }, { "epoch": 2.2988505747126435, "grad_norm": 3.5326387882232666, "learning_rate": 3.7185714285714287e-06, "loss": 0.0827, "step": 5400 }, { "epoch": 2.3094934014474244, "grad_norm": 2.5857245922088623, "learning_rate": 3.682857142857143e-06, "loss": 0.0957, "step": 5425 }, { "epoch": 2.3201362281822053, "grad_norm": 2.7961575984954834, "learning_rate": 3.6471428571428573e-06, "loss": 0.0968, "step": 5450 }, { "epoch": 2.330779054916986, "grad_norm": 2.5830881595611572, "learning_rate": 3.611428571428572e-06, "loss": 0.086, "step": 5475 }, { "epoch": 2.3414218816517667, "grad_norm": 3.009079694747925, "learning_rate": 3.5757142857142863e-06, "loss": 0.0821, "step": 5500 }, { "epoch": 2.3520647083865476, "grad_norm": 3.2206666469573975, "learning_rate": 3.54e-06, "loss": 0.1, "step": 5525 }, { "epoch": 2.362707535121328, "grad_norm": 2.6536972522735596, "learning_rate": 3.5042857142857145e-06, "loss": 0.0911, "step": 5550 }, { "epoch": 2.373350361856109, "grad_norm": 2.0286781787872314, "learning_rate": 3.468571428571429e-06, "loss": 0.083, "step": 5575 }, { "epoch": 2.38399318859089, "grad_norm": 3.5354936122894287, "learning_rate": 3.4328571428571435e-06, "loss": 0.0994, "step": 5600 }, { "epoch": 2.3946360153256707, "grad_norm": 2.823812246322632, "learning_rate": 3.397142857142857e-06, "loss": 0.0921, "step": 5625 }, { "epoch": 2.405278842060451, "grad_norm": 3.5603067874908447, "learning_rate": 3.3614285714285717e-06, "loss": 0.1015, "step": 5650 }, { "epoch": 2.415921668795232, "grad_norm": 2.4219422340393066, "learning_rate": 3.325714285714286e-06, "loss": 0.098, "step": 5675 }, { "epoch": 2.4265644955300125, "grad_norm": 3.9650704860687256, "learning_rate": 3.2900000000000003e-06, "loss": 0.0914, "step": 5700 }, { "epoch": 2.4372073222647934, "grad_norm": 2.7661550045013428, "learning_rate": 3.2542857142857148e-06, "loss": 0.0733, "step": 5725 }, { "epoch": 2.4478501489995743, "grad_norm": 2.8396358489990234, "learning_rate": 3.218571428571429e-06, "loss": 0.0954, "step": 5750 }, { "epoch": 2.458492975734355, "grad_norm": 2.8353986740112305, "learning_rate": 3.182857142857143e-06, "loss": 0.0848, "step": 5775 }, { "epoch": 2.4691358024691357, "grad_norm": 2.9679837226867676, "learning_rate": 3.1471428571428574e-06, "loss": 0.084, "step": 5800 }, { "epoch": 2.4797786292039166, "grad_norm": 2.0554795265197754, "learning_rate": 3.111428571428572e-06, "loss": 0.0894, "step": 5825 }, { "epoch": 2.4904214559386975, "grad_norm": 2.5439860820770264, "learning_rate": 3.0757142857142856e-06, "loss": 0.0836, "step": 5850 }, { "epoch": 2.501064282673478, "grad_norm": 2.93955135345459, "learning_rate": 3.04e-06, "loss": 0.0915, "step": 5875 }, { "epoch": 2.511707109408259, "grad_norm": 2.3502097129821777, "learning_rate": 3.0042857142857146e-06, "loss": 0.0963, "step": 5900 }, { "epoch": 2.5223499361430397, "grad_norm": 2.289599895477295, "learning_rate": 2.968571428571429e-06, "loss": 0.0892, "step": 5925 }, { "epoch": 2.5329927628778206, "grad_norm": 4.718634128570557, "learning_rate": 2.932857142857143e-06, "loss": 0.089, "step": 5950 }, { "epoch": 2.543635589612601, "grad_norm": 2.9124553203582764, "learning_rate": 2.8971428571428573e-06, "loss": 0.0943, "step": 5975 }, { "epoch": 2.554278416347382, "grad_norm": 3.2406508922576904, "learning_rate": 2.861428571428572e-06, "loss": 0.0891, "step": 6000 }, { "epoch": 2.554278416347382, "eval_loss": 0.1251918077468872, "eval_runtime": 2435.0048, "eval_samples_per_second": 1.962, "eval_steps_per_second": 0.062, "eval_wer": 16.934856191286404, "step": 6000 }, { "epoch": 2.5649212430821624, "grad_norm": 2.5758533477783203, "learning_rate": 2.825714285714286e-06, "loss": 0.0909, "step": 6025 }, { "epoch": 2.5755640698169433, "grad_norm": 2.308535575866699, "learning_rate": 2.7900000000000004e-06, "loss": 0.0903, "step": 6050 }, { "epoch": 2.586206896551724, "grad_norm": 3.0140132904052734, "learning_rate": 2.7542857142857145e-06, "loss": 0.1005, "step": 6075 }, { "epoch": 2.596849723286505, "grad_norm": 3.0237767696380615, "learning_rate": 2.7185714285714286e-06, "loss": 0.1032, "step": 6100 }, { "epoch": 2.6074925500212855, "grad_norm": 2.413677930831909, "learning_rate": 2.682857142857143e-06, "loss": 0.0753, "step": 6125 }, { "epoch": 2.6181353767560664, "grad_norm": 2.406214475631714, "learning_rate": 2.6471428571428576e-06, "loss": 0.0744, "step": 6150 }, { "epoch": 2.628778203490847, "grad_norm": 2.9371650218963623, "learning_rate": 2.6114285714285712e-06, "loss": 0.0795, "step": 6175 }, { "epoch": 2.639421030225628, "grad_norm": 3.0647592544555664, "learning_rate": 2.5757142857142857e-06, "loss": 0.0885, "step": 6200 }, { "epoch": 2.6500638569604087, "grad_norm": 2.245195150375366, "learning_rate": 2.5400000000000002e-06, "loss": 0.0951, "step": 6225 }, { "epoch": 2.6607066836951896, "grad_norm": 3.212939977645874, "learning_rate": 2.5042857142857148e-06, "loss": 0.1081, "step": 6250 }, { "epoch": 2.67134951042997, "grad_norm": 2.987602949142456, "learning_rate": 2.468571428571429e-06, "loss": 0.0694, "step": 6275 }, { "epoch": 2.681992337164751, "grad_norm": 2.6746339797973633, "learning_rate": 2.4328571428571433e-06, "loss": 0.0879, "step": 6300 }, { "epoch": 2.692635163899532, "grad_norm": 2.3074121475219727, "learning_rate": 2.3971428571428574e-06, "loss": 0.0771, "step": 6325 }, { "epoch": 2.7032779906343123, "grad_norm": 2.62947940826416, "learning_rate": 2.361428571428572e-06, "loss": 0.0882, "step": 6350 }, { "epoch": 2.713920817369093, "grad_norm": 2.5452988147735596, "learning_rate": 2.325714285714286e-06, "loss": 0.081, "step": 6375 }, { "epoch": 2.724563644103874, "grad_norm": 1.9240838289260864, "learning_rate": 2.29e-06, "loss": 0.0672, "step": 6400 }, { "epoch": 2.735206470838655, "grad_norm": 2.3632349967956543, "learning_rate": 2.2542857142857146e-06, "loss": 0.0716, "step": 6425 }, { "epoch": 2.7458492975734354, "grad_norm": 1.9626713991165161, "learning_rate": 2.2185714285714287e-06, "loss": 0.0857, "step": 6450 }, { "epoch": 2.7564921243082163, "grad_norm": 1.8497956991195679, "learning_rate": 2.1828571428571428e-06, "loss": 0.0774, "step": 6475 }, { "epoch": 2.767134951042997, "grad_norm": 1.9737045764923096, "learning_rate": 2.1471428571428573e-06, "loss": 0.0884, "step": 6500 }, { "epoch": 2.7777777777777777, "grad_norm": 3.017702102661133, "learning_rate": 2.1114285714285714e-06, "loss": 0.0894, "step": 6525 }, { "epoch": 2.7884206045125586, "grad_norm": 2.41921067237854, "learning_rate": 2.075714285714286e-06, "loss": 0.0855, "step": 6550 }, { "epoch": 2.7990634312473395, "grad_norm": 2.0304954051971436, "learning_rate": 2.04e-06, "loss": 0.0802, "step": 6575 }, { "epoch": 2.80970625798212, "grad_norm": 2.724147319793701, "learning_rate": 2.0042857142857145e-06, "loss": 0.0892, "step": 6600 }, { "epoch": 2.820349084716901, "grad_norm": 1.7320371866226196, "learning_rate": 1.968571428571429e-06, "loss": 0.1036, "step": 6625 }, { "epoch": 2.8309919114516817, "grad_norm": 2.932657241821289, "learning_rate": 1.932857142857143e-06, "loss": 0.0902, "step": 6650 }, { "epoch": 2.841634738186462, "grad_norm": 2.653630256652832, "learning_rate": 1.8971428571428573e-06, "loss": 0.0807, "step": 6675 }, { "epoch": 2.852277564921243, "grad_norm": 2.851041078567505, "learning_rate": 1.8614285714285714e-06, "loss": 0.0908, "step": 6700 }, { "epoch": 2.862920391656024, "grad_norm": 3.30446720123291, "learning_rate": 1.825714285714286e-06, "loss": 0.1001, "step": 6725 }, { "epoch": 2.873563218390805, "grad_norm": 3.250701427459717, "learning_rate": 1.79e-06, "loss": 0.0825, "step": 6750 }, { "epoch": 2.8842060451255853, "grad_norm": 2.4845850467681885, "learning_rate": 1.7542857142857145e-06, "loss": 0.0705, "step": 6775 }, { "epoch": 2.894848871860366, "grad_norm": 2.6934683322906494, "learning_rate": 1.7185714285714286e-06, "loss": 0.0667, "step": 6800 }, { "epoch": 2.9054916985951467, "grad_norm": 2.785459518432617, "learning_rate": 1.6828571428571431e-06, "loss": 0.0746, "step": 6825 }, { "epoch": 2.9161345253299276, "grad_norm": 2.5107369422912598, "learning_rate": 1.6471428571428572e-06, "loss": 0.077, "step": 6850 }, { "epoch": 2.9267773520647085, "grad_norm": 3.4977328777313232, "learning_rate": 1.6114285714285715e-06, "loss": 0.0857, "step": 6875 }, { "epoch": 2.9374201787994894, "grad_norm": 2.6151537895202637, "learning_rate": 1.575714285714286e-06, "loss": 0.0729, "step": 6900 }, { "epoch": 2.94806300553427, "grad_norm": 2.975446939468384, "learning_rate": 1.54e-06, "loss": 0.0581, "step": 6925 }, { "epoch": 2.9587058322690507, "grad_norm": 2.03027606010437, "learning_rate": 1.5042857142857146e-06, "loss": 0.1017, "step": 6950 }, { "epoch": 2.969348659003831, "grad_norm": 3.547647476196289, "learning_rate": 1.4685714285714287e-06, "loss": 0.0603, "step": 6975 }, { "epoch": 2.979991485738612, "grad_norm": 1.8231449127197266, "learning_rate": 1.432857142857143e-06, "loss": 0.0738, "step": 7000 }, { "epoch": 2.979991485738612, "eval_loss": 0.11986401677131653, "eval_runtime": 2460.4728, "eval_samples_per_second": 1.941, "eval_steps_per_second": 0.061, "eval_wer": 15.561025938059986, "step": 7000 }, { "epoch": 2.990634312473393, "grad_norm": 2.170557737350464, "learning_rate": 1.3971428571428573e-06, "loss": 0.0847, "step": 7025 }, { "epoch": 3.001277139208174, "grad_norm": 1.225490689277649, "learning_rate": 1.3614285714285716e-06, "loss": 0.0737, "step": 7050 }, { "epoch": 3.0119199659429543, "grad_norm": 2.1241679191589355, "learning_rate": 1.3257142857142856e-06, "loss": 0.0595, "step": 7075 }, { "epoch": 3.022562792677735, "grad_norm": 2.3180058002471924, "learning_rate": 1.2900000000000001e-06, "loss": 0.0615, "step": 7100 }, { "epoch": 3.033205619412516, "grad_norm": 2.4434351921081543, "learning_rate": 1.2542857142857142e-06, "loss": 0.0536, "step": 7125 }, { "epoch": 3.0438484461472965, "grad_norm": 2.712207317352295, "learning_rate": 1.2185714285714287e-06, "loss": 0.0558, "step": 7150 }, { "epoch": 3.0544912728820774, "grad_norm": 2.7258520126342773, "learning_rate": 1.182857142857143e-06, "loss": 0.0727, "step": 7175 }, { "epoch": 3.0651340996168583, "grad_norm": 2.103072166442871, "learning_rate": 1.1471428571428573e-06, "loss": 0.0479, "step": 7200 }, { "epoch": 3.075776926351639, "grad_norm": 1.9003605842590332, "learning_rate": 1.1114285714285714e-06, "loss": 0.0554, "step": 7225 }, { "epoch": 3.0864197530864197, "grad_norm": 1.4967641830444336, "learning_rate": 1.0757142857142857e-06, "loss": 0.0409, "step": 7250 }, { "epoch": 3.0970625798212006, "grad_norm": 1.389493703842163, "learning_rate": 1.04e-06, "loss": 0.0474, "step": 7275 }, { "epoch": 3.107705406555981, "grad_norm": 1.4253233671188354, "learning_rate": 1.0042857142857143e-06, "loss": 0.0445, "step": 7300 }, { "epoch": 3.118348233290762, "grad_norm": 2.6737582683563232, "learning_rate": 9.685714285714288e-07, "loss": 0.0584, "step": 7325 }, { "epoch": 3.128991060025543, "grad_norm": 2.5511069297790527, "learning_rate": 9.32857142857143e-07, "loss": 0.0557, "step": 7350 }, { "epoch": 3.1396338867603237, "grad_norm": 2.139846086502075, "learning_rate": 8.971428571428573e-07, "loss": 0.0467, "step": 7375 }, { "epoch": 3.150276713495104, "grad_norm": 1.826206088066101, "learning_rate": 8.614285714285716e-07, "loss": 0.054, "step": 7400 }, { "epoch": 3.160919540229885, "grad_norm": 2.8576643466949463, "learning_rate": 8.257142857142858e-07, "loss": 0.0517, "step": 7425 }, { "epoch": 3.171562366964666, "grad_norm": 2.1208717823028564, "learning_rate": 7.900000000000001e-07, "loss": 0.0667, "step": 7450 }, { "epoch": 3.1822051936994464, "grad_norm": 1.534239649772644, "learning_rate": 7.542857142857144e-07, "loss": 0.0592, "step": 7475 }, { "epoch": 3.1928480204342273, "grad_norm": 2.3605740070343018, "learning_rate": 7.185714285714286e-07, "loss": 0.063, "step": 7500 }, { "epoch": 3.2034908471690082, "grad_norm": 2.1266493797302246, "learning_rate": 6.842857142857143e-07, "loss": 0.0567, "step": 7525 }, { "epoch": 3.2141336739037887, "grad_norm": 1.5303648710250854, "learning_rate": 6.485714285714287e-07, "loss": 0.0619, "step": 7550 }, { "epoch": 3.2247765006385696, "grad_norm": 2.740006446838379, "learning_rate": 6.128571428571429e-07, "loss": 0.0773, "step": 7575 }, { "epoch": 3.2354193273733505, "grad_norm": 1.5786134004592896, "learning_rate": 5.771428571428572e-07, "loss": 0.0629, "step": 7600 }, { "epoch": 3.246062154108131, "grad_norm": 1.3754280805587769, "learning_rate": 5.414285714285715e-07, "loss": 0.0614, "step": 7625 }, { "epoch": 3.256704980842912, "grad_norm": 0.8814867734909058, "learning_rate": 5.057142857142858e-07, "loss": 0.0574, "step": 7650 }, { "epoch": 3.2673478075776927, "grad_norm": 2.909646511077881, "learning_rate": 4.7000000000000005e-07, "loss": 0.0408, "step": 7675 }, { "epoch": 3.2779906343124736, "grad_norm": 2.272367238998413, "learning_rate": 4.342857142857143e-07, "loss": 0.0539, "step": 7700 }, { "epoch": 3.288633461047254, "grad_norm": 2.039271831512451, "learning_rate": 3.985714285714286e-07, "loss": 0.0688, "step": 7725 }, { "epoch": 3.299276287782035, "grad_norm": 2.0516164302825928, "learning_rate": 3.6285714285714283e-07, "loss": 0.0546, "step": 7750 }, { "epoch": 3.3099191145168154, "grad_norm": 1.9131453037261963, "learning_rate": 3.271428571428572e-07, "loss": 0.0532, "step": 7775 }, { "epoch": 3.3205619412515963, "grad_norm": 1.66374933719635, "learning_rate": 2.914285714285715e-07, "loss": 0.0536, "step": 7800 }, { "epoch": 3.331204767986377, "grad_norm": 1.596907615661621, "learning_rate": 2.557142857142857e-07, "loss": 0.0456, "step": 7825 }, { "epoch": 3.341847594721158, "grad_norm": 2.430992603302002, "learning_rate": 2.2e-07, "loss": 0.0635, "step": 7850 }, { "epoch": 3.3524904214559386, "grad_norm": 2.4150683879852295, "learning_rate": 1.842857142857143e-07, "loss": 0.0638, "step": 7875 }, { "epoch": 3.3631332481907195, "grad_norm": 1.5517698526382446, "learning_rate": 1.4857142857142857e-07, "loss": 0.0615, "step": 7900 }, { "epoch": 3.3737760749255004, "grad_norm": 1.8786826133728027, "learning_rate": 1.1285714285714287e-07, "loss": 0.0607, "step": 7925 }, { "epoch": 3.384418901660281, "grad_norm": 2.849170446395874, "learning_rate": 7.714285714285715e-08, "loss": 0.0567, "step": 7950 }, { "epoch": 3.3950617283950617, "grad_norm": 2.8803513050079346, "learning_rate": 4.1428571428571426e-08, "loss": 0.0507, "step": 7975 }, { "epoch": 3.4057045551298426, "grad_norm": 2.7549145221710205, "learning_rate": 5.714285714285715e-09, "loss": 0.0544, "step": 8000 }, { "epoch": 3.4057045551298426, "eval_loss": 0.1155887097120285, "eval_runtime": 2444.7471, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.061, "eval_wer": 15.246076710047603, "step": 8000 } ], "logging_steps": 25, "max_steps": 8000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.364112316878029e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }