{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 200, "global_step": 9160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2183406113537118, "eval_loss": 2.382188320159912, "eval_runtime": 74.685, "eval_samples_per_second": 13.617, "eval_steps_per_second": 1.714, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4366812227074236, "eval_loss": 0.27150478959083557, "eval_runtime": 75.1491, "eval_samples_per_second": 13.533, "eval_steps_per_second": 1.703, "eval_wer": 0.5093029350104822, "step": 400 }, { "epoch": 0.5458515283842795, "grad_norm": 0.9615474939346313, "learning_rate": 0.0002868211920529801, "loss": 2.7769, "step": 500 }, { "epoch": 0.6550218340611353, "eval_loss": 0.24891583621501923, "eval_runtime": 75.3374, "eval_samples_per_second": 13.499, "eval_steps_per_second": 1.699, "eval_wer": 0.4820492662473795, "step": 600 }, { "epoch": 0.8733624454148472, "eval_loss": 0.2296326756477356, "eval_runtime": 74.8314, "eval_samples_per_second": 13.591, "eval_steps_per_second": 1.711, "eval_wer": 0.46947064989517817, "step": 800 }, { "epoch": 1.091703056768559, "grad_norm": 0.819220244884491, "learning_rate": 0.00027026490066225163, "loss": 0.6809, "step": 1000 }, { "epoch": 1.091703056768559, "eval_loss": 0.22090712189674377, "eval_runtime": 75.0858, "eval_samples_per_second": 13.544, "eval_steps_per_second": 1.705, "eval_wer": 0.4638364779874214, "step": 1000 }, { "epoch": 1.3100436681222707, "eval_loss": 0.21630799770355225, "eval_runtime": 75.4504, "eval_samples_per_second": 13.479, "eval_steps_per_second": 1.696, "eval_wer": 0.44693396226415094, "step": 1200 }, { "epoch": 1.5283842794759825, "eval_loss": 0.20916949212551117, "eval_runtime": 75.3429, "eval_samples_per_second": 13.498, "eval_steps_per_second": 1.699, "eval_wer": 0.4399895178197065, "step": 1400 }, { "epoch": 1.6375545851528384, "grad_norm": 0.9643642902374268, "learning_rate": 0.00025370860927152316, "loss": 0.6113, "step": 1500 }, { "epoch": 1.7467248908296944, "eval_loss": 0.20470178127288818, "eval_runtime": 74.8687, "eval_samples_per_second": 13.584, "eval_steps_per_second": 1.71, "eval_wer": 0.4346174004192872, "step": 1600 }, { "epoch": 1.965065502183406, "eval_loss": 0.20738434791564941, "eval_runtime": 75.3186, "eval_samples_per_second": 13.503, "eval_steps_per_second": 1.699, "eval_wer": 0.4466719077568134, "step": 1800 }, { "epoch": 2.183406113537118, "grad_norm": 0.6018229126930237, "learning_rate": 0.00023715231788079468, "loss": 0.5974, "step": 2000 }, { "epoch": 2.183406113537118, "eval_loss": 0.20408853888511658, "eval_runtime": 75.0651, "eval_samples_per_second": 13.548, "eval_steps_per_second": 1.705, "eval_wer": 0.43042452830188677, "step": 2000 }, { "epoch": 2.4017467248908297, "eval_loss": 0.20539002120494843, "eval_runtime": 74.9069, "eval_samples_per_second": 13.577, "eval_steps_per_second": 1.709, "eval_wer": 0.4317348008385744, "step": 2200 }, { "epoch": 2.6200873362445414, "eval_loss": 0.19870109856128693, "eval_runtime": 75.2079, "eval_samples_per_second": 13.523, "eval_steps_per_second": 1.702, "eval_wer": 0.4240041928721174, "step": 2400 }, { "epoch": 2.7292576419213974, "grad_norm": 0.7469688653945923, "learning_rate": 0.0002205960264900662, "loss": 0.5636, "step": 2500 }, { "epoch": 2.8384279475982535, "eval_loss": 0.20032186806201935, "eval_runtime": 75.3206, "eval_samples_per_second": 13.502, "eval_steps_per_second": 1.699, "eval_wer": 0.42518343815513626, "step": 2600 }, { "epoch": 3.056768558951965, "eval_loss": 0.19965218007564545, "eval_runtime": 74.7464, "eval_samples_per_second": 13.606, "eval_steps_per_second": 1.712, "eval_wer": 0.42872117400419285, "step": 2800 }, { "epoch": 3.2751091703056767, "grad_norm": 0.8557049036026001, "learning_rate": 0.00020403973509933773, "loss": 0.5398, "step": 3000 }, { "epoch": 3.2751091703056767, "eval_loss": 0.20972934365272522, "eval_runtime": 74.9379, "eval_samples_per_second": 13.571, "eval_steps_per_second": 1.708, "eval_wer": 0.4399895178197065, "step": 3000 }, { "epoch": 3.493449781659389, "eval_loss": 0.19680900871753693, "eval_runtime": 74.7045, "eval_samples_per_second": 13.614, "eval_steps_per_second": 1.713, "eval_wer": 0.4165356394129979, "step": 3200 }, { "epoch": 3.7117903930131004, "eval_loss": 0.20125041902065277, "eval_runtime": 74.9279, "eval_samples_per_second": 13.573, "eval_steps_per_second": 1.708, "eval_wer": 0.42177672955974843, "step": 3400 }, { "epoch": 3.8209606986899565, "grad_norm": 0.7604547739028931, "learning_rate": 0.00018748344370860925, "loss": 0.5334, "step": 3500 }, { "epoch": 3.930131004366812, "eval_loss": 0.20029041171073914, "eval_runtime": 74.9149, "eval_samples_per_second": 13.575, "eval_steps_per_second": 1.709, "eval_wer": 0.4229559748427673, "step": 3600 }, { "epoch": 4.148471615720524, "eval_loss": 0.19763976335525513, "eval_runtime": 75.2231, "eval_samples_per_second": 13.52, "eval_steps_per_second": 1.702, "eval_wer": 0.4226939203354298, "step": 3800 }, { "epoch": 4.366812227074236, "grad_norm": 1.0875004529953003, "learning_rate": 0.00017092715231788077, "loss": 0.5123, "step": 4000 }, { "epoch": 4.366812227074236, "eval_loss": 0.19775792956352234, "eval_runtime": 75.5854, "eval_samples_per_second": 13.455, "eval_steps_per_second": 1.693, "eval_wer": 0.419811320754717, "step": 4000 }, { "epoch": 4.585152838427947, "eval_loss": 0.201936736702919, "eval_runtime": 74.8299, "eval_samples_per_second": 13.591, "eval_steps_per_second": 1.711, "eval_wer": 0.429769392033543, "step": 4200 }, { "epoch": 4.8034934497816595, "eval_loss": 0.19385558366775513, "eval_runtime": 75.489, "eval_samples_per_second": 13.472, "eval_steps_per_second": 1.696, "eval_wer": 0.41457023060796644, "step": 4400 }, { "epoch": 4.9126637554585155, "grad_norm": 1.0764209032058716, "learning_rate": 0.0001543708609271523, "loss": 0.5119, "step": 4500 }, { "epoch": 5.021834061135372, "eval_loss": 0.1989189237356186, "eval_runtime": 75.1857, "eval_samples_per_second": 13.527, "eval_steps_per_second": 1.702, "eval_wer": 0.4161425576519916, "step": 4600 }, { "epoch": 5.240174672489083, "eval_loss": 0.1901673972606659, "eval_runtime": 75.3755, "eval_samples_per_second": 13.492, "eval_steps_per_second": 1.698, "eval_wer": 0.407625786163522, "step": 4800 }, { "epoch": 5.458515283842795, "grad_norm": 0.5494298338890076, "learning_rate": 0.00013781456953642382, "loss": 0.4929, "step": 5000 }, { "epoch": 5.458515283842795, "eval_loss": 0.1928589940071106, "eval_runtime": 75.5607, "eval_samples_per_second": 13.459, "eval_steps_per_second": 1.694, "eval_wer": 0.4115566037735849, "step": 5000 }, { "epoch": 5.676855895196507, "eval_loss": 0.19432277977466583, "eval_runtime": 75.57, "eval_samples_per_second": 13.458, "eval_steps_per_second": 1.694, "eval_wer": 0.4144392033542977, "step": 5200 }, { "epoch": 5.895196506550218, "eval_loss": 0.19218797981739044, "eval_runtime": 75.4593, "eval_samples_per_second": 13.477, "eval_steps_per_second": 1.696, "eval_wer": 0.4106394129979036, "step": 5400 }, { "epoch": 6.004366812227074, "grad_norm": 0.40776267647743225, "learning_rate": 0.00012125827814569536, "loss": 0.4878, "step": 5500 }, { "epoch": 6.11353711790393, "eval_loss": 0.19328303635120392, "eval_runtime": 76.1865, "eval_samples_per_second": 13.349, "eval_steps_per_second": 1.68, "eval_wer": 0.41365303983228513, "step": 5600 }, { "epoch": 6.331877729257642, "eval_loss": 0.19196276366710663, "eval_runtime": 75.3609, "eval_samples_per_second": 13.495, "eval_steps_per_second": 1.698, "eval_wer": 0.40579140461215935, "step": 5800 }, { "epoch": 6.550218340611353, "grad_norm": 0.5017375946044922, "learning_rate": 0.00010470198675496688, "loss": 0.4755, "step": 6000 }, { "epoch": 6.550218340611353, "eval_loss": 0.19270355999469757, "eval_runtime": 75.2917, "eval_samples_per_second": 13.507, "eval_steps_per_second": 1.7, "eval_wer": 0.41705974842767296, "step": 6000 }, { "epoch": 6.7685589519650655, "eval_loss": 0.19197432696819305, "eval_runtime": 75.4413, "eval_samples_per_second": 13.481, "eval_steps_per_second": 1.697, "eval_wer": 0.41273584905660377, "step": 6200 }, { "epoch": 6.986899563318778, "eval_loss": 0.19249393045902252, "eval_runtime": 75.5242, "eval_samples_per_second": 13.466, "eval_steps_per_second": 1.695, "eval_wer": 0.40605345911949686, "step": 6400 }, { "epoch": 7.096069868995633, "grad_norm": 0.8388169407844543, "learning_rate": 8.81456953642384e-05, "loss": 0.475, "step": 6500 }, { "epoch": 7.205240174672489, "eval_loss": 0.18842807412147522, "eval_runtime": 75.7212, "eval_samples_per_second": 13.431, "eval_steps_per_second": 1.69, "eval_wer": 0.40579140461215935, "step": 6600 }, { "epoch": 7.423580786026201, "eval_loss": 0.19032706320285797, "eval_runtime": 75.2911, "eval_samples_per_second": 13.508, "eval_steps_per_second": 1.7, "eval_wer": 0.40697064989517817, "step": 6800 }, { "epoch": 7.641921397379913, "grad_norm": 1.298710584640503, "learning_rate": 7.158940397350993e-05, "loss": 0.4715, "step": 7000 }, { "epoch": 7.641921397379913, "eval_loss": 0.18821676075458527, "eval_runtime": 75.1357, "eval_samples_per_second": 13.536, "eval_steps_per_second": 1.704, "eval_wer": 0.3996331236897275, "step": 7000 }, { "epoch": 7.860262008733624, "eval_loss": 0.1881488859653473, "eval_runtime": 75.7296, "eval_samples_per_second": 13.429, "eval_steps_per_second": 1.69, "eval_wer": 0.4033018867924528, "step": 7200 }, { "epoch": 8.078602620087336, "eval_loss": 0.1884731650352478, "eval_runtime": 75.2978, "eval_samples_per_second": 13.506, "eval_steps_per_second": 1.7, "eval_wer": 0.40068134171907754, "step": 7400 }, { "epoch": 8.187772925764191, "grad_norm": 0.7714540362358093, "learning_rate": 5.5033112582781456e-05, "loss": 0.4575, "step": 7500 }, { "epoch": 8.296943231441048, "eval_loss": 0.1885315328836441, "eval_runtime": 76.1765, "eval_samples_per_second": 13.351, "eval_steps_per_second": 1.68, "eval_wer": 0.4015985324947589, "step": 7600 }, { "epoch": 8.51528384279476, "eval_loss": 0.18875299394130707, "eval_runtime": 75.8312, "eval_samples_per_second": 13.411, "eval_steps_per_second": 1.688, "eval_wer": 0.40500524109014674, "step": 7800 }, { "epoch": 8.733624454148472, "grad_norm": 0.7762609124183655, "learning_rate": 3.850993377483443e-05, "loss": 0.4611, "step": 8000 }, { "epoch": 8.733624454148472, "eval_loss": 0.18837310373783112, "eval_runtime": 76.1442, "eval_samples_per_second": 13.356, "eval_steps_per_second": 1.681, "eval_wer": 0.40461215932914046, "step": 8000 }, { "epoch": 8.951965065502183, "eval_loss": 0.1881391853094101, "eval_runtime": 76.0237, "eval_samples_per_second": 13.377, "eval_steps_per_second": 1.684, "eval_wer": 0.39740566037735847, "step": 8200 }, { "epoch": 9.170305676855895, "eval_loss": 0.1864539086818695, "eval_runtime": 75.5164, "eval_samples_per_second": 13.467, "eval_steps_per_second": 1.695, "eval_wer": 0.3955712788259958, "step": 8400 }, { "epoch": 9.279475982532752, "grad_norm": 0.7670572400093079, "learning_rate": 2.1953642384105956e-05, "loss": 0.4559, "step": 8500 }, { "epoch": 9.388646288209607, "eval_loss": 0.18750794231891632, "eval_runtime": 75.8042, "eval_samples_per_second": 13.416, "eval_steps_per_second": 1.689, "eval_wer": 0.39740566037735847, "step": 8600 }, { "epoch": 9.606986899563319, "eval_loss": 0.1871640682220459, "eval_runtime": 75.5158, "eval_samples_per_second": 13.467, "eval_steps_per_second": 1.695, "eval_wer": 0.3996331236897275, "step": 8800 }, { "epoch": 9.825327510917031, "grad_norm": 0.47464126348495483, "learning_rate": 5.3973509933774825e-06, "loss": 0.4536, "step": 9000 }, { "epoch": 9.825327510917031, "eval_loss": 0.18756870925426483, "eval_runtime": 75.5452, "eval_samples_per_second": 13.462, "eval_steps_per_second": 1.694, "eval_wer": 0.3953092243186583, "step": 9000 }, { "epoch": 10.0, "step": 9160, "total_flos": 1.4294624233263563e+19, "train_loss": 0.6386700151268572, "train_runtime": 13601.4767, "train_samples_per_second": 5.387, "train_steps_per_second": 0.673 } ], "logging_steps": 500, "max_steps": 9160, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4294624233263563e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }