{ "best_metric": 0.14731654524803162, "best_model_checkpoint": "./xls-r-1b-bem-sv-male/checkpoint-3500", "epoch": 4.997397188964081, "eval_steps": 500, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20822488287350338, "grad_norm": 6.451992988586426, "learning_rate": 1.9600000000000002e-05, "loss": 4.6771, "step": 200 }, { "epoch": 0.41644976574700676, "grad_norm": 3.6971700191497803, "learning_rate": 3.960000000000001e-05, "loss": 0.9808, "step": 400 }, { "epoch": 0.5205622071837585, "eval_loss": 0.2580466568470001, "eval_runtime": 117.6846, "eval_samples_per_second": 8.922, "eval_steps_per_second": 2.235, "eval_wer": 0.8361904761904762, "step": 500 }, { "epoch": 0.6246746486205101, "grad_norm": 3.8861448764801025, "learning_rate": 4.8883720930232564e-05, "loss": 0.5099, "step": 600 }, { "epoch": 0.8328995314940135, "grad_norm": 6.251056671142578, "learning_rate": 4.655813953488372e-05, "loss": 0.4325, "step": 800 }, { "epoch": 1.041124414367517, "grad_norm": 2.006500005722046, "learning_rate": 4.423255813953489e-05, "loss": 0.3877, "step": 1000 }, { "epoch": 1.041124414367517, "eval_loss": 0.20405621826648712, "eval_runtime": 117.4432, "eval_samples_per_second": 8.94, "eval_steps_per_second": 2.239, "eval_wer": 0.7790476190476191, "step": 1000 }, { "epoch": 1.2493492972410203, "grad_norm": 1.86452317237854, "learning_rate": 4.190697674418605e-05, "loss": 0.3249, "step": 1200 }, { "epoch": 1.4575741801145237, "grad_norm": 2.239264965057373, "learning_rate": 3.958139534883721e-05, "loss": 0.298, "step": 1400 }, { "epoch": 1.5616866215512752, "eval_loss": 0.1842915266752243, "eval_runtime": 118.4618, "eval_samples_per_second": 8.864, "eval_steps_per_second": 2.22, "eval_wer": 0.7619047619047619, "step": 1500 }, { "epoch": 1.665799062988027, "grad_norm": 0.9172901511192322, "learning_rate": 3.725581395348837e-05, "loss": 0.2978, "step": 1600 }, { "epoch": 1.8740239458615304, "grad_norm": 2.832592487335205, "learning_rate": 3.4930232558139534e-05, "loss": 0.2734, "step": 1800 }, { "epoch": 2.082248828735034, "grad_norm": 1.0995967388153076, "learning_rate": 3.26046511627907e-05, "loss": 0.2433, "step": 2000 }, { "epoch": 2.082248828735034, "eval_loss": 0.16212375462055206, "eval_runtime": 117.6082, "eval_samples_per_second": 8.928, "eval_steps_per_second": 2.236, "eval_wer": 0.700952380952381, "step": 2000 }, { "epoch": 2.2904737116085374, "grad_norm": 1.1086174249649048, "learning_rate": 3.0279069767441864e-05, "loss": 0.203, "step": 2200 }, { "epoch": 2.4986985944820406, "grad_norm": 1.091976523399353, "learning_rate": 2.7953488372093022e-05, "loss": 0.188, "step": 2400 }, { "epoch": 2.6028110359187924, "eval_loss": 0.1653192788362503, "eval_runtime": 118.9756, "eval_samples_per_second": 8.825, "eval_steps_per_second": 2.211, "eval_wer": 0.6838095238095238, "step": 2500 }, { "epoch": 2.706923477355544, "grad_norm": 0.8310242295265198, "learning_rate": 2.5627906976744187e-05, "loss": 0.1958, "step": 2600 }, { "epoch": 2.9151483602290473, "grad_norm": 1.4682559967041016, "learning_rate": 2.3302325581395352e-05, "loss": 0.2035, "step": 2800 }, { "epoch": 3.123373243102551, "grad_norm": 0.5501867532730103, "learning_rate": 2.0976744186046513e-05, "loss": 0.1521, "step": 3000 }, { "epoch": 3.123373243102551, "eval_loss": 0.14906759560108185, "eval_runtime": 118.0235, "eval_samples_per_second": 8.897, "eval_steps_per_second": 2.228, "eval_wer": 0.659047619047619, "step": 3000 }, { "epoch": 3.331598125976054, "grad_norm": 0.9272790551185608, "learning_rate": 1.8651162790697675e-05, "loss": 0.1381, "step": 3200 }, { "epoch": 3.5398230088495577, "grad_norm": 0.9619794487953186, "learning_rate": 1.6325581395348837e-05, "loss": 0.1316, "step": 3400 }, { "epoch": 3.643935450286309, "eval_loss": 0.14731654524803162, "eval_runtime": 117.6012, "eval_samples_per_second": 8.928, "eval_steps_per_second": 2.236, "eval_wer": 0.6228571428571429, "step": 3500 }, { "epoch": 3.748047891723061, "grad_norm": 0.7800536155700684, "learning_rate": 1.4000000000000001e-05, "loss": 0.127, "step": 3600 }, { "epoch": 3.956272774596564, "grad_norm": 4.820044994354248, "learning_rate": 1.1674418604651163e-05, "loss": 0.128, "step": 3800 }, { "epoch": 4.164497657470068, "grad_norm": 0.6071628928184509, "learning_rate": 9.348837209302326e-06, "loss": 0.0925, "step": 4000 }, { "epoch": 4.164497657470068, "eval_loss": 0.15515577793121338, "eval_runtime": 118.2807, "eval_samples_per_second": 8.877, "eval_steps_per_second": 2.224, "eval_wer": 0.6447619047619048, "step": 4000 }, { "epoch": 4.372722540343571, "grad_norm": 0.38652822375297546, "learning_rate": 7.023255813953489e-06, "loss": 0.0777, "step": 4200 }, { "epoch": 4.580947423217075, "grad_norm": 0.60927414894104, "learning_rate": 4.697674418604651e-06, "loss": 0.0767, "step": 4400 }, { "epoch": 4.685059864653827, "eval_loss": 0.15233619511127472, "eval_runtime": 118.0689, "eval_samples_per_second": 8.893, "eval_steps_per_second": 2.228, "eval_wer": 0.6228571428571429, "step": 4500 }, { "epoch": 4.7891723060905775, "grad_norm": 0.5293630361557007, "learning_rate": 2.372093023255814e-06, "loss": 0.0731, "step": 4600 }, { "epoch": 4.997397188964081, "grad_norm": 0.3965218961238861, "learning_rate": 4.651162790697675e-08, "loss": 0.0804, "step": 4800 }, { "epoch": 4.997397188964081, "step": 4800, "total_flos": 2.172568340790461e+19, "train_loss": 0.42886508484681446, "train_runtime": 12219.0167, "train_samples_per_second": 3.144, "train_steps_per_second": 0.393 } ], "logging_steps": 200, "max_steps": 4800, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.172568340790461e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }