{ "best_metric": 0.0534161813557148, "best_model_checkpoint": "/storage/BioM-ELECTRA-Large-SQuAD2_carDB_5e_neg_lg_SGD/checkpoint-4238", "epoch": 5.0, "eval_steps": 500, "global_step": 10595, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23596035865974516, "grad_norm": 1.2747831344604492, "learning_rate": 1.9063709296838132e-05, "loss": 0.1626, "step": 500 }, { "epoch": 0.4719207173194903, "grad_norm": 0.4436088800430298, "learning_rate": 1.8119867862199152e-05, "loss": 0.0822, "step": 1000 }, { "epoch": 0.7078810759792354, "grad_norm": 1.6280406713485718, "learning_rate": 1.7176026427560172e-05, "loss": 0.0789, "step": 1500 }, { "epoch": 0.9438414346389806, "grad_norm": 2.336548089981079, "learning_rate": 1.6232184992921192e-05, "loss": 0.0706, "step": 2000 }, { "epoch": 1.0, "eval_accuracy_score": 0.9812832660700708, "eval_f1": 0.8218279984573852, "eval_loss": 0.05883064866065979, "eval_precision": 0.8056710775047259, "eval_recall": 0.8386462022825659, "eval_runtime": 5.2595, "eval_samples_per_second": 358.209, "eval_steps_per_second": 44.871, "step": 2119 }, { "epoch": 1.1798017932987257, "grad_norm": 2.0369768142700195, "learning_rate": 1.528834355828221e-05, "loss": 0.0496, "step": 2500 }, { "epoch": 1.415762151958471, "grad_norm": 0.6217797994613647, "learning_rate": 1.4344502123643229e-05, "loss": 0.0487, "step": 3000 }, { "epoch": 1.651722510618216, "grad_norm": 0.8187075257301331, "learning_rate": 1.3400660689004247e-05, "loss": 0.0455, "step": 3500 }, { "epoch": 1.8876828692779613, "grad_norm": 1.2321038246154785, "learning_rate": 1.2456819254365267e-05, "loss": 0.0439, "step": 4000 }, { "epoch": 2.0, "eval_accuracy_score": 0.9828429938975649, "eval_f1": 0.8423466462832028, "eval_loss": 0.0534161813557148, "eval_precision": 0.8187221396731055, "eval_recall": 0.867375049193231, "eval_runtime": 5.2808, "eval_samples_per_second": 356.764, "eval_steps_per_second": 44.69, "step": 4238 }, { "epoch": 2.1236432279377064, "grad_norm": 1.0647934675216675, "learning_rate": 1.1512977819726287e-05, "loss": 0.0337, "step": 4500 }, { "epoch": 2.3596035865974514, "grad_norm": 0.07234682887792587, "learning_rate": 1.0569136385087306e-05, "loss": 0.0247, "step": 5000 }, { "epoch": 2.595563945257197, "grad_norm": 2.0315685272216797, "learning_rate": 9.625294950448326e-06, "loss": 0.0263, "step": 5500 }, { "epoch": 2.831524303916942, "grad_norm": 0.034490641206502914, "learning_rate": 8.681453515809346e-06, "loss": 0.027, "step": 6000 }, { "epoch": 3.0, "eval_accuracy_score": 0.9833888986371878, "eval_f1": 0.853031465848043, "eval_loss": 0.06263311207294464, "eval_precision": 0.832272557094721, "eval_recall": 0.8748524203069658, "eval_runtime": 5.3057, "eval_samples_per_second": 355.091, "eval_steps_per_second": 44.481, "step": 6357 }, { "epoch": 3.067484662576687, "grad_norm": 0.1344643086194992, "learning_rate": 7.739499764039643e-06, "loss": 0.0254, "step": 6500 }, { "epoch": 3.303445021236432, "grad_norm": 0.07355394959449768, "learning_rate": 6.795658329400662e-06, "loss": 0.0163, "step": 7000 }, { "epoch": 3.5394053798961775, "grad_norm": 0.7838532328605652, "learning_rate": 5.85181689476168e-06, "loss": 0.016, "step": 7500 }, { "epoch": 3.7753657385559225, "grad_norm": 3.0708000659942627, "learning_rate": 4.9079754601227e-06, "loss": 0.0138, "step": 8000 }, { "epoch": 4.0, "eval_accuracy_score": 0.9838763135832798, "eval_f1": 0.8535911602209945, "eval_loss": 0.06983982026576996, "eval_precision": 0.8559556786703602, "eval_recall": 0.8512396694214877, "eval_runtime": 5.271, "eval_samples_per_second": 357.425, "eval_steps_per_second": 44.773, "step": 8476 }, { "epoch": 4.011326097215668, "grad_norm": 0.10571020096540451, "learning_rate": 3.966021708352997e-06, "loss": 0.0144, "step": 8500 }, { "epoch": 4.247286455875413, "grad_norm": 0.029509373009204865, "learning_rate": 3.0221802737140165e-06, "loss": 0.0086, "step": 9000 }, { "epoch": 4.483246814535158, "grad_norm": 1.1270127296447754, "learning_rate": 2.0783388390750357e-06, "loss": 0.0066, "step": 9500 }, { "epoch": 4.719207173194903, "grad_norm": 0.010910986922681332, "learning_rate": 1.134497404436055e-06, "loss": 0.0097, "step": 10000 }, { "epoch": 4.955167531854649, "grad_norm": 1.0645971298217773, "learning_rate": 1.906559697970741e-07, "loss": 0.0076, "step": 10500 }, { "epoch": 5.0, "eval_accuracy_score": 0.9837593339962176, "eval_f1": 0.8539891556932611, "eval_loss": 0.07769417762756348, "eval_precision": 0.8406404879908502, "eval_recall": 0.8677685950413223, "eval_runtime": 5.252, "eval_samples_per_second": 358.721, "eval_steps_per_second": 44.935, "step": 10595 } ], "logging_steps": 500, "max_steps": 10595, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1.7692937221754256e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }