{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7473598700243704, "eval_steps": 500, "global_step": 460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016246953696181964, "grad_norm": 0.11087504774332047, "learning_rate": 0.0002, "loss": 1.2851, "step": 10 }, { "epoch": 0.03249390739236393, "grad_norm": 0.14227746427059174, "learning_rate": 0.0002, "loss": 1.1866, "step": 20 }, { "epoch": 0.048740861088545896, "grad_norm": 0.15468546748161316, "learning_rate": 0.0002, "loss": 1.3344, "step": 30 }, { "epoch": 0.06498781478472786, "grad_norm": 0.1867746114730835, "learning_rate": 0.0002, "loss": 1.4973, "step": 40 }, { "epoch": 0.08123476848090982, "grad_norm": 0.6773258447647095, "learning_rate": 0.0002, "loss": 1.7041, "step": 50 }, { "epoch": 0.09748172217709179, "grad_norm": 0.09346572309732437, "learning_rate": 0.0002, "loss": 1.239, "step": 60 }, { "epoch": 0.11372867587327376, "grad_norm": 0.11293257027864456, "learning_rate": 0.0002, "loss": 1.1884, "step": 70 }, { "epoch": 0.12997562956945571, "grad_norm": 0.1317419856786728, "learning_rate": 0.0002, "loss": 1.3503, "step": 80 }, { "epoch": 0.1462225832656377, "grad_norm": 0.20818021893501282, "learning_rate": 0.0002, "loss": 1.505, "step": 90 }, { "epoch": 0.16246953696181965, "grad_norm": 0.5995267629623413, "learning_rate": 0.0002, "loss": 1.7264, "step": 100 }, { "epoch": 0.17871649065800163, "grad_norm": 0.10515395551919937, "learning_rate": 0.0002, "loss": 1.1812, "step": 110 }, { "epoch": 0.19496344435418358, "grad_norm": 0.11150451004505157, "learning_rate": 0.0002, "loss": 1.287, "step": 120 }, { "epoch": 0.21121039805036557, "grad_norm": 0.1395130306482315, "learning_rate": 0.0002, "loss": 1.2411, "step": 130 }, { "epoch": 0.22745735174654752, "grad_norm": 0.15016046166419983, "learning_rate": 0.0002, "loss": 1.4182, "step": 140 }, { "epoch": 0.2437043054427295, "grad_norm": 0.4636495113372803, "learning_rate": 0.0002, "loss": 1.7269, "step": 150 }, { "epoch": 0.25995125913891143, "grad_norm": 0.0939592495560646, "learning_rate": 0.0002, "loss": 1.2139, "step": 160 }, { "epoch": 0.27619821283509344, "grad_norm": 0.09509933739900589, "learning_rate": 0.0002, "loss": 1.1963, "step": 170 }, { "epoch": 0.2924451665312754, "grad_norm": 0.1235380694270134, "learning_rate": 0.0002, "loss": 1.2873, "step": 180 }, { "epoch": 0.30869212022745735, "grad_norm": 0.1839320808649063, "learning_rate": 0.0002, "loss": 1.455, "step": 190 }, { "epoch": 0.3249390739236393, "grad_norm": 0.481478750705719, "learning_rate": 0.0002, "loss": 1.7412, "step": 200 }, { "epoch": 0.3411860276198213, "grad_norm": 0.08681885898113251, "learning_rate": 0.0002, "loss": 1.2474, "step": 210 }, { "epoch": 0.35743298131600326, "grad_norm": 0.09558644890785217, "learning_rate": 0.0002, "loss": 1.2158, "step": 220 }, { "epoch": 0.3736799350121852, "grad_norm": 0.12771648168563843, "learning_rate": 0.0002, "loss": 1.3005, "step": 230 }, { "epoch": 0.38992688870836717, "grad_norm": 0.17630507051944733, "learning_rate": 0.0002, "loss": 1.4772, "step": 240 }, { "epoch": 0.4061738424045491, "grad_norm": 0.44942063093185425, "learning_rate": 0.0002, "loss": 1.6645, "step": 250 }, { "epoch": 0.42242079610073113, "grad_norm": 0.09558656066656113, "learning_rate": 0.0002, "loss": 1.1758, "step": 260 }, { "epoch": 0.4386677497969131, "grad_norm": 0.09703896939754486, "learning_rate": 0.0002, "loss": 1.2423, "step": 270 }, { "epoch": 0.45491470349309504, "grad_norm": 0.12717948853969574, "learning_rate": 0.0002, "loss": 1.3671, "step": 280 }, { "epoch": 0.471161657189277, "grad_norm": 0.1827155202627182, "learning_rate": 0.0002, "loss": 1.5173, "step": 290 }, { "epoch": 0.487408610885459, "grad_norm": 0.4099660813808441, "learning_rate": 0.0002, "loss": 1.6514, "step": 300 }, { "epoch": 0.503655564581641, "grad_norm": 0.12756651639938354, "learning_rate": 0.0002, "loss": 1.1952, "step": 310 }, { "epoch": 0.5199025182778229, "grad_norm": 0.09647507965564728, "learning_rate": 0.0002, "loss": 1.1511, "step": 320 }, { "epoch": 0.5361494719740049, "grad_norm": 0.12492221593856812, "learning_rate": 0.0002, "loss": 1.1778, "step": 330 }, { "epoch": 0.5523964256701869, "grad_norm": 0.18637599050998688, "learning_rate": 0.0002, "loss": 1.4947, "step": 340 }, { "epoch": 0.5686433793663688, "grad_norm": 0.5491933226585388, "learning_rate": 0.0002, "loss": 1.6702, "step": 350 }, { "epoch": 0.5848903330625508, "grad_norm": 0.091646708548069, "learning_rate": 0.0002, "loss": 1.1607, "step": 360 }, { "epoch": 0.6011372867587328, "grad_norm": 0.10480837523937225, "learning_rate": 0.0002, "loss": 1.2112, "step": 370 }, { "epoch": 0.6173842404549147, "grad_norm": 0.12856683135032654, "learning_rate": 0.0002, "loss": 1.2448, "step": 380 }, { "epoch": 0.6336311941510967, "grad_norm": 0.221836119890213, "learning_rate": 0.0002, "loss": 1.471, "step": 390 }, { "epoch": 0.6498781478472786, "grad_norm": 0.442389577627182, "learning_rate": 0.0002, "loss": 1.7765, "step": 400 }, { "epoch": 0.6661251015434606, "grad_norm": 0.08752889186143875, "learning_rate": 0.0002, "loss": 1.2821, "step": 410 }, { "epoch": 0.6823720552396426, "grad_norm": 0.10227832943201065, "learning_rate": 0.0002, "loss": 1.1901, "step": 420 }, { "epoch": 0.6986190089358245, "grad_norm": 0.12081281840801239, "learning_rate": 0.0002, "loss": 1.302, "step": 430 }, { "epoch": 0.7148659626320065, "grad_norm": 0.1923910230398178, "learning_rate": 0.0002, "loss": 1.5397, "step": 440 }, { "epoch": 0.7311129163281884, "grad_norm": 0.5226483941078186, "learning_rate": 0.0002, "loss": 1.6591, "step": 450 }, { "epoch": 0.7473598700243704, "grad_norm": 0.09780491888523102, "learning_rate": 0.0002, "loss": 1.1514, "step": 460 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0671426142086758e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }