{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 612, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.032679738562091505, "grad_norm": 5.336216449737549, "learning_rate": 3.2520325203252037e-06, "loss": 1.4653, "step": 20 }, { "epoch": 0.06535947712418301, "grad_norm": 4.05853796005249, "learning_rate": 6.504065040650407e-06, "loss": 1.2502, "step": 40 }, { "epoch": 0.09803921568627451, "grad_norm": 4.095743656158447, "learning_rate": 9.756097560975611e-06, "loss": 1.1818, "step": 60 }, { "epoch": 0.13071895424836602, "grad_norm": 4.049984931945801, "learning_rate": 1.3008130081300815e-05, "loss": 1.1616, "step": 80 }, { "epoch": 0.16339869281045752, "grad_norm": 4.275440216064453, "learning_rate": 1.6260162601626018e-05, "loss": 1.1681, "step": 100 }, { "epoch": 0.19607843137254902, "grad_norm": 3.7174243927001953, "learning_rate": 1.9512195121951222e-05, "loss": 1.1263, "step": 120 }, { "epoch": 0.22875816993464052, "grad_norm": 3.68149471282959, "learning_rate": 1.9940417581113062e-05, "loss": 1.1764, "step": 140 }, { "epoch": 0.26143790849673204, "grad_norm": 3.664177656173706, "learning_rate": 1.9718803741191918e-05, "loss": 1.1296, "step": 160 }, { "epoch": 0.29411764705882354, "grad_norm": 3.219787120819092, "learning_rate": 1.9336954955188042e-05, "loss": 1.1026, "step": 180 }, { "epoch": 0.32679738562091504, "grad_norm": 3.5025317668914795, "learning_rate": 1.880116680445757e-05, "loss": 1.1542, "step": 200 }, { "epoch": 0.35947712418300654, "grad_norm": 3.2319493293762207, "learning_rate": 1.812027288495843e-05, "loss": 1.1492, "step": 220 }, { "epoch": 0.39215686274509803, "grad_norm": 2.933061122894287, "learning_rate": 1.730549916681868e-05, "loss": 1.1107, "step": 240 }, { "epoch": 0.42483660130718953, "grad_norm": 3.0482289791107178, "learning_rate": 1.6370278910578644e-05, "loss": 1.1077, "step": 260 }, { "epoch": 0.45751633986928103, "grad_norm": 3.3377017974853516, "learning_rate": 1.5330031191602395e-05, "loss": 1.1246, "step": 280 }, { "epoch": 0.49019607843137253, "grad_norm": 3.618112564086914, "learning_rate": 1.420190668415002e-05, "loss": 1.101, "step": 300 }, { "epoch": 0.5228758169934641, "grad_norm": 3.0649290084838867, "learning_rate": 1.3004504896395564e-05, "loss": 1.0556, "step": 320 }, { "epoch": 0.5555555555555556, "grad_norm": 3.0544955730438232, "learning_rate": 1.1757567518366883e-05, "loss": 1.0287, "step": 340 }, { "epoch": 0.5882352941176471, "grad_norm": 2.709897994995117, "learning_rate": 1.0481652938612374e-05, "loss": 1.0145, "step": 360 }, { "epoch": 0.6209150326797386, "grad_norm": 2.6121253967285156, "learning_rate": 9.197797295872709e-06, "loss": 1.0179, "step": 380 }, { "epoch": 0.6535947712418301, "grad_norm": 3.049081563949585, "learning_rate": 7.927167654034622e-06, "loss": 0.9818, "step": 400 }, { "epoch": 0.6862745098039216, "grad_norm": 3.008591413497925, "learning_rate": 6.690713018507917e-06, "loss": 0.9431, "step": 420 }, { "epoch": 0.7189542483660131, "grad_norm": 3.0657355785369873, "learning_rate": 5.508818947755687e-06, "loss": 0.9978, "step": 440 }, { "epoch": 0.7516339869281046, "grad_norm": 2.533656597137451, "learning_rate": 4.4009714544339755e-06, "loss": 0.9494, "step": 460 }, { "epoch": 0.7843137254901961, "grad_norm": 2.7470057010650635, "learning_rate": 3.3854357374383905e-06, "loss": 0.962, "step": 480 }, { "epoch": 0.8169934640522876, "grad_norm": 2.5096466541290283, "learning_rate": 2.478955041636435e-06, "loss": 0.9592, "step": 500 }, { "epoch": 0.8496732026143791, "grad_norm": 2.438676357269287, "learning_rate": 1.6964746102169582e-06, "loss": 0.9327, "step": 520 }, { "epoch": 0.8823529411764706, "grad_norm": 2.47017240524292, "learning_rate": 1.0508952808836682e-06, "loss": 0.9249, "step": 540 }, { "epoch": 0.9150326797385621, "grad_norm": 2.9075846672058105, "learning_rate": 5.528607883782599e-07, "loss": 0.9466, "step": 560 }, { "epoch": 0.9477124183006536, "grad_norm": 2.7440707683563232, "learning_rate": 2.1058228009902094e-07, "loss": 0.9481, "step": 580 }, { "epoch": 0.9803921568627451, "grad_norm": 2.9985830783843994, "learning_rate": 2.9702938044468e-08, "loss": 0.951, "step": 600 }, { "epoch": 1.0, "step": 612, "total_flos": 16017543659520.0, "train_loss": 1.0669627922033174, "train_runtime": 18486.1158, "train_samples_per_second": 0.132, "train_steps_per_second": 0.033 } ], "logging_steps": 20, "max_steps": 612, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 16017543659520.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }