{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 10.0,
  "eval_steps": 500,
  "global_step": 1830,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.273224043715847,
      "grad_norm": 1.0141575336456299,
      "learning_rate": 0.00019995065603657316,
      "loss": 1.8945,
      "step": 50
    },
    {
      "epoch": 0.546448087431694,
      "grad_norm": 0.9003917574882507,
      "learning_rate": 0.00019980267284282717,
      "loss": 1.3737,
      "step": 100
    },
    {
      "epoch": 0.819672131147541,
      "grad_norm": 0.855492889881134,
      "learning_rate": 0.00019955619646030802,
      "loss": 1.3506,
      "step": 150
    },
    {
      "epoch": 1.092896174863388,
      "grad_norm": 1.1789681911468506,
      "learning_rate": 0.0001992114701314478,
      "loss": 1.2946,
      "step": 200
    },
    {
      "epoch": 1.366120218579235,
      "grad_norm": 0.9413456320762634,
      "learning_rate": 0.00019876883405951377,
      "loss": 1.2914,
      "step": 250
    },
    {
      "epoch": 1.639344262295082,
      "grad_norm": 0.8401021957397461,
      "learning_rate": 0.0001982287250728689,
      "loss": 1.2756,
      "step": 300
    },
    {
      "epoch": 1.9125683060109289,
      "grad_norm": 0.9392536878585815,
      "learning_rate": 0.00019759167619387476,
      "loss": 1.2785,
      "step": 350
    },
    {
      "epoch": 2.185792349726776,
      "grad_norm": 0.918136477470398,
      "learning_rate": 0.0001968583161128631,
      "loss": 1.2199,
      "step": 400
    },
    {
      "epoch": 2.459016393442623,
      "grad_norm": 0.9809663891792297,
      "learning_rate": 0.0001960293685676943,
      "loss": 1.201,
      "step": 450
    },
    {
      "epoch": 2.73224043715847,
      "grad_norm": 1.0254710912704468,
      "learning_rate": 0.00019510565162951537,
      "loss": 1.1842,
      "step": 500
    },
    {
      "epoch": 3.0054644808743167,
      "grad_norm": 1.1089431047439575,
      "learning_rate": 0.00019408807689542257,
      "loss": 1.1819,
      "step": 550
    },
    {
      "epoch": 3.278688524590164,
      "grad_norm": 1.2321062088012695,
      "learning_rate": 0.00019297764858882514,
      "loss": 1.1113,
      "step": 600
    },
    {
      "epoch": 3.551912568306011,
      "grad_norm": 1.0911256074905396,
      "learning_rate": 0.00019177546256839812,
      "loss": 1.1212,
      "step": 650
    },
    {
      "epoch": 3.8251366120218577,
      "grad_norm": 1.1500061750411987,
      "learning_rate": 0.00019048270524660196,
      "loss": 1.1247,
      "step": 700
    },
    {
      "epoch": 4.098360655737705,
      "grad_norm": 1.259513258934021,
      "learning_rate": 0.0001891006524188368,
      "loss": 1.0826,
      "step": 750
    },
    {
      "epoch": 4.371584699453552,
      "grad_norm": 1.377414345741272,
      "learning_rate": 0.00018763066800438636,
      "loss": 1.0593,
      "step": 800
    },
    {
      "epoch": 4.644808743169399,
      "grad_norm": 1.2397098541259766,
      "learning_rate": 0.0001860742027003944,
      "loss": 1.0414,
      "step": 850
    },
    {
      "epoch": 4.918032786885246,
      "grad_norm": 1.2820392847061157,
      "learning_rate": 0.00018443279255020152,
      "loss": 1.0601,
      "step": 900
    },
    {
      "epoch": 5.191256830601093,
      "grad_norm": 1.6708155870437622,
      "learning_rate": 0.00018270805742745617,
      "loss": 0.973,
      "step": 950
    },
    {
      "epoch": 5.46448087431694,
      "grad_norm": 1.546794056892395,
      "learning_rate": 0.00018090169943749476,
      "loss": 0.9904,
      "step": 1000
    },
    {
      "epoch": 5.737704918032787,
      "grad_norm": 1.437908411026001,
      "learning_rate": 0.00017901550123756906,
      "loss": 0.9863,
      "step": 1050
    },
    {
      "epoch": 6.0109289617486334,
      "grad_norm": 1.4555143117904663,
      "learning_rate": 0.00017705132427757895,
      "loss": 0.9768,
      "step": 1100
    },
    {
      "epoch": 6.284153005464481,
      "grad_norm": 1.494957447052002,
      "learning_rate": 0.00017501110696304596,
      "loss": 0.8969,
      "step": 1150
    },
    {
      "epoch": 6.557377049180328,
      "grad_norm": 1.4257054328918457,
      "learning_rate": 0.00017289686274214118,
      "loss": 0.9207,
      "step": 1200
    },
    {
      "epoch": 6.830601092896175,
      "grad_norm": 1.6431266069412231,
      "learning_rate": 0.00017071067811865476,
      "loss": 0.9116,
      "step": 1250
    },
    {
      "epoch": 7.103825136612022,
      "grad_norm": 1.4786570072174072,
      "learning_rate": 0.00016845471059286887,
      "loss": 0.8975,
      "step": 1300
    },
    {
      "epoch": 7.377049180327869,
      "grad_norm": 1.5059996843338013,
      "learning_rate": 0.00016613118653236518,
      "loss": 0.8519,
      "step": 1350
    },
    {
      "epoch": 7.6502732240437155,
      "grad_norm": 1.5110268592834473,
      "learning_rate": 0.000163742398974869,
      "loss": 0.8471,
      "step": 1400
    },
    {
      "epoch": 7.923497267759563,
      "grad_norm": 1.6930420398712158,
      "learning_rate": 0.00016129070536529766,
      "loss": 0.8544,
      "step": 1450
    },
    {
      "epoch": 8.19672131147541,
      "grad_norm": 1.8286707401275635,
      "learning_rate": 0.00015877852522924732,
      "loss": 0.8102,
      "step": 1500
    },
    {
      "epoch": 8.469945355191257,
      "grad_norm": 1.4673559665679932,
      "learning_rate": 0.00015620833778521307,
      "loss": 0.7986,
      "step": 1550
    },
    {
      "epoch": 8.743169398907105,
      "grad_norm": 1.6546106338500977,
      "learning_rate": 0.00015358267949789966,
      "loss": 0.7985,
      "step": 1600
    },
    {
      "epoch": 9.01639344262295,
      "grad_norm": 1.7138121128082275,
      "learning_rate": 0.00015090414157503714,
      "loss": 0.8194,
      "step": 1650
    },
    {
      "epoch": 9.289617486338798,
      "grad_norm": 1.5631183385849,
      "learning_rate": 0.00014817536741017152,
      "loss": 0.7317,
      "step": 1700
    },
    {
      "epoch": 9.562841530054644,
      "grad_norm": 1.936880111694336,
      "learning_rate": 0.00014539904997395468,
      "loss": 0.7479,
      "step": 1750
    },
    {
      "epoch": 9.836065573770492,
      "grad_norm": 1.6515196561813354,
      "learning_rate": 0.00014257792915650728,
      "loss": 0.7435,
      "step": 1800
    }
  ],
  "logging_steps": 50,
  "max_steps": 5000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 28,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.968783346244608e+16,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}