{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.32,
  "eval_steps": 500,
  "global_step": 50,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0064,
      "grad_norm": 2.568751573562622,
      "learning_rate": 2e-05,
      "loss": 2.1237,
      "step": 1
    },
    {
      "epoch": 0.0128,
      "grad_norm": 2.210172414779663,
      "learning_rate": 4e-05,
      "loss": 1.9939,
      "step": 2
    },
    {
      "epoch": 0.0192,
      "grad_norm": 2.24556040763855,
      "learning_rate": 6e-05,
      "loss": 1.9278,
      "step": 3
    },
    {
      "epoch": 0.0256,
      "grad_norm": 1.4301820993423462,
      "learning_rate": 8e-05,
      "loss": 1.4586,
      "step": 4
    },
    {
      "epoch": 0.032,
      "grad_norm": 1.2345417737960815,
      "learning_rate": 0.0001,
      "loss": 1.2717,
      "step": 5
    },
    {
      "epoch": 0.0384,
      "grad_norm": 1.1178274154663086,
      "learning_rate": 0.00012,
      "loss": 0.973,
      "step": 6
    },
    {
      "epoch": 0.0448,
      "grad_norm": 0.8991392850875854,
      "learning_rate": 0.00014,
      "loss": 0.6798,
      "step": 7
    },
    {
      "epoch": 0.0512,
      "grad_norm": 1.3577145338058472,
      "learning_rate": 0.00016,
      "loss": 0.7061,
      "step": 8
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.7793099284172058,
      "learning_rate": 0.00018,
      "loss": 0.4772,
      "step": 9
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.4328145682811737,
      "learning_rate": 0.0002,
      "loss": 0.4894,
      "step": 10
    },
    {
      "epoch": 0.0704,
      "grad_norm": 0.33954325318336487,
      "learning_rate": 0.0001999390827019096,
      "loss": 0.4181,
      "step": 11
    },
    {
      "epoch": 0.0768,
      "grad_norm": 0.26666146516799927,
      "learning_rate": 0.00019975640502598244,
      "loss": 0.3721,
      "step": 12
    },
    {
      "epoch": 0.0832,
      "grad_norm": 0.2060694545507431,
      "learning_rate": 0.00019945218953682734,
      "loss": 0.4174,
      "step": 13
    },
    {
      "epoch": 0.0896,
      "grad_norm": 0.2562263607978821,
      "learning_rate": 0.00019902680687415705,
      "loss": 0.4412,
      "step": 14
    },
    {
      "epoch": 0.096,
      "grad_norm": 0.22718767821788788,
      "learning_rate": 0.00019848077530122083,
      "loss": 0.3749,
      "step": 15
    },
    {
      "epoch": 0.1024,
      "grad_norm": 0.3134597837924957,
      "learning_rate": 0.00019781476007338058,
      "loss": 0.3291,
      "step": 16
    },
    {
      "epoch": 0.1088,
      "grad_norm": 0.24586430191993713,
      "learning_rate": 0.00019702957262759965,
      "loss": 0.2975,
      "step": 17
    },
    {
      "epoch": 0.1152,
      "grad_norm": 0.33068349957466125,
      "learning_rate": 0.0001961261695938319,
      "loss": 0.3777,
      "step": 18
    },
    {
      "epoch": 0.1216,
      "grad_norm": 0.18534499406814575,
      "learning_rate": 0.00019510565162951537,
      "loss": 0.302,
      "step": 19
    },
    {
      "epoch": 0.128,
      "grad_norm": 0.21168865263462067,
      "learning_rate": 0.00019396926207859084,
      "loss": 0.3034,
      "step": 20
    },
    {
      "epoch": 0.1344,
      "grad_norm": 0.1708526760339737,
      "learning_rate": 0.00019271838545667876,
      "loss": 0.331,
      "step": 21
    },
    {
      "epoch": 0.1408,
      "grad_norm": 0.1510276347398758,
      "learning_rate": 0.0001913545457642601,
      "loss": 0.3169,
      "step": 22
    },
    {
      "epoch": 0.1472,
      "grad_norm": 0.16446498036384583,
      "learning_rate": 0.0001898794046299167,
      "loss": 0.319,
      "step": 23
    },
    {
      "epoch": 0.1536,
      "grad_norm": 0.29390275478363037,
      "learning_rate": 0.00018829475928589271,
      "loss": 0.2821,
      "step": 24
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.13030974566936493,
      "learning_rate": 0.00018660254037844388,
      "loss": 0.256,
      "step": 25
    },
    {
      "epoch": 0.1664,
      "grad_norm": 0.12436527013778687,
      "learning_rate": 0.0001848048096156426,
      "loss": 0.3123,
      "step": 26
    },
    {
      "epoch": 0.1728,
      "grad_norm": 0.16936242580413818,
      "learning_rate": 0.00018290375725550417,
      "loss": 0.262,
      "step": 27
    },
    {
      "epoch": 0.1792,
      "grad_norm": 0.14882561564445496,
      "learning_rate": 0.00018090169943749476,
      "loss": 0.2825,
      "step": 28
    },
    {
      "epoch": 0.1856,
      "grad_norm": 0.12063440680503845,
      "learning_rate": 0.00017880107536067218,
      "loss": 0.2362,
      "step": 29
    },
    {
      "epoch": 0.192,
      "grad_norm": 0.12142356485128403,
      "learning_rate": 0.0001766044443118978,
      "loss": 0.2745,
      "step": 30
    },
    {
      "epoch": 0.1984,
      "grad_norm": 0.115916408598423,
      "learning_rate": 0.00017431448254773944,
      "loss": 0.2727,
      "step": 31
    },
    {
      "epoch": 0.2048,
      "grad_norm": 0.11815212666988373,
      "learning_rate": 0.0001719339800338651,
      "loss": 0.2276,
      "step": 32
    },
    {
      "epoch": 0.2112,
      "grad_norm": 0.10431115329265594,
      "learning_rate": 0.00016946583704589973,
      "loss": 0.2662,
      "step": 33
    },
    {
      "epoch": 0.2176,
      "grad_norm": 0.0971333459019661,
      "learning_rate": 0.00016691306063588583,
      "loss": 0.251,
      "step": 34
    },
    {
      "epoch": 0.224,
      "grad_norm": 0.11603528261184692,
      "learning_rate": 0.00016427876096865394,
      "loss": 0.2405,
      "step": 35
    },
    {
      "epoch": 0.2304,
      "grad_norm": 0.10909801721572876,
      "learning_rate": 0.0001615661475325658,
      "loss": 0.2515,
      "step": 36
    },
    {
      "epoch": 0.2368,
      "grad_norm": 0.08744729310274124,
      "learning_rate": 0.00015877852522924732,
      "loss": 0.2748,
      "step": 37
    },
    {
      "epoch": 0.2432,
      "grad_norm": 0.10513755679130554,
      "learning_rate": 0.0001559192903470747,
      "loss": 0.2292,
      "step": 38
    },
    {
      "epoch": 0.2496,
      "grad_norm": 0.10443545877933502,
      "learning_rate": 0.0001529919264233205,
      "loss": 0.2688,
      "step": 39
    },
    {
      "epoch": 0.256,
      "grad_norm": 0.10798890888690948,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.2578,
      "step": 40
    },
    {
      "epoch": 0.2624,
      "grad_norm": 0.10528898984193802,
      "learning_rate": 0.00014694715627858908,
      "loss": 0.266,
      "step": 41
    },
    {
      "epoch": 0.2688,
      "grad_norm": 0.09310728311538696,
      "learning_rate": 0.00014383711467890774,
      "loss": 0.2059,
      "step": 42
    },
    {
      "epoch": 0.2752,
      "grad_norm": 0.09954522550106049,
      "learning_rate": 0.00014067366430758004,
      "loss": 0.2992,
      "step": 43
    },
    {
      "epoch": 0.2816,
      "grad_norm": 0.10648441314697266,
      "learning_rate": 0.00013746065934159123,
      "loss": 0.259,
      "step": 44
    },
    {
      "epoch": 0.288,
      "grad_norm": 0.12005575746297836,
      "learning_rate": 0.00013420201433256689,
      "loss": 0.2654,
      "step": 45
    },
    {
      "epoch": 0.2944,
      "grad_norm": 0.09197583049535751,
      "learning_rate": 0.00013090169943749476,
      "loss": 0.2088,
      "step": 46
    },
    {
      "epoch": 0.3008,
      "grad_norm": 0.0905015841126442,
      "learning_rate": 0.0001275637355816999,
      "loss": 0.2659,
      "step": 47
    },
    {
      "epoch": 0.3072,
      "grad_norm": 0.10011676698923111,
      "learning_rate": 0.00012419218955996676,
      "loss": 0.2573,
      "step": 48
    },
    {
      "epoch": 0.3136,
      "grad_norm": 0.11158254742622375,
      "learning_rate": 0.00012079116908177593,
      "loss": 0.2734,
      "step": 49
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.10176081955432892,
      "learning_rate": 0.00011736481776669306,
      "loss": 0.2386,
      "step": 50
    }
  ],
  "logging_steps": 1,
  "max_steps": 100,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.458014435582157e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}