{ "best_metric": 0.5911664962768555, "best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e3l37-l/checkpoint-19500", "epoch": 2.251491613193741, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.056287290329843524, "grad_norm": 96.92656707763672, "learning_rate": 2.9437127096701565e-07, "loss": 0.3748, "step": 500 }, { "epoch": 0.056287290329843524, "eval_loss": 1.2226382493972778, "eval_runtime": 131.2779, "eval_samples_per_second": 15.037, "eval_steps_per_second": 1.882, "step": 500 }, { "epoch": 0.11257458065968705, "grad_norm": 402.31158447265625, "learning_rate": 2.887425419340313e-07, "loss": 0.3057, "step": 1000 }, { "epoch": 0.11257458065968705, "eval_loss": 1.0702259540557861, "eval_runtime": 131.0109, "eval_samples_per_second": 15.067, "eval_steps_per_second": 1.885, "step": 1000 }, { "epoch": 0.16886187098953057, "grad_norm": 515.0911865234375, "learning_rate": 2.831138129010469e-07, "loss": 0.2239, "step": 1500 }, { "epoch": 0.16886187098953057, "eval_loss": 0.9957238435745239, "eval_runtime": 134.8377, "eval_samples_per_second": 14.64, "eval_steps_per_second": 1.832, "step": 1500 }, { "epoch": 0.2251491613193741, "grad_norm": 3.1967105865478516, "learning_rate": 2.774850838680626e-07, "loss": 0.2229, "step": 2000 }, { "epoch": 0.2251491613193741, "eval_loss": 0.9504629373550415, "eval_runtime": 129.0169, "eval_samples_per_second": 15.3, "eval_steps_per_second": 1.914, "step": 2000 }, { "epoch": 0.2814364516492176, "grad_norm": 276.22998046875, "learning_rate": 2.718563548350782e-07, "loss": 0.2098, "step": 2500 }, { "epoch": 0.2814364516492176, "eval_loss": 0.9006705284118652, "eval_runtime": 129.1816, "eval_samples_per_second": 15.281, "eval_steps_per_second": 1.912, "step": 2500 }, { "epoch": 0.33772374197906113, "grad_norm": 1105.317138671875, "learning_rate": 2.6622762580209386e-07, "loss": 0.1938, "step": 3000 }, { "epoch": 0.33772374197906113, "eval_loss": 0.8782906532287598, "eval_runtime": 128.6387, "eval_samples_per_second": 15.345, "eval_steps_per_second": 1.92, "step": 3000 }, { "epoch": 0.39401103230890466, "grad_norm": 0.00043465400813147426, "learning_rate": 2.605988967691095e-07, "loss": 0.1688, "step": 3500 }, { "epoch": 0.39401103230890466, "eval_loss": 0.8405746221542358, "eval_runtime": 128.6989, "eval_samples_per_second": 15.338, "eval_steps_per_second": 1.919, "step": 3500 }, { "epoch": 0.4502983226387482, "grad_norm": 0.027426382526755333, "learning_rate": 2.549701677361252e-07, "loss": 0.1457, "step": 4000 }, { "epoch": 0.4502983226387482, "eval_loss": 0.813768208026886, "eval_runtime": 128.7209, "eval_samples_per_second": 15.336, "eval_steps_per_second": 1.919, "step": 4000 }, { "epoch": 0.5065856129685917, "grad_norm": 0.0012935090344399214, "learning_rate": 2.4934143870314085e-07, "loss": 0.179, "step": 4500 }, { "epoch": 0.5065856129685917, "eval_loss": 0.7965527176856995, "eval_runtime": 128.3969, "eval_samples_per_second": 15.374, "eval_steps_per_second": 1.924, "step": 4500 }, { "epoch": 0.5628729032984352, "grad_norm": 0.0017458726651966572, "learning_rate": 2.4371270967015646e-07, "loss": 0.1224, "step": 5000 }, { "epoch": 0.5628729032984352, "eval_loss": 0.7788484692573547, "eval_runtime": 128.4939, "eval_samples_per_second": 15.363, "eval_steps_per_second": 1.922, "step": 5000 }, { "epoch": 0.6191601936282788, "grad_norm": 65.76844787597656, "learning_rate": 2.380839806371721e-07, "loss": 0.1551, "step": 5500 }, { "epoch": 0.6191601936282788, "eval_loss": 0.7626588344573975, "eval_runtime": 128.7302, "eval_samples_per_second": 15.334, "eval_steps_per_second": 1.919, "step": 5500 }, { "epoch": 0.6754474839581223, "grad_norm": 5.014094829559326, "learning_rate": 2.3245525160418776e-07, "loss": 0.1721, "step": 6000 }, { "epoch": 0.6754474839581223, "eval_loss": 0.7464810609817505, "eval_runtime": 128.749, "eval_samples_per_second": 15.332, "eval_steps_per_second": 1.918, "step": 6000 }, { "epoch": 0.7317347742879657, "grad_norm": 0.0022696161177009344, "learning_rate": 2.268265225712034e-07, "loss": 0.1532, "step": 6500 }, { "epoch": 0.7317347742879657, "eval_loss": 0.7335842251777649, "eval_runtime": 128.8165, "eval_samples_per_second": 15.324, "eval_steps_per_second": 1.917, "step": 6500 }, { "epoch": 0.7880220646178093, "grad_norm": 5.508715730684344e-06, "learning_rate": 2.2119779353821906e-07, "loss": 0.1991, "step": 7000 }, { "epoch": 0.7880220646178093, "eval_loss": 0.7244272232055664, "eval_runtime": 128.9211, "eval_samples_per_second": 15.312, "eval_steps_per_second": 1.916, "step": 7000 }, { "epoch": 0.8443093549476528, "grad_norm": 0.30320531129837036, "learning_rate": 2.155690645052347e-07, "loss": 0.1551, "step": 7500 }, { "epoch": 0.8443093549476528, "eval_loss": 0.718368411064148, "eval_runtime": 128.8149, "eval_samples_per_second": 15.324, "eval_steps_per_second": 1.917, "step": 7500 }, { "epoch": 0.9005966452774964, "grad_norm": 1.9788849385804497e-05, "learning_rate": 2.0994033547225037e-07, "loss": 0.1439, "step": 8000 }, { "epoch": 0.9005966452774964, "eval_loss": 0.7040167450904846, "eval_runtime": 128.4921, "eval_samples_per_second": 15.363, "eval_steps_per_second": 1.922, "step": 8000 }, { "epoch": 0.9568839356073399, "grad_norm": 1.0750063665909693e-06, "learning_rate": 2.04311606439266e-07, "loss": 0.1361, "step": 8500 }, { "epoch": 0.9568839356073399, "eval_loss": 0.6983720660209656, "eval_runtime": 128.5929, "eval_samples_per_second": 15.351, "eval_steps_per_second": 1.921, "step": 8500 }, { "epoch": 1.0131712259371835, "grad_norm": 187.1415557861328, "learning_rate": 1.9868287740628167e-07, "loss": 0.1144, "step": 9000 }, { "epoch": 1.0131712259371835, "eval_loss": 0.6901102066040039, "eval_runtime": 128.5322, "eval_samples_per_second": 15.358, "eval_steps_per_second": 1.922, "step": 9000 }, { "epoch": 1.069458516267027, "grad_norm": 0.007205183617770672, "learning_rate": 1.9305414837329728e-07, "loss": 0.0643, "step": 9500 }, { "epoch": 1.069458516267027, "eval_loss": 0.6831753253936768, "eval_runtime": 128.59, "eval_samples_per_second": 15.351, "eval_steps_per_second": 1.921, "step": 9500 }, { "epoch": 1.1257458065968704, "grad_norm": 0.4961595833301544, "learning_rate": 1.8742541934031294e-07, "loss": 0.0773, "step": 10000 }, { "epoch": 1.1257458065968704, "eval_loss": 0.6732445955276489, "eval_runtime": 128.732, "eval_samples_per_second": 15.334, "eval_steps_per_second": 1.919, "step": 10000 }, { "epoch": 1.1820330969267139, "grad_norm": 0.0009373470675200224, "learning_rate": 1.817966903073286e-07, "loss": 0.0814, "step": 10500 }, { "epoch": 1.1820330969267139, "eval_loss": 0.6723716259002686, "eval_runtime": 126.1133, "eval_samples_per_second": 15.653, "eval_steps_per_second": 1.959, "step": 10500 }, { "epoch": 1.2383203872565574, "grad_norm": 2.7500348096509697e-06, "learning_rate": 1.7616796127434424e-07, "loss": 0.0924, "step": 11000 }, { "epoch": 1.2383203872565574, "eval_loss": 0.6679245233535767, "eval_runtime": 126.0452, "eval_samples_per_second": 15.661, "eval_steps_per_second": 1.96, "step": 11000 }, { "epoch": 1.294607677586401, "grad_norm": 12.024052619934082, "learning_rate": 1.705392322413599e-07, "loss": 0.091, "step": 11500 }, { "epoch": 1.294607677586401, "eval_loss": 0.6659140586853027, "eval_runtime": 129.0611, "eval_samples_per_second": 15.295, "eval_steps_per_second": 1.914, "step": 11500 }, { "epoch": 1.3508949679162445, "grad_norm": 0.0008855258929543197, "learning_rate": 1.6491050320837554e-07, "loss": 0.0978, "step": 12000 }, { "epoch": 1.3508949679162445, "eval_loss": 0.6577230095863342, "eval_runtime": 126.2858, "eval_samples_per_second": 15.631, "eval_steps_per_second": 1.956, "step": 12000 }, { "epoch": 1.407182258246088, "grad_norm": 0.0003772446943912655, "learning_rate": 1.592817741753912e-07, "loss": 0.0598, "step": 12500 }, { "epoch": 1.407182258246088, "eval_loss": 0.6527238488197327, "eval_runtime": 125.8972, "eval_samples_per_second": 15.679, "eval_steps_per_second": 1.962, "step": 12500 }, { "epoch": 1.4634695485759315, "grad_norm": 0.00032130314502865076, "learning_rate": 1.5365304514240682e-07, "loss": 0.0713, "step": 13000 }, { "epoch": 1.4634695485759315, "eval_loss": 0.6472681760787964, "eval_runtime": 128.5052, "eval_samples_per_second": 15.361, "eval_steps_per_second": 1.922, "step": 13000 }, { "epoch": 1.5197568389057752, "grad_norm": 0.0006748048472218215, "learning_rate": 1.4802431610942248e-07, "loss": 0.0759, "step": 13500 }, { "epoch": 1.5197568389057752, "eval_loss": 0.637607991695404, "eval_runtime": 125.8757, "eval_samples_per_second": 15.682, "eval_steps_per_second": 1.962, "step": 13500 }, { "epoch": 1.5760441292356187, "grad_norm": 4.276073184428242e-08, "learning_rate": 1.4239558707643812e-07, "loss": 0.093, "step": 14000 }, { "epoch": 1.5760441292356187, "eval_loss": 0.6378567218780518, "eval_runtime": 126.214, "eval_samples_per_second": 15.64, "eval_steps_per_second": 1.957, "step": 14000 }, { "epoch": 1.6323314195654621, "grad_norm": 0.0019163701217621565, "learning_rate": 1.3676685804345378e-07, "loss": 0.061, "step": 14500 }, { "epoch": 1.6323314195654621, "eval_loss": 0.6364826560020447, "eval_runtime": 128.5083, "eval_samples_per_second": 15.361, "eval_steps_per_second": 1.922, "step": 14500 }, { "epoch": 1.6886187098953056, "grad_norm": 10.94436264038086, "learning_rate": 1.3113812901046944e-07, "loss": 0.077, "step": 15000 }, { "epoch": 1.6886187098953056, "eval_loss": 0.6351856589317322, "eval_runtime": 126.3016, "eval_samples_per_second": 15.629, "eval_steps_per_second": 1.956, "step": 15000 }, { "epoch": 1.744906000225149, "grad_norm": 245.41305541992188, "learning_rate": 1.2550939997748508e-07, "loss": 0.0798, "step": 15500 }, { "epoch": 1.744906000225149, "eval_loss": 0.6253550052642822, "eval_runtime": 126.2592, "eval_samples_per_second": 15.635, "eval_steps_per_second": 1.956, "step": 15500 }, { "epoch": 1.8011932905549926, "grad_norm": 5.819400783479978e-08, "learning_rate": 1.1988067094450072e-07, "loss": 0.0846, "step": 16000 }, { "epoch": 1.8011932905549926, "eval_loss": 0.6173177361488342, "eval_runtime": 128.7312, "eval_samples_per_second": 15.334, "eval_steps_per_second": 1.919, "step": 16000 }, { "epoch": 1.8574805808848363, "grad_norm": 4.076898676430574e-06, "learning_rate": 1.1425194191151638e-07, "loss": 0.1003, "step": 16500 }, { "epoch": 1.8574805808848363, "eval_loss": 0.6105911135673523, "eval_runtime": 126.0429, "eval_samples_per_second": 15.661, "eval_steps_per_second": 1.96, "step": 16500 }, { "epoch": 1.9137678712146797, "grad_norm": 2.8583364486694336, "learning_rate": 1.0862321287853203e-07, "loss": 0.0874, "step": 17000 }, { "epoch": 1.9137678712146797, "eval_loss": 0.6095408797264099, "eval_runtime": 125.7222, "eval_samples_per_second": 15.701, "eval_steps_per_second": 1.965, "step": 17000 }, { "epoch": 1.9700551615445232, "grad_norm": 0.047957953065633774, "learning_rate": 1.0299448384554767e-07, "loss": 0.0513, "step": 17500 }, { "epoch": 1.9700551615445232, "eval_loss": 0.6062661409378052, "eval_runtime": 128.3312, "eval_samples_per_second": 15.382, "eval_steps_per_second": 1.925, "step": 17500 }, { "epoch": 2.026342451874367, "grad_norm": 1.2455217301976518e-06, "learning_rate": 9.736575481256332e-08, "loss": 0.0743, "step": 18000 }, { "epoch": 2.026342451874367, "eval_loss": 0.6002796292304993, "eval_runtime": 126.2895, "eval_samples_per_second": 15.631, "eval_steps_per_second": 1.956, "step": 18000 }, { "epoch": 2.0826297422042104, "grad_norm": 12.021537780761719, "learning_rate": 9.173702577957897e-08, "loss": 0.0704, "step": 18500 }, { "epoch": 2.0826297422042104, "eval_loss": 0.5955923199653625, "eval_runtime": 126.1104, "eval_samples_per_second": 15.653, "eval_steps_per_second": 1.959, "step": 18500 }, { "epoch": 2.138917032534054, "grad_norm": 3.156046152114868, "learning_rate": 8.610829674659462e-08, "loss": 0.0368, "step": 19000 }, { "epoch": 2.138917032534054, "eval_loss": 0.5925185084342957, "eval_runtime": 128.6739, "eval_samples_per_second": 15.341, "eval_steps_per_second": 1.92, "step": 19000 }, { "epoch": 2.1952043228638973, "grad_norm": 0.0006427310290746391, "learning_rate": 8.047956771361026e-08, "loss": 0.0636, "step": 19500 }, { "epoch": 2.1952043228638973, "eval_loss": 0.5911664962768555, "eval_runtime": 126.3814, "eval_samples_per_second": 15.619, "eval_steps_per_second": 1.954, "step": 19500 }, { "epoch": 2.251491613193741, "grad_norm": 5.489724117069272e-06, "learning_rate": 7.485083868062591e-08, "loss": 0.0588, "step": 20000 }, { "epoch": 2.251491613193741, "eval_loss": 0.5912911295890808, "eval_runtime": 126.306, "eval_samples_per_second": 15.629, "eval_steps_per_second": 1.956, "step": 20000 } ], "logging_steps": 500, "max_steps": 26649, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7195526783551260.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }