{ "best_metric": 0.6019417475728155, "best_model_checkpoint": "videomae-base-finetuned-kinetics-finetuned-conflab-traj-direction-rh-v10/checkpoint-819", "epoch": 6.135531135531136, "eval_steps": 500, "global_step": 819, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01221001221001221, "grad_norm": 8.122791290283203, "learning_rate": 6.0975609756097564e-06, "loss": 2.1095, "step": 10 }, { "epoch": 0.02442002442002442, "grad_norm": 6.661695957183838, "learning_rate": 1.2195121951219513e-05, "loss": 2.0298, "step": 20 }, { "epoch": 0.03663003663003663, "grad_norm": 7.086618900299072, "learning_rate": 1.8292682926829268e-05, "loss": 2.0189, "step": 30 }, { "epoch": 0.04884004884004884, "grad_norm": 8.178045272827148, "learning_rate": 2.4390243902439026e-05, "loss": 2.0099, "step": 40 }, { "epoch": 0.06105006105006105, "grad_norm": 7.585304260253906, "learning_rate": 3.048780487804878e-05, "loss": 1.9196, "step": 50 }, { "epoch": 0.07326007326007326, "grad_norm": 7.4838409423828125, "learning_rate": 3.6585365853658535e-05, "loss": 1.9165, "step": 60 }, { "epoch": 0.08547008547008547, "grad_norm": 6.174314498901367, "learning_rate": 4.26829268292683e-05, "loss": 1.8358, "step": 70 }, { "epoch": 0.09768009768009768, "grad_norm": 5.699541091918945, "learning_rate": 4.878048780487805e-05, "loss": 1.8794, "step": 80 }, { "epoch": 0.10989010989010989, "grad_norm": 6.571558475494385, "learning_rate": 4.94572591587517e-05, "loss": 1.8931, "step": 90 }, { "epoch": 0.1221001221001221, "grad_norm": 7.583756446838379, "learning_rate": 4.877883310719132e-05, "loss": 1.8477, "step": 100 }, { "epoch": 0.1343101343101343, "grad_norm": 6.113456726074219, "learning_rate": 4.810040705563094e-05, "loss": 1.989, "step": 110 }, { "epoch": 0.14407814407814407, "eval_accuracy": 0.2669902912621359, "eval_loss": 1.8786594867706299, "eval_runtime": 28.8858, "eval_samples_per_second": 7.132, "eval_steps_per_second": 0.9, "step": 118 }, { "epoch": 1.0024420024420024, "grad_norm": 6.775885105133057, "learning_rate": 4.742198100407056e-05, "loss": 1.7872, "step": 120 }, { "epoch": 1.0146520146520146, "grad_norm": 5.531322956085205, "learning_rate": 4.674355495251018e-05, "loss": 1.8331, "step": 130 }, { "epoch": 1.0268620268620268, "grad_norm": 7.1956353187561035, "learning_rate": 4.60651289009498e-05, "loss": 1.7703, "step": 140 }, { "epoch": 1.0390720390720392, "grad_norm": 8.779479026794434, "learning_rate": 4.5386702849389416e-05, "loss": 1.6198, "step": 150 }, { "epoch": 1.0512820512820513, "grad_norm": 10.621439933776855, "learning_rate": 4.470827679782904e-05, "loss": 1.6431, "step": 160 }, { "epoch": 1.0634920634920635, "grad_norm": 7.896602630615234, "learning_rate": 4.402985074626866e-05, "loss": 1.6686, "step": 170 }, { "epoch": 1.0757020757020757, "grad_norm": 6.063083171844482, "learning_rate": 4.335142469470828e-05, "loss": 1.4762, "step": 180 }, { "epoch": 1.0879120879120878, "grad_norm": 10.578692436218262, "learning_rate": 4.26729986431479e-05, "loss": 1.6948, "step": 190 }, { "epoch": 1.1001221001221002, "grad_norm": 8.612629890441895, "learning_rate": 4.199457259158752e-05, "loss": 1.5687, "step": 200 }, { "epoch": 1.1123321123321124, "grad_norm": 8.634360313415527, "learning_rate": 4.131614654002714e-05, "loss": 1.4138, "step": 210 }, { "epoch": 1.1245421245421245, "grad_norm": 8.842486381530762, "learning_rate": 4.063772048846676e-05, "loss": 1.4202, "step": 220 }, { "epoch": 1.1367521367521367, "grad_norm": 8.472060203552246, "learning_rate": 3.995929443690638e-05, "loss": 1.3092, "step": 230 }, { "epoch": 1.144078144078144, "eval_accuracy": 0.42718446601941745, "eval_loss": 1.6427617073059082, "eval_runtime": 21.3576, "eval_samples_per_second": 9.645, "eval_steps_per_second": 1.217, "step": 236 }, { "epoch": 2.004884004884005, "grad_norm": 9.463373184204102, "learning_rate": 3.9280868385345995e-05, "loss": 1.5065, "step": 240 }, { "epoch": 2.017094017094017, "grad_norm": 8.447351455688477, "learning_rate": 3.860244233378562e-05, "loss": 1.3116, "step": 250 }, { "epoch": 2.029304029304029, "grad_norm": 9.61158561706543, "learning_rate": 3.792401628222524e-05, "loss": 1.0903, "step": 260 }, { "epoch": 2.0415140415140414, "grad_norm": 10.502912521362305, "learning_rate": 3.724559023066486e-05, "loss": 1.1713, "step": 270 }, { "epoch": 2.0537240537240535, "grad_norm": 11.798907279968262, "learning_rate": 3.656716417910448e-05, "loss": 1.1561, "step": 280 }, { "epoch": 2.065934065934066, "grad_norm": 16.395263671875, "learning_rate": 3.58887381275441e-05, "loss": 1.2809, "step": 290 }, { "epoch": 2.0781440781440783, "grad_norm": 11.183971405029297, "learning_rate": 3.521031207598372e-05, "loss": 1.1276, "step": 300 }, { "epoch": 2.0903540903540905, "grad_norm": 10.743521690368652, "learning_rate": 3.453188602442334e-05, "loss": 1.3756, "step": 310 }, { "epoch": 2.1025641025641026, "grad_norm": 8.513188362121582, "learning_rate": 3.385345997286296e-05, "loss": 0.9218, "step": 320 }, { "epoch": 2.114774114774115, "grad_norm": 12.773633003234863, "learning_rate": 3.3175033921302575e-05, "loss": 1.2335, "step": 330 }, { "epoch": 2.126984126984127, "grad_norm": 11.963506698608398, "learning_rate": 3.24966078697422e-05, "loss": 1.1391, "step": 340 }, { "epoch": 2.139194139194139, "grad_norm": 10.355384826660156, "learning_rate": 3.181818181818182e-05, "loss": 1.0096, "step": 350 }, { "epoch": 2.144078144078144, "eval_accuracy": 0.47572815533980584, "eval_loss": 1.4351158142089844, "eval_runtime": 27.857, "eval_samples_per_second": 7.395, "eval_steps_per_second": 0.933, "step": 354 }, { "epoch": 3.0073260073260073, "grad_norm": 10.925836563110352, "learning_rate": 3.113975576662144e-05, "loss": 0.9783, "step": 360 }, { "epoch": 3.0195360195360195, "grad_norm": 6.559803009033203, "learning_rate": 3.046132971506106e-05, "loss": 0.8608, "step": 370 }, { "epoch": 3.0317460317460316, "grad_norm": 10.63405990600586, "learning_rate": 2.9782903663500678e-05, "loss": 0.7684, "step": 380 }, { "epoch": 3.043956043956044, "grad_norm": 11.322102546691895, "learning_rate": 2.91044776119403e-05, "loss": 0.8308, "step": 390 }, { "epoch": 3.056166056166056, "grad_norm": 5.719593524932861, "learning_rate": 2.842605156037992e-05, "loss": 0.7868, "step": 400 }, { "epoch": 3.0683760683760686, "grad_norm": 13.967921257019043, "learning_rate": 2.7747625508819542e-05, "loss": 0.7707, "step": 410 }, { "epoch": 3.0805860805860807, "grad_norm": 9.134116172790527, "learning_rate": 2.7069199457259158e-05, "loss": 0.5804, "step": 420 }, { "epoch": 3.092796092796093, "grad_norm": 14.580177307128906, "learning_rate": 2.639077340569878e-05, "loss": 0.7846, "step": 430 }, { "epoch": 3.105006105006105, "grad_norm": 12.265727043151855, "learning_rate": 2.57123473541384e-05, "loss": 0.6807, "step": 440 }, { "epoch": 3.1172161172161172, "grad_norm": 15.816527366638184, "learning_rate": 2.5033921302578023e-05, "loss": 0.6878, "step": 450 }, { "epoch": 3.1294261294261294, "grad_norm": 9.39810848236084, "learning_rate": 2.4355495251017642e-05, "loss": 0.5625, "step": 460 }, { "epoch": 3.1416361416361416, "grad_norm": 7.950680732727051, "learning_rate": 2.367706919945726e-05, "loss": 0.604, "step": 470 }, { "epoch": 3.144078144078144, "eval_accuracy": 0.5, "eval_loss": 1.3919281959533691, "eval_runtime": 21.6391, "eval_samples_per_second": 9.52, "eval_steps_per_second": 1.202, "step": 472 }, { "epoch": 4.00976800976801, "grad_norm": 3.859422206878662, "learning_rate": 2.299864314789688e-05, "loss": 0.3818, "step": 480 }, { "epoch": 4.021978021978022, "grad_norm": 4.586574077606201, "learning_rate": 2.2320217096336503e-05, "loss": 0.3743, "step": 490 }, { "epoch": 4.034188034188034, "grad_norm": 11.923030853271484, "learning_rate": 2.164179104477612e-05, "loss": 0.4857, "step": 500 }, { "epoch": 4.046398046398046, "grad_norm": 8.866025924682617, "learning_rate": 2.0963364993215738e-05, "loss": 0.5601, "step": 510 }, { "epoch": 4.058608058608058, "grad_norm": 8.028688430786133, "learning_rate": 2.028493894165536e-05, "loss": 0.4649, "step": 520 }, { "epoch": 4.070818070818071, "grad_norm": 8.852441787719727, "learning_rate": 1.960651289009498e-05, "loss": 0.3592, "step": 530 }, { "epoch": 4.083028083028083, "grad_norm": 22.12917137145996, "learning_rate": 1.89280868385346e-05, "loss": 0.4787, "step": 540 }, { "epoch": 4.095238095238095, "grad_norm": 9.262681007385254, "learning_rate": 1.824966078697422e-05, "loss": 0.4364, "step": 550 }, { "epoch": 4.107448107448107, "grad_norm": 5.102321624755859, "learning_rate": 1.757123473541384e-05, "loss": 0.3868, "step": 560 }, { "epoch": 4.119658119658119, "grad_norm": 13.144558906555176, "learning_rate": 1.689280868385346e-05, "loss": 0.4023, "step": 570 }, { "epoch": 4.131868131868132, "grad_norm": 16.35342788696289, "learning_rate": 1.6214382632293083e-05, "loss": 0.4355, "step": 580 }, { "epoch": 4.1440781440781445, "grad_norm": 12.760746955871582, "learning_rate": 1.55359565807327e-05, "loss": 0.2381, "step": 590 }, { "epoch": 4.1440781440781445, "eval_accuracy": 0.5436893203883495, "eval_loss": 1.355545163154602, "eval_runtime": 21.2157, "eval_samples_per_second": 9.71, "eval_steps_per_second": 1.226, "step": 590 }, { "epoch": 5.012210012210012, "grad_norm": 7.593497276306152, "learning_rate": 1.485753052917232e-05, "loss": 0.2075, "step": 600 }, { "epoch": 5.024420024420024, "grad_norm": 4.697848320007324, "learning_rate": 1.417910447761194e-05, "loss": 0.2499, "step": 610 }, { "epoch": 5.0366300366300365, "grad_norm": 5.646294116973877, "learning_rate": 1.3500678426051561e-05, "loss": 0.1955, "step": 620 }, { "epoch": 5.048840048840049, "grad_norm": 1.646572232246399, "learning_rate": 1.282225237449118e-05, "loss": 0.2132, "step": 630 }, { "epoch": 5.061050061050061, "grad_norm": 13.153250694274902, "learning_rate": 1.2143826322930801e-05, "loss": 0.2291, "step": 640 }, { "epoch": 5.073260073260073, "grad_norm": 3.749263286590576, "learning_rate": 1.1465400271370422e-05, "loss": 0.2316, "step": 650 }, { "epoch": 5.085470085470085, "grad_norm": 1.2367647886276245, "learning_rate": 1.0786974219810041e-05, "loss": 0.1593, "step": 660 }, { "epoch": 5.097680097680097, "grad_norm": 14.28999137878418, "learning_rate": 1.010854816824966e-05, "loss": 0.1864, "step": 670 }, { "epoch": 5.1098901098901095, "grad_norm": 4.065025329589844, "learning_rate": 9.430122116689281e-06, "loss": 0.261, "step": 680 }, { "epoch": 5.122100122100122, "grad_norm": 5.471700668334961, "learning_rate": 8.751696065128902e-06, "loss": 0.1759, "step": 690 }, { "epoch": 5.134310134310135, "grad_norm": 1.2977887392044067, "learning_rate": 8.073270013568522e-06, "loss": 0.2201, "step": 700 }, { "epoch": 5.1440781440781445, "eval_accuracy": 0.5776699029126213, "eval_loss": 1.3875343799591064, "eval_runtime": 33.2439, "eval_samples_per_second": 6.197, "eval_steps_per_second": 0.782, "step": 708 }, { "epoch": 6.002442002442002, "grad_norm": 5.552870750427246, "learning_rate": 7.394843962008141e-06, "loss": 0.2692, "step": 710 }, { "epoch": 6.014652014652015, "grad_norm": 20.285839080810547, "learning_rate": 6.716417910447762e-06, "loss": 0.1318, "step": 720 }, { "epoch": 6.026862026862027, "grad_norm": 1.199399471282959, "learning_rate": 6.037991858887382e-06, "loss": 0.0786, "step": 730 }, { "epoch": 6.039072039072039, "grad_norm": 0.7117233872413635, "learning_rate": 5.359565807327002e-06, "loss": 0.0873, "step": 740 }, { "epoch": 6.051282051282051, "grad_norm": 1.9136446714401245, "learning_rate": 4.681139755766622e-06, "loss": 0.1112, "step": 750 }, { "epoch": 6.063492063492063, "grad_norm": 3.076906204223633, "learning_rate": 4.002713704206242e-06, "loss": 0.1026, "step": 760 }, { "epoch": 6.075702075702075, "grad_norm": 26.244754791259766, "learning_rate": 3.324287652645862e-06, "loss": 0.1529, "step": 770 }, { "epoch": 6.087912087912088, "grad_norm": 0.7138678431510925, "learning_rate": 2.645861601085482e-06, "loss": 0.1352, "step": 780 }, { "epoch": 6.1001221001221, "grad_norm": 10.830814361572266, "learning_rate": 1.967435549525102e-06, "loss": 0.0969, "step": 790 }, { "epoch": 6.112332112332112, "grad_norm": 5.247445106506348, "learning_rate": 1.289009497964722e-06, "loss": 0.1702, "step": 800 }, { "epoch": 6.124542124542124, "grad_norm": 1.4733346700668335, "learning_rate": 6.10583446404342e-07, "loss": 0.1171, "step": 810 }, { "epoch": 6.135531135531136, "eval_accuracy": 0.6019417475728155, "eval_loss": 1.3527742624282837, "eval_runtime": 23.7516, "eval_samples_per_second": 8.673, "eval_steps_per_second": 1.095, "step": 819 }, { "epoch": 6.135531135531136, "step": 819, "total_flos": 8.149698472747991e+18, "train_loss": 0.8887154050216861, "train_runtime": 1543.5432, "train_samples_per_second": 4.245, "train_steps_per_second": 0.531 }, { "epoch": 6.135531135531136, "eval_accuracy": 0.5756097560975609, "eval_loss": 1.497787594795227, "eval_runtime": 33.2888, "eval_samples_per_second": 6.158, "eval_steps_per_second": 0.781, "step": 819 }, { "epoch": 6.135531135531136, "eval_accuracy": 0.5756097560975609, "eval_loss": 1.497787356376648, "eval_runtime": 33.1995, "eval_samples_per_second": 6.175, "eval_steps_per_second": 0.783, "step": 819 } ], "logging_steps": 10, "max_steps": 819, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.149698472747991e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }