{ "best_metric": 0.8249799337347952, "best_model_checkpoint": "./CARES/checkpoints/bert-ba-stratified/run-9/checkpoint-3976", "epoch": 56.0, "global_step": 3976, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0, "eval_loss": 0.24943208694458008, "eval_macro_f1": 0.08911588063155029, "eval_macro_precision": 0.1020147123407993, "eval_macro_recall": 0.08017933766090879, "eval_micro_f1": 0.27627302275189597, "eval_micro_precision": 0.7750759878419453, "eval_micro_recall": 0.16809492419248517, "eval_runtime": 2.9775, "eval_samples_per_second": 324.431, "eval_steps_per_second": 20.487, "step": 142 }, { "epoch": 4.0, "eval_loss": 0.1698431819677353, "eval_macro_f1": 0.30238618603356054, "eval_macro_precision": 0.4320649543604988, "eval_macro_recall": 0.2644492382273683, "eval_micro_f1": 0.6521739130434782, "eval_micro_precision": 0.8376421923474664, "eval_micro_recall": 0.5339485827290705, "eval_runtime": 2.979, "eval_samples_per_second": 324.273, "eval_steps_per_second": 20.477, "step": 284 }, { "epoch": 6.0, "eval_loss": 0.14047418534755707, "eval_macro_f1": 0.40223830727079396, "eval_macro_precision": 0.4784721691006365, "eval_macro_recall": 0.3687052580973402, "eval_micro_f1": 0.739880059970015, "eval_micro_precision": 0.8575152041702867, "eval_micro_recall": 0.6506262359920897, "eval_runtime": 2.9805, "eval_samples_per_second": 324.11, "eval_steps_per_second": 20.467, "step": 426 }, { "epoch": 7.04, "learning_rate": 3.279287453609026e-05, "loss": 0.2244, "step": 500 }, { "epoch": 8.0, "eval_loss": 0.1187577173113823, "eval_macro_f1": 0.5347249220382584, "eval_macro_precision": 0.7327975628305986, "eval_macro_recall": 0.4754068941604508, "eval_micro_f1": 0.7880299251870324, "eval_micro_precision": 0.8573643410852713, "eval_micro_recall": 0.7290705339485827, "eval_runtime": 2.9805, "eval_samples_per_second": 324.108, "eval_steps_per_second": 20.466, "step": 568 }, { "epoch": 10.0, "eval_loss": 0.11074026674032211, "eval_macro_f1": 0.5991127961103198, "eval_macro_precision": 0.6991665233729463, "eval_macro_recall": 0.5596673950826421, "eval_micro_f1": 0.8128196385952949, "eval_micro_precision": 0.8418079096045198, "eval_micro_recall": 0.7857613711272248, "eval_runtime": 2.9796, "eval_samples_per_second": 324.206, "eval_steps_per_second": 20.473, "step": 710 }, { "epoch": 12.0, "eval_loss": 0.10078531503677368, "eval_macro_f1": 0.6568773778388772, "eval_macro_precision": 0.700413763066982, "eval_macro_recall": 0.6283428000904666, "eval_micro_f1": 0.8347529812606473, "eval_micro_precision": 0.8638928067700987, "eval_micro_recall": 0.8075148319050758, "eval_runtime": 2.979, "eval_samples_per_second": 324.274, "eval_steps_per_second": 20.477, "step": 852 }, { "epoch": 14.0, "eval_loss": 0.10221733897924423, "eval_macro_f1": 0.6758575377881516, "eval_macro_precision": 0.838910780569426, "eval_macro_recall": 0.6238243641780066, "eval_micro_f1": 0.8337912087912088, "eval_micro_precision": 0.8702508960573476, "eval_micro_recall": 0.8002636783124588, "eval_runtime": 2.9795, "eval_samples_per_second": 324.21, "eval_steps_per_second": 20.473, "step": 994 }, { "epoch": 14.08, "learning_rate": 3.0308565859113728e-05, "loss": 0.0513, "step": 1000 }, { "epoch": 16.0, "eval_loss": 0.10313227772712708, "eval_macro_f1": 0.7298003500123689, "eval_macro_precision": 0.8118751777216693, "eval_macro_recall": 0.6944187826598622, "eval_micro_f1": 0.8354006034193765, "eval_micro_precision": 0.849931787175989, "eval_micro_recall": 0.8213579433091628, "eval_runtime": 2.9811, "eval_samples_per_second": 324.042, "eval_steps_per_second": 20.462, "step": 1136 }, { "epoch": 18.0, "eval_loss": 0.10032625496387482, "eval_macro_f1": 0.7681297243773157, "eval_macro_precision": 0.8958394795684446, "eval_macro_recall": 0.7124194145895655, "eval_micro_f1": 0.8443093549476527, "eval_micro_precision": 0.8656509695290858, "eval_micro_recall": 0.8239947264337508, "eval_runtime": 2.9813, "eval_samples_per_second": 324.018, "eval_steps_per_second": 20.461, "step": 1278 }, { "epoch": 20.0, "eval_loss": 0.1044757142663002, "eval_macro_f1": 0.7829375079467304, "eval_macro_precision": 0.8858384241353942, "eval_macro_recall": 0.7367403001149204, "eval_micro_f1": 0.8420698924731183, "eval_micro_precision": 0.8588074023303632, "eval_micro_recall": 0.8259723137771918, "eval_runtime": 2.9861, "eval_samples_per_second": 323.499, "eval_steps_per_second": 20.428, "step": 1420 }, { "epoch": 21.13, "learning_rate": 2.7824257182137193e-05, "loss": 0.0183, "step": 1500 }, { "epoch": 22.0, "eval_loss": 0.1039622500538826, "eval_macro_f1": 0.7902798824417182, "eval_macro_precision": 0.89276275853935, "eval_macro_recall": 0.7403232660636272, "eval_micro_f1": 0.8479512360311547, "eval_micro_precision": 0.871866295264624, "eval_micro_recall": 0.8253131179960448, "eval_runtime": 2.977, "eval_samples_per_second": 324.483, "eval_steps_per_second": 20.49, "step": 1562 }, { "epoch": 24.0, "eval_loss": 0.10449391603469849, "eval_macro_f1": 0.7973957256263516, "eval_macro_precision": 0.8897797609904068, "eval_macro_recall": 0.7482788788828134, "eval_micro_f1": 0.8530297957817209, "eval_micro_precision": 0.8666666666666667, "eval_micro_recall": 0.8398154251812788, "eval_runtime": 2.982, "eval_samples_per_second": 323.947, "eval_steps_per_second": 20.456, "step": 1704 }, { "epoch": 26.0, "eval_loss": 0.10751193016767502, "eval_macro_f1": 0.815099576727704, "eval_macro_precision": 0.8596170620799346, "eval_macro_recall": 0.7893940744657464, "eval_micro_f1": 0.8517287234042553, "eval_micro_precision": 0.8591549295774648, "eval_micro_recall": 0.8444297956493079, "eval_runtime": 2.9874, "eval_samples_per_second": 323.362, "eval_steps_per_second": 20.419, "step": 1846 }, { "epoch": 28.0, "eval_loss": 0.10826986283063889, "eval_macro_f1": 0.8122978188941863, "eval_macro_precision": 0.8818206343623207, "eval_macro_recall": 0.7768327155632533, "eval_micro_f1": 0.8507362784471219, "eval_micro_precision": 0.8640380693405847, "eval_micro_recall": 0.8378378378378378, "eval_runtime": 2.9805, "eval_samples_per_second": 324.106, "eval_steps_per_second": 20.466, "step": 1988 }, { "epoch": 28.17, "learning_rate": 2.5339948505160657e-05, "loss": 0.0094, "step": 2000 }, { "epoch": 30.0, "eval_loss": 0.11172767728567123, "eval_macro_f1": 0.7952069303877679, "eval_macro_precision": 0.8834731091800803, "eval_macro_recall": 0.7589521997225694, "eval_micro_f1": 0.8510210913960494, "eval_micro_precision": 0.8646258503401361, "eval_micro_recall": 0.8378378378378378, "eval_runtime": 2.9811, "eval_samples_per_second": 324.039, "eval_steps_per_second": 20.462, "step": 2130 }, { "epoch": 32.0, "eval_loss": 0.12214264273643494, "eval_macro_f1": 0.784963786280585, "eval_macro_precision": 0.8399655989881138, "eval_macro_recall": 0.7539663033116887, "eval_micro_f1": 0.8395881766854865, "eval_micro_precision": 0.8460508701472557, "eval_micro_recall": 0.8332234673698088, "eval_runtime": 2.9802, "eval_samples_per_second": 324.138, "eval_steps_per_second": 20.468, "step": 2272 }, { "epoch": 34.0, "eval_loss": 0.10992709547281265, "eval_macro_f1": 0.818123937895116, "eval_macro_precision": 0.846307748666965, "eval_macro_recall": 0.7959472696549352, "eval_micro_f1": 0.8540829986613119, "eval_micro_precision": 0.86743711760707, "eval_micro_recall": 0.8411338167435728, "eval_runtime": 2.9817, "eval_samples_per_second": 323.976, "eval_steps_per_second": 20.458, "step": 2414 }, { "epoch": 35.21, "learning_rate": 2.2855639828184125e-05, "loss": 0.0088, "step": 2500 }, { "epoch": 36.0, "eval_loss": 0.11282340437173843, "eval_macro_f1": 0.810588172484549, "eval_macro_precision": 0.8881718400115531, "eval_macro_recall": 0.7660828954423657, "eval_micro_f1": 0.8533783783783784, "eval_micro_precision": 0.8752598752598753, "eval_micro_recall": 0.8325642715886619, "eval_runtime": 2.9823, "eval_samples_per_second": 323.913, "eval_steps_per_second": 20.454, "step": 2556 }, { "epoch": 38.0, "eval_loss": 0.11364943534135818, "eval_macro_f1": 0.8086979072156089, "eval_macro_precision": 0.874192444614366, "eval_macro_recall": 0.7687841160082525, "eval_micro_f1": 0.855510752688172, "eval_micro_precision": 0.8725154215215901, "eval_micro_recall": 0.8391562294001318, "eval_runtime": 2.9856, "eval_samples_per_second": 323.552, "eval_steps_per_second": 20.431, "step": 2698 }, { "epoch": 40.0, "eval_loss": 0.11377756297588348, "eval_macro_f1": 0.8074550758048753, "eval_macro_precision": 0.8601506214626871, "eval_macro_recall": 0.7772364003410493, "eval_micro_f1": 0.8550483172275909, "eval_micro_precision": 0.8645552560646901, "eval_micro_recall": 0.8457481872116018, "eval_runtime": 2.987, "eval_samples_per_second": 323.399, "eval_steps_per_second": 20.422, "step": 2840 }, { "epoch": 42.0, "eval_loss": 0.11456111818552017, "eval_macro_f1": 0.8197602680311404, "eval_macro_precision": 0.8652772327919502, "eval_macro_recall": 0.7923572373451824, "eval_micro_f1": 0.8584748584748586, "eval_micro_precision": 0.8674293405114402, "eval_micro_recall": 0.8497033618984838, "eval_runtime": 2.9793, "eval_samples_per_second": 324.237, "eval_steps_per_second": 20.475, "step": 2982 }, { "epoch": 42.25, "learning_rate": 2.0371331151207586e-05, "loss": 0.0049, "step": 3000 }, { "epoch": 44.0, "eval_loss": 0.11773423105478287, "eval_macro_f1": 0.8086306686692242, "eval_macro_precision": 0.8642281309223148, "eval_macro_recall": 0.7764573931394076, "eval_micro_f1": 0.8553291012362179, "eval_micro_precision": 0.8672086720867209, "eval_micro_recall": 0.8437705998681608, "eval_runtime": 2.9813, "eval_samples_per_second": 324.023, "eval_steps_per_second": 20.461, "step": 3124 }, { "epoch": 46.0, "eval_loss": 0.11780666559934616, "eval_macro_f1": 0.8101599105843645, "eval_macro_precision": 0.8691080726361069, "eval_macro_recall": 0.7762766074908475, "eval_micro_f1": 0.8565723793677205, "eval_micro_precision": 0.8649193548387096, "eval_micro_recall": 0.8483849703361899, "eval_runtime": 2.9804, "eval_samples_per_second": 324.123, "eval_steps_per_second": 20.467, "step": 3266 }, { "epoch": 48.0, "eval_loss": 0.11932362616062164, "eval_macro_f1": 0.8165194442314286, "eval_macro_precision": 0.8648115942931565, "eval_macro_recall": 0.7864788866212832, "eval_micro_f1": 0.8535201868535202, "eval_micro_precision": 0.8641891891891892, "eval_micro_recall": 0.8431114040870138, "eval_runtime": 2.985, "eval_samples_per_second": 323.616, "eval_steps_per_second": 20.435, "step": 3408 }, { "epoch": 49.3, "learning_rate": 1.7887022474231054e-05, "loss": 0.0034, "step": 3500 }, { "epoch": 50.0, "eval_loss": 0.12221735715866089, "eval_macro_f1": 0.8141089592504215, "eval_macro_precision": 0.8615361853719665, "eval_macro_recall": 0.7844581807191786, "eval_micro_f1": 0.8523714094856378, "eval_micro_precision": 0.8639133378469871, "eval_micro_recall": 0.8411338167435728, "eval_runtime": 2.9803, "eval_samples_per_second": 324.125, "eval_steps_per_second": 20.468, "step": 3550 }, { "epoch": 52.0, "eval_loss": 0.12201466411352158, "eval_macro_f1": 0.8215839258353351, "eval_macro_precision": 0.8618774470352207, "eval_macro_recall": 0.795741448912531, "eval_micro_f1": 0.8534223706176962, "eval_micro_precision": 0.86468200270636, "eval_micro_recall": 0.8424522083058669, "eval_runtime": 2.9796, "eval_samples_per_second": 324.208, "eval_steps_per_second": 20.473, "step": 3692 }, { "epoch": 54.0, "eval_loss": 0.12538054585456848, "eval_macro_f1": 0.8134875725433206, "eval_macro_precision": 0.8710944940417797, "eval_macro_recall": 0.7763605326378977, "eval_micro_f1": 0.85425782564793, "eval_micro_precision": 0.8727647867950481, "eval_micro_recall": 0.8365194462755439, "eval_runtime": 2.9841, "eval_samples_per_second": 323.717, "eval_steps_per_second": 20.442, "step": 3834 }, { "epoch": 56.0, "eval_loss": 0.12492760270833969, "eval_macro_f1": 0.8249799337347952, "eval_macro_precision": 0.8672879358583392, "eval_macro_recall": 0.7974535529440212, "eval_micro_f1": 0.8574290484140235, "eval_micro_precision": 0.8687415426251691, "eval_micro_recall": 0.8464073829927489, "eval_runtime": 2.9821, "eval_samples_per_second": 323.938, "eval_steps_per_second": 20.456, "step": 3976 } ], "max_steps": 7100, "num_train_epochs": 100, "total_flos": 3.752304368656253e+16, "trial_name": null, "trial_params": { "adam_epsilon": 2.4799103776060603e-09, "learning_rate": 3.4268553890214325e-05, "per_device_eval_batch_size": 16, "per_device_train_batch_size": 32, "seed": 326, "warmup_steps": 203, "weight_decay": 2.8436289860950645e-08 } }