{ "best_metric": 2.203572988510132, "best_model_checkpoint": "./Qwen1-5-4B-Chat-hindi-sft/checkpoint-250", "epoch": 0.03607243344636029, "eval_steps": 25, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.076544761657715, "learning_rate": 4.807692307692308e-06, "loss": 6.0722, "step": 25 }, { "epoch": 0.0, "eval_loss": 6.239687919616699, "eval_runtime": 258.5805, "eval_samples_per_second": 1.087, "eval_steps_per_second": 1.087, "step": 25 }, { "epoch": 0.01, "grad_norm": 2.1901891231536865, "learning_rate": 9.615384615384616e-06, "loss": 6.1105, "step": 50 }, { "epoch": 0.01, "eval_loss": 5.878845691680908, "eval_runtime": 259.0078, "eval_samples_per_second": 1.085, "eval_steps_per_second": 1.085, "step": 50 }, { "epoch": 0.01, "grad_norm": 7.468161582946777, "learning_rate": 1.4423076923076923e-05, "loss": 5.1608, "step": 75 }, { "epoch": 0.01, "eval_loss": 4.886234283447266, "eval_runtime": 259.2966, "eval_samples_per_second": 1.084, "eval_steps_per_second": 1.084, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.9563682079315186, "learning_rate": 1.923076923076923e-05, "loss": 4.5709, "step": 100 }, { "epoch": 0.01, "eval_loss": 3.820255994796753, "eval_runtime": 258.4491, "eval_samples_per_second": 1.087, "eval_steps_per_second": 1.087, "step": 100 }, { "epoch": 0.02, "grad_norm": 1.1275033950805664, "learning_rate": 2.4038461538461542e-05, "loss": 3.7854, "step": 125 }, { "epoch": 0.02, "eval_loss": 3.2866950035095215, "eval_runtime": 258.8408, "eval_samples_per_second": 1.086, "eval_steps_per_second": 1.086, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.8067069053649902, "learning_rate": 2.8846153846153845e-05, "loss": 2.8926, "step": 150 }, { "epoch": 0.02, "eval_loss": 2.8484082221984863, "eval_runtime": 259.0414, "eval_samples_per_second": 1.085, "eval_steps_per_second": 1.085, "step": 150 }, { "epoch": 0.03, "grad_norm": 0.6189056038856506, "learning_rate": 3.365384615384616e-05, "loss": 2.6005, "step": 175 }, { "epoch": 0.03, "eval_loss": 2.602226495742798, "eval_runtime": 258.7432, "eval_samples_per_second": 1.086, "eval_steps_per_second": 1.086, "step": 175 }, { "epoch": 0.03, "grad_norm": 1.2638813257217407, "learning_rate": 3.846153846153846e-05, "loss": 2.3939, "step": 200 }, { "epoch": 0.03, "eval_loss": 2.4451844692230225, "eval_runtime": 258.1713, "eval_samples_per_second": 1.088, "eval_steps_per_second": 1.088, "step": 200 }, { "epoch": 0.03, "grad_norm": 1.101535439491272, "learning_rate": 4.326923076923077e-05, "loss": 2.3008, "step": 225 }, { "epoch": 0.03, "eval_loss": 2.311918258666992, "eval_runtime": 259.1913, "eval_samples_per_second": 1.084, "eval_steps_per_second": 1.084, "step": 225 }, { "epoch": 0.04, "grad_norm": 1.461435079574585, "learning_rate": 4.8076923076923084e-05, "loss": 1.9883, "step": 250 }, { "epoch": 0.04, "eval_loss": 2.203572988510132, "eval_runtime": 258.6057, "eval_samples_per_second": 1.087, "eval_steps_per_second": 1.087, "step": 250 } ], "logging_steps": 25, "max_steps": 34650, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 25, "total_flos": 6028950357319680.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }