{ "best_metric": 4.886234283447266, "best_model_checkpoint": "./Qwen1-5-4B-Chat-hindi-sft/checkpoint-75", "epoch": 0.010821730033908088, "eval_steps": 25, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.076544761657715, "learning_rate": 4.807692307692308e-06, "loss": 6.0722, "step": 25 }, { "epoch": 0.0, "eval_loss": 6.239687919616699, "eval_runtime": 258.5805, "eval_samples_per_second": 1.087, "eval_steps_per_second": 1.087, "step": 25 }, { "epoch": 0.01, "grad_norm": 2.1901891231536865, "learning_rate": 9.615384615384616e-06, "loss": 6.1105, "step": 50 }, { "epoch": 0.01, "eval_loss": 5.878845691680908, "eval_runtime": 259.0078, "eval_samples_per_second": 1.085, "eval_steps_per_second": 1.085, "step": 50 }, { "epoch": 0.01, "grad_norm": 7.468161582946777, "learning_rate": 1.4423076923076923e-05, "loss": 5.1608, "step": 75 }, { "epoch": 0.01, "eval_loss": 4.886234283447266, "eval_runtime": 259.2966, "eval_samples_per_second": 1.084, "eval_steps_per_second": 1.084, "step": 75 } ], "logging_steps": 25, "max_steps": 34650, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 25, "total_flos": 1867509460915200.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }