Existance commited on
Commit
743b49e
1 Parent(s): 6629342

Training in progress, step 25, checkpoint

Browse files
checkpoint-25/adapter_config.json CHANGED
@@ -20,8 +20,8 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
- "q_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "q_proj",
24
+ "k_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
checkpoint-25/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1563bb2f99164590d2dbdc515e408979c62a8093aeff08436576e9c26413bc8
3
  size 26235864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe4050447deec581bd3ff93dfbe198cc18566d7b2d38d43238acf2738c2abdd
3
  size 26235864
checkpoint-25/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbe492804c82bf09b809e818917727b1e18838b0bc10272e3390e307247a6458
3
  size 52523386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4cb47b2b9565fe2265c4b343bef6711b4cb97a25458bf2ab7f0af03098f83fb
3
  size 52523386
checkpoint-25/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fa8d5f3e9c6b5abca54e31be902d76e695522f08d66d4ead12cfcc409589fb2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abd83e428a64cc7191cf368e64a77290525a3cadd3b210219a78a857e2fdb8ae
3
  size 14244
checkpoint-25/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:641e38a7f663447384b1de7ed580f2bead07e3637186dcf8d53646516defeff0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b5ec54b43ed8952919b704047aa882c4dd3048daa7c693f2bdcae16f7aa402
3
  size 1064
checkpoint-25/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 6.239687919616699,
3
  "best_model_checkpoint": "./Qwen1-5-4B-Chat-hindi-sft/checkpoint-25",
4
- "epoch": 0.0036072433446360293,
5
  "eval_steps": 25,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
@@ -10,26 +10,26 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "grad_norm": 4.076544761657715,
14
- "learning_rate": 4.807692307692308e-06,
15
- "loss": 6.0722,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.0,
20
- "eval_loss": 6.239687919616699,
21
- "eval_runtime": 258.5805,
22
- "eval_samples_per_second": 1.087,
23
- "eval_steps_per_second": 1.087,
24
  "step": 25
25
  }
26
  ],
27
  "logging_steps": 25,
28
- "max_steps": 34650,
29
  "num_input_tokens_seen": 0,
30
- "num_train_epochs": 5,
31
  "save_steps": 25,
32
- "total_flos": 626456460764160.0,
33
  "train_batch_size": 1,
34
  "trial_name": null,
35
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.88370418548584,
3
  "best_model_checkpoint": "./Qwen1-5-4B-Chat-hindi-sft/checkpoint-25",
4
+ "epoch": 0.003574747980267391,
5
  "eval_steps": 25,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "grad_norm": 5.597507476806641,
14
+ "learning_rate": 7.936507936507936e-06,
15
+ "loss": 6.5107,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.0,
20
+ "eval_loss": 5.88370418548584,
21
+ "eval_runtime": 27.753,
22
+ "eval_samples_per_second": 1.045,
23
+ "eval_steps_per_second": 1.045,
24
  "step": 25
25
  }
26
  ],
27
  "logging_steps": 25,
28
+ "max_steps": 20979,
29
  "num_input_tokens_seen": 0,
30
+ "num_train_epochs": 3,
31
  "save_steps": 25,
32
+ "total_flos": 500129031075840.0,
33
  "train_batch_size": 1,
34
  "trial_name": null,
35
  "trial_params": null
checkpoint-25/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81d460fd33adef439c648b974d0d6b51823bfbb80f3fee5d7110d1f149b86cd6
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9388d54f20b310f46b4eefba4bae25cec3658ca4bd837cd5c0be8d9c2b1f2f7
3
  size 5048