Nadav commited on
Commit
cb6f055
1 Parent(s): 60c51e5

Training in progress, step 25000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49067541189efe886db6963256073252d295ca9bdc1760d32149b3d192043cff
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8ac0528ae96ac37b3b802dfcf67b3e3287a79699902d5ea869bb7ecd76a86d
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b40587551c6359d244432c6aa9fe4499a1dd75262644d31886b6f012e695e7e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f550aaa679be49bdef1e79d5263b22969a6f34b532e760e534bc4ff1c616cbeb
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b9ef78655e6bb3bfb1e8e0536d4b8d1bed93f33651cb43025c88490b2ea3396
3
- size 15459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc9dc36bcc19ec50bad9e258fedb0a58fc5090294d119fe4f13612d58ae4fd2
3
+ size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a41408f29059abc49e36ed4313f4edd9d8787eb8db25f44722ca806febce9e97
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50ef36672b7ac14c121f179c123c02c01d1b8c2cb6f2f99d3d54a1ecdd9a6777
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6df48bd3acfa1b41b6ce6c0df78ab20393bdc886bb13843361b0b4ecc2d1016d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdbfcefcd9e1661e5368ed0a7d4915dce2e8bd6a92fd4d33ed45da578d95703
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7410986332375729,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -278,11 +278,79 @@
278
  "eval_samples_per_second": 87.452,
279
  "eval_steps_per_second": 1.382,
280
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  }
282
  ],
283
  "max_steps": 100000,
284
  "num_train_epochs": 9,
285
- "total_flos": 9.419678881926768e+20,
286
  "trial_name": null,
287
  "trial_params": null
288
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4353125544140692,
5
+ "global_step": 25000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
278
  "eval_samples_per_second": 87.452,
279
  "eval_steps_per_second": 1.382,
280
  "step": 20000
281
+ },
282
+ {
283
+ "epoch": 1.04,
284
+ "learning_rate": 9.169565984405851e-05,
285
+ "loss": 0.4442,
286
+ "step": 20500
287
+ },
288
+ {
289
+ "epoch": 1.09,
290
+ "learning_rate": 9.126411941080688e-05,
291
+ "loss": 0.4404,
292
+ "step": 21000
293
+ },
294
+ {
295
+ "epoch": 1.13,
296
+ "learning_rate": 9.082376309845848e-05,
297
+ "loss": 0.4418,
298
+ "step": 21500
299
+ },
300
+ {
301
+ "epoch": 1.17,
302
+ "learning_rate": 9.037379052736644e-05,
303
+ "loss": 0.44,
304
+ "step": 22000
305
+ },
306
+ {
307
+ "epoch": 1.22,
308
+ "learning_rate": 8.991518638364486e-05,
309
+ "loss": 0.4388,
310
+ "step": 22500
311
+ },
312
+ {
313
+ "epoch": 1.26,
314
+ "learning_rate": 8.944806382100394e-05,
315
+ "loss": 0.4389,
316
+ "step": 23000
317
+ },
318
+ {
319
+ "epoch": 1.3,
320
+ "learning_rate": 8.897253809494634e-05,
321
+ "loss": 0.4386,
322
+ "step": 23500
323
+ },
324
+ {
325
+ "epoch": 1.35,
326
+ "learning_rate": 8.848872653432954e-05,
327
+ "loss": 0.4373,
328
+ "step": 24000
329
+ },
330
+ {
331
+ "epoch": 1.39,
332
+ "learning_rate": 8.799674851241666e-05,
333
+ "loss": 0.438,
334
+ "step": 24500
335
+ },
336
+ {
337
+ "epoch": 1.44,
338
+ "learning_rate": 8.749672541742298e-05,
339
+ "loss": 0.4379,
340
+ "step": 25000
341
+ },
342
+ {
343
+ "epoch": 1.44,
344
+ "eval_loss": 0.415912002325058,
345
+ "eval_runtime": 93.156,
346
+ "eval_samples_per_second": 53.673,
347
+ "eval_steps_per_second": 0.848,
348
+ "step": 25000
349
  }
350
  ],
351
  "max_steps": 100000,
352
  "num_train_epochs": 9,
353
+ "total_flos": 1.1774609555574384e+21,
354
  "trial_name": null,
355
  "trial_params": null
356
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c25b619285837a9cbe79c92a5e0fc44806703a40caa54c45909f86e5afdcbb
3
  size 5679
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0091be33616f449526c621b5dc7d5430418f8d9b3aec1ceac81b29b107da733
3
  size 5679
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b40587551c6359d244432c6aa9fe4499a1dd75262644d31886b6f012e695e7e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f550aaa679be49bdef1e79d5263b22969a6f34b532e760e534bc4ff1c616cbeb
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c25b619285837a9cbe79c92a5e0fc44806703a40caa54c45909f86e5afdcbb
3
  size 5679
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0091be33616f449526c621b5dc7d5430418f8d9b3aec1ceac81b29b107da733
3
  size 5679