vsvasconcelos commited on
Commit
864a1fd
1 Parent(s): fe551f2

Upload 14 files

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f7526ee08f438d7808d9785137c18cbf8654a6472bcacb5c4de6092ac3fcb0c
3
  size 21005448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b77ce3eabe98b3dd5d9af3d242aa7afa8f33882d1f56f65c58998bc9825b2a3
3
  size 21005448
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd94af4f394684f24cd699ee85a06451d1f8c42005cba02e137e0162177758d6
3
  size 42094394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df45b609c722c27fd6af31839d2e4766641be7a59015121be37decee3b15076
3
  size 42094394
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dd884ff74300335908cb0ce933135e61390658c94c2bd331d51f8cec6d63fa1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fef053b98b59c364725345dad3e53b155f73438c8248fe2f9a1da5c24f99b84
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f373e8fb20037e2635440e7a6a44223f19af178ddd45fe7d18a8d131ea1297e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65388ae503753e7077fd19c66a72087713b4e5c41500c9577a18b19abc7e4d66
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0940170940170941,
5
  "eval_steps": 100,
6
- "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -217,6 +217,216 @@
217
  "eval_samples_per_second": 0.75,
218
  "eval_steps_per_second": 0.094,
219
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  }
221
  ],
222
  "logging_steps": 100,
@@ -224,7 +434,7 @@
224
  "num_input_tokens_seen": 0,
225
  "num_train_epochs": 5,
226
  "save_steps": 100,
227
- "total_flos": 5.70672180486144e+17,
228
  "train_batch_size": 2,
229
  "trial_name": null,
230
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.1880341880341883,
5
  "eval_steps": 100,
6
+ "global_step": 2800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
217
  "eval_samples_per_second": 0.75,
218
  "eval_steps_per_second": 0.094,
219
  "step": 1400
220
+ },
221
+ {
222
+ "epoch": 1.1721611721611722,
223
+ "grad_norm": 0.3037821054458618,
224
+ "learning_rate": 1.8067819073052813e-05,
225
+ "loss": 1.7148,
226
+ "step": 1500
227
+ },
228
+ {
229
+ "epoch": 1.1721611721611722,
230
+ "eval_loss": 1.612409234046936,
231
+ "eval_runtime": 3364.8773,
232
+ "eval_samples_per_second": 0.751,
233
+ "eval_steps_per_second": 0.094,
234
+ "step": 1500
235
+ },
236
+ {
237
+ "epoch": 1.2503052503052503,
238
+ "grad_norm": 0.3078468441963196,
239
+ "learning_rate": 1.7678472543402166e-05,
240
+ "loss": 1.7003,
241
+ "step": 1600
242
+ },
243
+ {
244
+ "epoch": 1.2503052503052503,
245
+ "eval_loss": 1.607498049736023,
246
+ "eval_runtime": 3366.2319,
247
+ "eval_samples_per_second": 0.75,
248
+ "eval_steps_per_second": 0.094,
249
+ "step": 1600
250
+ },
251
+ {
252
+ "epoch": 1.3284493284493284,
253
+ "grad_norm": 0.3602350056171417,
254
+ "learning_rate": 1.7258455772543573e-05,
255
+ "loss": 1.6981,
256
+ "step": 1700
257
+ },
258
+ {
259
+ "epoch": 1.3284493284493284,
260
+ "eval_loss": 1.6040035486221313,
261
+ "eval_runtime": 3369.5225,
262
+ "eval_samples_per_second": 0.75,
263
+ "eval_steps_per_second": 0.094,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 1.4065934065934065,
268
+ "grad_norm": 0.34948959946632385,
269
+ "learning_rate": 1.6809446439937472e-05,
270
+ "loss": 1.703,
271
+ "step": 1800
272
+ },
273
+ {
274
+ "epoch": 1.4065934065934065,
275
+ "eval_loss": 1.6002304553985596,
276
+ "eval_runtime": 3359.2322,
277
+ "eval_samples_per_second": 0.752,
278
+ "eval_steps_per_second": 0.094,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 1.4847374847374848,
283
+ "grad_norm": 0.33941686153411865,
284
+ "learning_rate": 1.6333238030480473e-05,
285
+ "loss": 1.6842,
286
+ "step": 1900
287
+ },
288
+ {
289
+ "epoch": 1.4847374847374848,
290
+ "eval_loss": 1.5976512432098389,
291
+ "eval_runtime": 3363.9968,
292
+ "eval_samples_per_second": 0.751,
293
+ "eval_steps_per_second": 0.094,
294
+ "step": 1900
295
+ },
296
+ {
297
+ "epoch": 1.5628815628815629,
298
+ "grad_norm": 0.3594741225242615,
299
+ "learning_rate": 1.5831732670761e-05,
300
+ "loss": 1.6813,
301
+ "step": 2000
302
+ },
303
+ {
304
+ "epoch": 1.5628815628815629,
305
+ "eval_loss": 1.5942325592041016,
306
+ "eval_runtime": 3356.6247,
307
+ "eval_samples_per_second": 0.753,
308
+ "eval_steps_per_second": 0.094,
309
+ "step": 2000
310
+ },
311
+ {
312
+ "epoch": 1.641025641025641,
313
+ "grad_norm": 0.3513031303882599,
314
+ "learning_rate": 1.5306933531365748e-05,
315
+ "loss": 1.6747,
316
+ "step": 2100
317
+ },
318
+ {
319
+ "epoch": 1.641025641025641,
320
+ "eval_loss": 1.591470718383789,
321
+ "eval_runtime": 3360.2103,
322
+ "eval_samples_per_second": 0.752,
323
+ "eval_steps_per_second": 0.094,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 1.7191697191697193,
328
+ "grad_norm": 0.3548774719238281,
329
+ "learning_rate": 1.4760936825584535e-05,
330
+ "loss": 1.6821,
331
+ "step": 2200
332
+ },
333
+ {
334
+ "epoch": 1.7191697191697193,
335
+ "eval_loss": 1.5890487432479858,
336
+ "eval_runtime": 3360.7245,
337
+ "eval_samples_per_second": 0.752,
338
+ "eval_steps_per_second": 0.094,
339
+ "step": 2200
340
+ },
341
+ {
342
+ "epoch": 1.7973137973137974,
343
+ "grad_norm": 0.37473219633102417,
344
+ "learning_rate": 1.4195923436473257e-05,
345
+ "loss": 1.685,
346
+ "step": 2300
347
+ },
348
+ {
349
+ "epoch": 1.7973137973137974,
350
+ "eval_loss": 1.586594820022583,
351
+ "eval_runtime": 3365.8879,
352
+ "eval_samples_per_second": 0.75,
353
+ "eval_steps_per_second": 0.094,
354
+ "step": 2300
355
+ },
356
+ {
357
+ "epoch": 1.8754578754578755,
358
+ "grad_norm": 0.37071430683135986,
359
+ "learning_rate": 1.3614150205719086e-05,
360
+ "loss": 1.6602,
361
+ "step": 2400
362
+ },
363
+ {
364
+ "epoch": 1.8754578754578755,
365
+ "eval_loss": 1.5838119983673096,
366
+ "eval_runtime": 3363.0708,
367
+ "eval_samples_per_second": 0.751,
368
+ "eval_steps_per_second": 0.094,
369
+ "step": 2400
370
+ },
371
+ {
372
+ "epoch": 1.9536019536019538,
373
+ "grad_norm": 0.4014996588230133,
374
+ "learning_rate": 1.3017940919102943e-05,
375
+ "loss": 1.682,
376
+ "step": 2500
377
+ },
378
+ {
379
+ "epoch": 1.9536019536019538,
380
+ "eval_loss": 1.5820069313049316,
381
+ "eval_runtime": 3367.2545,
382
+ "eval_samples_per_second": 0.75,
383
+ "eval_steps_per_second": 0.094,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 2.0317460317460316,
388
+ "grad_norm": 0.3767947256565094,
389
+ "learning_rate": 1.2409677024566145e-05,
390
+ "loss": 1.6595,
391
+ "step": 2600
392
+ },
393
+ {
394
+ "epoch": 2.0317460317460316,
395
+ "eval_loss": 1.5797280073165894,
396
+ "eval_runtime": 3364.0566,
397
+ "eval_samples_per_second": 0.751,
398
+ "eval_steps_per_second": 0.094,
399
+ "step": 2600
400
+ },
401
+ {
402
+ "epoch": 2.10989010989011,
403
+ "grad_norm": 0.3852591812610626,
404
+ "learning_rate": 1.1791788119956191e-05,
405
+ "loss": 1.6606,
406
+ "step": 2700
407
+ },
408
+ {
409
+ "epoch": 2.10989010989011,
410
+ "eval_loss": 1.578330636024475,
411
+ "eval_runtime": 3359.3638,
412
+ "eval_samples_per_second": 0.752,
413
+ "eval_steps_per_second": 0.094,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 2.1880341880341883,
418
+ "grad_norm": 0.3954846262931824,
419
+ "learning_rate": 1.116674224844664e-05,
420
+ "loss": 1.6611,
421
+ "step": 2800
422
+ },
423
+ {
424
+ "epoch": 2.1880341880341883,
425
+ "eval_loss": 1.5769294500350952,
426
+ "eval_runtime": 3365.2078,
427
+ "eval_samples_per_second": 0.751,
428
+ "eval_steps_per_second": 0.094,
429
+ "step": 2800
430
  }
431
  ],
432
  "logging_steps": 100,
 
434
  "num_input_tokens_seen": 0,
435
  "num_train_epochs": 5,
436
  "save_steps": 100,
437
+ "total_flos": 1.141344360972288e+18,
438
  "train_batch_size": 2,
439
  "trial_name": null,
440
  "trial_params": null