supermy
/

couplet-gpt2

@@ -65,44 +65,45 @@ model = AutoModelForCausalLM.from_pretrained("supermy/couplet")
 bpe分词："vocab_size"=50000
 ```
-[INFO|trainer.py:1608] 2022-11-29 16:00:16,391 >> ***** Running training *****
-[INFO|trainer.py:1609] 2022-11-29 16:00:16,391 >>   Num examples = 249327
-[INFO|trainer.py:1610] 2022-11-29 16:00:16,391 >>   Num Epochs = 38
-[INFO|trainer.py:1611] 2022-11-29 16:00:16,391 >>   Instantaneous batch size per device = 96
-[INFO|trainer.py:1612] 2022-11-29 16:00:16,391 >>   Total train batch size (w. parallel, distributed & accumulation) = 96
-[INFO|trainer.py:1613] 2022-11-29 16:00:16,391 >>   Gradient Accumulation steps = 1
-[INFO|trainer.py:1614] 2022-11-29 16:00:16,391 >>   Total optimization steps = 98724
-[INFO|trainer.py:1616] 2022-11-29 16:00:16,392 >>   Number of trainable parameters = 124439808
-{'loss': 6.4109, 'learning_rate': 4.975031400672582e-05, 'epoch': 0.19}
-{'loss': 5.8476, 'learning_rate': 4.9497082776224627e-05, 'epoch': 0.38}
 ......
 ......
 ......
-{'loss': 3.4331, 'learning_rate': 1.3573193954864066e-07, 'epoch': 37.91}
-{'train_runtime': 65776.233, 'train_samples_per_second': 144.04, 'train_steps_per_second': 1.501, 'train_loss': 3.74187503763847, 'epoch': 38.0}
 ***** train metrics *****
-  epoch                    =        38.0
-  train_loss               =      3.7419
-  train_runtime            = 18:16:16.23
-  train_samples            =      249327
-  train_samples_per_second =      144.04
-  train_steps_per_second   =       1.501
-11/30/2022 10:16:35 - INFO - __main__ - *** Evaluate ***
-[INFO|trainer.py:2929] 2022-11-30 10:16:35,902 >> ***** Running Evaluation *****
-[INFO|trainer.py:2931] 2022-11-30 10:16:35,902 >>   Num examples = 1290
-[INFO|trainer.py:2934] 2022-11-30 10:16:35,902 >>   Batch size = 96
-100%|██████████| 14/14 [00:03<00:00,  4.13it/s]
-[INFO|modelcard.py:449] 2022-11-30 10:16:40,821 >> Dropping the following result as it does not have all the necessary fields:
-{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.39426602682416634}]}
 ***** eval metrics *****
-  epoch                   =       38.0
-  eval_accuracy           =     0.3943
-  eval_loss               =      3.546
-  eval_runtime            = 0:00:03.67
-  eval_samples            =       1290
-  eval_samples_per_second =    351.199
-  eval_steps_per_second   =      3.811
-  perplexity              =    34.6733
 ```

 bpe分词："vocab_size"=50000
 ```
+[INFO|trainer.py:1608] 2022-11-30 12:51:36,357 >> ***** Running training *****
+[INFO|trainer.py:1609] 2022-11-30 12:51:36,357 >>   Num examples = 260926
+[INFO|trainer.py:1610] 2022-11-30 12:51:36,357 >>   Num Epochs = 81
+[INFO|trainer.py:1611] 2022-11-30 12:51:36,357 >>   Instantaneous batch size per device = 96
+[INFO|trainer.py:1612] 2022-11-30 12:51:36,357 >>   Total train batch size (w. parallel, distributed & accumulation) = 96
+[INFO|trainer.py:1613] 2022-11-30 12:51:36,357 >>   Gradient Accumulation steps = 1
+[INFO|trainer.py:1614] 2022-11-30 12:51:36,357 >>   Total optimization steps = 220158
+[INFO|trainer.py:1616] 2022-11-30 12:51:36,358 >>   Number of trainable parameters = 124439808
+{'loss': 6.1104, 'learning_rate': 4.9888034956712906e-05, 'epoch': 0.18}
+{'loss': 5.5855, 'learning_rate': 4.977448014607691e-05, 'epoch': 0.37}
+{'loss': 5.3264, 'learning_rate': 4.966092533544091e-05, 'epoch': 0.55}
 ......
 ......
 ......
+{'loss': 2.8539, 'learning_rate': 5.677740531799889e-08, 'epoch': 80.94}
+{'train_runtime': 146835.0563, 'train_samples_per_second': 143.937, 'train_steps_per_second': 1.499, 'train_loss': 3.1762605669072217, 'epoch': 81.0}
 ***** train metrics *****
+  epoch                    =               81.0
+  train_loss               =             3.1763
+  train_runtime            = 1 day, 16:47:15.05
+  train_samples            =             260926
+  train_samples_per_second =            143.937
+  train_steps_per_second   =              1.499
+12/02/2022 05:38:54 - INFO - __main__ - *** Evaluate ***
+[INFO|trainer.py:2929] 2022-12-02 05:38:54,688 >> ***** Running Evaluation *****
+[INFO|trainer.py:2931] 2022-12-02 05:38:54,688 >>   Num examples = 1350
+[INFO|trainer.py:2934] 2022-12-02 05:38:54,688 >>   Batch size = 96
+100%|██████████| 15/15 [00:03<00:00,  4.20it/s]
+[INFO|modelcard.py:449] 2022-12-02 05:38:59,875 >> Dropping the following result as it does not have all the necessary fields:
+{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.4447501469723692}]}
 ***** eval metrics *****
+  epoch                   =       81.0
+  eval_accuracy           =     0.4448
+  eval_loss               =     3.2813
+  eval_runtime            = 0:00:03.86
+  eval_samples            =       1350
+  eval_samples_per_second =    349.505
+  eval_steps_per_second   =      3.883
+  perplexity              =    26.6108
 ```