aaabiao commited on
Commit
b015ed2
1 Parent(s): ea00e3d

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ tags:
4
+ - llama-factory
5
+ - full
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: scaleup_STEM_merged_10M_MOE_sft_0428_256
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ ## Model description
16
+
17
+ More information needed
18
+
19
+ ## Intended uses & limitations
20
+
21
+ More information needed
22
+
23
+ ## Training and evaluation data
24
+
25
+ More information needed
26
+
27
+ ## Training procedure
28
+
29
+ ### Training hyperparameters
30
+
31
+ The following hyperparameters were used during training:
32
+ - learning_rate: 5e-06
33
+ - train_batch_size: 4
34
+ - eval_batch_size: 8
35
+ - seed: 42
36
+ - distributed_type: multi-GPU
37
+ - num_devices: 256
38
+ - total_train_batch_size: 1024
39
+ - total_eval_batch_size: 2048
40
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
+ - lr_scheduler_type: cosine
42
+ - lr_scheduler_warmup_ratio: 0.05
43
+ - num_epochs: 3.0
44
+
45
+ ### Training results
46
+
47
+
48
+
49
+ ### Framework versions
50
+
51
+ - Transformers 4.40.0
52
+ - Pytorch 2.3.0a0+40ec155e58.nv24.03
53
+ - Datasets 2.18.0
54
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 4.271915416263066e+16,
4
+ "train_loss": 0.3954192769085303,
5
+ "train_runtime": 93126.1025,
6
+ "train_samples_per_second": 70.83,
7
+ "train_steps_per_second": 0.069
8
+ }
model-00001-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5e52a34a2bbaf221fa35a10e5b52ef022fe32d45373d0664347ddd2fa221836
3
+ size 4892809584
model-00002-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d33ea27253324c37d83c13a84779bfdbb78f08676c53e7af6fe1daef0a2820
3
+ size 4983004016
model-00003-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c659d265df682f0c04f74417b454577055a96408dd2c299e8ff8ebef9e8fe814
3
+ size 4983004016
model-00004-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c3cf01845abde048c2e09f3f58993b472b4dc6fe6b13e0657ad438e0d04fb41
3
+ size 4899035200
model-00005-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d4de45bf4528839cfce148cc578f9f5f5f3cf627e29ee0060c9102a73ddecec
3
+ size 4983004016
model-00006-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8705e4049624af211290d6d92870409fe9dc8524f97b480eafd234437cc5c99
3
+ size 4983004016
model-00007-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbc56f0029e4a923e7cd23377073b5f7dae62a3e65b536736a77cb05e8a77cce
3
+ size 4899035248
model-00008-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0a037d215e9403d731182400eee370c48379cba2313b0c2798f374aed6bb76
3
+ size 4983004072
model-00009-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:749a46ea190874a99bfdb88abb59a05ba567046adbd66540ac24a699b4c88e5e
3
+ size 4983004072
model-00010-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8945749230c93f506d22dcf0c27b25976346eb143ebe9071acffdcc5dc5257e1
3
+ size 4899035248
model-00011-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01069c00a98ad1cdc711e823b961471584c3fbaf5b5c867fcc7be7f79b6f3f0e
3
+ size 4983004072
model-00012-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c88b407b327438b6bfa7489ec33102d7988b4c03935033b6ff2bcc001a10089
3
+ size 4983004072
model-00013-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7da155beb3cc6a8d12b2ad8c5988f84d2afc62ab29f940e6d702a59dbbfd84
3
+ size 4983004072
model-00014-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d81012ed05e2945ca6519eed4da07f39869b0d8c6a41da49dbd24a5522fd552
3
+ size 4899035248
model-00015-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f6a0e1b984d69d25823dab2fcff806d2a99787c4ffeb22e3d2dd05bfe923b6
3
+ size 4983004072
model-00016-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b762c941e33ad82c04e0b14e4ecf5249f05e9d9323d56e058f3cd25651741b2a
3
+ size 4983004072
model-00017-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de252cd0286e7882d9bd3e737373dc00baad61309786dd43b5f2cf2867a6c6d6
3
+ size 4899035248
model-00018-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e224c6ad506c91a2b4e5752c04334a17f7a4eb17703adea2a0ce2c7c52bc0d
3
+ size 4983004072
model-00019-of-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad58f83d0dd4d67228c85cbe1f354cb0c2b7a91b7e80a3c375c4356b0eb73c70
3
+ size 4221679088
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 4.271915416263066e+16,
4
+ "train_loss": 0.3954192769085303,
5
+ "train_runtime": 93126.1025,
6
+ "train_samples_per_second": 70.83,
7
+ "train_steps_per_second": 0.069
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2bbf7579ab389ada54152ffdc3c45fda213695f93e668f3c5f733a613f44767
3
+ size 6904