sjdata commited on
Commit
38bfa2c
1 Parent(s): ba1d577

🍻 cheers

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +46 -46
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "total_flos": 3.233737255326843e+17,
4
- "train_loss": 0.21213595934140567,
5
- "train_runtime": 75.7831,
6
- "train_samples_per_second": 54.577,
7
- "train_steps_per_second": 3.431
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "total_flos": 3.205097416476426e+17,
4
+ "train_loss": 0.13431097194552422,
5
+ "train_runtime": 68.8344,
6
+ "train_samples_per_second": 60.086,
7
+ "train_steps_per_second": 3.777
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "total_flos": 3.233737255326843e+17,
4
- "train_loss": 0.21213595934140567,
5
- "train_runtime": 75.7831,
6
- "train_samples_per_second": 54.577,
7
- "train_steps_per_second": 3.431
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "total_flos": 3.205097416476426e+17,
4
+ "train_loss": 0.13431097194552422,
5
+ "train_runtime": 68.8344,
6
+ "train_samples_per_second": 60.086,
7
+ "train_steps_per_second": 3.777
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.006127053406089544,
3
  "best_model_checkpoint": "./vit-base-beans/checkpoint-200",
4
  "epoch": 4.0,
5
  "global_step": 260,
@@ -9,191 +9,191 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.15,
12
- "learning_rate": 0.0001930769230769231,
13
- "loss": 2.9271,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.31,
18
- "learning_rate": 0.0001853846153846154,
19
- "loss": 0.4676,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.46,
24
- "learning_rate": 0.0001776923076923077,
25
- "loss": 0.3661,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.62,
30
- "learning_rate": 0.00017,
31
- "loss": 0.2549,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.77,
36
  "learning_rate": 0.0001623076923076923,
37
- "loss": 0.229,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.92,
42
  "learning_rate": 0.00015461538461538464,
43
- "loss": 0.2858,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 1.08,
48
  "learning_rate": 0.00014692307692307693,
49
- "loss": 0.1084,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 1.23,
54
  "learning_rate": 0.00013923076923076923,
55
- "loss": 0.1062,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 1.38,
60
  "learning_rate": 0.00013153846153846156,
61
- "loss": 0.1174,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 1.54,
66
  "learning_rate": 0.00012384615384615385,
67
- "loss": 0.0994,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 1.54,
72
- "eval_accuracy": 0.9849624060150376,
73
- "eval_loss": 0.05971372127532959,
74
- "eval_runtime": 1.0813,
75
- "eval_samples_per_second": 122.997,
76
- "eval_steps_per_second": 15.721,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.69,
81
  "learning_rate": 0.00011615384615384617,
82
- "loss": 0.1219,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.85,
87
  "learning_rate": 0.00010846153846153846,
88
- "loss": 0.0793,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 2.0,
93
  "learning_rate": 0.00010076923076923077,
94
- "loss": 0.0741,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 2.15,
99
  "learning_rate": 9.307692307692309e-05,
100
- "loss": 0.0281,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 2.31,
105
  "learning_rate": 8.538461538461538e-05,
106
- "loss": 0.0309,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 2.46,
111
  "learning_rate": 7.76923076923077e-05,
112
- "loss": 0.1636,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 2.62,
117
  "learning_rate": 7e-05,
118
- "loss": 0.0244,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 2.77,
123
  "learning_rate": 6.23076923076923e-05,
124
- "loss": 0.0115,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 2.92,
129
  "learning_rate": 5.461538461538461e-05,
130
- "loss": 0.0046,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 3.08,
135
  "learning_rate": 4.692307692307693e-05,
136
- "loss": 0.001,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 3.08,
141
  "eval_accuracy": 1.0,
142
- "eval_loss": 0.006127053406089544,
143
- "eval_runtime": 1.5958,
144
- "eval_samples_per_second": 83.342,
145
- "eval_steps_per_second": 10.653,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 3.23,
150
  "learning_rate": 3.923076923076923e-05,
151
- "loss": 0.0081,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 3.38,
156
  "learning_rate": 3.153846153846154e-05,
157
- "loss": 0.0004,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 3.54,
162
  "learning_rate": 2.384615384615385e-05,
163
- "loss": 0.0009,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 3.69,
168
  "learning_rate": 1.6153846153846154e-05,
169
- "loss": 0.0004,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 3.85,
174
  "learning_rate": 8.461538461538462e-06,
175
- "loss": 0.0019,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 4.0,
180
  "learning_rate": 7.692307692307694e-07,
181
- "loss": 0.0026,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 4.0,
186
  "step": 260,
187
- "total_flos": 3.233737255326843e+17,
188
- "train_loss": 0.21213595934140567,
189
- "train_runtime": 75.7831,
190
- "train_samples_per_second": 54.577,
191
- "train_steps_per_second": 3.431
192
  }
193
  ],
194
  "max_steps": 260,
195
  "num_train_epochs": 4,
196
- "total_flos": 3.233737255326843e+17,
197
  "trial_name": null,
198
  "trial_params": null
199
  }
 
1
  {
2
+ "best_metric": 0.013820217922329903,
3
  "best_model_checkpoint": "./vit-base-beans/checkpoint-200",
4
  "epoch": 4.0,
5
  "global_step": 260,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.15,
12
+ "learning_rate": 0.00019230769230769233,
13
+ "loss": 0.8816,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.31,
18
+ "learning_rate": 0.00018461538461538463,
19
+ "loss": 0.4369,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.46,
24
+ "learning_rate": 0.00017692307692307693,
25
+ "loss": 0.2885,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.62,
30
+ "learning_rate": 0.00016923076923076923,
31
+ "loss": 0.1892,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.77,
36
  "learning_rate": 0.0001623076923076923,
37
+ "loss": 0.2412,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.92,
42
  "learning_rate": 0.00015461538461538464,
43
+ "loss": 0.265,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 1.08,
48
  "learning_rate": 0.00014692307692307693,
49
+ "loss": 0.1797,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 1.23,
54
  "learning_rate": 0.00013923076923076923,
55
+ "loss": 0.0923,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 1.38,
60
  "learning_rate": 0.00013153846153846156,
61
+ "loss": 0.0401,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 1.54,
66
  "learning_rate": 0.00012384615384615385,
67
+ "loss": 0.1595,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 1.54,
72
+ "eval_accuracy": 0.9699248120300752,
73
+ "eval_loss": 0.1211724504828453,
74
+ "eval_runtime": 1.0092,
75
+ "eval_samples_per_second": 131.786,
76
+ "eval_steps_per_second": 16.845,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.69,
81
  "learning_rate": 0.00011615384615384617,
82
+ "loss": 0.0987,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.85,
87
  "learning_rate": 0.00010846153846153846,
88
+ "loss": 0.1505,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 2.0,
93
  "learning_rate": 0.00010076923076923077,
94
+ "loss": 0.0768,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 2.15,
99
  "learning_rate": 9.307692307692309e-05,
100
+ "loss": 0.0241,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 2.31,
105
  "learning_rate": 8.538461538461538e-05,
106
+ "loss": 0.0415,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 2.46,
111
  "learning_rate": 7.76923076923077e-05,
112
+ "loss": 0.07,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 2.62,
117
  "learning_rate": 7e-05,
118
+ "loss": 0.0488,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 2.77,
123
  "learning_rate": 6.23076923076923e-05,
124
+ "loss": 0.0738,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 2.92,
129
  "learning_rate": 5.461538461538461e-05,
130
+ "loss": 0.0162,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 3.08,
135
  "learning_rate": 4.692307692307693e-05,
136
+ "loss": 0.014,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 3.08,
141
  "eval_accuracy": 1.0,
142
+ "eval_loss": 0.013820217922329903,
143
+ "eval_runtime": 1.0516,
144
+ "eval_samples_per_second": 126.478,
145
+ "eval_steps_per_second": 16.166,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 3.23,
150
  "learning_rate": 3.923076923076923e-05,
151
+ "loss": 0.0131,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 3.38,
156
  "learning_rate": 3.153846153846154e-05,
157
+ "loss": 0.0421,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 3.54,
162
  "learning_rate": 2.384615384615385e-05,
163
+ "loss": 0.0122,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 3.69,
168
  "learning_rate": 1.6153846153846154e-05,
169
+ "loss": 0.0123,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 3.85,
174
  "learning_rate": 8.461538461538462e-06,
175
+ "loss": 0.0121,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 4.0,
180
  "learning_rate": 7.692307692307694e-07,
181
+ "loss": 0.012,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 4.0,
186
  "step": 260,
187
+ "total_flos": 3.205097416476426e+17,
188
+ "train_loss": 0.13431097194552422,
189
+ "train_runtime": 68.8344,
190
+ "train_samples_per_second": 60.086,
191
+ "train_steps_per_second": 3.777
192
  }
193
  ],
194
  "max_steps": 260,
195
  "num_train_epochs": 4,
196
+ "total_flos": 3.205097416476426e+17,
197
  "trial_name": null,
198
  "trial_params": null
199
  }