douy commited on
Commit
15b7835
1 Parent(s): 5c6d65b

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -264
trainer_state.json DELETED
@@ -1,264 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "eval_steps": 10,
6
- "global_step": 36,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.08,
13
- "learning_rate": 0.0,
14
- "loss": 1.796,
15
- "step": 1
16
- },
17
- {
18
- "epoch": 0.17,
19
- "learning_rate": 0.0001,
20
- "loss": 1.0996,
21
- "step": 2
22
- },
23
- {
24
- "epoch": 0.25,
25
- "learning_rate": 0.0001,
26
- "loss": 0.9834,
27
- "step": 3
28
- },
29
- {
30
- "epoch": 0.33,
31
- "learning_rate": 0.0001,
32
- "loss": 0.558,
33
- "step": 4
34
- },
35
- {
36
- "epoch": 0.42,
37
- "learning_rate": 0.0001,
38
- "loss": 0.3067,
39
- "step": 5
40
- },
41
- {
42
- "epoch": 0.5,
43
- "learning_rate": 0.0001,
44
- "loss": 0.4554,
45
- "step": 6
46
- },
47
- {
48
- "epoch": 0.58,
49
- "learning_rate": 0.0001,
50
- "loss": 0.4328,
51
- "step": 7
52
- },
53
- {
54
- "epoch": 0.67,
55
- "learning_rate": 0.0001,
56
- "loss": 0.2377,
57
- "step": 8
58
- },
59
- {
60
- "epoch": 0.75,
61
- "learning_rate": 0.0001,
62
- "loss": 0.3933,
63
- "step": 9
64
- },
65
- {
66
- "epoch": 0.83,
67
- "learning_rate": 0.0001,
68
- "loss": 0.3539,
69
- "step": 10
70
- },
71
- {
72
- "epoch": 0.83,
73
- "eval_accuracy": 0.01065855981246795,
74
- "eval_loss": 0.3053041696548462,
75
- "eval_runtime": 2.9764,
76
- "eval_samples_per_second": 7.391,
77
- "eval_steps_per_second": 0.336,
78
- "step": 10
79
- },
80
- {
81
- "epoch": 0.92,
82
- "learning_rate": 0.0001,
83
- "loss": 0.3407,
84
- "step": 11
85
- },
86
- {
87
- "epoch": 1.0,
88
- "learning_rate": 0.0001,
89
- "loss": 0.3791,
90
- "step": 12
91
- },
92
- {
93
- "epoch": 1.08,
94
- "learning_rate": 0.0001,
95
- "loss": 0.2567,
96
- "step": 13
97
- },
98
- {
99
- "epoch": 1.17,
100
- "learning_rate": 0.0001,
101
- "loss": 0.3081,
102
- "step": 14
103
- },
104
- {
105
- "epoch": 1.25,
106
- "learning_rate": 0.0001,
107
- "loss": 0.2464,
108
- "step": 15
109
- },
110
- {
111
- "epoch": 1.33,
112
- "learning_rate": 0.0001,
113
- "loss": 0.295,
114
- "step": 16
115
- },
116
- {
117
- "epoch": 1.42,
118
- "learning_rate": 0.0001,
119
- "loss": 0.243,
120
- "step": 17
121
- },
122
- {
123
- "epoch": 1.5,
124
- "learning_rate": 0.0001,
125
- "loss": 0.2889,
126
- "step": 18
127
- },
128
- {
129
- "epoch": 1.58,
130
- "learning_rate": 0.0001,
131
- "loss": 0.2491,
132
- "step": 19
133
- },
134
- {
135
- "epoch": 1.67,
136
- "learning_rate": 0.0001,
137
- "loss": 0.1398,
138
- "step": 20
139
- },
140
- {
141
- "epoch": 1.67,
142
- "eval_accuracy": 0.0107684418723903,
143
- "eval_loss": 0.2902178466320038,
144
- "eval_runtime": 2.52,
145
- "eval_samples_per_second": 8.73,
146
- "eval_steps_per_second": 0.397,
147
- "step": 20
148
- },
149
- {
150
- "epoch": 1.75,
151
- "learning_rate": 0.0001,
152
- "loss": 0.255,
153
- "step": 21
154
- },
155
- {
156
- "epoch": 1.83,
157
- "learning_rate": 0.0001,
158
- "loss": 0.233,
159
- "step": 22
160
- },
161
- {
162
- "epoch": 1.92,
163
- "learning_rate": 0.0001,
164
- "loss": 0.2733,
165
- "step": 23
166
- },
167
- {
168
- "epoch": 2.0,
169
- "learning_rate": 0.0001,
170
- "loss": 0.2045,
171
- "step": 24
172
- },
173
- {
174
- "epoch": 2.08,
175
- "learning_rate": 0.0001,
176
- "loss": 0.1885,
177
- "step": 25
178
- },
179
- {
180
- "epoch": 2.17,
181
- "learning_rate": 0.0001,
182
- "loss": 0.2345,
183
- "step": 26
184
- },
185
- {
186
- "epoch": 2.25,
187
- "learning_rate": 0.0001,
188
- "loss": 0.2209,
189
- "step": 27
190
- },
191
- {
192
- "epoch": 2.33,
193
- "learning_rate": 0.0001,
194
- "loss": 0.2146,
195
- "step": 28
196
- },
197
- {
198
- "epoch": 2.42,
199
- "learning_rate": 0.0001,
200
- "loss": 0.1555,
201
- "step": 29
202
- },
203
- {
204
- "epoch": 2.5,
205
- "learning_rate": 0.0001,
206
- "loss": 0.2355,
207
- "step": 30
208
- },
209
- {
210
- "epoch": 2.5,
211
- "eval_accuracy": 0.0107684418723903,
212
- "eval_loss": 0.27231642603874207,
213
- "eval_runtime": 2.1494,
214
- "eval_samples_per_second": 10.235,
215
- "eval_steps_per_second": 0.465,
216
- "step": 30
217
- },
218
- {
219
- "epoch": 2.58,
220
- "learning_rate": 0.0001,
221
- "loss": 0.1539,
222
- "step": 31
223
- },
224
- {
225
- "epoch": 2.67,
226
- "learning_rate": 0.0001,
227
- "loss": 0.119,
228
- "step": 32
229
- },
230
- {
231
- "epoch": 2.75,
232
- "learning_rate": 0.0001,
233
- "loss": 0.2046,
234
- "step": 33
235
- },
236
- {
237
- "epoch": 2.83,
238
- "learning_rate": 0.0001,
239
- "loss": 0.1672,
240
- "step": 34
241
- },
242
- {
243
- "epoch": 2.92,
244
- "learning_rate": 0.0001,
245
- "loss": 0.1556,
246
- "step": 35
247
- },
248
- {
249
- "epoch": 3.0,
250
- "learning_rate": 0.0001,
251
- "loss": 0.161,
252
- "step": 36
253
- }
254
- ],
255
- "logging_steps": 1.0,
256
- "max_steps": 60,
257
- "num_input_tokens_seen": 0,
258
- "num_train_epochs": 5,
259
- "save_steps": 500,
260
- "total_flos": 7706157613056.0,
261
- "train_batch_size": 4,
262
- "trial_name": null,
263
- "trial_params": null
264
- }