lewtun HF staff commited on
Commit
0828815
1 Parent(s): 62fb2cc

Model save

Browse files
README.md CHANGED
@@ -60,12 +60,12 @@ The following hyperparameters were used during training:
60
 
61
  ### Training results
62
 
63
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
- | 0.5606 | 0.21 | 100 | 0.5444 | -0.6103 | -1.2336 | 0.7539 | 0.6232 | -380.4666 | -337.5370 | -2.9992 | -2.9936 |
66
- | 0.5399 | 0.42 | 200 | 0.5208 | -0.8033 | -1.6027 | 0.7773 | 0.7994 | -417.3810 | -356.8376 | -2.7196 | -2.7673 |
67
- | 0.5025 | 0.63 | 300 | 0.5035 | -0.9722 | -1.9486 | 0.7852 | 0.9764 | -451.9680 | -373.7211 | -2.1705 | -2.3401 |
68
- | 0.5005 | 0.84 | 400 | 0.4964 | -1.0116 | -2.0114 | 0.7969 | 0.9998 | -458.2532 | -377.6674 | -2.3544 | -2.4795 |
69
 
70
 
71
  ### Framework versions
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
64
+ |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
65
+ | 0.5606 | 0.21 | 100 | -2.9936 | -2.9992 | -337.5370 | -380.4666 | 0.5444 | 0.7539 | -0.6103 | 0.6232 | -1.2336 |
66
+ | 0.5399 | 0.42 | 200 | -2.7673 | -2.7196 | -356.8376 | -417.3810 | 0.5208 | 0.7773 | -0.8033 | 0.7994 | -1.6027 |
67
+ | 0.5025 | 0.63 | 300 | -2.3401 | -2.1705 | -373.7211 | -451.9680 | 0.5035 | 0.7852 | -0.9722 | 0.9764 | -1.9486 |
68
+ | 0.5005 | 0.84 | 400 | -2.4795 | -2.3544 | -377.6674 | -458.2532 | 0.4964 | 0.7969 | -1.0116 | 0.9998 | -2.0114 |
69
 
70
 
71
  ### Framework versions
all_results.json CHANGED
@@ -9,13 +9,13 @@
9
  "eval_rewards/chosen": -0.9225484132766724,
10
  "eval_rewards/margins": 1.0126549005508423,
11
  "eval_rewards/rejected": -1.9352033138275146,
12
- "eval_runtime": 89.0705,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 22.454,
15
- "eval_steps_per_second": 0.359,
16
- "train_loss": 0.5373976499964502,
17
- "train_runtime": 5297.1261,
18
  "train_samples": 61155,
19
- "train_samples_per_second": 11.545,
20
- "train_steps_per_second": 0.09
21
  }
 
9
  "eval_rewards/chosen": -0.9225484132766724,
10
  "eval_rewards/margins": 1.0126549005508423,
11
  "eval_rewards/rejected": -1.9352033138275146,
12
+ "eval_runtime": 86.9848,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 22.993,
15
+ "eval_steps_per_second": 0.368,
16
+ "train_loss": 0.08028295149863016,
17
+ "train_runtime": 773.4064,
18
  "train_samples": 61155,
19
+ "train_samples_per_second": 79.072,
20
+ "train_steps_per_second": 0.618
21
  }
eval_results.json CHANGED
@@ -9,8 +9,8 @@
9
  "eval_rewards/chosen": -0.9225484132766724,
10
  "eval_rewards/margins": 1.0126549005508423,
11
  "eval_rewards/rejected": -1.9352033138275146,
12
- "eval_runtime": 89.0705,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 22.454,
15
- "eval_steps_per_second": 0.359
16
  }
 
9
  "eval_rewards/chosen": -0.9225484132766724,
10
  "eval_rewards/margins": 1.0126549005508423,
11
  "eval_rewards/rejected": -1.9352033138275146,
12
+ "eval_runtime": 86.9848,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 22.993,
15
+ "eval_steps_per_second": 0.368
16
  }
runs/Jan04_15-24-24_ip-26-0-172-73/events.out.tfevents.1704381944.ip-26-0-172-73.2335235.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778321c06a661ef16a7dc9f3f9747bc041b58741a7cf7833b784efefb450e4ca
3
+ size 9264
runs/Jan04_15-24-24_ip-26-0-172-73/events.out.tfevents.1704382805.ip-26-0-172-73.2335235.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe41322f218fef66fe99848864bfb75653d8fc2930784cf2fac3bdc7813ae2c
3
+ size 828
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5373976499964502,
4
- "train_runtime": 5297.1261,
5
  "train_samples": 61155,
6
- "train_samples_per_second": 11.545,
7
- "train_steps_per_second": 0.09
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.08028295149863016,
4
+ "train_runtime": 773.4064,
5
  "train_samples": 61155,
6
+ "train_samples_per_second": 79.072,
7
+ "train_steps_per_second": 0.618
8
  }
trainer_state.json CHANGED
@@ -748,10 +748,10 @@
748
  "epoch": 1.0,
749
  "step": 478,
750
  "total_flos": 0.0,
751
- "train_loss": 0.5373976499964502,
752
- "train_runtime": 5297.1261,
753
- "train_samples_per_second": 11.545,
754
- "train_steps_per_second": 0.09
755
  }
756
  ],
757
  "logging_steps": 10,
 
748
  "epoch": 1.0,
749
  "step": 478,
750
  "total_flos": 0.0,
751
+ "train_loss": 0.08028295149863016,
752
+ "train_runtime": 773.4064,
753
+ "train_samples_per_second": 79.072,
754
+ "train_steps_per_second": 0.618
755
  }
756
  ],
757
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f027d41f29be82338938a0b16542f35f8042f6098a904bca214b45fed5284b2c
3
  size 5944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426b16da5d6c49955d8ee8f36eaf09fdcabfc25874f0ff18a47e145cecb00c63
3
  size 5944