jonathanagustin commited on
Commit
5af950e
1 Parent(s): 31e0135

Model save

Browse files
Files changed (5) hide show
  1. README.md +65 -28
  2. repo_card.md +31 -0
  3. tokenizer.json +3 -5
  4. trainer_state.json +6 -6
  5. training_args.bin +1 -1
README.md CHANGED
@@ -1,31 +1,68 @@
1
  ---
2
- {}
 
 
 
 
 
 
3
  ---
4
 
5
- # distilbert-finetuned-uncased Model
6
-
7
- This model is fine-tuned on the SQuAD v2 dataset for the task of question answering.
8
-
9
- ## Training Procedure
10
- - Number of Epochs: 2
11
- - Learning Rate: 2e-05
12
- - Batch Size: 128 (per device)
13
- - Evaluation Strategy: Every 100 steps
14
- - Save Strategy: Every 100 steps
15
- - FP16 Training: Yes
16
-
17
- ## Evaluation Results
18
- - Exact Match: 23.347090036216628
19
- - F1 Score: 26.869992349988973
20
- - Total: 11873
21
- - Has Answer Exact: 38.630229419703106
22
- - Has Answer F1: 45.686136837283904
23
- - Has Answer Total: 5928
24
- - No Answer Exact: 8.107653490328007
25
- - No Answer F1: 8.107653490328007
26
- - No Answer Total: 5945
27
- - Best Exact: 50.11370336056599
28
- - Best Exact Threshold: 0.0
29
- - Best F1: 50.11370336056599
30
- - Best F1 Threshold: 0.0
31
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - generated_from_trainer
4
+ datasets:
5
+ - squad_v2
6
+ model-index:
7
+ - name: distilbert-finetuned-uncased-squad_v2
8
+ results: []
9
  ---
10
 
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # distilbert-finetuned-uncased-squad_v2
15
+
16
+ This model was trained from scratch on the squad_v2 dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 1.3930
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 2e-05
38
+ - train_batch_size: 128
39
+ - eval_batch_size: 128
40
+ - seed: 42
41
+ - gradient_accumulation_steps: 4
42
+ - total_train_batch_size: 512
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 4
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss |
50
+ |:-------------:|:-----:|:----:|:---------------:|
51
+ | 3.6437 | 0.39 | 100 | 2.1780 |
52
+ | 2.1596 | 0.78 | 200 | 1.6557 |
53
+ | 1.8138 | 1.18 | 300 | 1.5683 |
54
+ | 1.6987 | 1.57 | 400 | 1.5076 |
55
+ | 1.6586 | 1.96 | 500 | 1.5350 |
56
+ | 1.5957 | 1.18 | 600 | 1.4431 |
57
+ | 1.5825 | 1.37 | 700 | 1.4955 |
58
+ | 1.5523 | 1.57 | 800 | 1.4444 |
59
+ | 1.5346 | 1.76 | 900 | 1.3930 |
60
+ | 1.5098 | 1.96 | 1000 | 1.4285 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.34.1
66
+ - Pytorch 2.1.0+cu118
67
+ - Datasets 2.14.5
68
+ - Tokenizers 0.14.1
repo_card.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ {}
3
+ ---
4
+
5
+ # distilbert-finetuned-uncased Model
6
+
7
+ This model is fine-tuned on the SQuAD v2 dataset for the task of question answering.
8
+
9
+ ## Training Procedure
10
+ - Number of Epochs: 2
11
+ - Learning Rate: 2e-05
12
+ - Batch Size: 128 (per device)
13
+ - Evaluation Strategy: Every 100 steps
14
+ - Save Strategy: Every 100 steps
15
+ - FP16 Training: Yes
16
+
17
+ ## Evaluation Results
18
+ - Exact Match: 23.347090036216628
19
+ - F1 Score: 26.869992349988973
20
+ - Total: 11873
21
+ - Has Answer Exact: 38.630229419703106
22
+ - Has Answer F1: 45.686136837283904
23
+ - Has Answer Total: 5928
24
+ - No Answer Exact: 8.107653490328007
25
+ - No Answer F1: 8.107653490328007
26
+ - No Answer Total: 5945
27
+ - Best Exact: 50.11370336056599
28
+ - Best Exact Threshold: 0.0
29
+ - Best F1: 50.11370336056599
30
+ - Best F1 Threshold: 0.0
31
+
tokenizer.json CHANGED
@@ -3,13 +3,11 @@
3
  "truncation": {
4
  "direction": "Right",
5
  "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
  },
9
  "padding": {
10
- "strategy": {
11
- "Fixed": 512
12
- },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
15
  "pad_id": 0,
 
3
  "truncation": {
4
  "direction": "Right",
5
  "max_length": 512,
6
+ "strategy": "OnlySecond",
7
+ "stride": 128
8
  },
9
  "padding": {
10
+ "strategy": "BatchLongest",
 
 
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
  "pad_id": 0,
trainer_state.json CHANGED
@@ -153,16 +153,16 @@
153
  "step": 1000,
154
  "total_flos": 5.015589595888435e+16,
155
  "train_loss": 0.0,
156
- "train_runtime": 0.354,
157
- "train_samples_per_second": 737393.791,
158
- "train_steps_per_second": 1440.851
159
  },
160
  {
161
  "epoch": 1.96,
162
  "eval_loss": 1.3930128812789917,
163
- "eval_runtime": 8.2561,
164
- "eval_samples_per_second": 1449.724,
165
- "eval_steps_per_second": 11.386,
166
  "step": 1000
167
  }
168
  ],
 
153
  "step": 1000,
154
  "total_flos": 5.015589595888435e+16,
155
  "train_loss": 0.0,
156
+ "train_runtime": 0.3572,
157
+ "train_samples_per_second": 730670.816,
158
+ "train_steps_per_second": 1427.715
159
  },
160
  {
161
  "epoch": 1.96,
162
  "eval_loss": 1.3930128812789917,
163
+ "eval_runtime": 8.2864,
164
+ "eval_samples_per_second": 1444.423,
165
+ "eval_steps_per_second": 11.344,
166
  "step": 1000
167
  }
168
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b43115521096779b46ae6f7b82e2bd13f806b3fd3b5fbd681a6c2ae5c41ef56
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c2218707d8f17a87a80bd2f04a5dd940a8048c67f7e922aee33e6506357a060
3
  size 4664