jonathanagustin commited on
Commit
79c410d
1 Parent(s): f7ee333

Model save

Browse files
Files changed (4) hide show
  1. README.md +65 -62
  2. tokenizer.json +3 -5
  3. trainer_state.json +6 -6
  4. training_args.bin +1 -1
README.md CHANGED
@@ -1,65 +1,68 @@
1
  ---
2
- {}
 
 
 
 
 
 
3
  ---
4
 
5
- ---
6
- language:
7
- - en
8
- license: mit
9
- tags:
10
- - question-answering
11
- - SQuAD
12
- - BERT
13
- datasets:
14
- - squad
15
- metrics:
16
- - f1
17
- - em
18
-
19
- model-index:
20
- - name: squad-v2-bert-base-finetuned
21
- results:
22
- - task:
23
- type: question-answering
24
- name: SQuAD Question Answering
25
- dataset:
26
- type: squad_v2
27
- name: SQuAD v2
28
- split: validation
29
- metrics:
30
- - type: f1
31
- value: 26.869992349988973
32
- name: F1 Score
33
- - type: em
34
- value: 23.347090036216628
35
- name: Exact Match
36
- verified: true
37
- ---
38
-
39
- # distilbert-finetuned-uncased
40
-
41
- This model is fine-tuned on SQuAD v2 for question answering tasks.
42
-
43
- ## Training Procedure
44
- - Number of Epochs: 4
45
- - Learning Rate: 2e-05
46
- - Batch Size: 128 (per device)
47
- - Evaluation Strategy: Every 100 steps
48
- - Save Strategy: Every 100 steps
49
- - FP16 Training: Yes
50
-
51
- ## Evaluation Results
52
- - Exact Match: 23.347090036216628
53
- - F1 Score: 26.869992349988973
54
- - Total: 11873
55
- - Has Answer Exact: 38.630229419703106
56
- - Has Answer F1: 45.686136837283904
57
- - Has Answer Total: 5928
58
- - No Answer Exact: 8.107653490328007
59
- - No Answer F1: 8.107653490328007
60
- - No Answer Total: 5945
61
- - Best Exact: 50.11370336056599
62
- - Best Exact Threshold: 0.0
63
- - Best F1: 50.11370336056599
64
- - Best F1 Threshold: 0.0
65
-
 
1
  ---
2
+ tags:
3
+ - generated_from_trainer
4
+ datasets:
5
+ - squad_v2
6
+ model-index:
7
+ - name: distilbert-finetuned-uncased-squad_v2
8
+ results: []
9
  ---
10
 
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # distilbert-finetuned-uncased-squad_v2
15
+
16
+ This model was trained from scratch on the squad_v2 dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 1.3930
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 2e-05
38
+ - train_batch_size: 128
39
+ - eval_batch_size: 128
40
+ - seed: 42
41
+ - gradient_accumulation_steps: 4
42
+ - total_train_batch_size: 512
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 4
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss |
50
+ |:-------------:|:-----:|:----:|:---------------:|
51
+ | 3.6437 | 0.39 | 100 | 2.1780 |
52
+ | 2.1596 | 0.78 | 200 | 1.6557 |
53
+ | 1.8138 | 1.18 | 300 | 1.5683 |
54
+ | 1.6987 | 1.57 | 400 | 1.5076 |
55
+ | 1.6586 | 1.96 | 500 | 1.5350 |
56
+ | 1.5957 | 1.18 | 600 | 1.4431 |
57
+ | 1.5825 | 1.37 | 700 | 1.4955 |
58
+ | 1.5523 | 1.57 | 800 | 1.4444 |
59
+ | 1.5346 | 1.76 | 900 | 1.3930 |
60
+ | 1.5098 | 1.96 | 1000 | 1.4285 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.34.1
66
+ - Pytorch 2.1.0+cu118
67
+ - Datasets 2.14.5
68
+ - Tokenizers 0.14.1
 
 
 
tokenizer.json CHANGED
@@ -3,13 +3,11 @@
3
  "truncation": {
4
  "direction": "Right",
5
  "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
  },
9
  "padding": {
10
- "strategy": {
11
- "Fixed": 512
12
- },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
15
  "pad_id": 0,
 
3
  "truncation": {
4
  "direction": "Right",
5
  "max_length": 512,
6
+ "strategy": "OnlySecond",
7
+ "stride": 128
8
  },
9
  "padding": {
10
+ "strategy": "BatchLongest",
 
 
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
  "pad_id": 0,
trainer_state.json CHANGED
@@ -153,16 +153,16 @@
153
  "step": 1020,
154
  "total_flos": 5.148633647651021e+16,
155
  "train_loss": 0.028946983113008386,
156
- "train_runtime": 26.8068,
157
- "train_samples_per_second": 19473.121,
158
- "train_steps_per_second": 38.05
159
  },
160
  {
161
  "epoch": 4.0,
162
  "eval_loss": 1.3930128812789917,
163
- "eval_runtime": 8.2969,
164
- "eval_samples_per_second": 1442.592,
165
- "eval_steps_per_second": 11.33,
166
  "step": 1020
167
  }
168
  ],
 
153
  "step": 1020,
154
  "total_flos": 5.148633647651021e+16,
155
  "train_loss": 0.028946983113008386,
156
+ "train_runtime": 26.811,
157
+ "train_samples_per_second": 19470.051,
158
+ "train_steps_per_second": 38.044
159
  },
160
  {
161
  "epoch": 4.0,
162
  "eval_loss": 1.3930128812789917,
163
+ "eval_runtime": 8.3512,
164
+ "eval_samples_per_second": 1433.204,
165
+ "eval_steps_per_second": 11.256,
166
  "step": 1020
167
  }
168
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c7ae8b9eb8e6eebcf1c334060dd09414a7a9d6f7ee56c74412375c0cdf85353
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d8a461b292d1c7d936212be6c7619b9698a1a89d0479a510f55cfd2a15f38a
3
  size 4664