ceyda commited on
Commit
3dee02c
1 Parent(s): ab8048e

fix attention flag

Browse files
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
- value: 24.91
27
  ---
28
 
29
  # Wav2Vec2-Base-760-Turkish
@@ -102,11 +102,13 @@ test_dataset = test_dataset.map(speech_file_to_array_fn)
102
 
103
  # Preprocessing the datasets.
104
  # We need to read the aduio files as arrays
 
 
105
  def evaluate(batch):
106
  inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
107
 
108
  with torch.no_grad():
109
- logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
110
 
111
  pred_ids = torch.argmax(logits, dim=-1)
112
  batch["pred_strings"] = processor.batch_decode(pred_ids,skip_special_tokens=True)
@@ -117,7 +119,9 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
117
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
118
  ```
119
 
120
- **Test Result**: 24.91 % (in progress)
 
 
121
 
122
 
123
  ## Training
 
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
+ value: 22.60
27
  ---
28
 
29
  # Wav2Vec2-Base-760-Turkish
 
102
 
103
  # Preprocessing the datasets.
104
  # We need to read the aduio files as arrays
105
+
106
+ #Attention mask is not used because the base-model was not trained with it. reference: https://github.com/huggingface/transformers/blob/403d530eec105c0e229fc2b754afdf77a4439def/src/transformers/models/wav2vec2/tokenization_wav2vec2.py#L305
107
  def evaluate(batch):
108
  inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
109
 
110
  with torch.no_grad():
111
+ logits = model(inputs.input_values.to("cuda")).logits
112
 
113
  pred_ids = torch.argmax(logits, dim=-1)
114
  batch["pred_strings"] = processor.batch_decode(pred_ids,skip_special_tokens=True)
 
119
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
120
  ```
121
 
122
+ **Test Results**:
123
+ - WER: 22.602390
124
+ - CER: 6.054137
125
 
126
 
127
  ## Training
preprocessor_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "do_normalize": true,
3
  "feature_size": 1,
4
  "padding_side": "right",
5
- "padding_value": 0.0,
6
  "return_attention_mask": true,
7
  "sampling_rate": 16000
8
  }
 
2
  "do_normalize": true,
3
  "feature_size": 1,
4
  "padding_side": "right",
5
+ "padding_value": 0,
6
  "return_attention_mask": true,
7
  "sampling_rate": 16000
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac7eaad990b15315d1772928ea15b9c77d2e259311b5189f9772b04da157294
3
  size 377691502
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adeefd83b89a25212c0d6c74b43b28e367e54cc7fbce63599927f7bc6d2b8ae9
3
  size 377691502
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|","special_tokens_map_file": "/home/ceyda/workspace/libs/fairseq/hf_finetuned_output/special_tokens_map.json", "tokenizer_file": null}
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|"}
vocab.json CHANGED
@@ -1,39 +1 @@
1
- {"|": 4,
2
- "p": 5,
3
- "i": 6,
4
- "r": 7,
5
- "n": 8,
6
- "s": 9,
7
- "ö": 10,
8
- "z": 11,
9
- "l": 12,
10
- "e": 13,
11
- "h": 14,
12
- "â": 15,
13
- "y": 16,
14
- "a": 17,
15
- "k": 18,
16
- "ı": 19,
17
- "o": 20,
18
- "m": 21,
19
- "ü": 22,
20
- "g": 23,
21
- "c": 24,
22
- "b": 25,
23
- "ş": 26,
24
- "d": 27,
25
- "u": 28,
26
- "t": 29,
27
- "ç": 30,
28
- "ğ": 31,
29
- "v": 32,
30
- "f": 33,
31
- "j": 34,
32
- "x": 35,
33
- "w": 36,
34
- "q": 37,
35
- "î": 38,
36
- "<s>": 0,
37
- "<pad>": 1,
38
- "</s>": 2,
39
- "<unk>": 3}
 
1
+ {"<s>": 0, "<pad>": 1, "</s>": 2, "<unk>": 3, "|": 4, "p": 5, "i": 6, "r": 7, "n": 8, "s": 9, "ö": 10, "z": 11, "l": 12, "e": 13, "h": 14, "â": 15, "y": 16, "a": 17, "k": 18, "ı": 19, "o": 20, "m": 21, "ü": 22, "g": 23, "c": 24, "b": 25, "ş": 26, "d": 27, "u": 28, "t": 29, "ç": 30, "ğ": 31, "v": 32, "f": 33, "j": 34, "x": 35, "w": 36, "q": 37, "î": 38}