Ocelotr commited on
Commit
22270ba
1 Parent(s): 7986803

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +47 -161
hyperparams.yaml CHANGED
@@ -1,174 +1,60 @@
1
- # Generated 2023-10-18 from:
2
- # /home/wakeb/Abdulrahman-tts/speechbrain/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
3
- # yamllint disable
4
- # ################################
5
- # Model: Speaker identification with ECAPA
6
- # Authors: Hwidong Na & Mirco Ravanelli
7
- # ################################
8
-
9
- # Basic parameters
10
- seed: 651
11
- __set_seed: !apply:torch.manual_seed [651]
12
- output_folder: /media/wakeb/T7 Touch/speechbrain651
13
- save_folder: /media/wakeb/T7 Touch/speechbrain651/save
14
- train_log: /media/wakeb/T7 Touch/speechbrain651/train_log.txt
15
-
16
- # Data files
17
- data_folder: /media/wakeb/T7 Touch/data_qasr/ # e.g. /path/to/Voxceleb
18
- train_annotation: /media/wakeb/T7 Touch/data_qasr/train_reworked.csv
19
- valid_annotation: /media/wakeb/T7 Touch/data_qasr/dev_reworked.csv
20
-
21
- # Folder to extract data augmentation files
22
- rir_folder: /media/wakeb/T7 Touch/data_qasr/ # Change it if needed
23
-
24
- # Use the following links for the official voxceleb splits:
25
- # VoxCeleb1 (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt
26
- # VoxCeleb1-H (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_hard2.txt
27
- # VoxCeleb1-E (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_all2.txt.
28
- # VoxCeleb1-E and VoxCeleb1-H lists are drawn from the VoxCeleb1 training set.
29
- # Therefore you cannot use any files in VoxCeleb1 for training if you are using these lists for testing.
30
- verification_file: /media/wakeb/T7 Touch/data_qasr/testing.txt
31
-
32
- split_ratio: [90, 10]
33
- skip_prep: true
34
- ckpt_interval_minutes: 15 # save checkpoint every N min
35
-
36
- # Training parameters
37
- number_of_epochs: 30
38
- batch_size: 140
39
- lr: 0.001
40
- lr_final: 0.000001
41
-
42
- sample_rate: 16000
43
- sentence_len: 3 # seconds
44
- shuffle: false
45
- random_chunk: true
46
-
47
  # Feature parameters
48
  n_mels: 24
49
- left_frames: 0
50
- right_frames: 0
51
- deltas: false
52
-
53
- # Number of speakers
54
- out_n_neurons: 2917
55
- emb_dim: 512
56
-
57
- dataloader_options:
58
- batch_size: 140
59
- shuffle: false
60
- num_workers: 0
61
 
62
- # Functions
63
- compute_features: &id006 !new:speechbrain.lobes.features.Fbank
64
- n_mels: 24
65
- left_frames: 0
66
- right_frames: 0
67
- deltas: false
68
 
69
- embedding_model: &id007 !new:speechbrain.lobes.models.Xvector.Xvector
70
- in_channels: 24
71
- activation: !name:torch.nn.LeakyReLU
72
- tdnn_blocks: 5
73
- tdnn_channels: [512, 512, 512, 512, 1500]
74
- tdnn_kernel_sizes: [5, 3, 3, 1, 1]
75
- tdnn_dilations: [1, 2, 3, 1, 1]
76
- lin_neurons: 512
77
-
78
- classifier: &id008 !new:speechbrain.lobes.models.Xvector.Classifier
79
- input_shape: [null, null, 512]
80
- activation: !name:torch.nn.LeakyReLU
81
- lin_blocks: 1
82
- lin_neurons: 512
83
- out_neurons: 2917
84
-
85
- epoch_counter: &id010 !new:speechbrain.utils.epoch_loop.EpochCounter
86
- limit: 30
87
-
88
-
89
- augment_wavedrop: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
90
- sample_rate: 16000
91
- speeds: [100]
92
-
93
- augment_speed: &id002 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
94
- sample_rate: 16000
95
- speeds: [95, 100, 105]
96
 
97
- add_rev: &id003 !new:speechbrain.lobes.augment.EnvCorrupt
98
- openrir_folder: /media/wakeb/T7 Touch/data_qasr/
99
- openrir_max_noise_len: 3.0 # seconds
100
- reverb_prob: 1.0
101
- noise_prob: 0.0
102
- noise_snr_low: 0
103
- noise_snr_high: 15
104
- rir_scale_factor: 1.0
105
 
106
- add_noise: &id004 !new:speechbrain.lobes.augment.EnvCorrupt
107
- openrir_folder: /media/wakeb/T7 Touch/data_qasr/
108
- openrir_max_noise_len: 3.0 # seconds
109
- reverb_prob: 0.0
110
- noise_prob: 1.0
111
- noise_snr_low: 0
112
- noise_snr_high: 15
113
- rir_scale_factor: 1.0
114
 
115
- add_rev_noise: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
116
- openrir_folder: /media/wakeb/T7 Touch/data_qasr/
117
- openrir_max_noise_len: 3.0 # seconds
118
- reverb_prob: 1.0
119
- noise_prob: 1.0
120
- noise_snr_low: 0
121
- noise_snr_high: 15
122
- rir_scale_factor: 1.0
123
 
 
 
 
124
 
125
- # Definition of the augmentation pipeline.
126
- # If concat_augment = False, the augmentation techniques are applied
127
- # in sequence. If concat_augment = True, all the augmented signals
128
- # are concatenated in a single big batch.
129
- augment_pipeline: [*id001, *id002, *id003, *id004, *id005]
130
- concat_augment: true
 
 
131
 
132
- mean_var_norm: &id009 !new:speechbrain.processing.features.InputNormalization
 
 
 
 
 
133
 
134
- # Cost + optimization
135
- norm_type: sentence
136
- std_norm: false
137
 
138
  modules:
139
- compute_features: *id006
140
- augment_wavedrop: *id001
141
- augment_speed: *id002
142
- add_rev: *id003
143
- add_noise: *id004
144
- add_rev_noise: *id005
145
- embedding_model: *id007
146
- classifier: *id008
147
- mean_var_norm: *id009
148
- compute_cost: !name:speechbrain.nnet.losses.nll_loss
149
- # compute_error: !name:speechbrain.nnet.losses.classification_error
150
-
151
- opt_class: !name:torch.optim.Adam
152
- lr: 0.001
153
- weight_decay: 0.000002
154
-
155
- lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
156
- initial_value: 0.001
157
- final_value: 0.000001
158
- epoch_count: 30
159
-
160
- # Logging + checkpoints
161
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
162
- save_file: /media/wakeb/T7 Touch/speechbrain651/train_log.txt
163
-
164
- error_stats: !name:speechbrain.utils.metric_stats.MetricStats
165
- metric: !name:speechbrain.nnet.losses.classification_error
166
- reduction: batch
167
-
168
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
169
- checkpoints_dir: /media/wakeb/T7 Touch/speechbrain651/save
170
- recoverables:
171
- embedding_model: *id007
172
- classifier: *id008
173
- normalizer: *id009
174
- counter: *id010
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Feature parameters
2
  n_mels: 24
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ # Pretrain folder (HuggingFace)
5
+ pretrained_path: Ocelotr/xvecver2
 
 
 
 
6
 
7
+ # Output parameters
8
+ out_n_neurons: 2917
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
 
 
 
 
11
 
12
+ # Model params
13
+ compute_features: !new:speechbrain.lobes.features.Fbank
14
+ n_mels: !ref <n_mels>
 
 
 
 
 
15
 
16
+ mean_var_norm: !new:speechbrain.processing.features.InputNormalization
17
+ norm_type: sentence
18
+ std_norm: False
19
 
20
+ embedding_model: !new:speechbrain.lobes.models.Xvector.Xvector
21
+ in_channels: !ref <n_mels>
22
+ activation: !name:torch.nn.LeakyReLU
23
+ tdnn_blocks: 5
24
+ tdnn_channels: [512, 512, 512, 512, 1500]
25
+ tdnn_kernel_sizes: [5, 3, 3, 1, 1]
26
+ tdnn_dilations: [1, 2, 3, 1, 1]
27
+ lin_neurons: 512
28
 
29
+ classifier: !new:speechbrain.lobes.models.Xvector.Classifier
30
+ input_shape: [null, null, 512]
31
+ activation: !name:torch.nn.LeakyReLU
32
+ lin_blocks: 1
33
+ lin_neurons: 512
34
+ out_neurons: !ref <out_n_neurons>
35
 
36
+ mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
37
+ norm_type: global
38
+ std_norm: False
39
 
40
  modules:
41
+ compute_features: !ref <compute_features>
42
+ mean_var_norm: !ref <mean_var_norm>
43
+ embedding_model: !ref <embedding_model>
44
+ mean_var_norm_emb: !ref <mean_var_norm_emb>
45
+ classifier: !ref <classifier>
46
+
47
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
48
+
49
+
50
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
51
+ loadables:
52
+ embedding_model: !ref <embedding_model>
53
+ mean_var_norm_emb: !ref <mean_var_norm_emb>
54
+ classifier: !ref <classifier>
55
+ label_encoder: !ref <label_encoder>
56
+ paths:
57
+ embedding_model: !ref <pretrained_path>/embedding_model.ckpt
58
+ mean_var_norm_emb: !ref <pretrained_path>/mean_var_norm_emb.ckpt
59
+ classifier: !ref <pretrained_path>/classifier.ckpt
60
+ label_encoder: !ref <pretrained_path>/label_encoder.txt