Ocelotr commited on
Commit
d5a768b
1 Parent(s): 76e4014

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +47 -160
hyperparams.yaml CHANGED
@@ -1,174 +1,61 @@
1
- # Generated 2023-10-04 from:
2
- # /home/wakeb/Abdulrahman-tts/speechbrain/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
3
- # yamllint disable
4
- # ################################
5
- # Model: Speaker identification with ECAPA
6
- # Authors: Hwidong Na & Mirco Ravanelli
7
- # ################################
8
-
9
- # Basic parameters
10
- seed: 10
11
- __set_seed: !apply:torch.manual_seed [10]
12
- output_folder: /media/wakeb/T7 Touch/speechbrain10
13
- save_folder: /media/wakeb/T7 Touch/speechbrain10/save
14
- train_log: /media/wakeb/T7 Touch/speechbrain10/train_log.txt
15
-
16
- # Data files
17
- data_folder: /media/wakeb/T7 Touch/data_qasr/ # e.g. /path/to/Voxceleb
18
- train_annotation: /media/wakeb/T7 Touch/data_qasr/train.csv
19
- valid_annotation: /media/wakeb/T7 Touch/data_qasr/dev.csv
20
-
21
- # Folder to extract data augmentation files
22
- rir_folder: /media/wakeb/T7 Touch/data_qasr/ # Change it if needed
23
-
24
- # Use the following links for the official voxceleb splits:
25
- # VoxCeleb1 (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt
26
- # VoxCeleb1-H (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_hard2.txt
27
- # VoxCeleb1-E (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_all2.txt.
28
- # VoxCeleb1-E and VoxCeleb1-H lists are drawn from the VoxCeleb1 training set.
29
- # Therefore you cannot use any files in VoxCeleb1 for training if you are using these lists for testing.
30
- verification_file: /home/wakeb/Abdulrahman-tts/clovaai/voxceleb_trainer/data/test_list.txt
31
-
32
- split_ratio: [90, 10]
33
- skip_prep: false
34
- ckpt_interval_minutes: 15 # save checkpoint every N min
35
-
36
- # Training parameters
37
- number_of_epochs: 20
38
- batch_size: 140
39
- lr: 0.0008
40
- lr_final: 0.0001
41
-
42
- sample_rate: 16000
43
- sentence_len: 3 # seconds
44
- shuffle: false
45
- random_chunk: true
46
 
47
  # Feature parameters
48
  n_mels: 24
49
- left_frames: 0
50
- right_frames: 0
51
- deltas: false
52
-
53
- # Number of speakers
54
- out_n_neurons: 4112
55
- emb_dim: 512
56
-
57
- dataloader_options:
58
- batch_size: 140
59
- shuffle: false
60
- num_workers: 0
61
-
62
- # Functions
63
- compute_features: &id006 !new:speechbrain.lobes.features.Fbank
64
- n_mels: 24
65
- left_frames: 0
66
- right_frames: 0
67
- deltas: false
68
 
69
- embedding_model: &id007 !new:speechbrain.lobes.models.Xvector.Xvector
70
- in_channels: 24
71
- activation: !name:torch.nn.LeakyReLU
72
- tdnn_blocks: 5
73
- tdnn_channels: [512, 512, 512, 512, 1500]
74
- tdnn_kernel_sizes: [5, 3, 3, 1, 1]
75
- tdnn_dilations: [1, 2, 3, 1, 1]
76
- lin_neurons: 512
77
 
78
- classifier: &id008 !new:speechbrain.lobes.models.Xvector.Classifier
79
- input_shape: [null, null, 512]
80
- activation: !name:torch.nn.LeakyReLU
81
- lin_blocks: 1
82
- lin_neurons: 512
83
- out_neurons: 4112
84
 
85
- epoch_counter: &id010 !new:speechbrain.utils.epoch_loop.EpochCounter
86
- limit: 20
87
 
88
 
89
- augment_wavedrop: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
90
- sample_rate: 16000
91
- speeds: [100]
92
 
93
- augment_speed: &id002 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
94
- sample_rate: 16000
95
- speeds: [95, 100, 105]
96
 
97
- add_rev: &id003 !new:speechbrain.lobes.augment.EnvCorrupt
98
- openrir_folder: /media/wakeb/T7 Touch/data_qasr/
99
- openrir_max_noise_len: 3.0 # seconds
100
- reverb_prob: 1.0
101
- noise_prob: 0.0
102
- noise_snr_low: 0
103
- noise_snr_high: 15
104
- rir_scale_factor: 1.0
105
 
106
- add_noise: &id004 !new:speechbrain.lobes.augment.EnvCorrupt
107
- openrir_folder: /media/wakeb/T7 Touch/data_qasr/
108
- openrir_max_noise_len: 3.0 # seconds
109
- reverb_prob: 0.0
110
- noise_prob: 1.0
111
- noise_snr_low: 0
112
- noise_snr_high: 15
113
- rir_scale_factor: 1.0
114
 
115
- add_rev_noise: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
116
- openrir_folder: /media/wakeb/T7 Touch/data_qasr/
117
- openrir_max_noise_len: 3.0 # seconds
118
- reverb_prob: 1.0
119
- noise_prob: 1.0
120
- noise_snr_low: 0
121
- noise_snr_high: 15
122
- rir_scale_factor: 1.0
123
-
124
-
125
- # Definition of the augmentation pipeline.
126
- # If concat_augment = False, the augmentation techniques are applied
127
- # in sequence. If concat_augment = True, all the augmented signals
128
- # are concatenated in a single big batch.
129
- augment_pipeline: [*id001, *id002, *id003, *id004, *id005]
130
- concat_augment: true
131
-
132
- mean_var_norm: &id009 !new:speechbrain.processing.features.InputNormalization
133
-
134
- # Cost + optimization
135
- norm_type: sentence
136
- std_norm: false
137
 
138
  modules:
139
- compute_features: *id006
140
- augment_wavedrop: *id001
141
- augment_speed: *id002
142
- add_rev: *id003
143
- add_noise: *id004
144
- add_rev_noise: *id005
145
- embedding_model: *id007
146
- classifier: *id008
147
- mean_var_norm: *id009
148
- compute_cost: !name:speechbrain.nnet.losses.nll_loss
149
- # compute_error: !name:speechbrain.nnet.losses.classification_error
150
-
151
- opt_class: !name:torch.optim.Adam
152
- lr: 0.0008
153
- weight_decay: 0.000002
154
-
155
- lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
156
- initial_value: 0.0008
157
- final_value: 0.0001
158
- epoch_count: 20
159
-
160
- # Logging + checkpoints
161
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
162
- save_file: /media/wakeb/T7 Touch/speechbrain10/train_log.txt
163
-
164
- error_stats: !name:speechbrain.utils.metric_stats.MetricStats
165
- metric: !name:speechbrain.nnet.losses.classification_error
166
- reduction: batch
167
-
168
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
169
- checkpoints_dir: /media/wakeb/T7 Touch/speechbrain10/save
170
- recoverables:
171
- embedding_model: *id007
172
- classifier: *id008
173
- normalizer: *id009
174
- counter: *id010
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  # Feature parameters
3
  n_mels: 24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Pretrain folder (HuggingFace)
6
+ pretrained_path: Ocelotr/xvec-qasr
 
 
 
 
 
 
7
 
8
+ # Output parameters
9
+ out_n_neurons: 1734
 
 
 
 
10
 
 
 
11
 
12
 
13
+ # Model params
14
+ compute_features: !new:speechbrain.lobes.features.Fbank
15
+ n_mels: !ref <n_mels>
16
 
17
+ mean_var_norm: !new:speechbrain.processing.features.InputNormalization
18
+ norm_type: sentence
19
+ std_norm: False
20
 
21
+ embedding_model: !new:speechbrain.lobes.models.Xvector.Xvector
22
+ in_channels: !ref <n_mels>
23
+ activation: !name:torch.nn.LeakyReLU
24
+ tdnn_blocks: 5
25
+ tdnn_channels: [512, 512, 512, 512, 1500]
26
+ tdnn_kernel_sizes: [5, 3, 3, 1, 1]
27
+ tdnn_dilations: [1, 2, 3, 1, 1]
28
+ lin_neurons: 512
29
 
30
+ classifier: !new:speechbrain.lobes.models.Xvector.Classifier
31
+ input_shape: [null, null, 512]
32
+ activation: !name:torch.nn.LeakyReLU
33
+ lin_blocks: 1
34
+ lin_neurons: 512
35
+ out_neurons: !ref <out_n_neurons>
 
 
36
 
37
+ mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
38
+ norm_type: global
39
+ std_norm: False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  modules:
42
+ compute_features: !ref <compute_features>
43
+ mean_var_norm: !ref <mean_var_norm>
44
+ embedding_model: !ref <embedding_model>
45
+ mean_var_norm_emb: !ref <mean_var_norm_emb>
46
+ classifier: !ref <classifier>
47
+
48
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
49
+
50
+
51
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
52
+ loadables:
53
+ embedding_model: !ref <embedding_model>
54
+ mean_var_norm_emb: !ref <mean_var_norm_emb>
55
+ classifier: !ref <classifier>
56
+ label_encoder: !ref <label_encoder>
57
+ paths:
58
+ embedding_model: !ref <pretrained_path>/embedding_model.ckpt
59
+ mean_var_norm_emb: !ref <pretrained_path>/mean_var_norm_emb.ckpt
60
+ classifier: !ref <pretrained_path>/classifier.ckpt
61
+ label_encoder: !ref <pretrained_path>/label_encoder.txt