xvecver2 / hyperparams.yaml
Ocelotr's picture
Upload 10 files
0dcab9a
raw
history blame
No virus
5.18 kB
# Generated 2023-10-18 from:
# /home/wakeb/Abdulrahman-tts/speechbrain/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
# yamllint disable
# ################################
# Model: Speaker identification with ECAPA
# Authors: Hwidong Na & Mirco Ravanelli
# ################################
# Basic parameters
seed: 651
__set_seed: !apply:torch.manual_seed [651]
output_folder: /media/wakeb/T7 Touch/speechbrain651
save_folder: /media/wakeb/T7 Touch/speechbrain651/save
train_log: /media/wakeb/T7 Touch/speechbrain651/train_log.txt
# Data files
data_folder: /media/wakeb/T7 Touch/data_qasr/ # e.g. /path/to/Voxceleb
train_annotation: /media/wakeb/T7 Touch/data_qasr/train_reworked.csv
valid_annotation: /media/wakeb/T7 Touch/data_qasr/dev_reworked.csv
# Folder to extract data augmentation files
rir_folder: /media/wakeb/T7 Touch/data_qasr/ # Change it if needed
# Use the following links for the official voxceleb splits:
# VoxCeleb1 (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt
# VoxCeleb1-H (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_hard2.txt
# VoxCeleb1-E (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_all2.txt.
# VoxCeleb1-E and VoxCeleb1-H lists are drawn from the VoxCeleb1 training set.
# Therefore you cannot use any files in VoxCeleb1 for training if you are using these lists for testing.
verification_file: /media/wakeb/T7 Touch/data_qasr/testing.txt
split_ratio: [90, 10]
skip_prep: true
ckpt_interval_minutes: 15 # save checkpoint every N min
# Training parameters
number_of_epochs: 30
batch_size: 140
lr: 0.001
lr_final: 0.000001
sample_rate: 16000
sentence_len: 3 # seconds
shuffle: false
random_chunk: true
# Feature parameters
n_mels: 24
left_frames: 0
right_frames: 0
deltas: false
# Number of speakers
out_n_neurons: 2917
emb_dim: 512
dataloader_options:
batch_size: 140
shuffle: false
num_workers: 0
# Functions
compute_features: &id006 !new:speechbrain.lobes.features.Fbank
n_mels: 24
left_frames: 0
right_frames: 0
deltas: false
embedding_model: &id007 !new:speechbrain.lobes.models.Xvector.Xvector
in_channels: 24
activation: !name:torch.nn.LeakyReLU
tdnn_blocks: 5
tdnn_channels: [512, 512, 512, 512, 1500]
tdnn_kernel_sizes: [5, 3, 3, 1, 1]
tdnn_dilations: [1, 2, 3, 1, 1]
lin_neurons: 512
classifier: &id008 !new:speechbrain.lobes.models.Xvector.Classifier
input_shape: [null, null, 512]
activation: !name:torch.nn.LeakyReLU
lin_blocks: 1
lin_neurons: 512
out_neurons: 2917
epoch_counter: &id010 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 30
augment_wavedrop: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
sample_rate: 16000
speeds: [100]
augment_speed: &id002 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
sample_rate: 16000
speeds: [95, 100, 105]
add_rev: &id003 !new:speechbrain.lobes.augment.EnvCorrupt
openrir_folder: /media/wakeb/T7 Touch/data_qasr/
openrir_max_noise_len: 3.0 # seconds
reverb_prob: 1.0
noise_prob: 0.0
noise_snr_low: 0
noise_snr_high: 15
rir_scale_factor: 1.0
add_noise: &id004 !new:speechbrain.lobes.augment.EnvCorrupt
openrir_folder: /media/wakeb/T7 Touch/data_qasr/
openrir_max_noise_len: 3.0 # seconds
reverb_prob: 0.0
noise_prob: 1.0
noise_snr_low: 0
noise_snr_high: 15
rir_scale_factor: 1.0
add_rev_noise: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
openrir_folder: /media/wakeb/T7 Touch/data_qasr/
openrir_max_noise_len: 3.0 # seconds
reverb_prob: 1.0
noise_prob: 1.0
noise_snr_low: 0
noise_snr_high: 15
rir_scale_factor: 1.0
# Definition of the augmentation pipeline.
# If concat_augment = False, the augmentation techniques are applied
# in sequence. If concat_augment = True, all the augmented signals
# are concatenated in a single big batch.
augment_pipeline: [*id001, *id002, *id003, *id004, *id005]
concat_augment: true
mean_var_norm: &id009 !new:speechbrain.processing.features.InputNormalization
# Cost + optimization
norm_type: sentence
std_norm: false
modules:
compute_features: *id006
augment_wavedrop: *id001
augment_speed: *id002
add_rev: *id003
add_noise: *id004
add_rev_noise: *id005
embedding_model: *id007
classifier: *id008
mean_var_norm: *id009
compute_cost: !name:speechbrain.nnet.losses.nll_loss
# compute_error: !name:speechbrain.nnet.losses.classification_error
opt_class: !name:torch.optim.Adam
lr: 0.001
weight_decay: 0.000002
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
initial_value: 0.001
final_value: 0.000001
epoch_count: 30
# Logging + checkpoints
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: /media/wakeb/T7 Touch/speechbrain651/train_log.txt
error_stats: !name:speechbrain.utils.metric_stats.MetricStats
metric: !name:speechbrain.nnet.losses.classification_error
reduction: batch
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: /media/wakeb/T7 Touch/speechbrain651/save
recoverables:
embedding_model: *id007
classifier: *id008
normalizer: *id009
counter: *id010