Ocelotr
/

xvecver2

Model card Files Files and versions Community

xvecver2 / hyperparams.yaml

Ocelotr

Upload 10 files

0dcab9a 12 months ago

raw

history blame

No virus

5.18 kB

	# Generated 2023-10-18 from:
	# /home/wakeb/Abdulrahman-tts/speechbrain/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
	# yamllint disable
	# ################################
	# Model: Speaker identification with ECAPA
	# Authors: Hwidong Na & Mirco Ravanelli
	# ################################

	# Basic parameters
	seed: 651
	__set_seed: !apply:torch.manual_seed [651]
	output_folder: /media/wakeb/T7 Touch/speechbrain651
	save_folder: /media/wakeb/T7 Touch/speechbrain651/save
	train_log: /media/wakeb/T7 Touch/speechbrain651/train_log.txt

	# Data files
	data_folder: /media/wakeb/T7 Touch/data_qasr/ # e.g. /path/to/Voxceleb
	train_annotation: /media/wakeb/T7 Touch/data_qasr/train_reworked.csv
	valid_annotation: /media/wakeb/T7 Touch/data_qasr/dev_reworked.csv

	# Folder to extract data augmentation files
	rir_folder: /media/wakeb/T7 Touch/data_qasr/ # Change it if needed

	# Use the following links for the official voxceleb splits:
	# VoxCeleb1 (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt
	# VoxCeleb1-H (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_hard2.txt
	# VoxCeleb1-E (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_all2.txt.
	# VoxCeleb1-E and VoxCeleb1-H lists are drawn from the VoxCeleb1 training set.
	# Therefore you cannot use any files in VoxCeleb1 for training if you are using these lists for testing.
	verification_file: /media/wakeb/T7 Touch/data_qasr/testing.txt

	split_ratio: [90, 10]
	skip_prep: true
	ckpt_interval_minutes: 15 # save checkpoint every N min

	# Training parameters
	number_of_epochs: 30
	batch_size: 140
	lr: 0.001
	lr_final: 0.000001

	sample_rate: 16000
	sentence_len: 3 # seconds
	shuffle: false
	random_chunk: true

	# Feature parameters
	n_mels: 24
	left_frames: 0
	right_frames: 0
	deltas: false

	# Number of speakers
	out_n_neurons: 2917
	emb_dim: 512

	dataloader_options:
	batch_size: 140
	shuffle: false
	num_workers: 0

	# Functions
	compute_features: &id006 !new:speechbrain.lobes.features.Fbank
	n_mels: 24
	left_frames: 0
	right_frames: 0
	deltas: false

	embedding_model: &id007 !new:speechbrain.lobes.models.Xvector.Xvector
	in_channels: 24
	activation: !name:torch.nn.LeakyReLU
	tdnn_blocks: 5
	tdnn_channels: [512, 512, 512, 512, 1500]
	tdnn_kernel_sizes: [5, 3, 3, 1, 1]
	tdnn_dilations: [1, 2, 3, 1, 1]
	lin_neurons: 512

	classifier: &id008 !new:speechbrain.lobes.models.Xvector.Classifier
	input_shape: [null, null, 512]
	activation: !name:torch.nn.LeakyReLU
	lin_blocks: 1
	lin_neurons: 512
	out_neurons: 2917

	epoch_counter: &id010 !new:speechbrain.utils.epoch_loop.EpochCounter
	limit: 30


	augment_wavedrop: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
	sample_rate: 16000
	speeds: [100]

	augment_speed: &id002 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
	sample_rate: 16000
	speeds: [95, 100, 105]

	add_rev: &id003 !new:speechbrain.lobes.augment.EnvCorrupt
	openrir_folder: /media/wakeb/T7 Touch/data_qasr/
	openrir_max_noise_len: 3.0 # seconds
	reverb_prob: 1.0
	noise_prob: 0.0
	noise_snr_low: 0
	noise_snr_high: 15
	rir_scale_factor: 1.0

	add_noise: &id004 !new:speechbrain.lobes.augment.EnvCorrupt
	openrir_folder: /media/wakeb/T7 Touch/data_qasr/
	openrir_max_noise_len: 3.0 # seconds
	reverb_prob: 0.0
	noise_prob: 1.0
	noise_snr_low: 0
	noise_snr_high: 15
	rir_scale_factor: 1.0

	add_rev_noise: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
	openrir_folder: /media/wakeb/T7 Touch/data_qasr/
	openrir_max_noise_len: 3.0 # seconds
	reverb_prob: 1.0
	noise_prob: 1.0
	noise_snr_low: 0
	noise_snr_high: 15
	rir_scale_factor: 1.0


	# Definition of the augmentation pipeline.
	# If concat_augment = False, the augmentation techniques are applied
	# in sequence. If concat_augment = True, all the augmented signals
	# are concatenated in a single big batch.
	augment_pipeline: [id001, id002, id003, id004, *id005]
	concat_augment: true

	mean_var_norm: &id009 !new:speechbrain.processing.features.InputNormalization

	# Cost + optimization
	norm_type: sentence
	std_norm: false

	modules:
	compute_features: *id006
	augment_wavedrop: *id001
	augment_speed: *id002
	add_rev: *id003
	add_noise: *id004
	add_rev_noise: *id005
	embedding_model: *id007
	classifier: *id008
	mean_var_norm: *id009
	compute_cost: !name:speechbrain.nnet.losses.nll_loss
	# compute_error: !name:speechbrain.nnet.losses.classification_error

	opt_class: !name:torch.optim.Adam
	lr: 0.001
	weight_decay: 0.000002

	lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
	initial_value: 0.001
	final_value: 0.000001
	epoch_count: 30

	# Logging + checkpoints
	train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
	save_file: /media/wakeb/T7 Touch/speechbrain651/train_log.txt

	error_stats: !name:speechbrain.utils.metric_stats.MetricStats
	metric: !name:speechbrain.nnet.losses.classification_error
	reduction: batch

	checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
	checkpoints_dir: /media/wakeb/T7 Touch/speechbrain651/save
	recoverables:
	embedding_model: *id007
	classifier: *id008
	normalizer: *id009
	counter: *id010