soumi-maiti commited on
Commit
1d8e087
1 Parent(s): 164c86c

Update model

Browse files
exp/diar_enh_stats_8k/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9cec0a1b3324f2eaa6cbad59122b5d908db5cad3ac0b1f58a9c1c4f863ab554
3
+ size 778
exp/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/16epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07c01b1e6ecde2bfd9dce6d0646972821933bd3f293ccebd9d88bb64f49dc1b
3
+ size 38983318
exp/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/DIAR_RESULTS.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Requirement already satisfied: humanfriendly in /ocean/projects/cis210027p/smaiti/espnet_diar_enh/tools/venv/lib/python3.9/site-packages (10.0)
2
+
3
+ [notice] A new release of pip available: 22.1.2 -> 23.1.1
4
+ [notice] To update, run: pip install --upgrade pip
5
+ <!-- Generated by scripts/utils/show_diar_result.sh -->
6
+ # RESULTS
7
+ ## Environments
8
+ - date: `Wed Apr 26 13:02:10 EDT 2023`
9
+ - python version: `3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:39:03) [GCC 11.3.0]`
10
+ - espnet version: `espnet 202205`
11
+ - pytorch version: `pytorch 1.8.1+cu102`
12
+ - Git hash: `d837c97c88f13ffe655a30bcff93d814f212b225`
13
+ - Commit date: `Wed Jun 29 12:04:57 2022 -0700`
14
+
15
+ ## diar_enh_train_diar_enh_convtasnet_concat_feats_adapt
16
+ ### DER
17
+ diarized_enhanced_test_decode_diar_enh_adapt
18
+ |threshold_median_collar|DER|
19
+ |---|---|
20
+ |result_th0.3_med11_collar0.0|6.50|
21
+ |result_th0.3_med1_collar0.0|6.52|
22
+ |result_th0.4_med11_collar0.0|6.20|
23
+ |result_th0.4_med1_collar0.0|6.24|
24
+ |result_th0.5_med11_collar0.0|6.04|
25
+ |result_th0.5_med1_collar0.0|6.10|
26
+ |result_th0.6_med11_collar0.0|6.03|
27
+ |result_th0.6_med1_collar0.0|6.12|
28
+ |result_th0.7_med11_collar0.0|6.25|
29
+ |result_th0.7_med1_collar0.0|6.36|
exp/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/ENH_RESULTS.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Requirement already satisfied: humanfriendly in /ocean/projects/cis210027p/smaiti/espnet_diar_enh/tools/venv/lib/python3.9/site-packages (10.0)
2
+
3
+ [notice] A new release of pip available: 22.1.2 -> 23.1.2
4
+ [notice] To update, run: pip install --upgrade pip
5
+ <!-- Generated by scripts/utils/show_enh_score.sh -->
6
+ # RESULTS
7
+ ## Environments
8
+ - date: `Thu May 4 02:08:47 EDT 2023`
9
+ - python version: `3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:39:03) [GCC 11.3.0]`
10
+ - espnet version: `espnet 202205`
11
+ - pytorch version: `pytorch 1.8.1+cu102`
12
+ - Git hash: `d837c97c88f13ffe655a30bcff93d814f212b225`
13
+ - Commit date: `Wed Jun 29 12:04:57 2022 -0700`
14
+
15
+
16
+ ## diar_enh_train_diar_enh_convtasnet_concat_feats_adapt
17
+
18
+ config: conf/tuning/train_diar_enh_convtasnet_concat_feats_adapt.yaml
19
+
20
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|
21
+ |---|---|---|---|---|---|
22
+ |diarized_enhanced_test_decode_diar_enh_adapt|77.3102|8.3970|6.9224|15.6728|5.3881|
23
+
exp/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/config.yaml ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_diar_enh_convtasnet_concat_feats_adapt.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: 4
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss_enh
39
+ - min
40
+ keep_nbest_models: 1
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 16
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ wandb_entity: null
57
+ wandb_name: null
58
+ wandb_model_log_interval: -1
59
+ detect_anomaly: false
60
+ pretrain_path: null
61
+ init_param:
62
+ - ../enh_diar1/exp/diar_enh_train_diar_enh_convtasnet_concat_feats_raw/valid.loss_enh.best.pth
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 1
67
+ valid_batch_size: null
68
+ batch_bins: 1000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/diar_enh_stats_8k/train/speech_shape
72
+ - exp/diar_enh_stats_8k/train/text_shape
73
+ - exp/diar_enh_stats_8k/train/speech_ref1_shape
74
+ - exp/diar_enh_stats_8k/train/speech_ref2_shape
75
+ - exp/diar_enh_stats_8k/train/speech_ref3_shape
76
+ - exp/diar_enh_stats_8k/train/noise_ref1_shape
77
+ valid_shape_file:
78
+ - exp/diar_enh_stats_8k/valid/speech_shape
79
+ - exp/diar_enh_stats_8k/valid/text_shape
80
+ - exp/diar_enh_stats_8k/valid/speech_ref1_shape
81
+ - exp/diar_enh_stats_8k/valid/speech_ref2_shape
82
+ - exp/diar_enh_stats_8k/valid/speech_ref3_shape
83
+ - exp/diar_enh_stats_8k/valid/noise_ref1_shape
84
+ batch_type: folded
85
+ valid_batch_type: null
86
+ fold_length:
87
+ - 800
88
+ - 80000
89
+ - 80000
90
+ - 80000
91
+ - 80000
92
+ - 80000
93
+ sort_in_batch: descending
94
+ sort_batch: descending
95
+ multiple_iterator: false
96
+ chunk_length: 24000
97
+ chunk_shift_ratio: 0.5
98
+ num_cache_chunks: 1024
99
+ train_data_path_and_name_and_type:
100
+ - - dump/raw/train/wav.scp
101
+ - speech
102
+ - sound
103
+ - - dump/raw/train/espnet_rttm
104
+ - text
105
+ - rttm
106
+ - - dump/raw/train/spk1.scp
107
+ - speech_ref1
108
+ - sound
109
+ - - dump/raw/train/spk2.scp
110
+ - speech_ref2
111
+ - sound
112
+ - - dump/raw/train/spk3.scp
113
+ - speech_ref3
114
+ - sound
115
+ - - dump/raw/train/noise1.scp
116
+ - noise_ref1
117
+ - sound
118
+ valid_data_path_and_name_and_type:
119
+ - - dump/raw/dev/wav.scp
120
+ - speech
121
+ - sound
122
+ - - dump/raw/dev/espnet_rttm
123
+ - text
124
+ - rttm
125
+ - - dump/raw/dev/spk1.scp
126
+ - speech_ref1
127
+ - sound
128
+ - - dump/raw/dev/spk2.scp
129
+ - speech_ref2
130
+ - sound
131
+ - - dump/raw/dev/spk3.scp
132
+ - speech_ref3
133
+ - sound
134
+ - - dump/raw/dev/noise1.scp
135
+ - noise_ref1
136
+ - sound
137
+ allow_variable_data_keys: false
138
+ max_cache_size: 0.0
139
+ max_cache_fd: 32
140
+ valid_max_cache_size: null
141
+ optim: adam
142
+ optim_conf:
143
+ lr: 0.001
144
+ eps: 1.0e-07
145
+ weight_decay: 0
146
+ scheduler: reducelronplateau
147
+ scheduler_conf:
148
+ mode: min
149
+ factor: 0.5
150
+ patience: 1
151
+ token_list: null
152
+ src_token_list: null
153
+ init: xavier_uniform
154
+ input_size: null
155
+ ctc_conf:
156
+ dropout_rate: 0.0
157
+ ctc_type: builtin
158
+ reduce: true
159
+ ignore_nan_grad: null
160
+ zero_infinity: true
161
+ enh_criterions:
162
+ - name: si_snr
163
+ conf:
164
+ eps: 1.0e-07
165
+ wrapper: pit
166
+ wrapper_conf:
167
+ weight: 1.0
168
+ independent_perm: true
169
+ flexible_numspk: true
170
+ diar_num_spk: 3
171
+ diar_input_size: 128
172
+ enh_model_conf:
173
+ loss_type: si_snr
174
+ asr_model_conf:
175
+ ctc_weight: 0.5
176
+ interctc_weight: 0.0
177
+ ignore_id: -1
178
+ lsm_weight: 0.0
179
+ length_normalized_loss: false
180
+ report_cer: true
181
+ report_wer: true
182
+ sym_space: <space>
183
+ sym_blank: <blank>
184
+ extract_feats_in_collect_stats: true
185
+ st_model_conf:
186
+ stft_consistency: false
187
+ loss_type: mask_mse
188
+ mask_type: null
189
+ diar_model_conf:
190
+ diar_weight: 0.2
191
+ attractor_weight: 0.2
192
+ subtask_series:
193
+ - enh
194
+ - diar
195
+ model_conf:
196
+ calc_enh_loss: true
197
+ bypass_enh_prob: 0
198
+ use_preprocessor: true
199
+ token_type: bpe
200
+ bpemodel: null
201
+ src_token_type: bpe
202
+ src_bpemodel: null
203
+ non_linguistic_symbols: null
204
+ cleaner: null
205
+ g2p: null
206
+ enh_encoder: conv
207
+ enh_encoder_conf:
208
+ channel: 512
209
+ kernel_size: 16
210
+ stride: 8
211
+ enh_separator: tcn_nomask
212
+ enh_separator_conf:
213
+ layer: 8
214
+ stack: 3
215
+ bottleneck_dim: 128
216
+ hidden_dim: 512
217
+ kernel: 3
218
+ causal: false
219
+ norm_type: gLN
220
+ enh_decoder: conv
221
+ enh_decoder_conf:
222
+ channel: 512
223
+ kernel_size: 16
224
+ stride: 8
225
+ enh_mask_module: multi_mask
226
+ enh_mask_module_conf:
227
+ max_num_spk: 3
228
+ mask_nonlinear: relu
229
+ bottleneck_dim: 128
230
+ frontend: default
231
+ frontend_conf: {}
232
+ specaug: null
233
+ specaug_conf: {}
234
+ normalize: utterance_mvn
235
+ normalize_conf: {}
236
+ asr_preencoder: null
237
+ asr_preencoder_conf: {}
238
+ asr_encoder: rnn
239
+ asr_encoder_conf: {}
240
+ asr_postencoder: null
241
+ asr_postencoder_conf: {}
242
+ asr_decoder: rnn
243
+ asr_decoder_conf: {}
244
+ st_preencoder: null
245
+ st_preencoder_conf: {}
246
+ st_encoder: rnn
247
+ st_encoder_conf: {}
248
+ st_postencoder: null
249
+ st_postencoder_conf: {}
250
+ st_decoder: rnn
251
+ st_decoder_conf: {}
252
+ st_extra_asr_decoder: rnn
253
+ st_extra_asr_decoder_conf: {}
254
+ st_extra_mt_decoder: rnn
255
+ st_extra_mt_decoder_conf: {}
256
+ diar_frontend: default
257
+ diar_frontend_conf:
258
+ hop_length: 64
259
+ fs: 8000
260
+ diar_specaug: null
261
+ diar_specaug_conf: {}
262
+ diar_normalize: utterance_mvn
263
+ diar_normalize_conf: {}
264
+ diar_encoder: transformer
265
+ diar_encoder_conf:
266
+ input_layer: conv2d8
267
+ num_blocks: 4
268
+ linear_units: 512
269
+ dropout_rate: 0.1
270
+ output_size: 256
271
+ attention_heads: 4
272
+ attention_dropout_rate: 0.1
273
+ diar_decoder: linear
274
+ diar_decoder_conf: {}
275
+ label_aggregator: label_aggregator
276
+ label_aggregator_conf:
277
+ win_length: 256
278
+ hop_length: 64
279
+ diar_attractor: rnn
280
+ diar_attractor_conf:
281
+ unit: 256
282
+ layer: 1
283
+ dropout: 0.0
284
+ attractor_grad: true
285
+ required:
286
+ - output_dir
287
+ version: '202205'
288
+ distributed: false
meta.yaml CHANGED
@@ -2,7 +2,7 @@ espnet: '202205'
2
  files:
3
  model_file: exp_bk/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/16epoch.pth
4
  python: "3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:39:03) \n[GCC 11.3.0]"
5
- timestamp: 1683182053.532115
6
  torch: 1.8.1+cu102
7
  yaml_files:
8
  train_config: exp_bk/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/config.yaml
 
2
  files:
3
  model_file: exp_bk/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/16epoch.pth
4
  python: "3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:39:03) \n[GCC 11.3.0]"
5
+ timestamp: 1683182262.975525
6
  torch: 1.8.1+cu102
7
  yaml_files:
8
  train_config: exp_bk/diar_enh_train_diar_enh_convtasnet_concat_feats_adapt/config.yaml