File size: 1,353 Bytes
b93970c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
base_config:
  - configs/singing/fs2.yaml
  - usr/configs/midi/cascade/opencs/opencpop_statis.yaml

audio_sample_rate: 24000
hop_size: 128            # Hop size.
fft_size: 512           # FFT size.
win_size: 512           # FFT size.
fmin: 30
fmax: 12000
min_level_db: -120

binarization_args:
  with_wav: true
  with_spk_embed: false
  with_align: true
raw_data_dir: 'data/raw/opencpop/segments'
processed_data_dir: 'xxx'
binarizer_cls: data_gen.singing.binarize.OpencpopBinarizer


binary_data_dir: 'data/binary/opencpop-midi-dp'
use_midi: true  #  for midi exp
use_gt_f0: false  #  for midi exp
use_gt_dur: false  # for further midi exp
lambda_f0: 1.0
lambda_uv: 1.0
#lambda_energy: 0.1
lambda_ph_dur: 1.0
lambda_sent_dur: 1.0
lambda_word_dur: 1.0
predictor_grad: 0.1
pe_enable: false
pe_ckpt: ''

num_spk: 1
test_prefixes: [
    '2044',
    '2086',
    '2092',
    '2093',
    '2100',
]

task_cls: usr.diffsinger_task.AuxDecoderMIDITask
#vocoder: usr.singingvocoder.highgan.HighGAN
#vocoder_ckpt: checkpoints/h_2_model/checkpoint-530000steps.pkl
vocoder: vocoders.hifigan.HifiGAN
vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128

use_nsf: true

# config for experiments
max_frames: 5000
max_tokens: 40000
predictor_layers: 5
rel_pos: true
dur_predictor_layers: 5  # *

use_spk_embed: false
num_valid_plots: 10
max_updates: 160000
save_gt: true