File size: 2,739 Bytes
b93970c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
base_config:
  - configs/tts/fs2.yaml
  - configs/singing/base.yaml
  - ./base.yaml

audio_sample_rate: 24000
hop_size: 128            # Hop size.
fft_size: 512           # FFT size.
win_size: 512           # FFT size.
fmin: 30
fmax: 12000
min_level_db: -120

binarization_args:
  with_wav: true
  with_spk_embed: false
  with_align: true
raw_data_dir: 'data/raw/popcs'
processed_data_dir: 'data/processed/popcs'
binary_data_dir: 'data/binary/popcs-pmf0'
num_spk: 1
datasets: [
  'popcs',
]
test_prefixes: [
  'popcs-说散就散',
  'popcs-隐形的翅膀',
]

spec_min: [-6.8276, -7.0270, -6.8142, -7.1429, -7.6669, -7.6000, -7.1148, -6.9640,
           -6.8414, -6.6596, -6.6880, -6.7439, -6.7986, -7.4940, -7.7845, -7.6586,
           -6.9288, -6.7639, -6.9118, -6.8246, -6.7183, -7.1769, -6.9794, -7.4513,
           -7.3422, -7.5623, -6.9610, -6.8158, -6.9595, -6.8403, -6.5688, -6.6356,
           -7.0209, -6.5002, -6.7819, -6.5232, -6.6927, -6.5701, -6.5531, -6.7069,
           -6.6462, -6.4523, -6.5954, -6.4264, -6.4487, -6.7070, -6.4025, -6.3042,
           -6.4008, -6.3857, -6.3903, -6.3094, -6.2491, -6.3518, -6.3566, -6.4168,
           -6.2481, -6.3624, -6.2858, -6.2575, -6.3638, -6.4520, -6.1835, -6.2754,
           -6.1253, -6.1645, -6.0638, -6.1262, -6.0710, -6.1039, -6.4428, -6.1363,
           -6.1054, -6.1252, -6.1797, -6.0235, -6.0758, -5.9453, -6.0213, -6.0446]
spec_max: [ 0.2645,  0.0583, -0.2344, -0.0184,  0.1227,  0.1533,  0.1103,  0.1212,
            0.2421,  0.1809,  0.2134,  0.3161,  0.3301,  0.3289,  0.2667,  0.2421,
            0.2581,  0.2600,  0.1394,  0.1907,  0.1082,  0.1474,  0.1680,  0.2550,
            0.1057,  0.0826,  0.0423,  0.1203, -0.0701, -0.0056,  0.0477, -0.0639,
            -0.0272, -0.0728, -0.1648, -0.0855, -0.2652, -0.1998, -0.1547, -0.2167,
            -0.4181, -0.5463, -0.4161, -0.4733, -0.6518, -0.5387, -0.4290, -0.4191,
            -0.4151, -0.3042, -0.3810, -0.4160, -0.4496, -0.2847, -0.4676, -0.4658,
            -0.4931, -0.4885, -0.5547, -0.5481, -0.6948, -0.7968, -0.8455, -0.8392,
            -0.8770, -0.9520, -0.8749, -0.7297, -0.8374, -0.8667, -0.7157, -0.9035,
            -0.9219, -0.8801, -0.9298, -0.9009, -0.9604, -1.0537, -1.0781, -1.3766]

task_cls: usr.diffsinger_task.DiffSingerTask
#vocoder: usr.singingvocoder.highgan.HighGAN
#vocoder_ckpt: checkpoints/h_2_model/checkpoint-530000steps.pkl
vocoder: vocoders.hifigan.HifiGAN
vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128

pitch_extractor: 'parselmouth'
# config for experiments
use_spk_embed: false
num_valid_plots: 10
max_updates: 160000
lr: 0.001
timesteps: 100
K_step: 51
diff_loss_type: l1
diff_decoder_type: 'wavenet'
schedule_type: 'linear'
max_beta: 0.06
fs2_ckpt: ''
use_nsf: true