ecker commited on
Commit
c377f65
1 Parent(s): 15258a4

Upload 2 files

Browse files
Files changed (2) hide show
  1. ckpt/ar+nar-retnet-8/fp32.pth +3 -0
  2. config.ar_nar.yaml +46 -28
ckpt/ar+nar-retnet-8/fp32.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd8279a7e8658230b743ed66dac025059cfe292f869cd34257b3b568528767a
3
+ size 441054655
config.ar_nar.yaml CHANGED
@@ -1,10 +1,7 @@
1
  dataset:
2
- training: [
3
- ]
4
- validation: [
5
- ]
6
- noise: [
7
- ]
8
 
9
  speaker_name_getter: "lambda p: f'{p.parts[-3]}_{p.parts[-2]}'"
10
 
@@ -13,7 +10,7 @@ dataset:
13
  hdf5_flag: r
14
  validate: True
15
 
16
- workers: 4
17
  cache: True
18
 
19
  phones_range: [4, 256]
@@ -21,41 +18,60 @@ dataset:
21
 
22
  random_utterance: 1.0
23
  max_prompts: 3
24
- prompt_duration: 3.0
25
 
26
  sample_type: speaker
27
 
28
- tasks_list: ["tts"] # , "ns", "sr", "tse", "cse", "nse", "tts"]
29
 
30
  models:
31
- _prom_levels: 4
32
  _max_levels: 8
33
 
34
  _models:
35
  - name: "ar+nar"
36
  size: "full"
37
- resp_levels: 4
38
- prom_levels: 4
39
  tasks: 8
40
  arch_type: "retnet"
41
  training: True
 
42
 
43
  hyperparameters:
44
  batch_size: 8
45
- gradient_accumulation_steps: 1
46
  gradient_clipping: 100
47
 
48
- optimizer: AdamW
49
- learning_rate: 1.0e-5
 
50
 
51
  scheduler_type: ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  evaluation:
54
  batch_size: 16
55
- frequency: 500
56
  size: 16
57
 
58
- steps: 300
59
  ar_temperature: 0.95
60
  nar_temperature: 0.25
61
  load_disabled_engines: True
@@ -66,7 +82,7 @@ trainer:
66
  save_tag: step
67
  save_on_oom: True
68
  save_on_quit: True
69
- save_frequency: 500
70
  export_on_save: True
71
 
72
  keep_last_checkpoints: 4
@@ -74,32 +90,34 @@ trainer:
74
  aggressive_optimizations: False
75
  load_disabled_engines: False
76
 
77
- load_state_dict: True
 
 
 
 
78
 
79
  gc_mode: None # "global_step"
80
 
81
- weight_dtype: float32
82
  amp: False
83
 
84
- backend: local
85
  deepspeed:
86
  zero_optimization_level: 0
87
  use_compression_training: True
88
 
89
- inference:
90
- weight_dtype: float32
91
- amp: False
92
 
 
93
  use_vocos: True
94
  normalize: False
95
 
96
- recurrent_chunk_size: 0
97
- recurrent_forward: False
98
 
99
  bitsandbytes:
100
  enabled: False
101
  injects: True
102
  linear: True
103
  embedding: True
104
-
105
- device: cpu
 
1
  dataset:
2
+ training: []
3
+ validation: []
4
+ noise: []
 
 
 
5
 
6
  speaker_name_getter: "lambda p: f'{p.parts[-3]}_{p.parts[-2]}'"
7
 
 
10
  hdf5_flag: r
11
  validate: True
12
 
13
+ workers: 2
14
  cache: True
15
 
16
  phones_range: [4, 256]
 
18
 
19
  random_utterance: 1.0
20
  max_prompts: 3
21
+ prompt_duration: 6.0
22
 
23
  sample_type: speaker
24
 
25
+ tasks_list: [ "tts" ] # , [ "tts", "tts-c", "ns", "sr", "tse", "cse", "nse", "tts"]
26
 
27
  models:
28
+ _prom_levels: 8
29
  _max_levels: 8
30
 
31
  _models:
32
  - name: "ar+nar"
33
  size: "full"
34
+ resp_levels: 8
35
+ prom_levels: 8
36
  tasks: 8
37
  arch_type: "retnet"
38
  training: True
39
+ version: 2
40
 
41
  hyperparameters:
42
  batch_size: 8
43
+ gradient_accumulation_steps: 32
44
  gradient_clipping: 100
45
 
46
+ optimizer: Prodigy
47
+ torch_optimizer: True
48
+ learning_rate: 1.0
49
 
50
  scheduler_type: ""
51
+ #scheduler_type: OneCycle
52
+ #scheduler_params:
53
+ # cycle_first_step_size: 10_000
54
+ # cycle_first_stair_count: 10_000
55
+
56
+ # cycle_second_step_size: 15_000
57
+ # cycle_second_stair_count: 15_000
58
+
59
+ # decay_step_size: 5_000
60
+
61
+ # cycle_min_lr: 2.5e-4 # 1.0e-5
62
+ # cycle_max_lr: 2.5e-4 # 1.0e-4
63
+ # decay_lr_rate: 0.0
64
+
65
+ # cycle_min_mom: 0.90
66
+ # cycle_max_mom: 0.99
67
+ # decay_mom_rate: 0.0
68
 
69
  evaluation:
70
  batch_size: 16
71
+ frequency: 250
72
  size: 16
73
 
74
+ steps: 450
75
  ar_temperature: 0.95
76
  nar_temperature: 0.25
77
  load_disabled_engines: True
 
82
  save_tag: step
83
  save_on_oom: True
84
  save_on_quit: True
85
+ save_frequency: 100
86
  export_on_save: True
87
 
88
  keep_last_checkpoints: 4
 
90
  aggressive_optimizations: False
91
  load_disabled_engines: False
92
 
93
+ #load_state_dict: True
94
+ #strict_loading: False
95
+ #load_tag: "9500"
96
+ #load_states: False
97
+ #restart_step_count: True
98
 
99
  gc_mode: None # "global_step"
100
 
101
+ weight_dtype: bfloat16
102
  amp: False
103
 
104
+ backend: deepspeed
105
  deepspeed:
106
  zero_optimization_level: 0
107
  use_compression_training: True
108
 
109
+ activation_checkpointing: True
 
 
110
 
111
+ inference:
112
  use_vocos: True
113
  normalize: False
114
 
115
+ weight_dtype: bfloat16
116
+ amp: False
117
 
118
  bitsandbytes:
119
  enabled: False
120
  injects: True
121
  linear: True
122
  embedding: True
123
+