mrq commited on
Commit
56a60c3
1 Parent(s): d890d15
models/ckpt/ar+nar-retnet-4/fp32.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:91781616d713a424cab977abb718888323d1a26461bef78c8065ac30d1258d2a
3
- size 424338659
 
 
 
 
models/ckpt/ar-retnet-4/fp32.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e042d05f14f21a166cd5f5c16b9c9c4ac9ce18af2a4c285c7f0d3ef3ea6729bf
3
- size 418040575
 
 
 
 
models/ckpt/nar-retnet-4/fp32.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:18027cafe3c077cb8786a5665f04f732f4e3fcacff17844182f9383a1dca640f
3
- size 422230719
 
 
 
 
models/config.yaml DELETED
@@ -1,104 +0,0 @@
1
- dataset:
2
- training: [
3
- ]
4
- validation: [
5
- ]
6
- noise: [
7
- ]
8
-
9
- speaker_name_getter: "lambda p: f'{p.parts[-3]}_{p.parts[-2]}'"
10
-
11
- use_hdf5: True
12
- use_metadata: True
13
- hdf5_flag: r
14
- validate: True
15
-
16
- workers: 4
17
- cache: True
18
-
19
- phones_range: [4, 256]
20
- duration_range: [1.0, 16.0]
21
-
22
- random_utterance: 1.0
23
- max_prompts: 3
24
- prompt_duration: 3.0
25
-
26
- sample_type: speaker
27
-
28
- tasks_list: ["tts"] # , "ns", "sr", "tse", "cse", "nse", "tts"]
29
-
30
- models:
31
- _prom_levels: 4
32
- _max_levels: 4
33
-
34
- _models:
35
- - name: "ar"
36
- size: "full"
37
- resp_levels: 1
38
- prom_levels: 2
39
- tasks: 8
40
- arch_type: "retnet"
41
- training: True
42
- - name: "nar"
43
- size: "full"
44
- resp_levels: 3
45
- prom_levels: 4
46
- tasks: 8
47
- arch_type: "retnet"
48
- training: True
49
-
50
-
51
- hyperparameters:
52
- batch_size: 8
53
- gradient_accumulation_steps: 1
54
- gradient_clipping: 100
55
-
56
- optimizer: AdamW
57
- learning_rate: 1.0e-5
58
-
59
- scheduler_type: ""
60
-
61
- evaluation:
62
- batch_size: 16
63
- frequency: 500
64
- size: 16
65
-
66
- steps: 300
67
- ar_temperature: 0.95
68
- nar_temperature: 0.25
69
- load_disabled_engines: True
70
-
71
- trainer:
72
- iterations: 1_000_000
73
-
74
- save_tag: step
75
- save_on_oom: True
76
- save_on_quit: True
77
- save_frequency: 100
78
- export_on_save: True
79
-
80
- keep_last_checkpoints: 4
81
-
82
- load_state_dict: True
83
-
84
- gc_mode: None # "global_step"
85
-
86
- weight_dtype: bfloat16
87
- amp: False
88
-
89
- backend: deepspeed
90
- deepspeed:
91
- zero_optimization_level: 0
92
- use_compression_training: True
93
-
94
- activation_checkpointing: True
95
-
96
- inference:
97
- use_vocos: True
98
- normalize: False
99
-
100
- bitsandbytes:
101
- enabled: False
102
- injects: False
103
- linear: False
104
- embedding: False