Moldwebs commited on
Commit
3d659e7
1 Parent(s): d41d75b

Create svd_xt_1_1.yaml

Browse files
scripts/sampling/configs/svd_xt_1_1.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ target: sgm.models.diffusion.DiffusionEngine
3
+ params:
4
+ scale_factor: 0.18215
5
+ disable_first_stage_autocast: True
6
+ ckpt_path: /workspace/checkpoints/svd_xt_1_1.safetensors
7
+
8
+ denoiser_config:
9
+ target: sgm.modules.diffusionmodules.denoiser.Denoiser
10
+ params:
11
+ scaling_config:
12
+ target: sgm.modules.diffusionmodules.denoiser_scaling.VScalingWithEDMcNoise
13
+
14
+ network_config:
15
+ target: sgm.modules.diffusionmodules.video_model.VideoUNet
16
+ params:
17
+ adm_in_channels: 768
18
+ num_classes: sequential
19
+ use_checkpoint: True
20
+ in_channels: 8
21
+ out_channels: 4
22
+ model_channels: 320
23
+ attention_resolutions: [4, 2, 1]
24
+ num_res_blocks: 2
25
+ channel_mult: [1, 2, 4, 4]
26
+ num_head_channels: 64
27
+ use_linear_in_transformer: True
28
+ transformer_depth: 1
29
+ context_dim: 1024
30
+ spatial_transformer_attn_type: softmax-xformers
31
+ extra_ff_mix_layer: True
32
+ use_spatial_context: True
33
+ merge_strategy: learned_with_images
34
+ video_kernel_size: [3, 1, 1]
35
+
36
+ conditioner_config:
37
+ target: sgm.modules.GeneralConditioner
38
+ params:
39
+ emb_models:
40
+ - is_trainable: False
41
+ input_key: cond_frames_without_noise
42
+ target: sgm.modules.encoders.modules.FrozenOpenCLIPImagePredictionEmbedder
43
+ params:
44
+ n_cond_frames: 1
45
+ n_copies: 1
46
+ open_clip_embedding_config:
47
+ target: sgm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
48
+ params:
49
+ freeze: True
50
+
51
+ - input_key: fps_id
52
+ is_trainable: False
53
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
54
+ params:
55
+ outdim: 256
56
+
57
+ - input_key: motion_bucket_id
58
+ is_trainable: False
59
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
60
+ params:
61
+ outdim: 256
62
+
63
+ - input_key: cond_frames
64
+ is_trainable: False
65
+ target: sgm.modules.encoders.modules.VideoPredictionEmbedderWithEncoder
66
+ params:
67
+ disable_encoder_autocast: True
68
+ n_cond_frames: 1
69
+ n_copies: 1
70
+ is_ae: True
71
+ encoder_config:
72
+ target: sgm.models.autoencoder.AutoencoderKLModeOnly
73
+ params:
74
+ embed_dim: 4
75
+ monitor: val/rec_loss
76
+ ddconfig:
77
+ attn_type: vanilla-xformers
78
+ double_z: True
79
+ z_channels: 4
80
+ resolution: 256
81
+ in_channels: 3
82
+ out_ch: 3
83
+ ch: 128
84
+ ch_mult: [1, 2, 4, 4]
85
+ num_res_blocks: 2
86
+ attn_resolutions: []
87
+ dropout: 0.0
88
+ lossconfig:
89
+ target: torch.nn.Identity
90
+
91
+ - input_key: cond_aug
92
+ is_trainable: False
93
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
94
+ params:
95
+ outdim: 256
96
+
97
+ first_stage_config:
98
+ target: sgm.models.autoencoder.AutoencodingEngine
99
+ params:
100
+ loss_config:
101
+ target: torch.nn.Identity
102
+ regularizer_config:
103
+ target: sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer
104
+ encoder_config:
105
+ target: sgm.modules.diffusionmodules.model.Encoder
106
+ params:
107
+ attn_type: vanilla
108
+ double_z: True
109
+ z_channels: 4
110
+ resolution: 256
111
+ in_channels: 3
112
+ out_ch: 3
113
+ ch: 128
114
+ ch_mult: [1, 2, 4, 4]
115
+ num_res_blocks: 2
116
+ attn_resolutions: []
117
+ dropout: 0.0
118
+ decoder_config:
119
+ target: sgm.modules.autoencoding.temporal_ae.VideoDecoder
120
+ params:
121
+ attn_type: vanilla
122
+ double_z: True
123
+ z_channels: 4
124
+ resolution: 256
125
+ in_channels: 3
126
+ out_ch: 3
127
+ ch: 128
128
+ ch_mult: [1, 2, 4, 4]
129
+ num_res_blocks: 2
130
+ attn_resolutions: []
131
+ dropout: 0.0
132
+ video_kernel_size: [3, 1, 1]
133
+
134
+ sampler_config:
135
+ target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
136
+ params:
137
+ discretization_config:
138
+ target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
139
+ params:
140
+ sigma_max: 700.0
141
+
142
+ guider_config:
143
+ target: sgm.modules.diffusionmodules.guiders.LinearPredictionGuider
144
+ params:
145
+ max_scale: 3.0
146
+ min_scale: 1.5