File size: 5,501 Bytes
f2ab299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
model:
  target: SUPIR.models.SUPIR_model.SUPIRModel
  params:
    ae_dtype: bf16
    diffusion_dtype: fp16
    scale_factor: 0.13025
    disable_first_stage_autocast: True
    network_wrapper: sgm.modules.diffusionmodules.wrappers.ControlWrapper

    denoiser_config:
      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiserWithControl
      params:
        num_idx: 1000
        weighting_config:
          target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
        scaling_config:
          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
        discretization_config:
          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

    control_stage_config:
      target: SUPIR.modules.SUPIR_v0.GLVControl
      params:
        adm_in_channels: 2816
        num_classes: sequential
        use_checkpoint: True
        in_channels: 4
        out_channels: 4
        model_channels: 320
        attention_resolutions: [4, 2]
        num_res_blocks: 2
        channel_mult: [1, 2, 4]
        num_head_channels: 64
        use_spatial_transformer: True
        use_linear_in_transformer: True
        transformer_depth: [1, 2, 10]  # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
#        transformer_depth: [1, 1, 4]
        context_dim: 2048
        spatial_transformer_attn_type: softmax-xformers
        legacy: False
        input_upscale: 1

    network_config:
      target: SUPIR.modules.SUPIR_v0.LightGLVUNet
      params:
        mode: XL-base
        project_type: ZeroSFT
        project_channel_scale: 2
        adm_in_channels: 2816
        num_classes: sequential
        use_checkpoint: True
        in_channels: 4
        out_channels: 4
        model_channels: 320
        attention_resolutions: [4, 2]
        num_res_blocks: 2
        channel_mult: [1, 2, 4]
        num_head_channels: 64
        use_spatial_transformer: True
        use_linear_in_transformer: True
        transformer_depth: [1, 2, 10]  # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
        context_dim: 2048
        spatial_transformer_attn_type: softmax-xformers
        legacy: False

    conditioner_config:
      target: sgm.modules.GeneralConditionerWithControl
      params:
        emb_models:
          # crossattn cond
          - is_trainable: False
            input_key: txt
            target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
            params:
              layer: hidden
              layer_idx: 11
          # crossattn and vector cond
          - is_trainable: False
            input_key: txt
            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
            params:
              arch: ViT-bigG-14
              version: laion2b_s39b_b160k
              freeze: True
              layer: penultimate
              always_return_pooled: True
              legacy: False
          # vector cond
          - is_trainable: False
            input_key: original_size_as_tuple
            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
            params:
              outdim: 256  # multiplied by two
          # vector cond
          - is_trainable: False
            input_key: crop_coords_top_left
            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
            params:
              outdim: 256  # multiplied by two
          # vector cond
          - is_trainable: False
            input_key: target_size_as_tuple
            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
            params:
              outdim: 256  # multiplied by two

    first_stage_config:
      target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
      params:
        ckpt_path: ~
        embed_dim: 4
        monitor: val/rec_loss
        ddconfig:
          attn_type: vanilla-xformers
          double_z: true
          z_channels: 4
          resolution: 256
          in_channels: 3
          out_ch: 3
          ch: 128
          ch_mult: [ 1, 2, 4, 4 ]
          num_res_blocks: 2
          attn_resolutions: [ ]
          dropout: 0.0
        lossconfig:
          target: torch.nn.Identity

    sampler_config:
      target: sgm.modules.diffusionmodules.sampling.TiledRestoreEDMSampler
      params:
        num_steps: 100
        restore_cfg: 4.0
        s_churn: 0
        s_noise: 1.003
        tile_size: 128
        tile_stride: 64
        discretization_config:
          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
        guider_config:
          target: sgm.modules.diffusionmodules.guiders.LinearCFG
          params:
            scale: 7.5
            scale_min: 4.0

    p_p:
        'Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, 
             hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, 
             skin pore detailing, hyper sharpness, perfect without deformations.'
    n_p:
        'painting, oil painting, illustration, drawing, art, sketch, oil painting, cartoon, CG Style, 3D render, 
        unreal engine, blurring, dirty, messy, worst quality, low quality, frames, watermark, signature, 
        jpeg artifacts, deformed, lowres, over-smooth'

SDXL_CKPT: yushan777/SUPIR/sd_xl_base_1.0_0.9vae.safetensors
SUPIR_CKPT_F: yushan777/SUPIR/SUPIR-v0F.ckpt
SUPIR_CKPT_Q: yushan777/SUPIR/SUPIR-v0Q.ckpt
SUPIR_CKPT: ~