remove deprecated LoRA blocks

#1
by sayakpaul HF staff - opened
Files changed (2) hide show
  1. my_pipeline.py +4 -13
  2. unet/my_unet_model.py +3 -3
my_pipeline.py CHANGED
@@ -25,12 +25,7 @@ from diffusers.loaders import (
25
  TextualInversionLoaderMixin,
26
  )
27
  from diffusers.models import AutoencoderKL, UNet2DConditionModel
28
- from diffusers.models.attention_processor import (
29
- AttnProcessor2_0,
30
- LoRAAttnProcessor2_0,
31
- LoRAXFormersAttnProcessor,
32
- XFormersAttnProcessor,
33
- )
34
  from diffusers.models.lora import adjust_lora_scale_text_encoder
35
  from diffusers.schedulers import KarrasDiffusionSchedulers
36
  from diffusers.utils import (
@@ -135,6 +130,7 @@ class MyPipeline(
135
  watermark output images. If not defined, it will default to True if the package is installed, otherwise no
136
  watermarker will be used.
137
  """
 
138
  model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
139
  _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
140
 
@@ -571,12 +567,7 @@ class MyPipeline(
571
  self.vae.to(dtype=torch.float32)
572
  use_torch_2_0_or_xformers = isinstance(
573
  self.vae.decoder.mid_block.attentions[0].processor,
574
- (
575
- AttnProcessor2_0,
576
- XFormersAttnProcessor,
577
- LoRAXFormersAttnProcessor,
578
- LoRAAttnProcessor2_0,
579
- ),
580
  )
581
  # if xformers or torch_2_0 is used attention block does not need
582
  # to be in float32 which can save lots of memory
@@ -971,4 +962,4 @@ class MyPipeline(
971
  # Offload all models
972
  self.maybe_free_model_hooks()
973
 
974
- return (image,)
 
25
  TextualInversionLoaderMixin,
26
  )
27
  from diffusers.models import AutoencoderKL, UNet2DConditionModel
28
+ from diffusers.models.attention_processor import AttnProcessor2_0, XFormersAttnProcessor
 
 
 
 
 
29
  from diffusers.models.lora import adjust_lora_scale_text_encoder
30
  from diffusers.schedulers import KarrasDiffusionSchedulers
31
  from diffusers.utils import (
 
130
  watermark output images. If not defined, it will default to True if the package is installed, otherwise no
131
  watermarker will be used.
132
  """
133
+
134
  model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
135
  _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
136
 
 
567
  self.vae.to(dtype=torch.float32)
568
  use_torch_2_0_or_xformers = isinstance(
569
  self.vae.decoder.mid_block.attentions[0].processor,
570
+ (AttnProcessor2_0, XFormersAttnProcessor),
 
 
 
 
 
571
  )
572
  # if xformers or torch_2_0 is used attention block does not need
573
  # to be in float32 which can save lots of memory
 
962
  # Offload all models
963
  self.maybe_free_model_hooks()
964
 
965
+ return (image,)
unet/my_unet_model.py CHANGED
@@ -34,7 +34,7 @@ from diffusers.models.embeddings import (
34
  ImageHintTimeEmbedding,
35
  ImageProjection,
36
  ImageTimeEmbedding,
37
- PositionNet,
38
  TextImageProjection,
39
  TextImageTimeEmbedding,
40
  TextTimeEmbedding,
@@ -42,7 +42,7 @@ from diffusers.models.embeddings import (
42
  Timesteps,
43
  )
44
  from diffusers.models.modeling_utils import ModelMixin
45
- from diffusers.models.unet_2d_blocks import (
46
  UNetMidBlock2DCrossAttn,
47
  UNetMidBlock2DSimpleCrossAttn,
48
  get_down_block,
@@ -586,7 +586,7 @@ class MyUNetModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
586
  positive_len = cross_attention_dim[0]
587
 
588
  feature_type = "text-only" if attention_type == "gated" else "text-image"
589
- self.position_net = PositionNet(
590
  positive_len=positive_len, out_dim=cross_attention_dim, feature_type=feature_type
591
  )
592
 
 
34
  ImageHintTimeEmbedding,
35
  ImageProjection,
36
  ImageTimeEmbedding,
37
+ GLIGENTextBoundingboxProjection,
38
  TextImageProjection,
39
  TextImageTimeEmbedding,
40
  TextTimeEmbedding,
 
42
  Timesteps,
43
  )
44
  from diffusers.models.modeling_utils import ModelMixin
45
+ from diffusers.models.unets.unet_2d_blocks import (
46
  UNetMidBlock2DCrossAttn,
47
  UNetMidBlock2DSimpleCrossAttn,
48
  get_down_block,
 
586
  positive_len = cross_attention_dim[0]
587
 
588
  feature_type = "text-only" if attention_type == "gated" else "text-image"
589
+ self.position_net = GLIGENTextBoundingboxProjection(
590
  positive_len=positive_len, out_dim=cross_attention_dim, feature_type=feature_type
591
  )
592