jadechoghari commited on
Commit
0c1fd6d
β€’
1 Parent(s): faf4bac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -54
app.py CHANGED
@@ -1,65 +1,18 @@
1
  import gradio as gr
2
  import torch
3
  import spaces
4
- from diffusers import FluxPipeline, DiffusionPipeline
5
-
6
-
7
-
8
-
9
- # # # normal FluxPipeline
10
- # pipeline_normal = FluxPipeline.from_pretrained(
11
- # "sayakpaul/FLUX.1-merged",
12
- # torch_dtype=torch.bfloat16
13
- # ).to("cuda")
14
- # pipeline_normal.transformer.to(memory_format=torch.channels_last)
15
- # pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)
16
-
17
- torch.backends.cuda.matmul.allow_tf32 = True # Enable TensorFloat32 for faster matrix operations
18
- torch.backends.cudnn.benchmark = True # Optimizes for GPU by enabling auto-tuning
19
-
20
- # Compile the model with maximum optimizations
21
- torch.compile(backend="inductor", mode="max-autotune")
22
 
23
  pipe = FluxPipeline.from_pretrained(
24
  "sayakpaul/FLUX.1-merged",
25
  torch_dtype=torch.bfloat16
26
  )
27
-
28
- # Offload to CPU if necessary
29
- pipe.enable_model_cpu_offload()
30
-
31
- # Use xformers for memory-efficient attention
32
- pipe.enable_xformers_memory_efficient_attention()
33
-
34
- # Apply dynamic quantization for even faster inference
35
- pipe = torch.quantization.quantize_dynamic(pipe, {torch.nn.Linear}, dtype=torch.qint8)
36
- # # optimized FluxPipeline
37
- # pipeline_optimized = FluxPipeline.from_pretrained(
38
- # "camenduru/FLUX.1-dev-diffusers",
39
- # torch_dtype=torch.bfloat16
40
- # ).to("cuda")
41
- # pipeline_optimized.transformer.to(memory_format=torch.channels_last)
42
- # pipeline_optimized.transformer = torch.compile(
43
- # pipeline_optimized.transformer,
44
- # mode="max-autotune",
45
- # fullgraph=True
46
- # )
47
- # # wrap the autoquant call in a try-except block to handle unsupported layers
48
- # for name, layer in pipeline_optimized.transformer.named_children():
49
- # try:
50
- # # apply autoquant to each layer
51
- # pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
52
- # print(f"Successfully quantized {name}")
53
- # except AttributeError as e:
54
- # print(f"Skipping layer {name} due to error: {e}")
55
- # except Exception as e:
56
- # print(f"Unexpected error while quantizing {name}: {e}")
57
-
58
- # pipeline_optimized.transformer = autoquant(
59
- # pipeline_optimized.transformer,
60
- # error_on_unseen=False
61
- # )
62
- pipeline_optimized = pipe
63
 
64
  @spaces.GPU(duration=120)
65
  def generate_images(prompt, guidance_scale, num_inference_steps):
 
1
  import gradio as gr
2
  import torch
3
  import spaces
4
+ from diffusers import FluxPipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  pipe = FluxPipeline.from_pretrained(
7
  "sayakpaul/FLUX.1-merged",
8
  torch_dtype=torch.bfloat16
9
  )
10
+ pipe.transformer.to(memory_format=torch.channels_last)
11
+ pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
12
+ pipe.transformer = autoquant(
13
+ pipe.transformer,
14
+ error_on_unseen=False
15
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  @spaces.GPU(duration=120)
18
  def generate_images(prompt, guidance_scale, num_inference_steps):