jadechoghari commited on
Commit
8ae18c8
β€’
1 Parent(s): f329ba7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -65
app.py CHANGED
@@ -1,79 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
 
 
 
 
 
3
  import spaces
4
- from diffusers import FluxPipeline, DiffusionPipeline
5
- from torchao.quantization import autoquant
6
 
 
 
7
 
 
 
 
 
8
 
9
- # # # normal FluxPipeline
10
- pipeline_normal = FluxPipeline.from_pretrained(
11
- "sayakpaul/FLUX.1-merged",
12
- torch_dtype=torch.bfloat16
13
- ).to("cuda")
14
- pipeline_normal.transformer.to(memory_format=torch.channels_last)
15
- pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)
16
 
 
 
 
 
 
17
 
18
- # # optimized FluxPipeline
19
- # pipeline_optimized = FluxPipeline.from_pretrained(
20
- # "camenduru/FLUX.1-dev-diffusers",
21
- # torch_dtype=torch.bfloat16
22
- # ).to("cuda")
23
- # pipeline_optimized.transformer.to(memory_format=torch.channels_last)
24
- # pipeline_optimized.transformer = torch.compile(
25
- # pipeline_optimized.transformer,
26
- # mode="max-autotune",
27
- # fullgraph=True
28
- # )
29
- # # wrap the autoquant call in a try-except block to handle unsupported layers
30
- # for name, layer in pipeline_optimized.transformer.named_children():
31
- # try:
32
- # # apply autoquant to each layer
33
- # pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
34
- # print(f"Successfully quantized {name}")
35
- # except AttributeError as e:
36
- # print(f"Skipping layer {name} due to error: {e}")
37
- # except Exception as e:
38
- # print(f"Unexpected error while quantizing {name}: {e}")
39
-
40
- # pipeline_optimized.transformer = autoquant(
41
- # pipeline_optimized.transformer,
42
- # error_on_unseen=False
43
- # )
44
- pipeline_optimized = pipeline_normal
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  @spaces.GPU(duration=120)
47
- def generate_images(prompt, guidance_scale, num_inference_steps):
48
- # # generate image with normal pipeline
49
- # image_normal = pipeline_normal(
50
- # prompt=prompt,
51
- # guidance_scale=guidance_scale,
52
- # num_inference_steps=int(num_inference_steps)
53
- # ).images[0]
54
-
55
- # generate image with optimized pipeline
56
- image_optimized = pipeline_optimized(
57
- prompt=prompt,
58
  guidance_scale=guidance_scale,
59
- num_inference_steps=int(num_inference_steps)
 
 
60
  ).images[0]
61
-
62
- return image_optimized
63
-
64
- # set up Gradio interface
65
- demo = gr.Interface(
66
- fn=generate_images,
67
- inputs=[
68
- gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt"),
69
- gr.Slider(1.0, 10.0, step=0.5, value=3.5, label="Guidance Scale"),
70
- gr.Slider(10, 100, step=1, value=50, label="Number of Inference Steps")
71
- ],
72
- outputs=[
73
- gr.Image(type="pil", label="Optimized FluxPipeline")
74
- ],
75
- title="FluxPipeline Comparison",
76
- description="Compare images generated by the normal FluxPipeline and the optimized one using torchao and torch.compile()."
77
- )
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  demo.launch()
 
 
 
 
 
 
1
+ # import gradio as gr
2
+ # import torch
3
+ # import spaces
4
+ # from diffusers import FluxPipeline, DiffusionPipeline
5
+ # from torchao.quantization import autoquant
6
+
7
+
8
+
9
+ # # # # normal FluxPipeline
10
+ # pipeline_normal = FluxPipeline.from_pretrained(
11
+ # "sayakpaul/FLUX.1-merged",
12
+ # torch_dtype=torch.bfloat16
13
+ # ).to("cuda")
14
+ # pipeline_normal.transformer.to(memory_format=torch.channels_last)
15
+ # pipeline_normal.transformer = torch.compile(pipeline_normal.transformer, mode="max-autotune", fullgraph=True)
16
+
17
+
18
+ # # # optimized FluxPipeline
19
+ # # pipeline_optimized = FluxPipeline.from_pretrained(
20
+ # # "camenduru/FLUX.1-dev-diffusers",
21
+ # # torch_dtype=torch.bfloat16
22
+ # # ).to("cuda")
23
+ # # pipeline_optimized.transformer.to(memory_format=torch.channels_last)
24
+ # # pipeline_optimized.transformer = torch.compile(
25
+ # # pipeline_optimized.transformer,
26
+ # # mode="max-autotune",
27
+ # # fullgraph=True
28
+ # # )
29
+ # # # wrap the autoquant call in a try-except block to handle unsupported layers
30
+ # # for name, layer in pipeline_optimized.transformer.named_children():
31
+ # # try:
32
+ # # # apply autoquant to each layer
33
+ # # pipeline_optimized.transformer._modules[name] = autoquant(layer, error_on_unseen=False)
34
+ # # print(f"Successfully quantized {name}")
35
+ # # except AttributeError as e:
36
+ # # print(f"Skipping layer {name} due to error: {e}")
37
+ # # except Exception as e:
38
+ # # print(f"Unexpected error while quantizing {name}: {e}")
39
+
40
+ # # pipeline_optimized.transformer = autoquant(
41
+ # # pipeline_optimized.transformer,
42
+ # # error_on_unseen=False
43
+ # # )
44
+ # pipeline_optimized = pipeline_normal
45
+
46
+ # @spaces.GPU(duration=120)
47
+ # def generate_images(prompt, guidance_scale, num_inference_steps):
48
+ # # # generate image with normal pipeline
49
+ # # image_normal = pipeline_normal(
50
+ # # prompt=prompt,
51
+ # # guidance_scale=guidance_scale,
52
+ # # num_inference_steps=int(num_inference_steps)
53
+ # # ).images[0]
54
+
55
+ # # generate image with optimized pipeline
56
+ # image_optimized = pipeline_optimized(
57
+ # prompt=prompt,
58
+ # guidance_scale=guidance_scale,
59
+ # num_inference_steps=int(num_inference_steps)
60
+ # ).images[0]
61
+
62
+ # return image_optimized
63
+
64
+ # # set up Gradio interface
65
+ # demo = gr.Interface(
66
+ # fn=generate_images,
67
+ # inputs=[
68
+ # gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt"),
69
+ # gr.Slider(1.0, 10.0, step=0.5, value=3.5, label="Guidance Scale"),
70
+ # gr.Slider(10, 100, step=1, value=50, label="Number of Inference Steps")
71
+ # ],
72
+ # outputs=[
73
+ # gr.Image(type="pil", label="Optimized FluxPipeline")
74
+ # ],
75
+ # title="FluxPipeline Comparison",
76
+ # description="Compare images generated by the normal FluxPipeline and the optimized one using torchao and torch.compile()."
77
+ # )
78
+
79
+ # demo.launch()
80
  import gradio as gr
81
  import torch
82
+ from optimum.quanto import quantize
83
+ from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
84
+ from transformers import CLIPTextModel, CLIPTokenizer, T5TokenizerFast
85
+ from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
86
+ import subprocess
87
  import spaces
88
+ import os
 
89
 
90
+ # Set the data type for inference
91
+ dtype = torch.bfloat16
92
 
93
+ # Hugging Face repository and revision settings
94
+ repo_name = "FLUX.1-schnell-4bit"
95
+ bfl_repo = "black-forest-labs/FLUX.1-schnell"
96
+ revision = "refs/pr/1"
97
 
98
+ # Ensure local directory exists and download model files
99
+ subprocess.run(["mkdir", "-p", repo_name])
100
+ subprocess.run([
101
+ "huggingface-cli", "download", "PrunaAI/" + repo_name,
102
+ "--local-dir", repo_name,
103
+ "--local-dir-use-symlinks", "False"
104
+ ])
105
 
106
+ # Load scheduler, tokenizer, and VAE from the pre-trained repo
107
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler", revision=revision)
108
+ text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
109
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
110
+ vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype, revision=revision)
111
 
112
+ # Load text_encoder_2 and tokenizer_2 locally
113
+ text_encoder_2 = torch.load(repo_name + '/text_encoder_2.pt')
114
+ tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype, revision=revision)
115
+
116
+ # Load transformer locally (quantized model)
117
+ transformer = torch.load(repo_name + '/transformer.pt')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Create the pipeline using the pre-trained models
120
+ pipe = FluxPipeline(
121
+ scheduler=scheduler,
122
+ text_encoder=text_encoder,
123
+ tokenizer=tokenizer,
124
+ text_encoder_2=text_encoder_2,
125
+ tokenizer_2=tokenizer_2,
126
+ vae=vae,
127
+ transformer=transformer,
128
+ )
129
+
130
+ # Enable model CPU offload to save memory
131
+ pipe.enable_model_cpu_offload()
132
+
133
+ # Define the image generation function
134
  @spaces.GPU(duration=120)
135
+ def generate_image(prompt, guidance_scale, num_inference_steps):
136
+ generator = torch.Generator().manual_seed(12345)
137
+ image = pipe(
138
+ prompt,
 
 
 
 
 
 
 
139
  guidance_scale=guidance_scale,
140
+ num_inference_steps=int(num_inference_steps),
141
+ max_sequence_length=256,
142
+ generator=generator
143
  ).images[0]
144
+ return image
145
+
146
+ # Set up Gradio interface
147
+ with gr.Blocks() as demo:
148
+ gr.Markdown("# FLUX.1-schnell 4-bit Quantized Model")
149
+
150
+ # Input for text prompt
151
+ prompt_input = gr.Textbox(lines=2, label="Prompt", placeholder="Enter your prompt here...")
152
+
153
+ # Slider for guidance scale
154
+ guidance_scale_input = gr.Slider(0.0, 10.0, step=0.1, value=7.5, label="Guidance Scale")
 
 
 
 
 
 
155
 
156
+ # Slider for number of inference steps
157
+ inference_steps_input = gr.Slider(4, 50, step=1, value=25, label="Number of Inference Steps")
158
+
159
+ # Button to trigger generation
160
+ generate_button = gr.Button("Generate Image")
161
+
162
+ # Output image
163
+ output_image = gr.Image(label="Generated Image", type="pil")
164
+
165
+ # Connect button to the image generation function
166
+ generate_button.click(fn=generate_image,
167
+ inputs=[prompt_input, guidance_scale_input, inference_steps_input],
168
+ outputs=[output_image])
169
+
170
+ # Launch the Gradio app
171
  demo.launch()
172
+
173
+
174
+
175
+
176
+