import gradio as gr
from diffusers import DiffusionPipeline
import spaces
import torch
from concurrent.futures import ProcessPoolExecutor
from huggingface_hub import hf_hub_download

dev_model = "black-forest-labs/FLUX.1-dev"
schnell_model = "black-forest-labs/FLUX.1-schnell"

device = "cuda" if torch.cuda.is_available() else "cpu"

repo_name = "ByteDance/Hyper-SD"
ckpt_name = "Hyper-FLUX.1-dev-8steps-lora.safetensors"
hyper_lora = hf_hub_download(repo_name, ckpt_name)

repo_name = "alimama-creative/FLUX.1-Turbo-Alpha"
ckpt_name = "diffusion_pytorch_model.safetensors"
turbo_lora = hf_hub_download(repo_name, ckpt_name)

pipe_dev = DiffusionPipeline.from_pretrained(dev_model, torch_dtype=torch.bfloat16)
pipe_schnell = DiffusionPipeline.from_pretrained(
    schnell_model,
    text_encoder=pipe_dev.text_encoder,
    text_encoder_2=pipe_dev.text_encoder_2,
    tokenizer=pipe_dev.tokenizer,
    tokenizer_2=pipe_dev.tokenizer_2,
    torch_dtype=torch.bfloat16
)

@spaces.GPU
def run_dev_hyper(prompt):
    print("dev_hyper")
    pipe_dev.to("cuda")
    print(hyper_lora)
    pipe_dev.load_lora_weights(hyper_lora)
    print("Loaded hyper lora!")
    image = pipe_dev(prompt, num_inference_steps=8, joint_attention_kwargs={"scale": 0.125}).images[0]
    print("Ran!")
    pipe_dev.unload_lora_weights()
    return image

@spaces.GPU
def run_dev_turbo(prompt):
    print("dev_turbo")
    pipe_dev.to("cuda")
    print(turbo_lora)
    pipe_dev.load_lora_weights(turbo_lora)
    print("Loaded turbo lora!")
    image = pipe_dev(prompt, num_inference_steps=8).images[0]
    print("Ran!")
    pipe_dev.unload_lora_weights()
    return image

@spaces.GPU
def run_schnell(prompt):
    print("schnell")
    pipe_schnell.to("cuda")
    print("schnell on gpu")
    image = pipe_schnell(prompt, num_inference_steps=4).images[0]
    print("Ran!")
    return image

def run_parallel_models(prompt):
    print(prompt)
    with ProcessPoolExecutor(max_workers=3) as executor:
        future_dev_hyper = executor.submit(run_dev_hyper, prompt)
        future_dev_turbo = executor.submit(run_dev_turbo, prompt)
        future_schnell = executor.submit(run_schnell, prompt)
        
        res_dev_hyper = future_dev_hyper.result()
        res_dev_turbo = future_dev_turbo.result()
        res_schnell = future_schnell.result()
    return res_dev_hyper, res_dev_turbo, res_schnell

run_parallel_models.zerogpu = True

with gr.Blocks() as demo:
    gr.Markdown("# Low Step Flux Comparison")
    gr.Markdown("Compare the quality (not the speed) of FLUX Schnell (4 steps), FLUX.1[dev] HyperFLUX (8 steps), FLUX.1[dev]-Turbo-Alpha (8 steps)")
    with gr.Row():
        with gr.Column(scale=2):
            prompt = gr.Textbox(label="Prompt")
        with gr.Column(scale=1, min_width=120):
            submit = gr.Button("Run")
    with gr.Row():
        schnell = gr.Image(label="FLUX Schnell (4 steps)")
        hyper = gr.Image(label="FLUX.1[dev] HyperFLUX (8 steps)")
        turbo = gr.Image(label="FLUX.1[dev]-Turbo-Alpha (8 steps)")
    
    gr.Examples(
        examples=[["the spirit of a Tamagotchi wandering in the city of Vienna"], ["a photo of a lavender cat"], ["a tiny astronaut hatching from an egg on the moon"], ["A delicious ceviche cheesecake slice"], ["an insect robot preparing a delicious meal"], ["A Charmander fine dining with a view to la Sagrada Família"]],
        fn=run_parallel_models,
        inputs=[prompt],
        outputs=[schnell, hyper, turbo],
        cache_examples="lazy"
    )
    
    submit.click(
        fn=run_parallel_models,
        inputs=[prompt],
        outputs=[schnell, hyper, turbo]
    )
demo.launch()