import gradio as gr
import numpy as np
import random
from diffusers import DiffusionPipeline
from optimum.intel.openvino.modeling_diffusion import OVModelVaeDecoder, OVBaseModel, OVStableDiffusionPipeline
import torch
from huggingface_hub import snapshot_download
import openvino.runtime as ov
from typing import Optional, Dict

model_id = "Disty0/SoteMixV3"

#model_id = "Disty0/sotediffusion-v2" #不可

#1024*512 記憶體不足 1024x1536 
HIGH=512
WIDTH=512


batch_size = -1
#class CustomOVModelVaeDecoder(OVModelVaeDecoder):
#    def __init__(
#        self, model: ov.Model, parent_model: OVBaseModel, ov_config: Optional[Dict[str, str]] = None, model_dir: str = None,
#    ):
#        super(OVModelVaeDecoder, self).__init__(model, parent_model, ov_config, "vae_decoder", model_dir)


pipe = OVStableDiffusionPipeline.from_pretrained(model_id)
#pipe = OVStableDiffusionPipeline.from_pretrained(model_id, compile = False, ov_config = {"CACHE_DIR":""})

#有taesd很醜
#taesd_dir = snapshot_download(repo_id="deinferno/taesd-openvino")
#pipe.vae_decoder = CustomOVModelVaeDecoder(model = OVBaseModel.load_model(f"{taesd_dir}/vae_decoder/openvino_model.xml"), parent_model = pipe, model_dir = taesd_dir)

#pipe.reshape( batch_size=-1, height=HIGH, width=WIDTH, num_images_per_prompt=1)

#pipe.load_textual_inversion("./badhandv4.pt", "badhandv4")
#pipe.load_textual_inversion("./Konpeto.pt", "Konpeto")
#<shigure-ui-style>
#pipe.load_textual_inversion("sd-concepts-library/shigure-ui-style")
#pipe.load_textual_inversion("sd-concepts-library/ruan-jia")
#pipe.load_textual_inversion("sd-concepts-library/agm-style-nao")


#pipe.compile()

prompt=""
negative_prompt="(worst quality, low quality, lowres), zombie, interlocked fingers,"

def infer(prompt,negative_prompt):

    image = pipe(
        prompt = prompt, 
        negative_prompt = negative_prompt,
        width = HIGH, 
        height = WIDTH,
        guidance_scale=7.5,
        num_inference_steps=30,
        num_images_per_prompt=1,
    ).images[0] 
    
    return image


examples = [
    "A cute kitten, Japanese cartoon style.",
    "A sweet family, dad stands next to mom, mom holds baby girl.",
    "(illustration, 8k CG, extremely detailed),(whimsical),catgirl,teenage girl,playing in the snow,winter wonderland,snow-covered trees,soft pastel colors,gentle lighting,sparkling snow,joyful,magical atmosphere,highly detailed,fluffy cat ears and tail,intricate winter clothing,shallow depth of field,watercolor techniques,close-up shot,slightly tilted angle,fairy tale architecture,nostalgic,playful,winter magic,(masterpiece:2),best quality,ultra highres,original,extremely detailed,perfect lighting,",
]

css="""
#col-container {
    margin: 0 auto;
    max-width: 520px;
}
"""


power_device = "CPU"

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""
        # Disty0/SoteMixV3 {HIGH}x{WIDTH}
        Currently running on {power_device}.
        """)
        
        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )         
            run_button = gr.Button("Run", scale=0)
        
        result = gr.Image(label="Result", show_label=False)

        gr.Examples(
            examples = examples,
            inputs = [prompt]
        )

    run_button.click(
        fn = infer,
        inputs = [prompt],
        outputs = [result]
    )

demo.queue().launch()