from typing import Dict, List, Any import torch from huggingface_hub import hf_hub_download from diffusers import DiffusionPipeline from safetensors.torch import load_file from transformers import pipeline device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device ~>", device) class EndpointHandler: def __init__(self, path=""): print("path ~>", path) self.pipe = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 if device.type == "cuda" else None, variant="fp16", ).to(device) self.pipe.load_lora_weights("SvenN/sdxl-emoji", weight_name="lora.safetensors") self.pipe.fuse_lora(lora_scale=0.6) embedding_path = hf_hub_download( repo_id="SvenN/sdxl-emoji", filename="embeddings.pti", repo_type="model" ) state_dict = load_file(embedding_path) self.pipe.load_textual_inversion( state_dict["text_encoders_0"], token=["", ""], text_encoder=self.pipe.text_encoder, tokenizer=self.pipe.tokenizer, ) self.pipe.load_textual_inversion( state_dict["text_encoders_1"], token=["", ""], text_encoder=self.pipe.text_encoder_2, tokenizer=self.pipe.tokenizer_2, ) self.remove_bg = pipeline( "image-segmentation", model="briaai/RMBG-1.4", device=device, revision="22532afbdabdc36b2d30a334076720ac72a06f83", trust_remote_code=True, ) def __call__(self, data: Any) -> List[List[Dict[str, float]]]: """ Args: data (:obj:): includes the input data and the parameters for the inference. Return: A :obj:`dict`:. base64 encoded image """ inputs = data.pop("inputs", data) # Automatically add trigger tokens to the beginning of the prompt images = self.pipe(inputs, **data["parameters"]).images image = images[0] image_no_bg = self.remove_bg(image) return image_no_bg if __name__ == "__main__": handler = EndpointHandler() print(handler) output = handler({"inputs": "emoji of a tiger face, white background"}) print(output)