Spaces:

SunderAli17
/

SAKFaceTransform

Sleeping

App Files Files Community

SAKFaceTransform / app.py

SunderAli17

Update app.py

3c5ddc6 verified 15 days ago

raw

history blame contribute delete

7.93 kB

	import spaces
	import random
	import torch
	import cv2
	import insightface
	import gradio as gr
	import numpy as np
	import os
	from huggingface_hub import snapshot_download
	from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
	from SAK.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter_FaceID import StableDiffusionXLPipeline
	from SAK.models.modeling_chatglm import ChatGLMModel
	from SAK.models.tokenization_chatglm import ChatGLMTokenizer
	from diffusers import AutoencoderKL
	from SAK.models.unet_2d_condition import UNet2DConditionModel
	from diffusers import EulerDiscreteScheduler
	from PIL import Image
	from insightface.app import FaceAnalysis
	from insightface.data import get_image as ins_get_image

	MARKDOWN = """
	This demo utilizes <a href="https://huggingface.co/docs/diffusers/en/api/pipelines/stable_diffusion/stable_diffusion_xl">Stable Diffusion XL Pipeline</a>

	Try out with different prompts using your image and do provide your feedback.

	Demo by [Sunder Ali Khowaja](https://sander-ali.github.io) - [X](https://x.com/SunderAKhowaja) -[Github](https://github.com/sander-ali) -[Hugging Face](https://huggingface.co/SunderAli17)
	"""

	device = "cuda"
	ckpt_dir = snapshot_download(repo_id="SunderAli17/SAK")
	ckpt_dir_faceid = snapshot_download(repo_id="SunderAli17/SAK-IP-Adapter-FaceTransform-Plus")

	text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
	tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
	vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
	scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
	unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
	clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_faceid}/clip-vit-large-patch14-336', ignore_mismatched_sizes=True)
	clip_image_encoder.to(device)
	clip_image_processor = CLIPImageProcessor(size = 336, crop_size = 336)

	pipe = StableDiffusionXLPipeline(
	vae = vae,
	text_encoder = text_encoder,
	tokenizer = tokenizer,
	unet = unet,
	scheduler = scheduler,
	face_clip_encoder = clip_image_encoder,
	face_clip_processor = clip_image_processor,
	force_zeros_for_empty_prompt = False,
	)

	class FaceInfoGenerator():
	def __init__(self, root_dir = "./.insightface/"):
	self.app = FaceAnalysis(name = 'antelopev2', root = root_dir, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
	self.app.prepare(ctx_id = 0, det_size = (640, 640))

	def get_faceinfo_one_img(self, face_image):
	face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))

	if len(face_info) == 0:
	face_info = None
	else:
	face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
	return face_info

	def face_bbox_to_square(bbox):
	## l, t, r, b to square l, t, r, b
	l,t,r,b = bbox
	cent_x = (l + r) / 2
	cent_y = (t + b) / 2
	w, h = r - l, b - t
	r = max(w, h) / 2

	l0 = cent_x - r
	r0 = cent_x + r
	t0 = cent_y - r
	b0 = cent_y + r

	return [l0, t0, r0, b0]

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1024
	face_info_generator = FaceInfoGenerator()

	@spaces.GPU
	def infer(prompt,
	image = None,
	negative_prompt = "nsfw，Face shadows，Low resolution，JPEG artifacts、Vague、bad，Neon lights",
	seed = 66,
	randomize_seed = False,
	guidance_scale = 5.0,
	num_inference_steps = 50
	):
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator().manual_seed(seed)
	global pipe
	pipe = pipe.to(device)
	pipe.load_ip_adapter_faceid_plus(f'{ckpt_dir_faceid}/ipa-faceid-plus.bin', device = device)
	scale = 0.8
	pipe.set_face_fidelity_scale(scale)

	face_info = face_info_generator.get_faceinfo_one_img(image)
	face_bbox_square = face_bbox_to_square(face_info["bbox"])
	crop_image = image.crop(face_bbox_square)
	crop_image = crop_image.resize((336, 336))
	crop_image = [crop_image]
	face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
	face_embeds = face_embeds.to(device, dtype = torch.float16)

	image = pipe(
	prompt = prompt,
	negative_prompt = negative_prompt,
	height = 1024,
	width = 1024,
	num_inference_steps= num_inference_steps,
	guidance_scale = guidance_scale,
	num_images_per_prompt = 1,
	generator = generator,
	face_crop_image = crop_image,
	face_insightface_embeds = face_embeds
	).images[0]

	return image, seed


	examples = [
	["wearing a full suit sitting in a restaurant with candle lights", "image/SunderAli_Khowaja.png"],
	["Wild cowboy hat with western town and horses in the background", "image/test2.png"]
	]


	css="""
	#col-left {
	margin: 0 auto;
	max-width: 600px;
	}
	#col-right {
	margin: 0 auto;
	max-width: 750px;
	}
	#button {
	color: blue;
	}
	"""

	def load_description(fp):
	with open(fp, 'r', encoding='utf-8') as f:
	content = f.read()
	return content

	theme = gr.themes.Soft(
	font=[gr.themes.GoogleFont('Source Code Pro'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
	)
	js_func = """
	function refresh() {
	const url = new URL(window.location);
	if (url.searchParams.get('__theme') !== 'dark') {
	url.searchParams.set('__theme', 'dark');
	window.location.href = url.href;
	}
	}
	"""

	with gr.Blocks(js = js_func, theme = theme) as SAK:
	gr.Markdown(MARKDOWN)
	with gr.Row():
	with gr.Column(elem_id="col-left"):
	with gr.Row():
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Enter your prompt",
	lines=2
	)
	with gr.Row():
	image = gr.Image(label="Image", type="pil")
	with gr.Accordion("Advanced Settings", open=False):
	negative_prompt = gr.Textbox(
	label="Negative prompt",
	placeholder="Enter a negative prompt",
	visible=True,
	)
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=5.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=10,
	maximum=50,
	step=1,
	value=25,
	)
	with gr.Row():
	button = gr.Button("Run", elem_id="button")

	with gr.Column(elem_id="col-right"):
	result = gr.Image(label="Result", show_label=False)
	seed_used = gr.Number(label="Seed Used")

	with gr.Row():
	gr.Examples(
	fn = infer,
	examples = examples,
	inputs = [prompt, image],
	outputs = [result, seed_used],
	)

	button.click(
	fn = infer,
	inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps],
	outputs = [result, seed_used]
	)


	SAK.queue().launch(debug=True, share=True)