CLIPSeg2

Running on Zero

App Files Files Community

taesiri commited on Aug 2

Commit

9492db9

•

1 Parent(s): d01a481

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -11

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 from PIL import Image
 import torch
-import matplotlib.pyplot as plt
 import numpy as np
 import spaces
 from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
@@ -12,7 +11,7 @@ model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined"
 @spaces.GPU
 def process_image(image, prompt):
     inputs = processor(
-        text=prompt, images=image, padding="max_length", return_tensors="pt"
     )
     inputs = {k: v.cuda() for k, v in inputs.items()}
@@ -22,11 +21,10 @@ def process_image(image, prompt):
         preds = outputs.logits
     pred = torch.sigmoid(preds)
-    mat = pred.cpu().numpy()
     mask = Image.fromarray(np.uint8(mat * 255), "L")
-    mask = mask.convert("RGB")
     mask = mask.resize(image.size)
-    mask = np.array(mask)[:, :, 0]
     # normalize the mask
     mask_min = mask.min()
@@ -39,19 +37,19 @@ def get_masks(prompts, img, threshold):
     prompts = prompts.split(",")
     masks = []
     for prompt in prompts:
-        mask = process_image(img, prompt)
         mask = mask > threshold
         masks.append(mask)
     return masks
 @spaces.GPU
 def extract_image(pos_prompts, neg_prompts, img, threshold):
-    positive_masks = get_masks(pos_prompts, img, 0.5)
-    negative_masks = get_masks(neg_prompts, img, 0.5)
     # combine masks into one mask, logic OR
-    pos_mask = np.any(np.stack(positive_masks), axis=0)
-    neg_mask = np.any(np.stack(negative_masks), axis=0)
     final_mask = pos_mask & ~neg_mask
     # extract the final image
@@ -99,4 +97,4 @@ with gr.Blocks() as demo:
         outputs=[output_image, output_mask],
     )
-demo.launch()

 import gradio as gr
 from PIL import Image
 import torch
 import numpy as np
 import spaces
 from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
 @spaces.GPU
 def process_image(image, prompt):
     inputs = processor(
+        text=prompt, images=image, return_tensors="pt"
     )
     inputs = {k: v.cuda() for k, v in inputs.items()}
         preds = outputs.logits
     pred = torch.sigmoid(preds)
+    mat = pred.squeeze().cpu().numpy()  # Squeeze to remove extra dimensions
     mask = Image.fromarray(np.uint8(mat * 255), "L")
     mask = mask.resize(image.size)
+    mask = np.array(mask)
     # normalize the mask
     mask_min = mask.min()
     prompts = prompts.split(",")
     masks = []
     for prompt in prompts:
+        mask = process_image(img, prompt.strip())  # Strip whitespace from prompts
         mask = mask > threshold
         masks.append(mask)
     return masks
 @spaces.GPU
 def extract_image(pos_prompts, neg_prompts, img, threshold):
+    positive_masks = get_masks(pos_prompts, img, threshold)
+    negative_masks = get_masks(neg_prompts, img, threshold)
     # combine masks into one mask, logic OR
+    pos_mask = np.any(np.stack(positive_masks), axis=0) if positive_masks else np.zeros_like(img)[:,:,0].astype(bool)
+    neg_mask = np.any(np.stack(negative_masks), axis=0) if negative_masks else np.zeros_like(img)[:,:,0].astype(bool)
     final_mask = pos_mask & ~neg_mask
     # extract the final image
         outputs=[output_image, output_mask],
     )
+demo.launch(share=True)