Spaces:
taesiri
/
Running on Zero

taesiri commited on
Commit
abac148
1 Parent(s): 1237317
0.001861_submarine _ submarine_0.9862991.jpg ADDED
0.003473_cliff _ cliff_0.51112.jpg ADDED
0.004658_spatula _ spatula_0.35416836.jpg ADDED
app.py CHANGED
@@ -4,45 +4,114 @@ from PIL import Image
4
  import torch
5
  import matplotlib.pyplot as plt
6
  import cv2
 
 
7
 
8
  processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
9
  model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
10
 
11
- def process_image(image, prompt):
12
- inputs = processor(text=prompt, images=image, padding="max_length", return_tensors="pt")
13
-
14
- # predict
15
- with torch.no_grad():
16
- outputs = model(**inputs)
17
- preds = outputs.logits
18
-
19
- filename = f"mask.png"
20
- plt.imsave(filename, torch.sigmoid(preds))
21
-
22
- # # img2 = cv2.imread(filename)
23
- # # gray_image = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
24
-
25
- # # (thresh, bw_image) = cv2.threshold(gray_image, 100, 255, cv2.THRESH_BINARY)
26
-
27
- # # # fix color format
28
- # # cv2.cvtColor(bw_image, cv2.COLOR_BGR2RGB)
29
-
30
- # # return Image.fromarray(bw_image)
31
-
32
- return Image.open("mask.png").convert("RGB")
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  title = "Interactive demo: zero-shot image segmentation with CLIPSeg"
35
  description = "Demo for using CLIPSeg, a CLIP-based model for zero- and one-shot image segmentation. To use it, simply upload an image and add a text to mask (identify in the image), or use one of the examples below and click 'submit'. Results will show up in a few seconds."
36
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10003'>CLIPSeg: Image Segmentation Using Text and Image Prompts</a> | <a href='https://huggingface.co/docs/transformers/main/en/model_doc/clipseg'>HuggingFace docs</a></p>"
37
 
38
- examples = [["example_image.png", "wood"]]
39
-
40
- interface = gr.Interface(fn=process_image,
41
- inputs=[gr.Image(type="pil"), gr.Textbox(label="Please describe what you want to identify")],
42
- outputs=gr.Image(type="pil"),
43
- title=title,
44
- description=description,
45
- article=article,
46
- examples=examples)
47
-
48
- interface.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import torch
5
  import matplotlib.pyplot as plt
6
  import cv2
7
+ import torch
8
+ import numpy as np
9
 
10
  processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
11
  model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
12
 
13
+
14
+ def process_image(image, prompt, threhsold, alpha_value, draw_rectangles):
15
+ inputs = processor(
16
+ text=prompt, images=image, padding="max_length", return_tensors="pt"
17
+ )
18
+
19
+ # predict
20
+ with torch.no_grad():
21
+ outputs = model(**inputs)
22
+ preds = outputs.logits
23
+
24
+ pred = torch.sigmoid(preds)
25
+ mat = pred.cpu().numpy()
26
+ mask = Image.fromarray(np.uint8(mat * 255), "L")
27
+ mask = mask.convert("RGB")
28
+ mask = mask.resize(image.size)
29
+ mask = np.array(mask)[:, :, 0]
30
+
31
+ # normalize the mask
32
+ mask_min = mask.min()
33
+ mask_max = mask.max()
34
+ mask = (mask - mask_min) / (mask_max - mask_min)
35
+
36
+ # threshold the mask
37
+ bmask = mask > threhsold
38
+ # zero out values below the threshold
39
+ mask[mask < threhsold] = 0
40
+
41
+ fig, ax = plt.subplots()
42
+ ax.imshow(image)
43
+ ax.imshow(mask, alpha=alpha_value, cmap="jet")
44
+
45
+ if draw_rectangles:
46
+ contours, hierarchy = cv2.findContours(
47
+ bmask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
48
+ )
49
+ for contour in contours:
50
+ x, y, w, h = cv2.boundingRect(contour)
51
+ rect = plt.Rectangle(
52
+ (x, y), w, h, fill=False, edgecolor="yellow", linewidth=2
53
+ )
54
+ ax.add_patch(rect)
55
+
56
+ ax.axis("off")
57
+ plt.tight_layout()
58
+
59
+ return fig, mask
60
+
61
+
62
  title = "Interactive demo: zero-shot image segmentation with CLIPSeg"
63
  description = "Demo for using CLIPSeg, a CLIP-based model for zero- and one-shot image segmentation. To use it, simply upload an image and add a text to mask (identify in the image), or use one of the examples below and click 'submit'. Results will show up in a few seconds."
64
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10003'>CLIPSeg: Image Segmentation Using Text and Image Prompts</a> | <a href='https://huggingface.co/docs/transformers/main/en/model_doc/clipseg'>HuggingFace docs</a></p>"
65
 
66
+
67
+ with gr.Blocks() as demo:
68
+ gr.Markdown("# CLIPSeg: Image Segmentation Using Text and Image Prompts")
69
+ gr.Markdown(article)
70
+ gr.Markdown(description)
71
+ gr.Markdown(
72
+ "*Example images are taken from the [ImageNet-A](https://paperswithcode.com/dataset/imagenet-a) dataset*"
73
+ )
74
+
75
+ with gr.Row():
76
+ with gr.Column():
77
+ input_image = gr.Image(type="pil")
78
+ input_prompt = gr.Textbox(label="Please describe what you want to identify")
79
+ input_slider_T = gr.Slider(
80
+ minimum=0, maximum=1, value=0.4, label="Threshold"
81
+ )
82
+ input_slider_A = gr.Slider(minimum=0, maximum=1, value=0.5, label="Alpha")
83
+ draw_rectangles = gr.Checkbox(label="Draw rectangles")
84
+ btn_process = gr.Button(label="Process")
85
+
86
+ with gr.Column():
87
+ output_plot = gr.Plot(label="Segmentation Result")
88
+ output_image = gr.Image(label="Mask")
89
+
90
+ btn_process.click(
91
+ process_image,
92
+ inputs=[
93
+ input_image,
94
+ input_prompt,
95
+ input_slider_T,
96
+ input_slider_A,
97
+ draw_rectangles,
98
+ ],
99
+ outputs=[output_plot, output_image],
100
+ )
101
+
102
+ gr.Examples(
103
+ [
104
+ ["0.003473_cliff _ cliff_0.51112.jpg", "dog", 0.5, 0.5, True],
105
+ ["0.001861_submarine _ submarine_0.9862991.jpg", "beacon", 0.55, 0.4, True],
106
+ ["0.004658_spatula _ spatula_0.35416836.jpg", "banana", 0.4, 0.5, True],
107
+ ],
108
+ inputs=[
109
+ input_image,
110
+ input_prompt,
111
+ input_slider_T,
112
+ input_slider_A,
113
+ draw_rectangles,
114
+ ],
115
+ )
116
+
117
+ demo.launch()
example_image.png DELETED
Binary file (253 kB)