Spaces:
Build error
Build error
fix: discard remaining pixels
Browse files- most_relevant_part.py +27 -25
most_relevant_part.py
CHANGED
@@ -1,22 +1,23 @@
|
|
1 |
import os
|
2 |
-
import requests
|
3 |
-
import streamlit as st
|
4 |
-
from PIL import Image
|
5 |
|
6 |
import jax
|
7 |
import jax.numpy as jnp
|
8 |
import numpy as np
|
|
|
|
|
|
|
9 |
|
10 |
from utils import load_model
|
11 |
|
12 |
-
|
|
|
13 |
im = np.array(im)
|
14 |
-
|
15 |
-
|
16 |
tiles = [
|
17 |
-
im[x:x + M, y:y + N]
|
18 |
-
for x in range(0,
|
19 |
-
for y in range(0,
|
20 |
]
|
21 |
return tiles
|
22 |
|
@@ -36,19 +37,21 @@ def app(model_name):
|
|
36 |
model, processor = load_model(f"koclip/{model_name}")
|
37 |
|
38 |
st.title("Most Relevant Part of Image")
|
39 |
-
st.markdown(
|
|
|
40 |
Given a piece of text, the CLIP model finds the part of an image that best explains the text.
|
41 |
To try it out, you can
|
42 |
1) Upload an image
|
43 |
2) Explain a part of the image in text
|
44 |
Which will yield the most relevant image tile from a 3x3 grid of the image
|
45 |
-
"""
|
|
|
46 |
|
47 |
query1 = st.text_input(
|
48 |
"Enter a URL to an image...",
|
49 |
-
value="https://img.sbs.co.kr/newimg/news/20200823/201463830_1280.jpg"
|
50 |
-
|
51 |
-
|
52 |
captions = st.text_input(
|
53 |
"Enter query to find most relevant part of image ",
|
54 |
value="이건 서울의 경복궁 사진이다.",
|
@@ -58,23 +61,22 @@ def app(model_name):
|
|
58 |
if not any([query1, query2]):
|
59 |
st.error("Please upload an image or paste an image URL.")
|
60 |
else:
|
61 |
-
image_data = (
|
62 |
-
query1, stream=True).raw
|
|
|
63 |
image = Image.open(image_data)
|
64 |
st.image(image)
|
65 |
|
66 |
images = split_image(image)
|
67 |
|
68 |
-
inputs = processor(
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
outputs = model(**inputs)
|
75 |
probs = jax.nn.softmax(outputs.logits_per_image, axis=0)
|
76 |
-
for idx, prob in sorted(enumerate(probs),
|
77 |
-
key=lambda x: x[1],
|
78 |
-
reverse=True):
|
79 |
st.text(f"Score: {prob[0]:.3f}")
|
80 |
st.image(images[idx])
|
|
|
1 |
import os
|
|
|
|
|
|
|
2 |
|
3 |
import jax
|
4 |
import jax.numpy as jnp
|
5 |
import numpy as np
|
6 |
+
import requests
|
7 |
+
import streamlit as st
|
8 |
+
from PIL import Image
|
9 |
|
10 |
from utils import load_model
|
11 |
|
12 |
+
|
13 |
+
def split_image(im, num_rows=3, num_cols=3):
|
14 |
im = np.array(im)
|
15 |
+
row_size = im.shape[0] // num_rows
|
16 |
+
col_size = im.shape[1] // num_cols
|
17 |
tiles = [
|
18 |
+
im[x : x + M, y : y + N]
|
19 |
+
for x in range(0, num_rows * row_size, row_size)
|
20 |
+
for y in range(0, num_cols * col_size, col_size)
|
21 |
]
|
22 |
return tiles
|
23 |
|
|
|
37 |
model, processor = load_model(f"koclip/{model_name}")
|
38 |
|
39 |
st.title("Most Relevant Part of Image")
|
40 |
+
st.markdown(
|
41 |
+
"""
|
42 |
Given a piece of text, the CLIP model finds the part of an image that best explains the text.
|
43 |
To try it out, you can
|
44 |
1) Upload an image
|
45 |
2) Explain a part of the image in text
|
46 |
Which will yield the most relevant image tile from a 3x3 grid of the image
|
47 |
+
"""
|
48 |
+
)
|
49 |
|
50 |
query1 = st.text_input(
|
51 |
"Enter a URL to an image...",
|
52 |
+
value="https://img.sbs.co.kr/newimg/news/20200823/201463830_1280.jpg",
|
53 |
+
)
|
54 |
+
query2 = st.file_uploader("or upload an image...", type=["jpg", "jpeg", "png"])
|
55 |
captions = st.text_input(
|
56 |
"Enter query to find most relevant part of image ",
|
57 |
value="이건 서울의 경복궁 사진이다.",
|
|
|
61 |
if not any([query1, query2]):
|
62 |
st.error("Please upload an image or paste an image URL.")
|
63 |
else:
|
64 |
+
image_data = (
|
65 |
+
query2 if query2 is not None else requests.get(query1, stream=True).raw
|
66 |
+
)
|
67 |
image = Image.open(image_data)
|
68 |
st.image(image)
|
69 |
|
70 |
images = split_image(image)
|
71 |
|
72 |
+
inputs = processor(
|
73 |
+
text=captions, images=images, return_tensors="jax", padding=True
|
74 |
+
)
|
75 |
+
inputs["pixel_values"] = jnp.transpose(
|
76 |
+
inputs["pixel_values"], axes=[0, 2, 3, 1]
|
77 |
+
)
|
78 |
outputs = model(**inputs)
|
79 |
probs = jax.nn.softmax(outputs.logits_per_image, axis=0)
|
80 |
+
for idx, prob in sorted(enumerate(probs), key=lambda x: x[1], reverse=True):
|
|
|
|
|
81 |
st.text(f"Score: {prob[0]:.3f}")
|
82 |
st.image(images[idx])
|