Vivien commited on
Commit
1945055
1 Parent(s): 74e4bcd

Adjust sidebar text

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -85,7 +85,9 @@ source = {0: "\nSource: Unsplash", 1: "\nSource: The Movie Database (TMDB)"}
85
 
86
 
87
  def get_html(url_list, url_list_slip, height=150):
88
- html = "<div style='display: flex; flex-wrap: wrap; justify-content: space-evenly;'>"
 
 
89
  html += "<span style='margin-top: 20px; max-width: 1200px; display: flex; align-content: flex-start; flex-wrap: wrap; justify-content: space-evenly; width: 50%'>"
90
  html += "<div style='width: 100%; text-align: center;'><b>CLIP</b> (<a href='https://arxiv.org/abs/2103.00020'>Arxiv</a>, <a href='https://github.com/openai/CLIP'>GitHub</a>) from OpenAI</div>"
91
  for url, title, link in url_list:
@@ -104,10 +106,12 @@ def get_html(url_list, url_list_slip, height=150):
104
  html += "</span></div>"
105
  return html
106
 
 
107
  def compute_text_embeddings(list_of_strings):
108
  inputs = processor(text=list_of_strings, return_tensors="pt", padding=True)
109
  return model.get_text_features(**inputs)
110
 
 
111
  def compute_text_embeddings_slip(list_of_strings):
112
  texts = tokenizer(list_of_strings)
113
  if cuda_available:
@@ -115,6 +119,7 @@ def compute_text_embeddings_slip(list_of_strings):
115
  texts = texts.view(-1, 77).contiguous()
116
  return slip_model.encode_text(texts)
117
 
 
118
  def image_search(query, corpus, n_results=24):
119
  text_embeddings = compute_text_embeddings([query]).detach().numpy()
120
  text_embeddings_slip = compute_text_embeddings_slip([query]).detach().numpy()
@@ -150,13 +155,14 @@ description = """
150
 
151
  **Enter your query and hit enter**
152
 
153
- CLIP and SLIP are ML models that encode images and texts as vectors so that the vectors of an image and its caption are similar. They can notably be used for zero-shot image classification, text-based image retrieval or image generation.
 
 
154
 
155
  *Built with OpenAI's [CLIP](https://openai.com/blog/clip/) model, Meta AI's [SLIP](https://github.com/facebookresearch/SLIP) model, 🤗 Hugging Face's [transformers library](https://huggingface.co/transformers/), [Streamlit](https://streamlit.io/), 25k images from [Unsplash](https://unsplash.com/) and 8k images from [The Movie Database (TMDB)](https://www.themoviedb.org/)*
156
  """
157
 
158
 
159
-
160
  st.markdown(
161
  """
162
  <style>
@@ -196,4 +202,4 @@ query = c.text_input("", value="clouds at sunset")
196
  corpus = st.radio("", ["Unsplash", "Movies"])
197
  if len(query) > 0:
198
  results, results_slip = image_search(query, corpus)
199
- st.markdown(get_html(results, results_slip), unsafe_allow_html=True)
 
85
 
86
 
87
  def get_html(url_list, url_list_slip, height=150):
88
+ html = (
89
+ "<div style='display: flex; flex-wrap: wrap; justify-content: space-evenly;'>"
90
+ )
91
  html += "<span style='margin-top: 20px; max-width: 1200px; display: flex; align-content: flex-start; flex-wrap: wrap; justify-content: space-evenly; width: 50%'>"
92
  html += "<div style='width: 100%; text-align: center;'><b>CLIP</b> (<a href='https://arxiv.org/abs/2103.00020'>Arxiv</a>, <a href='https://github.com/openai/CLIP'>GitHub</a>) from OpenAI</div>"
93
  for url, title, link in url_list:
 
106
  html += "</span></div>"
107
  return html
108
 
109
+
110
  def compute_text_embeddings(list_of_strings):
111
  inputs = processor(text=list_of_strings, return_tensors="pt", padding=True)
112
  return model.get_text_features(**inputs)
113
 
114
+
115
  def compute_text_embeddings_slip(list_of_strings):
116
  texts = tokenizer(list_of_strings)
117
  if cuda_available:
 
119
  texts = texts.view(-1, 77).contiguous()
120
  return slip_model.encode_text(texts)
121
 
122
+
123
  def image_search(query, corpus, n_results=24):
124
  text_embeddings = compute_text_embeddings([query]).detach().numpy()
125
  text_embeddings_slip = compute_text_embeddings_slip([query]).detach().numpy()
 
155
 
156
  **Enter your query and hit enter**
157
 
158
+ CLIP and SLIP are ML models that encode images and texts as vectors so that the vectors of an image and its caption are similar. They can notably be used for zero-shot image classification, text-based image retrieval or image generation.
159
+
160
+ Cf. this Twitter [thread](https://twitter.com/vivien000000/status/1475829936443334660) with some suprising differences between CLIP and SLIP.
161
 
162
  *Built with OpenAI's [CLIP](https://openai.com/blog/clip/) model, Meta AI's [SLIP](https://github.com/facebookresearch/SLIP) model, 🤗 Hugging Face's [transformers library](https://huggingface.co/transformers/), [Streamlit](https://streamlit.io/), 25k images from [Unsplash](https://unsplash.com/) and 8k images from [The Movie Database (TMDB)](https://www.themoviedb.org/)*
163
  """
164
 
165
 
 
166
  st.markdown(
167
  """
168
  <style>
 
202
  corpus = st.radio("", ["Unsplash", "Movies"])
203
  if len(query) > 0:
204
  results, results_slip = image_search(query, corpus)
205
+ st.markdown(get_html(results, results_slip), unsafe_allow_html=True)