huzey commited on
Commit
c320745
1 Parent(s): 1075d8a

update gpu

Browse files
Files changed (1) hide show
  1. app.py +38 -19
app.py CHANGED
@@ -378,13 +378,14 @@ class CLIP(torch.nn.Module):
378
  causal_attention_mask=causal_attention_mask,
379
  output_attentions=output_attentions,
380
  )
381
- self.attn_output = hidden_states.clone()
 
382
  hidden_states = residual + hidden_states
383
 
384
  residual = hidden_states
385
  hidden_states = self.layer_norm2(hidden_states)
386
  hidden_states = self.mlp(hidden_states)
387
- self.mlp_output = hidden_states.clone()
388
 
389
  hidden_states = residual + hidden_states
390
 
@@ -393,7 +394,7 @@ class CLIP(torch.nn.Module):
393
  if output_attentions:
394
  outputs += (attn_weights,)
395
 
396
- self.block_output = hidden_states.clone()
397
  return outputs
398
 
399
  setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
@@ -538,10 +539,13 @@ def compute_ncut(
538
  affinity_focal_gamma=0.3,
539
  knn_ncut=10,
540
  knn_tsne=10,
 
541
  num_sample_tsne=1000,
542
  perplexity=500,
 
 
543
  ):
544
- from ncut_pytorch import NCUT, rgb_from_tsne_3d
545
 
546
  start = time.time()
547
  eigvecs, eigvals = NCUT(
@@ -554,16 +558,23 @@ def compute_ncut(
554
  print(f"NCUT time (cpu): {time.time() - start:.2f}s")
555
 
556
  start = time.time()
557
- X_3d, rgb = rgb_from_tsne_3d(
558
- eigvecs,
559
- num_sample=num_sample_tsne,
560
- perplexity=perplexity,
561
- knn=knn_tsne,
562
- )
563
- print(f"t-SNE time (cpu): {time.time() - start:.2f}s")
564
-
565
- # print("input shape:", features.shape)
566
- # print("output shape:", rgb.shape)
 
 
 
 
 
 
 
567
 
568
  rgb = rgb.reshape(features.shape[:3] + (3,))
569
  return rgb
@@ -585,7 +596,7 @@ def to_pil_images(images):
585
  for image in images
586
  ]
587
 
588
- @spaces.GPU(duration=60)
589
  def main_fn(
590
  images,
591
  model_name="SAM(sam_vit_b)",
@@ -595,9 +606,12 @@ def main_fn(
595
  affinity_focal_gamma=0.3,
596
  num_sample_ncut=10000,
597
  knn_ncut=10,
 
598
  num_sample_tsne=1000,
599
  knn_tsne=10,
600
  perplexity=500,
 
 
601
  ):
602
  if perplexity >= num_sample_tsne:
603
  # raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
@@ -620,7 +634,10 @@ def main_fn(
620
  knn_ncut=knn_ncut,
621
  knn_tsne=knn_tsne,
622
  num_sample_tsne=num_sample_tsne,
 
623
  perplexity=perplexity,
 
 
624
  )
625
  rgb = dont_use_too_much_green(rgb)
626
  return to_pil_images(rgb)
@@ -642,10 +659,12 @@ demo = gr.Interface(
642
  gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
643
  gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
644
  gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
645
- gr.Slider(100, 1000, step=100, label="num_sample (t-SNE)", value=500, elem_id="num_sample_tsne", info="for Nyström approximation. Adding will slow down t-SNE quite a lot"),
646
- gr.Slider(1, 100, step=1, label="KNN (t-SNE)", value=10, elem_id="knn_tsne", info="for Nyström approximation"),
647
- gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=250, elem_id="perplexity", info="for t-SNE"),
648
-
 
 
649
  ]
650
  )
651
 
 
378
  causal_attention_mask=causal_attention_mask,
379
  output_attentions=output_attentions,
380
  )
381
+ hw = np.sqrt(hidden_states.shape[1]-1).astype(int)
382
+ self.attn_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
383
  hidden_states = residual + hidden_states
384
 
385
  residual = hidden_states
386
  hidden_states = self.layer_norm2(hidden_states)
387
  hidden_states = self.mlp(hidden_states)
388
+ self.mlp_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
389
 
390
  hidden_states = residual + hidden_states
391
 
 
394
  if output_attentions:
395
  outputs += (attn_weights,)
396
 
397
+ self.block_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
398
  return outputs
399
 
400
  setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
 
539
  affinity_focal_gamma=0.3,
540
  knn_ncut=10,
541
  knn_tsne=10,
542
+ embedding_method="UMAP",
543
  num_sample_tsne=1000,
544
  perplexity=500,
545
+ n_neighbors=500,
546
+ min_dist=0.1,
547
  ):
548
+ from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d
549
 
550
  start = time.time()
551
  eigvecs, eigvals = NCUT(
 
558
  print(f"NCUT time (cpu): {time.time() - start:.2f}s")
559
 
560
  start = time.time()
561
+ if embedding_method == "UMAP":
562
+ rgb = rgb_from_umap_3d(
563
+ eigvecs,
564
+ n_neighbors=n_neighbors,
565
+ min_dist=min_dist,
566
+ )
567
+ print(f"UMAP time (cpu): {time.time() - start:.2f}s")
568
+ elif embedding_method == "t-SNE":
569
+ X_3d, rgb = rgb_from_tsne_3d(
570
+ eigvecs,
571
+ num_sample=num_sample_tsne,
572
+ perplexity=perplexity,
573
+ knn=knn_tsne,
574
+ )
575
+ print(f"t-SNE time (cpu): {time.time() - start:.2f}s")
576
+ else:
577
+ raise ValueError(f"Embedding method {embedding_method} not supported.")
578
 
579
  rgb = rgb.reshape(features.shape[:3] + (3,))
580
  return rgb
 
596
  for image in images
597
  ]
598
 
599
+ @spaces.GPU(duration=30)
600
  def main_fn(
601
  images,
602
  model_name="SAM(sam_vit_b)",
 
606
  affinity_focal_gamma=0.3,
607
  num_sample_ncut=10000,
608
  knn_ncut=10,
609
+ embedding_method="UMAP",
610
  num_sample_tsne=1000,
611
  knn_tsne=10,
612
  perplexity=500,
613
+ n_neighbors=500,
614
+ min_dist=0.1,
615
  ):
616
  if perplexity >= num_sample_tsne:
617
  # raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
 
634
  knn_ncut=knn_ncut,
635
  knn_tsne=knn_tsne,
636
  num_sample_tsne=num_sample_tsne,
637
+ embedding_method=embedding_method,
638
  perplexity=perplexity,
639
+ n_neighbors=n_neighbors,
640
+ min_dist=min_dist,
641
  )
642
  rgb = dont_use_too_much_green(rgb)
643
  return to_pil_images(rgb)
 
659
  gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
660
  gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
661
  gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
662
+ gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="UMAP", elem_id="embedding_method"),
663
+ gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="for Nyström approximation. Adding will slow down quite a lot"),
664
+ gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="for Nyström approximation"),
665
+ gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity", info="for t-SNE"),
666
+ gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors", info="for UMAP"),
667
+ gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist", info="for UMAP"),
668
  ]
669
  )
670