Spaces:

scr930
/

geolocal-StreetCLIP

Running

scr930 commited on Jun 24

Commit

1848536

•

1 Parent(s): 6b1798b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,26 +1,39 @@
 import gradio as gr
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
 # Load the model and processor
 model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
 processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")
 def classify_image(image):
-    # Preprocess the image
-    inputs = processor(images=image, return_tensors="pt")
     # Perform the inference
     outputs = model(**inputs)
     # Postprocess the outputs
     logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
     probs = logits_per_image.softmax(dim=1)  # we can use softmax to get probabilities
-    return probs
 # Define Gradio interface
 iface = gr.Interface(
     fn=classify_image,
     inputs=gr.Image(type="pil"),
-    outputs="text",
     title="Geolocal StreetCLIP Classification",
     description="Upload an image to classify using Geolocal StreetCLIP"
 )

 import gradio as gr
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
+import torch
 # Load the model and processor
 model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
 processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")
 def classify_image(image):
+    # Example labels for classification
+    labels = ["a photo of a cat", "a photo of a dog", "a photo of a car", "a photo of a tree"]
+    # Preprocess the image and text
+    inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
     # Perform the inference
     outputs = model(**inputs)
     # Postprocess the outputs
     logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
     probs = logits_per_image.softmax(dim=1)  # we can use softmax to get probabilities
+    # Convert the probabilities to a list
+    probs_list = probs.tolist()[0]
+    # Create a dictionary of labels and probabilities
+    result = {label: prob for label, prob in zip(labels, probs_list)}
+    return result
 # Define Gradio interface
 iface = gr.Interface(
     fn=classify_image,
     inputs=gr.Image(type="pil"),
+    outputs="label",
     title="Geolocal StreetCLIP Classification",
     description="Upload an image to classify using Geolocal StreetCLIP"
 )