File size: 1,344 Bytes
9c53e9f
72afe1e
 
1848536
9c53e9f
72afe1e
 
 
 
 
1848536
 
 
 
 
 
72afe1e
 
1848536
72afe1e
 
 
1848536
 
 
 
 
 
 
 
72afe1e
 
 
 
6b1798b
1848536
72afe1e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch

# Load the model and processor
model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")

def classify_image(image):
    # Example labels for classification
    labels = ["a photo of a cat", "a photo of a dog", "a photo of a car", "a photo of a tree"]
    
    # Preprocess the image and text
    inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
    
    # Perform the inference
    outputs = model(**inputs)
    
    # Postprocess the outputs
    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
    probs = logits_per_image.softmax(dim=1)  # we can use softmax to get probabilities
    
    # Convert the probabilities to a list
    probs_list = probs.tolist()[0]
    
    # Create a dictionary of labels and probabilities
    result = {label: prob for label, prob in zip(labels, probs_list)}
    
    return result

# Define Gradio interface
iface = gr.Interface(
    fn=classify_image,
    inputs=gr.Image(type="pil"),
    outputs="label",
    title="Geolocal StreetCLIP Classification",
    description="Upload an image to classify using Geolocal StreetCLIP"
)

# Launch the interface
iface.launch()