import gradio as gr
from transformers import AutoProcessor, CLIPModel


# Charger le pipeline

model = CLIPModel.from_pretrained("patrickjohncyh/fashion-clip")
processor = AutoProcessor.from_pretrained("patrickjohncyh/fashion-clip")

# Définir la fonction pour la classification d'image avec du texte en entrée
def classify_image_with_text(text, image):
    # Effectuer la classification d'image à l'aide du texte
    keywords = text.split(',')
    inputs = processor(
            text=keywords, images=image, return_tensors="pt", padding=True
    )
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
    probs = logits_per_image.softmax(dim=1) 
    predicted_class_index = probs.argmax(dim=1).item()
    predicted_label = keywords[predicted_class_index]
    return predicted_label
    
# Créer l'interface Gradio avec l'API de Gradio Blocks
with gr.Interface(
    fn=classify_image_with_text,
    inputs=[gr.Textbox(lines=1, label="Prompt"), gr.Image(label="Image")],
    outputs=gr.Textbox(label='Sortie de l\'API'),
    title="SD Models"
) as iface:
    iface.launch()