Image_to_Text / app.py
Razzaqi3143's picture
Create app.py
0b3d99b verified
raw
history blame
No virus
1.15 kB
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
from PIL import Image
import torch
import gradio as gr
# Load your model and tokenizer
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
# Function to generate captions from images
def generate_caption(image):
# Preprocess the image
pixel_values = processor(images=image, return_tensors="pt").pixel_values
# Generate captions
output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return caption
# Create a Gradio Interface
interface = gr.Interface(
fn=generate_caption,
inputs=gr.Image(type="pil"),
outputs=gr.Textbox(),
title="Image Caption Generator",
description="Upload an image and click 'Generate' to get a caption."
)
# Launch the app in Hugging Face Spaces
interface.launch()