Image_to_Text / app.py
Razzaqi3143's picture
Update app.py
18424fd verified
raw
history blame contribute delete
No virus
1.35 kB
import streamlit as st
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
from PIL import Image
import torch
# Load your model and tokenizer
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
# Streamlit UI
st.title("Image Caption Generator")
st.write("Upload an image and click 'Generate' to get a caption.")
# File uploader for image
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
# Display the uploaded image
image = Image.open(uploaded_image)
st.image(image, caption='Uploaded Image', use_column_width=True)
# Generate caption when button is clicked
if st.button('Generate'):
# Preprocess the image
pixel_values = processor(images=image, return_tensors="pt").pixel_values
# Generate captions
output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Display the generated caption
st.write(f"**Generated Caption:** {caption}")