Spaces:

Razzaqi3143
/

Image_to_Text

Sleeping

Image_to_Text / app.py

Update app.py

18424fd verified 24 days ago

No virus

1.35 kB

	import streamlit as st
	from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
	from PIL import Image
	import torch

	# Load your model and tokenizer
	model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
	processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
	tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

	# Streamlit UI
	st.title("Image Caption Generator")
	st.write("Upload an image and click 'Generate' to get a caption.")

	# File uploader for image
	uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

	if uploaded_image is not None:
	# Display the uploaded image
	image = Image.open(uploaded_image)
	st.image(image, caption='Uploaded Image', use_column_width=True)

	# Generate caption when button is clicked
	if st.button('Generate'):
	# Preprocess the image
	pixel_values = processor(images=image, return_tensors="pt").pixel_values

	# Generate captions
	output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
	caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)

	# Display the generated caption
	st.write(f"Generated Caption: {caption}")