Razzaqi3143 commited on
Commit
18424fd
1 Parent(s): 3306886

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -19
app.py CHANGED
@@ -1,32 +1,33 @@
 
1
  from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
2
  from PIL import Image
3
  import torch
4
- import gradio as gr
5
 
6
  # Load your model and tokenizer
7
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
8
  processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
9
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
10
 
11
- # Function to generate captions from images
12
- def generate_caption(image):
13
- # Preprocess the image
14
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
15
 
16
- # Generate captions
17
- output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
18
- caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
19
 
20
- return caption
 
 
 
21
 
22
- # Create a Gradio Interface
23
- interface = gr.Interface(
24
- fn=generate_caption,
25
- inputs=gr.Image(type="pil"),
26
- outputs=gr.Textbox(),
27
- title="Image Caption Generator",
28
- description="Upload an image and click 'Generate' to get a caption."
29
- )
30
 
31
- # Launch the app in Hugging Face Spaces
32
- interface.launch()
 
 
 
 
 
1
+ import streamlit as st
2
  from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
3
  from PIL import Image
4
  import torch
 
5
 
6
  # Load your model and tokenizer
7
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
8
  processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
9
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
10
 
11
+ # Streamlit UI
12
+ st.title("Image Caption Generator")
13
+ st.write("Upload an image and click 'Generate' to get a caption.")
 
14
 
15
+ # File uploader for image
16
+ uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 
17
 
18
+ if uploaded_image is not None:
19
+ # Display the uploaded image
20
+ image = Image.open(uploaded_image)
21
+ st.image(image, caption='Uploaded Image', use_column_width=True)
22
 
23
+ # Generate caption when button is clicked
24
+ if st.button('Generate'):
25
+ # Preprocess the image
26
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
 
 
 
 
27
 
28
+ # Generate captions
29
+ output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
30
+ caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
31
+
32
+ # Display the generated caption
33
+ st.write(f"**Generated Caption:** {caption}")