File size: 2,247 Bytes
dcee3e6
18bc429
b0a2a59
 
 
 
 
 
 
 
5739b16
a7938b4
 
2c9b33e
dcee3e6
2c9b33e
 
 
 
 
 
 
 
 
dcee3e6
2c9b33e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7938b4
 
 
2c9b33e
 
 
 
 
 
 
 
 
 
 
 
dcee3e6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


import os
HF_TOKEN = os.getenv('HF_TOKEN')

from huggingface_hub import HfFolder

# Set the token using HfFolder (this persists the token)
HfFolder.save_token(HF_TOKEN)

import transformers
from transformers import VisionEncoderDecoderModel, AutoTokenizer, pipeline, AutoModel

import streamlit as st

# Set Hugging Face API Token if required
"""
os.environ["HF_HOME"] = "path_to_your_huggingface_cache_directory"
os.environ["TRANSFORMERS_CACHE"] = "path_to_your_transformers_cache_directory"
os.environ["HF_DATASETS_CACHE"] = "path_to_your_datasets_cache_directory"
os.environ["HF_METRICS_CACHE"] = "path_to_your_metrics_cache_directory"
os.environ["HF_MODULES_CACHE"] = "path_to_your_modules_cache_directory"
os.environ["HF_TOKEN"] = "your_hugging_face_access_token"
"""

# Setup Streamlit interface for input
st.title("Image to Text Model")

# Using Pipeline
st.header("Using Pipeline for Image Captioning")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Assuming the pipeline handles image files directly
    pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
    try:
        result = pipe(uploaded_file.getvalue())
        st.write("Generated Caption:", result[0]['generated_text'])
    except Exception as e:
        st.error(f"Failed to generate caption: {str(e)}")

# Load model directly for further analysis or different processing steps
st.header("Load Model Directly")

# Assuming 'nlpconnect/vit-gpt2-image-captioning' is your model identifier
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

# Example of how you might use model and tokenizer directly
# This section can be customized based on what you need to do with the model
if st.button("Load Model Information"):
    try:
        st.text("Model and Tokenizer loaded successfully")
        # Display some model details, for example:
        st.text(f"Model Architecture: {model.__class__.__name__}")
        st.text(f"Tokenizer Type: {tokenizer.__class__.__name__}")
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")