import streamlit as st from PIL import Image from transformers import TrOCRProcessor, VisionEncoderDecoderModel import torch # Load the OCR model and processor (switching to a larger model) model_name = "microsoft/trocr-large-stage1" # You can try this larger model for better accuracy processor = TrOCRProcessor.from_pretrained(model_name) model = VisionEncoderDecoderModel.from_pretrained(model_name) # Streamlit app title st.title("OCR with TrOCR (Improved Accuracy)") # Upload image section uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"]) if uploaded_image is not None: # Open and display the uploaded image image = Image.open(uploaded_image).convert("RGB") # Ensure image is in RGB format st.image(image, caption="Uploaded Image", use_column_width=True) # Resize the image to improve OCR accuracy resized_image = image.resize((224, 224)) # Resize to a standard resolution # Convert image to a suitable format and ensure it's a batch (list of images) try: # Convert image to the right format for the processor inputs = processor(images=[resized_image], return_tensors="pt") # Put image in a list # Perform OCR with torch.no_grad(): outputs = model.generate(**inputs) # Decode the generated text text = processor.decode(outputs[0], skip_special_tokens=True) # Display the OCR result st.write("Extracted Text:") st.text(text) except Exception as e: st.error(f"An error occurred: {str(e)}")