Spaces:

baudm
/

PARSeq-OCR

Running

App Files Files Community

baudm commited on Jul 19, 2022

Commit

48c25e4

•

1 Parent(s): 1ca7683

Add app, config, and data

Browse files

Files changed (9) hide show

README.md +1 -0
app.py +92 -0
demo_images/art-01107.jpg +0 -0
demo_images/coco-1166773.jpg +0 -0
demo_images/cute-184.jpg +0 -0
demo_images/ic13_word_256.png +0 -0
demo_images/ic15_word_26.png +0 -0
demo_images/uber-27491.jpg +0 -0
requirements.txt +8 -0

README.md CHANGED Viewed

@@ -5,6 +5,7 @@ colorFrom: red
 colorTo: purple
 sdk: gradio
 sdk_version: 3.1.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorTo: purple
 sdk: gradio
 sdk_version: 3.1.0
+python_version: 3.9.13
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# Scene Text Recognition Model Hub
+# Copyright 2022 Darwin Bautista
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+import torch
+from torchvision import transforms as T
+import gradio as gr
+class App:
+    title = 'Scene Text Recognition with Permuted Autoregressive Sequence Models'
+    models = ['parseq', 'parseq_tiny', 'abinet', 'crnn', 'trba', 'vitstr']
+    def __init__(self):
+        self._model_cache = {}
+        self._preprocess = T.Compose([
+            T.Resize((32, 128), T.InterpolationMode.BICUBIC),
+            T.ToTensor(),
+            T.Normalize(0.5, 0.5)
+        ])
+    def _get_model(self, name):
+        if name in self._model_cache:
+            return self._model_cache[name]
+        model = torch.hub.load('baudm/parseq', name, pretrained=True).eval()
+        model.freeze()
+        self._model_cache[name] = model
+        return model
+    def __call__(self, model_name, image):
+        model = self._get_model(model_name)
+        image = self._preprocess(image.convert('RGB')).unsqueeze(0)
+        # Greedy decoding
+        pred = model(image).softmax(-1)
+        label, confidence = model.tokenizer.decode(pred)
+        return label[0]
+def main():
+    app = App()
+    with gr.Blocks(analytics_enabled=False, title=app.title) as demo:
+        gr.Markdown("""
+            <div align="center">
+            # Scene Text Recognition with<br/>Permuted Autoregressive Sequence Models
+            [![GitHub](https://img.shields.io/badge/baudm-parseq-blue?logo=github)](https://github.com/baudm/parseq)
+            </div>
+            To use this interactive demo for PARSeq and reproduced models:
+            1. Select which model you want to use.
+            2. Upload your own image, choose from the examples below, or draw on the canvas.
+            3. Read the given image or drawing.
+        """)
+        model_name = gr.Radio(app.models, value=app.models[0], label='Select STR model to use')
+        with gr.Row():
+            image_upload = gr.Image(type='pil', source='upload', label='Image')
+            image_canvas = gr.Image(type='pil', source='canvas', label='Drawing')
+        with gr.Row():
+            read_upload = gr.Button('Read Image')
+            read_canvas = gr.Button('Read Drawing')
+        output = gr.Textbox(max_lines=1, label='Model output')
+        demo_images = Path(__file__).parent.joinpath('demo_images').glob('*.*')
+        gr.Examples([str(p) for p in demo_images], inputs=image_upload)
+        read_upload.click(app, inputs=[model_name, image_upload], outputs=output)
+        read_canvas.click(app, inputs=[model_name, image_canvas], outputs=output)
+    demo.launch()
+if __name__ == '__main__':
+    main()

demo_images/art-01107.jpg ADDED Viewed

demo_images/coco-1166773.jpg ADDED Viewed

demo_images/cute-184.jpg ADDED Viewed

demo_images/ic13_word_256.png ADDED Viewed

demo_images/ic15_word_26.png ADDED Viewed

demo_images/uber-27491.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+Gradio
+torch
+torchtext
+torchvision
+torchmetrics==0.6.2
+timm==0.4.12
+nltk
+git+https://github.com/baudm/parseq.git