praysimanjuntak commited on
Commit
9d496b5
1 Parent(s): 4c44204

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import supervision as sv
3
+ from ultralytics import YOLO
4
+ from tqdm import tqdm
5
+ import re
6
+ from collections import defaultdict
7
+ from paddleocr import PaddleOCR
8
+ from pdf2image import convert_from_path
9
+ import json
10
+ import cv2
11
+ import gradio as gr
12
+
13
+ # Initialize YOLO model
14
+ model_yolo = YOLO(model="runs/detect/train/weights/best.pt")
15
+ ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)
16
+
17
+ def process_pdf(file):
18
+ images = convert_from_path(file.name)
19
+
20
+ # Function to process each slice of the image
21
+ def slicer_callback(slice: np.ndarray) -> sv.Detections:
22
+ result = model_yolo.predict(slice, conf=0.85)[0]
23
+ detections = sv.Detections.from_ultralytics(result)
24
+ return detections
25
+
26
+ # Initialize the slicer
27
+ slicer = sv.InferenceSlicer(
28
+ callback=slicer_callback,
29
+ slice_wh=(2000, 800),
30
+ overlap_ratio_wh=(0.6, 0.6),
31
+ overlap_filter_strategy=sv.OverlapFilter.NON_MAX_MERGE,
32
+ iou_threshold=0.05,
33
+ )
34
+
35
+ results = []
36
+ for pil_image in images:
37
+ opencvImage = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
38
+ opencvImage = cv2.rotate(opencvImage, cv2.ROTATE_90_CLOCKWISE)
39
+ # Perform inference on the entire image
40
+ detections = slicer(opencvImage)
41
+
42
+ # Function to run the TrOCR model with detections
43
+ def run_example(detections):
44
+ for detection in tqdm(detections):
45
+ # Extract bounding box coordinates
46
+ bbox = detection[0]
47
+ x_min, y_min, x_max, y_max = bbox
48
+ x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
49
+
50
+ # Crop the detected region from the image
51
+ cropped_image = opencvImage[y_min:y_max, x_min:x_max]
52
+ result = ocr.ocr(cropped_image, cls=True)[0]
53
+ if result is not None:
54
+ text = ''
55
+
56
+ if re.match(r"([A-Z])(\d+)-(\d+)", result[0][1][0]):
57
+ text = result[0][1][0]
58
+ elif re.match(r"([A-Z])(\d+)-(\d+)", ''.join([line[1][0] for line in result])):
59
+ text = ''.join([line[1][0] for line in result])
60
+
61
+ # Print the generated text
62
+ results.append(text)
63
+
64
+ # Run example with detections
65
+ run_example(detections)
66
+
67
+ detected_numbers = defaultdict(list)
68
+ for result in results:
69
+ match = re.match(r"([A-Z])(\d+)-(\d+)", result)
70
+ if match:
71
+ letter = match.group(1)
72
+ x = int(match.group(2))
73
+ y = int(match.group(3))
74
+ detected_numbers[(letter, x)].append(y)
75
+
76
+ # Generate the desired JSON output
77
+ output = {}
78
+
79
+ for (letter, x) in sorted(detected_numbers.keys()):
80
+ key = f"CB-{letter}{x}"
81
+ value = [f"{letter}{x}-{i}" for i in sorted(detected_numbers[(letter, x)])]
82
+ output[key] = value
83
+
84
+ return json.dumps(output, indent=4)
85
+
86
+ # Create the Gradio interface
87
+ iface = gr.Interface(
88
+ fn=process_pdf,
89
+ inputs=gr.File(label="Upload PDF"),
90
+ outputs="json",
91
+ title="Extract Data from PDF",
92
+ description="Upload a PDF file and get the JSON output of detected numbers."
93
+ )
94
+
95
+ # Launch the Gradio app
96
+ iface.launch()