File size: 2,803 Bytes
c1b4f26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from paddleocr import PaddleOCR
from vietocr.tool.config import Cfg
from vietocr.tool.predictor import Predictor
from utils.config import Config
import requests
import numpy as np
from PIL import Image, ImageTransform

class OCRDetector:
  def __init__(self) -> None:
    self.paddle_ocr = PaddleOCR(lang='en',
                                use_angle_cls=False,
                                use_gpu=True if Config.device == "cpu" else False,
                                show_log=False )
    # config['weights'] = './weights/transformerocr.pth'

    vietocr_config = Cfg.load_config_from_name('vgg_transformer')
    vietocr_config['weights'] = Config.ocr_path
    vietocr_config['cnn']['pretrained']=False
    vietocr_config['device'] = Config.device
    vietocr_config['predictor']['beamsearch']=False
    self.viet_ocr = Predictor(vietocr_config)

  def find_box(self, image):
    '''Xác định box dựa vào mô hình paddle_ocr'''
    result = self.paddle_ocr.ocr(image, cls = False, rec=False)
    result = result[0]
    # Extracting detected components
    boxes = result #[res[0] for res in result]
    boxes = np.array(boxes).astype(int)

    # scores = [res[1][1] for res in result]
    return boxes

  def cut_image_polygon(self, image, box):
    (x1, y1), (x2, y2), (x3, y3), (x4, y4) = box
    w = x2 - x1
    h = y4 - y1
    scl = h//7
    new_box = [max(x1-scl,0), max(y1 - scl, 0)], [x2+scl, y2-scl], [x3+scl, y3+scl], [x4-scl, y4+scl]
    (x1, y1), (x2, y2), (x3, y3), (x4, y4) = new_box
    # Define 8-tuple with x,y coordinates of top-left, bottom-left, bottom-right and top-right corners and apply
    transform = [x1, y1, x4, y4, x3, y3, x2, y2]
    result = image.transform((w,h), ImageTransform.QuadTransform(transform))
    return result

  def vietnamese_text(self, boxes, image):
    '''Xác định text dựa vào mô hình viet_ocr'''
    results = []
    for box in boxes:
      try:
        cut_image = self.cut_image_polygon(image, box)
        # cut_image = Image.fromarray(np.uint8(cut_image))
        text, score = self.viet_ocr.predict(cut_image, return_prob=True)
        if score > Config.vietocr_threshold:
          results.append({"text": text,
                        "score": score,
                        "box": box})
      except:
        continue
    return results

  #Merge
  def text_detector(self, image_path):
    if image_path.startswith("https://"):
        image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB")
    else:
        image = Image.open(image_path).convert("RGB")
    # np_image = np.array(image)

    boxes = self.find_box(image_path)
    if not boxes.any():
        return None

    results = self.vietnamese_text(boxes, image)
    if results != []:
        return results
    else:
        return None