stepfun-ai
/

GOT-OCR2_0

@@ -12,7 +12,7 @@ from .got_vision_b import build_GOT_vit_b
 from torchvision import transforms
 from torchvision.transforms.functional import InterpolationMode
 import dataclasses
 DEFAULT_IMAGE_TOKEN = "<image>"
 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
@@ -715,7 +715,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         return processed_images
-    def chat_crop(self, tokenizer, image_file, ocr_type, ocr_box='', ocr_color='', render=False, multi_page=False):
         # Model
         self.disable_torch_init()
@@ -805,36 +805,36 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
                 stopping_criteria=[stopping_criteria]
                 )
-        # if render:
-        #     print('==============rendering===============')
-        #     outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
-        #     if outputs.endswith(stop_str):
-        #         outputs = outputs[:-len(stop_str)]
-        #     outputs = outputs.strip()
-        #     html_path = "./render_tools/" + "/content-mmd-to-html.html"
-        #     html_path_2 = "./results/demo.html"
-        #     right_num = outputs.count('\\right')
-        #     left_num = outputs.count('\left')
-        #     if right_num != left_num:
-        #         outputs = outputs.replace('\left(', '(').replace('\\right)', ')').replace('\left[', '[').replace('\\right]', ']').replace('\left{', '{').replace('\\right}', '}').replace('\left|', '|').replace('\\right|', '|').replace('\left.', '.').replace('\\right.', '.')
-        #     outputs = outputs.replace('"', '``').replace('$', '')
-        #     outputs_list = outputs.split('\n')
-        #     gt= ''
-        #     for out in outputs_list:
-        #         gt +=  '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
-        #     gt = gt[:-2]
-        #     with open(html_path, 'r') as web_f:
-        #         lines = web_f.read()
-        #         lines = lines.split("const text =")
-        #         new_web = lines[0] + 'const text ='  + gt  + lines[1]
-        #     with open(html_path_2, 'w') as web_f_new:
-        #         web_f_new.write(new_web)

 from torchvision import transforms
 from torchvision.transforms.functional import InterpolationMode
 import dataclasses
+from megfile import smart_open
 DEFAULT_IMAGE_TOKEN = "<image>"
 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
         return processed_images
+    def chat_plus(self, tokenizer, image_file, render=False, save_render_file=None, multi_page=False):
         # Model
         self.disable_torch_init()
                 stopping_criteria=[stopping_criteria]
                 )
+        if render:
+            print('==============rendering===============')
+            outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
+            if outputs.endswith(stop_str):
+                outputs = outputs[:-len(stop_str)]
+            outputs = outputs.strip()
+            html_path = "./render_tools/" + "content-mmd-to-html.html"
+            html_path_2 = save_render_file
+            right_num = outputs.count('\\right')
+            left_num = outputs.count('\left')
+            if right_num != left_num:
+                outputs = outputs.replace('\left(', '(').replace('\\right)', ')').replace('\left[', '[').replace('\\right]', ']').replace('\left{', '{').replace('\\right}', '}').replace('\left|', '|').replace('\\right|', '|').replace('\left.', '.').replace('\\right.', '.')
+            outputs = outputs.replace('"', '``').replace('$', '')
+            outputs_list = outputs.split('\n')
+            gt= ''
+            for out in outputs_list:
+                gt +=  '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
+            gt = gt[:-2]
+            with smart_open(html_path, 'r') as web_f:
+                lines = web_f.read()
+                lines = lines.split("const text =")
+                new_web = lines[0] + 'const text ='  + gt  + lines[1]
+            with smart_open(html_path_2, 'w') as web_f_new:
+                web_f_new.write(new_web)