chrisc36 commited on
Commit
9364aa5
1 Parent(s): 3180b04

Upload preprocessing_molmo.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. preprocessing_molmo.py +20 -6
preprocessing_molmo.py CHANGED
@@ -2,9 +2,11 @@
2
  Processor class for Molmo.
3
  """
4
 
5
- from typing import List, Union, Optional
6
 
7
- from transformers.utils.constants import OPENAI_CLIP_STD, OPENAI_CLIP_MEAN
 
 
8
 
9
  try:
10
  from typing import Unpack
@@ -25,7 +27,7 @@ from transformers.tokenization_utils_base import TextInput
25
  from transformers.utils import logging
26
 
27
  from transformers import AutoTokenizer
28
- from .image_preprocessing_molmo import MolmoImagesKwargs, make_batched_images, MolmoImageProcessor
29
 
30
 
31
  logger = logging.get_logger(__name__)
@@ -81,7 +83,7 @@ class MolmoProcessorKwargs(ProcessingKwargs, total=False):
81
  class MolmoProcessor(ProcessorMixin):
82
  attributes = ["image_processor", "tokenizer"]
83
  image_processor_class = "AutoImageProcessor"
84
- tokenizer_class = ("GPT2Tokenizer", "GPT2TokenizerFast")
85
 
86
  def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
87
  # self.image_processor = image_processor
@@ -131,8 +133,20 @@ class MolmoProcessor(ProcessorMixin):
131
  image_token_id = self.special_token_ids[IMAGE_PROMPT]
132
 
133
  if images is not None:
134
- images = make_batched_images(images)
135
- images = [np.array(image).astype(np.uint8) for image in images]
 
 
 
 
 
 
 
 
 
 
 
 
136
  # For now only support inserting images at the start
137
  image_idx = [-1]*len(images)
138
  else:
 
2
  Processor class for Molmo.
3
  """
4
 
5
+ from typing import Optional
6
 
7
+ import PIL
8
+ from PIL import ImageOps
9
+ from PIL.Image import Image
10
 
11
  try:
12
  from typing import Unpack
 
27
  from transformers.utils import logging
28
 
29
  from transformers import AutoTokenizer
30
+ from .image_preprocessing_molmo import MolmoImagesKwargs, MolmoImageProcessor
31
 
32
 
33
  logger = logging.get_logger(__name__)
 
83
  class MolmoProcessor(ProcessorMixin):
84
  attributes = ["image_processor", "tokenizer"]
85
  image_processor_class = "AutoImageProcessor"
86
+ tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
87
 
88
  def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
89
  # self.image_processor = image_processor
 
133
  image_token_id = self.special_token_ids[IMAGE_PROMPT]
134
 
135
  if images is not None:
136
+ if not isinstance(images, (list, tuple)):
137
+ images = [images]
138
+ image_arrays = []
139
+ for image in images:
140
+ if isinstance(image, Image):
141
+ image = image.convert("RGB")
142
+ # Handle images with EXIF orientation tags, which PIL will ignore by default
143
+ # https://github.com/python-pillow/Pillow/issues/4703
144
+ img = ImageOps.exif_transpose(image)
145
+ image_arrays.append(np.array(image))
146
+ else:
147
+ assert len(image.shape) == 3 and image.shape[-1] == 3
148
+ image_arrays.append(image.astype(np.uint8))
149
+ images = image_arrays
150
  # For now only support inserting images at the start
151
  image_idx = [-1]*len(images)
152
  else: