chrisc36 commited on
Commit
001b110
1 Parent(s): 127b72a

Upload processor

Browse files
image_preprocessing_molmo.py CHANGED
@@ -563,4 +563,7 @@ class MolmoImageProcessor(BaseImageProcessor):
563
  }
564
  if image_masks is not None:
565
  out["image_masks"] = image_masks
566
- return out
 
 
 
 
563
  }
564
  if image_masks is not None:
565
  out["image_masks"] = image_masks
566
+ return out
567
+
568
+
569
+ MolmoImageProcessor.register_for_auto_class()
preprocessing_molmo.py CHANGED
@@ -4,6 +4,7 @@ Processor class for Molmo.
4
 
5
  from typing import List, Union, Optional
6
 
 
7
 
8
  try:
9
  from typing import Unpack
@@ -24,7 +25,7 @@ from transformers.tokenization_utils_base import TextInput
24
  from transformers.utils import logging
25
 
26
  from transformers import AutoTokenizer
27
- from hf_molmo.image_preprocessing_molmo import MolmoImagesKwargs, make_batched_images, MolmoImageProcessor
28
 
29
 
30
  logger = logging.get_logger(__name__)
@@ -79,12 +80,13 @@ class MolmoProcessorKwargs(ProcessingKwargs, total=False):
79
 
80
  class MolmoProcessor(ProcessorMixin):
81
  attributes = ["image_processor", "tokenizer"]
82
- image_processor_class = "MolmoImageProcessor"
83
  tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
84
 
85
  def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
86
- self.image_processor = image_processor
87
- self.tokenizer = tokenizer
 
88
  self._special_tokens = None
89
 
90
  @property
@@ -169,4 +171,5 @@ class MolmoProcessor(ProcessorMixin):
169
 
170
  return out
171
 
172
- MolmoProcessor.register_for_auto_class()
 
 
4
 
5
  from typing import List, Union, Optional
6
 
7
+ from transformers.utils.constants import OPENAI_CLIP_STD, OPENAI_CLIP_MEAN
8
 
9
  try:
10
  from typing import Unpack
 
25
  from transformers.utils import logging
26
 
27
  from transformers import AutoTokenizer
28
+ from .image_preprocessing_molmo import MolmoImagesKwargs, make_batched_images, MolmoImageProcessor
29
 
30
 
31
  logger = logging.get_logger(__name__)
 
80
 
81
  class MolmoProcessor(ProcessorMixin):
82
  attributes = ["image_processor", "tokenizer"]
83
+ image_processor_class = "AutoImageProcessor"
84
  tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
85
 
86
  def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
87
+ # self.image_processor = image_processor
88
+ # self.tokenizer = tokenizer
89
+ super().__init__(image_processor, tokenizer)
90
  self._special_tokens = None
91
 
92
  @property
 
171
 
172
  return out
173
 
174
+
175
+ MolmoProcessor.register_for_auto_class()
preprocessor_config.json CHANGED
@@ -1,4 +1,8 @@
1
  {
 
 
 
 
2
  "base_image_input_size": [
3
  336,
4
  336
 
1
  {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "image_preprocessing_molmo.MolmoImageProcessor",
4
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
5
+ },
6
  "base_image_input_size": [
7
  336,
8
  336
processor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
4
+ },
5
+ "processor_class": "MolmoProcessor"
6
+ }
tokenizer_config.json CHANGED
@@ -73,6 +73,9 @@
73
  "<im_col>",
74
  "<|image|>"
75
  ],
 
 
 
76
  "bos_token": null,
77
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
78
  "clean_up_tokenization_spaces": false,
 
73
  "<im_col>",
74
  "<|image|>"
75
  ],
76
+ "auto_map": {
77
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
78
+ },
79
  "bos_token": null,
80
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
81
  "clean_up_tokenization_spaces": false,