andito HF staff commited on
Commit
5f35294
1 Parent(s): c757833

Upload Florence2ForConditionalGeneration

Browse files
Files changed (3) hide show
  1. config.json +2 -2
  2. model.safetensors +1 -1
  3. modeling_florence2.py +3 -4
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "model_checkpoints/gigantic_fukuiraptor/epoch_9/",
3
  "architectures": [
4
  "Florence2ForConditionalGeneration"
5
  ],
@@ -160,7 +160,7 @@
160
  "length_penalty": 1.0,
161
  "max_length": 20,
162
  "min_length": 0,
163
- "model_type": "davit",
164
  "no_repeat_ngram_size": 0,
165
  "num_beam_groups": 1,
166
  "num_beams": 1,
 
1
  {
2
+ "_name_or_path": "model_checkpoints/vqainstruct_no_lora/epoch_5",
3
  "architectures": [
4
  "Florence2ForConditionalGeneration"
5
  ],
 
160
  "length_penalty": 1.0,
161
  "max_length": 20,
162
  "min_length": 0,
163
+ "model_type": "",
164
  "no_repeat_ngram_size": 0,
165
  "num_beam_groups": 1,
166
  "num_beams": 1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b6f79f7fe43daf6285f057156a5110ff0724e40ab4f2c395823ac44856a15a2
3
  size 3291921348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9a3bc6abcace5e9820630945fe26cfa961fe2577f8adeb48256acba876123e
3
  size 3291921348
modeling_florence2.py CHANGED
@@ -2288,8 +2288,7 @@ class Florence2Seq2SeqLMOutput(ModelOutput):
2288
 
2289
  image_hidden_states of the model produced by the vision encoder
2290
  """
2291
-
2292
- loss: torch.FloatTensor = None
2293
  logits: torch.FloatTensor = None
2294
  last_hidden_state: torch.FloatTensor = None
2295
  past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
@@ -2530,7 +2529,6 @@ class Florence2ForConditionalGeneration(Florence2PreTrainedModel):
2530
  def __init__(self, config: Florence2Config):
2531
  super().__init__(config)
2532
  assert config.vision_config.model_type == 'davit', 'only DaViT is supported for now'
2533
- # del config.vision_config.model_type
2534
  self.vision_tower = DaViT.from_config(config=config.vision_config)
2535
  # remove unused layers
2536
  del self.vision_tower.head
@@ -2734,7 +2732,8 @@ class Florence2ForConditionalGeneration(Florence2PreTrainedModel):
2734
  image_features = self._encode_image(pixel_values)
2735
  inputs_embeds, attention_mask = self._merge_input_ids_with_image_features(image_features, inputs_embeds)
2736
 
2737
- attention_mask = attention_mask.to(inputs_embeds.dtype)
 
2738
  outputs = self.language_model(
2739
  attention_mask=attention_mask,
2740
  labels=labels,
 
2288
 
2289
  image_hidden_states of the model produced by the vision encoder
2290
  """
2291
+ loss: Optional[torch.FloatTensor] = None
 
2292
  logits: torch.FloatTensor = None
2293
  last_hidden_state: torch.FloatTensor = None
2294
  past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
 
2529
  def __init__(self, config: Florence2Config):
2530
  super().__init__(config)
2531
  assert config.vision_config.model_type == 'davit', 'only DaViT is supported for now'
 
2532
  self.vision_tower = DaViT.from_config(config=config.vision_config)
2533
  # remove unused layers
2534
  del self.vision_tower.head
 
2732
  image_features = self._encode_image(pixel_values)
2733
  inputs_embeds, attention_mask = self._merge_input_ids_with_image_features(image_features, inputs_embeds)
2734
 
2735
+ if inputs_embeds is not None:
2736
+ attention_mask = attention_mask.to(inputs_embeds.dtype)
2737
  outputs = self.language_model(
2738
  attention_mask=attention_mask,
2739
  labels=labels,