from typing import List, Optional from transformers import PretrainedConfig config = { "_name_or_path": "mtgv/MobileVLM-1.7B", "architectures": [ "MobileLlamaForCausalLM" ], "bos_token_id": 1, "eos_token_id": 2, "freeze_mm_mlp_adapter": False, "hidden_act": "silu", "hidden_size": 2048, "image_aspect_ratio": "pad", "image_grid_pinpoints": None, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "max_sequence_length": 2048, "mm_hidden_size": 1024, "mm_projector_type": "ldpnet", "mm_use_im_patch_token": False, "mm_use_im_start_end": False, "mm_vision_select_feature": "patch", "mm_vision_select_layer": -2, "mm_vision_tower": "openai/clip-vit-large-patch14-336", "model_type": "mobilevlm", "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 16, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": None, "rope_theta": 10000.0, "tie_word_embeddings": False, "torch_dtype": "bfloat16", "transformers_version": "4.33.1", "tune_mm_mlp_adapter": False, "use_cache": True, "use_mm_proj": True, "vision_tower_type": "clip", "vocab_size": 32000 } class MobileVLMConfig(PretrainedConfig): model_type = "mobilevlm" def __init__( self, # _name_or_path: str = config["_name_or_path"], architectures: Optional[List[str]] = config["architectures"], bos_token_id: Optional[int] = config["bos_token_id"], eos_token_id: Optional[int] = config["eos_token_id"], freeze_mm_mlp_adapter: Optional[bool] = config["freeze_mm_mlp_adapter"], hidden_act: Optional[str] = config["hidden_act"], hidden_size: Optional[int] = config["hidden_size"], image_aspect_ratio: Optional[str] = config["image_aspect_ratio"], image_grid_pinpoints: Optional[bool] = config["image_grid_pinpoints"], initializer_range: Optional[float] = config["initializer_range"], intermediate_size: Optional[int] = config["intermediate_size"], max_position_embeddings: Optional[int] = config["max_position_embeddings"], max_sequence_length: Optional[int] = config["max_sequence_length"], mm_hidden_size: Optional[int] = config["mm_hidden_size"], mm_projector_type: Optional[str] = config["mm_projector_type"], mm_use_im_patch_token: Optional[bool] = config["mm_use_im_patch_token"], mm_use_im_start_end: Optional[bool] = config["mm_use_im_start_end"], mm_vision_select_feature: Optional[str] = config["mm_vision_select_feature"], mm_vision_select_layer: Optional[int] = config["mm_vision_select_layer"], mm_vision_tower: Optional[str] = config["mm_vision_tower"], # model_type: Optional[str] = config["model_type"], num_attention_heads: Optional[int] = config["num_attention_heads"], num_hidden_layers: Optional[int] = config["num_hidden_layers"], num_key_value_heads: Optional[int] = config["num_key_value_heads"], pad_token_id: Optional[int] = config["pad_token_id"], pretraining_tp: Optional[int] = config["pretraining_tp"], rms_norm_eps: Optional[float] = config["rms_norm_eps"], rope_scaling: Optional[float] = config["rope_scaling"], rope_theta: Optional[float] = config["rope_theta"], tie_word_embeddings: Optional[bool] = config["tie_word_embeddings"], torch_dtype: Optional[str] = config["torch_dtype"], transformers_version: Optional[str] = config["transformers_version"], tune_mm_mlp_adapter: Optional[bool] = config["tune_mm_mlp_adapter"], use_cache: Optional[bool] = config["use_cache"], use_mm_proj: Optional[bool] = config["use_mm_proj"], vision_tower_type: Optional[str] = config["vision_tower_type"], vocab_size: Optional[int] = config["vocab_size"], **kwargs, ): # self._name_or_path = _name_or_path self.architectures = architectures self.bos_token_id = bos_token_id self.eos_token_id = eos_token_id self.freeze_mm_mlp_adapter = freeze_mm_mlp_adapter self.hidden_act = hidden_act self.hidden_size = hidden_size self.image_aspect_ratio = image_aspect_ratio self.image_grid_pinpoints = image_grid_pinpoints self.initializer_range = initializer_range self.intermediate_size = intermediate_size self.max_position_embeddings = max_position_embeddings self.max_sequence_length = max_sequence_length self.mm_hidden_size = mm_hidden_size self.mm_projector_type = mm_projector_type self.mm_use_im_patch_token = mm_use_im_patch_token self.mm_use_im_start_end = mm_use_im_start_end self.mm_vision_select_feature = mm_vision_select_feature self.mm_vision_select_layer = mm_vision_select_layer self.mm_vision_tower = mm_vision_tower # self.model_type = model_type self.num_attention_heads = num_attention_heads self.num_hidden_layers = num_hidden_layers self.num_key_value_heads = num_key_value_heads self.pad_token_id = pad_token_id self.pretraining_tp = pretraining_tp self.rms_norm_eps = rms_norm_eps self.rope_scaling = rope_scaling self.rope_theta = rope_theta self.tie_word_embeddings = tie_word_embeddings self.torch_dtype = torch_dtype self.transformers_version = transformers_version self.tune_mm_mlp_adapter = tune_mm_mlp_adapter self.use_cache = use_cache self.use_mm_proj = use_mm_proj self.vision_tower_type = vision_tower_type self.vocab_size = vocab_size super().__init__(**kwargs)