{ "_name_or_path": "proteinglm-1b-mlm", "add_bias_linear": true, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": true, "architectures": [ "ProteinGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "configuration_proteinglm.ProteinGLMConfig", "AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM", "AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM", "AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM", "AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification", "AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification" }, "bias_dropout_fusion": true, "deepnorm": true, "experts_per_token": 0, "ffn_hidden_size": 5461, "fp32_residual_connection": false, "glu_activation": "geglu", "head_num": 1, "hidden_dropout": 0.0, "hidden_size": 2048, "initializer_range": 0.02, "is_causal": false, "kv_channels": 64, "layernorm_epsilon": 1e-05, "model_type": "ProteinGLM", "moe": false, "multi_query_attention": false, "multi_query_group_num": 1, "num_attention_heads": 32, "num_experts": 0, "num_layers": 24, "padded_vocab_size": 128, "post_layer_norm": true, "quantization_bit": 0, "rmsnorm": false, "rotary_embedding_2d": false, "seq_length": 1024, "torch_dtype": "float32", "transformers_version": "4.41.2", "untie_head": false, "use_cache": true, "use_pytorch_sdpa": true, "vocab_size": 128 }