charmen-electra / config.py
Tamás Ficsor
add model
00ea122
raw
history blame
2.76 kB
from transformers import PretrainedConfig
from typing import List, Literal, Optional
_SAMPLING_TYPE = Literal['fp32_gumbel', 'fp16_gumbel', 'multinomial']
class CharmenElectraConfig(PretrainedConfig):
model_type = "SzegedAI/charmen-electra"
_name_or_path = "SzegedAI/charmen-electra"
architectures = [
"CharmenElectraModel"
]
def __init__(
self,
downsampling_factor: int = 4,
max_block_size: int = 4,
score_consensus_attn: bool = True,
upsample_output: bool = True,
sampling: _SAMPLING_TYPE = 'fp32_gumbel',
attention_probs_dropout_prob: float = 0.1,
embedding_size: int = 768,
hidden_act: str = "gelu",
hidden_dropout_prob: float = 0.1,
hidden_size: int = 512,
initializer_range: float = 0.02,
intermediate_size: int = 2048,
layer_norm_eps: float = 1e-12,
max_position_embeddings: int = 1024,
model_type: str = "electra",
num_attention_heads: int = 8,
num_hidden_layers: int = 12,
pad_token_id: int = 0,
position_embedding_type: str = "absolute",
summary_activation: str = "gelu",
summary_last_dropout: float = 0.1,
summary_type: str = "first",
summary_use_proj: bool = True,
type_vocab_size: int = 2,
vocab_size: int = 261,
classifier_dropout: Optional[float] = None,
**kwargs,
):
self.downsampling_factor = downsampling_factor
self.max_block_size = max_block_size
self.score_consensus_attn = score_consensus_attn
self.upsample_output = upsample_output
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.embedding_size = embedding_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.hidden_size = hidden_size
self.initializer_range = initializer_range
self.intermediate_size = intermediate_size
self.layer_norm_eps = layer_norm_eps
self.max_position_embeddings = max_position_embeddings
self.model_type = model_type
self.num_attention_heads = num_attention_heads
self.num_hidden_layers = num_hidden_layers
self.pad_token_id = pad_token_id
self.position_embedding_type = position_embedding_type
self.summary_activation = summary_activation
self.summary_last_dropout = summary_last_dropout
self.summary_type = summary_type
self.summary_use_proj = summary_use_proj
self.type_vocab_size = type_vocab_size
self.vocab_size = vocab_size
self.sampling = sampling
self.classifier_dropout = classifier_dropout
super().__init__(**kwargs)