davda54 commited on
Commit
fb5127b
1 Parent(s): aeafbba
Files changed (1) hide show
  1. modeling_deberta.py +0 -34
modeling_deberta.py CHANGED
@@ -1376,11 +1376,6 @@ class DebertaV2LMPredictionHead(nn.Module):
1376
  # an output-only bias for each token.
1377
  self.decoder = nn.Linear(self.embedding_size, config.vocab_size, bias=True)
1378
 
1379
- #self.bias = nn.Parameter(torch.zeros(config.vocab_size))
1380
-
1381
- # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
1382
- #self.decoder.bias = self.bias
1383
-
1384
  def forward(self, hidden_states):
1385
  hidden_states = self.transform(hidden_states)
1386
  hidden_states = self.decoder(hidden_states)
@@ -1398,13 +1393,6 @@ class DebertaV2OnlyMLMHead(nn.Module):
1398
  return prediction_scores
1399
 
1400
 
1401
- @add_start_docstrings(
1402
- """
1403
- DeBERTa Model transformer with a sequence classification/regression head on top (a linear layer on top of the
1404
- pooled output) e.g. for GLUE tasks.
1405
- """,
1406
- DEBERTA_START_DOCSTRING,
1407
- )
1408
  class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
1409
  def __init__(self, config):
1410
  super().__init__(config)
@@ -1517,14 +1505,6 @@ class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
1517
  )
1518
 
1519
 
1520
- @add_start_docstrings(
1521
- """
1522
- DeBERTa Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
1523
- Named-Entity-Recognition (NER) tasks.
1524
- """,
1525
- DEBERTA_START_DOCSTRING,
1526
- )
1527
- # Copied from transformers.models.deberta.modeling_deberta.DebertaForTokenClassification with Deberta->DebertaV2
1528
  class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
1529
  def __init__(self, config):
1530
  super().__init__(config)
@@ -1591,13 +1571,6 @@ class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
1591
  )
1592
 
1593
 
1594
- @add_start_docstrings(
1595
- """
1596
- DeBERTa Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
1597
- layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
1598
- """,
1599
- DEBERTA_START_DOCSTRING,
1600
- )
1601
  class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
1602
  def __init__(self, config):
1603
  super().__init__(config)
@@ -1691,13 +1664,6 @@ class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
1691
  )
1692
 
1693
 
1694
- @add_start_docstrings(
1695
- """
1696
- DeBERTa Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
1697
- softmax) e.g. for RocStories/SWAG tasks.
1698
- """,
1699
- DEBERTA_START_DOCSTRING,
1700
- )
1701
  class DebertaV2ForMultipleChoice(DebertaV2PreTrainedModel):
1702
  def __init__(self, config):
1703
  super().__init__(config)
 
1376
  # an output-only bias for each token.
1377
  self.decoder = nn.Linear(self.embedding_size, config.vocab_size, bias=True)
1378
 
 
 
 
 
 
1379
  def forward(self, hidden_states):
1380
  hidden_states = self.transform(hidden_states)
1381
  hidden_states = self.decoder(hidden_states)
 
1393
  return prediction_scores
1394
 
1395
 
 
 
 
 
 
 
 
1396
  class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
1397
  def __init__(self, config):
1398
  super().__init__(config)
 
1505
  )
1506
 
1507
 
 
 
 
 
 
 
 
 
1508
  class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
1509
  def __init__(self, config):
1510
  super().__init__(config)
 
1571
  )
1572
 
1573
 
 
 
 
 
 
 
 
1574
  class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
1575
  def __init__(self, config):
1576
  super().__init__(config)
 
1664
  )
1665
 
1666
 
 
 
 
 
 
 
 
1667
  class DebertaV2ForMultipleChoice(DebertaV2PreTrainedModel):
1668
  def __init__(self, config):
1669
  super().__init__(config)