File size: 3,783 Bytes
d711508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Copyright 2023-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass, field
from typing import List, Literal, Optional, Union

from peft.config import PeftConfig
from peft.utils import PeftType


@dataclass
class PolyConfig(PeftConfig):
    """
    This is the configuration class to store the configuration of a [`PolyModel`].
        - [Polytropon (Poly)](https://arxiv.org/abs/2202.13914)
        - [Multi-Head Routing (MHR)](https://arxiv.org/abs/2211.03831)

    Args:
        r (`int`): Attention dimension of each Lora in Poly.
        target_modules (`Union[List[str],str]`): The names of the modules to apply Poly to.
        modules_to_save (`List[str]`): List of modules apart from Poly layers to be set as trainable
            and saved in the final checkpoint.
        init_weights (bool): Whether to perform initialization of Poly weights.
        poly_type (`Literal["poly"]`): The variant of the Poly module to use. Currently, only "poly"
            is supported.
        n_tasks (`int`): The number of tasks in a multitasking scenario.
        n_skills (`int`): The number of skills (LoRA) in each Poly layer.
        n_splits (`int`): The number of splits within each LoRA of a Poly layer. A value greater
            than 1 indicates the use of Multi-Head Routing (MHR).
    """

    r: int = field(default=8, metadata={"help": "Lora attention dimension"})
    target_modules: Optional[Union[List[str], str]] = field(
        default=None,
        metadata={
            "help": "List of module names or regex expression of the module names to replace with Poly."
            "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' "
        },
    )
    modules_to_save: Optional[List[str]] = field(
        default=None,
        metadata={
            "help": "List of modules apart from Poly layers to be set as trainable and saved in the final checkpoint. "
            "For example, in Sequence Classification or Token Classification tasks, "
            "the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved."
        },
    )
    init_weights: bool = field(
        default=True,
        metadata={
            "help": (
                "Whether to initialize the weights of the Poly layers with their default initialization. Don't change "
                "this setting, except if you know exactly what you're doing."
            ),
        },
    )
    poly_type: Literal["poly"] = field(
        default="poly",
        metadata={"help": 'Type of Poly modules to be used. Currently only "poly" is supported.'},
    )
    n_tasks: int = field(
        default=1,
        metadata={"help": "Number of tasks in multitasking scenario."},
    )
    n_skills: int = field(
        default=4,
        metadata={"help": "Number of skills (LoRA) in each Poly layer."},
    )
    n_splits: int = field(
        default=1,
        metadata={"help": "Number of splits within each LoRA of a Poly layer."},
    )

    def __post_init__(self):
        self.peft_type = PeftType.POLY
        self.target_modules = (
            set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
        )