
    oi                        S SK Jr  S SKrS SKJrJr  S SKJrJrJ	r	  S SK
Jr  S SKJr  SSKJr  \ " S	 S
\5      5       rg)    )annotationsN)	dataclassfield)LiteralOptionalUnion)
PeftConfig)PeftType   )random_maskc                     ^  \ rS rSr% Sr\" SSS0S9rS\S'   \" S	SS
0S9rS\S'   \" SSS0S9r	S\S'   \" SSS0S9r
S\S'   \" SSS0S9rS\S'   \" SSS0S9rS\S'   \" SSS0S9rS\S'   U 4S jrSrU =r$ ) ShiraConfig   a~  
This is the configuration class to store the configuration of a [`ShiraModel`].

Args:
    r (`int`, *optional*, defaults to `32`):
        For a given target module, the number of SHiRA parameters is computed as r(m+n), where the original tensor
        dimensions are m x n. This means the number of SHiRA parameters is the same as that for a LoRA adapter.
        SHiRA is a high rank adapter. Setting this r parameter does not restrict the rank to this value.
    mask_type (`str`, defaults to `random`):
        Type of mask function. Defaults to a random sparse mask. An optional user-defined mask_fn to compute the
        mask value can also be supplied by instantiating `config = ShiraConfig(...)` and then setting
        `config.mask_fn = <your custom mask function>`. For a pretrained weight with shape m x n, the custom mask
        function must return only one mask (shape: m x n) which must be binary 0 or 1 with num_shira_parameters =
        r(m + n) for linear layers. Device and dtype of mask must be same as base layer's weight's device and
        dtype. Please see mask_functions.py for more details and to see the default random sparse mask
        implementation.
    random_seed (`int`, *optional*, defaults to `None`):
        random seed for the torch generator for random_mask.
    target_modules (`Union[List[str], str]`):
        List of module names or regex expression of the module names to replace with SHiRA. For example, ['q', 'v']
        or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported.
    fan_in_fan_out (`bool`):
        Set this to True if the layer to replace stores weight like (fan_in, fan_out). For example, gpt-2 uses
        `Conv1D` which stores weights like (fan_in, fan_out) and hence this should be set to `True`.
    init_weights (`bool`, defaults to `True`):
        Initialize SHiRA weight to have zero values. If set to False, SHiRA weights are initialized to randn values
        instead of zeros and this is used only for testing.
    modules_to_save (`List[str]`):
        List of modules apart from SHiRA layers to be set as trainable and saved in the final checkpoint.
    helpa3  For a given target module, the number of SHiRA parameters is computed as r(m+n), where the original tensor dimensions are m x n. This means the number of SHiRA parameters is the same as that for a LoRA adapter. SHiRA is a high rank adapter. Setting this r parameter does not restrict the rank to this value.)defaultmetadataintrrandomao  Type of mask function. Defaults to a random sparse mask. An optional user-defined mask_fn to compute the mask value can also be supplied by instantiating `config = ShiraConfig(...)` and then setting `config.mask_fn = <your custom mask function>`. For a pretrained weight with shape m x n, the custom mask function must return only one mask (shape: m x n) which must be binary 0 or 1 with num_shira_parameters = r(m + n) for linear layers. Device and dtype of mask must be same as base layer's weight's device and dtype. Please see mask_functions.py for more details and to see the default random sparse mask implementation.zLiteral['random']	mask_typeNz3random seed for the torch generator for random_maskzOptional[int]random_seedzList of module names or regex expression of the module names to replace with SHiRA.For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Only linear layers are supported.zOptional[Union[list[str], str]]target_modulesFzMSet this to True if the layer to replace stores weight like (fan_in, fan_out)boolfan_in_fan_outTzInitialize SHiRA weight to have zero values. If set to False, SHiRA weights are initialized to randn values instead of zeros and this is used only for testing.init_weightsa  List of modules apart from SHiRA layers to be set as trainable and saved in the final checkpoint. For example, in Sequence Classification or Token Classification tasks, the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved.zOptional[list[str]]modules_to_savec                  > [         TU ]  5         [        R                  U l        [        U R                  [        5      (       a  [        U R                  5      OU R                  U l        U R                  S:X  a  [        U l        g U R                  (       d%  [        R                  " SU R                  < S35        S U l        g )Nr   zArgument self.mask_type=ze is not recognized, please supply your own masking function by calling `config.mask_fn = my_mask_fn`.)super__post_init__r
   SHIRA	peft_type
isinstancer   listsetr   r   mask_fninference_modewarningswarn)self	__class__s    R/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/shira/config.pyr    ShiraConfig.__post_init__t   s    !(243F3F(M(MC##$SWSfSf 	 >>X%&DL&&/0  1V  W  DL    )r&   r"   r   )__name__
__module____qualname____firstlineno____doc__r   r   __annotations__r   r   r   r   r   r   r    __static_attributes____classcell__)r+   s   @r,   r   r      s   > s
	As 	 $)z
$I   "'(]^"K  7<4
	7N3 	 !ijND    v
L$  ,1k
	,O( 	   r.   r   )
__future__r   r(   dataclassesr   r   typingr   r   r   peft.configr	   
peft.utilsr
   mask_functionsr   r    r.   r,   <module>r>      s=    #  ( + + "  ' e * e  e r.   