
    oi5                    t    S SK Jr  S SKJrJr  S SKJrJr  S SKJ	r	  S SK
Jr  SSKJr  \ " S S	\	5      5       rg
)    )annotations)	dataclassfield)OptionalUnion)
PeftConfig)PeftType   )WAVELET_REDUCTIONSc                    ^  \ rS rSr% Sr\" SSS0S9rS\S'   \" S	SS
0S9rS\S'   \" SSS0S9r	S\S'   \" SSS0S9r
S\S'   \" SSS0S9rS\S'   \" SSS0S9rS\S'   \" SSS0S9rS\S'   \" SSS0S9rS\S '   \" S!SS"0S9rS\S#'   \" SSS$0S9rS%\S&'   \" SSS'0S9rS(\S)'   \" SSS*0S9rS\S+'   \" \SS,0S-9rS.\S/'   \" SSS00S9rS\S1'   \" SSS20S9rS\S3'   U 4S4 jrS5rU =r$ )6WaveFTConfig   ae  
This is the configuration class to store the configuration of a [`WaveFTModel`]. It is used to define the
parameters for Wavelet-based Fine-Tuning (WaveFT), an approach that leverages the sparsity of wavelet transforms
for parameter-efficient fine-tuning of pretrained models.

Args:
    n_frequency (`int`):
        Number of learnable wavelet coefficients for the Discrete Wavelet Transform (DWT). 'n_frequency' is an
        integer that is greater than 0 and less than or equal to the total number of elements in the original
        weight matrix (d_out * d_in). This parameter directly controls the number of trainable parameters for each
        adapted layer. A higher 'n_frequency' generally leads to better performance but also increases GPU memory
        usage, with a minor impact on training speed.
    scaling (`float`):
        The scaling factor applied to the reconstructed delta W matrix. This is a crucial hyperparameter, analogous
        to `lora_alpha` in LoRA. It can be tuned during hyperparameter search. Our default value for SDXL
        personalization is 25.
    wavelet_family (`str`):
        The wavelet family (e.g., 'db1', 'sym2', 'coif1') to use for the DWT and Inverse DWT (IDWT). Defaults to
        'db1' (Haar wavelet). Different wavelet families have varying filter lengths which affect the training time
        substantially
    use_idwt (`bool`):
        Set to False for efficient adaptation. Whether to use the Inverse Discrete Wavelet Transform (IDWT) to
        reconstruct the delta weights from the learned wavelet coefficients. If `True` (default), the IDWT is
        applied. If `False`, the learned coefficients are directly used to form a sparse delta weight matrix, which
        is faster but performs worse for the SDXL personalization task.
    random_loc_seed (`int`):
        Seed for determining the random locations of the `n_frequency` learnable wavelet coefficients within the
        full wavelet coefficient matrix.
    target_modules (`Union[list[str],str]`):
        List of module names or a regex expression identifying the modules to be adapted with WaveFT. For example,
        `['q_proj', 'v_proj']` or `'.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'`. Currently, only linear
        layers (`torch.nn.Linear`) are supported.
    exclude_modules (`Optional[Union[List[str], str]]`):
        List of module names or a regex expression for modules to exclude from WaveFT adaptation.
    fan_in_fan_out (`bool`):
        Set to `True` if the weights of the layer to be replaced are stored in `(fan_in, fan_out)` format. Default
        is `False`.
    bias (`str`):
        Bias type for WaveFT. Can be 'none', 'all', or 'waveft_only'. ('fourier_only' was likely a typo and has
        been corrected to 'waveft_only' if it implies bias only on adapted parameters) If 'waveft_only', biases are
        added only to the WaveFT components. If 'all', biases are added to both base and WaveFT components. If
        'none', no new biases are added.
    modules_to_save (`list[str]`):
        List of modules, in addition to WaveFT layers, that should be marked as trainable and saved in the final
        checkpoint. Useful for layers like classifiers in sequence or token classification tasks that are randomly
        initialized and need training.
    layers_to_transform (`Union[list[int],int]`):
        Specific layer indices to transform. If provided, PEFT will only adapt layers at these indices. If a single
        integer is given, only that layer is transformed.
    layers_pattern (`Optional[Union[List[str], str]]`):
        Pattern for layer names, used if `layers_to_transform` is specified and the layer pattern is not standard
        (e.g., not 'layers' or 'h'). This should target the `nn.ModuleList` attribute in the model.
    n_frequency_pattern (`dict`):
        A dictionary mapping layer names (or regex) to specific `n_frequency` values, overriding the global
        `n_frequency`. Example: `{"model.decoder.layers.0.encoder_attn.k_proj": 1000}`.
    init_weights (`bool`):
        Initialization strategy for the learnable wavelet coefficients (spectrum). If `True` (default),
        coefficients are initialized to zeros. If `False`, coefficients are initialized from a standard normal
        distribution scaled by a small factor.
    proportional_parameters (`bool`):
        If `True`, `n_frequency` is allocated proportionally to each layer's `input_dim * output_dim`. Default is
        `False`. Note: This option is included for experimental thoroughness to allow researchers to reproduce
        paper results, rather than for practical utility, as no beneficial scenarios have been identified.
i 
  helpa  Number of learnable wavelet coefficients for the Discrete Wavelet Transform (DWT). 'n_frequency' is an integer that is greater than 0 and less than or equal to the total number of elements in the original weight matrix (d_out * d_in). This parameter directly controls the number of trainable parameters for each adapted layer. A higher 'n_frequency' generally leads to better performance but also increases GPU memory usage, with a minor impact on training speed.)defaultmetadataintn_frequencyg      9@zThe scaling factor applied to the reconstructed delta W matrix. This is a crucial hyperparameter, analogous to 'lora_alpha' in LoRA. It can be tuned during hyperparameter search. Default value for SDXL personalization is 25. floatscalingdb1a#  The wavelet family (e.g., 'db1', 'sym2', 'coif1') to use for the DWT and Inverse DWT (IDWT). Defaults to 'db1' (Haar wavelet). Different wavelet families have varying filter lengths which affect the training time substantially. Size differences are handled automatically if use_idwt is True.strwavelet_familyTat  Set to False for efficient adaptation. Whether to use the Inverse Discrete Wavelet Transform (IDWT) to reconstruct the delta weights from the learned wavelet coefficients. If True (default), the IDWT is applied. If False, the learned coefficients are directly used to form a sparse delta weight matrix, which is faster but performs worse for the SDXL personalization task.booluse_idwti	  zSeed for determining the random locations of the 'n_frequency' learnable wavelet coefficients within the full wavelet coefficient matrix.random_loc_seedFzpSet to True if the weights of the layer to be replaced are stored in (fan_in, fan_out) format. Default is False.fan_in_fan_outNzList of module names or a regex expression identifying the modules to be adapted with WaveFT. For example, ['q_proj', 'v_proj'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. Currently, only linear layers (torch.nn.Linear) are supported.zOptional[Union[list[str], str]]target_moduleszLList of module names or regex for modules to exclude from WaveFT adaptation.exclude_modulesnonezBias type for WaveFT. Can be 'none', 'all', or 'waveft_only'. If 'waveft_only', biases are added only to the WaveFT components. If 'all', biases are added to both base and WaveFT components. If 'none', no new biases are added.biaszList of modules, in addition to WaveFT layers, that should be marked as trainable and saved in the final checkpoint. Useful for layers like classifiers in sequence or token classification tasks that are randomly initialized and need training.zOptional[list[str]]modules_to_savezSpecific layer indices to transform. If provided, PEFT will only adapt layers at these indices. If a single integer is given, only that layer is transformed.zOptional[Union[list[int], int]]layers_to_transformzPattern for layer names, used if `layers_to_transform` is specified and the layer pattern is not standard (e.g., not 'layers' or 'h'). This should target the `nn.ModuleList` attribute in the model.layers_patternzA dictionary mapping layer names (or regex) to specific `n_frequency` values, overriding the global `n_frequency`. Example: {"model.decoder.layers.0.encoder_attn.k_proj": 1000}.)default_factoryr   zOptional[dict]n_frequency_patterna-  If True, 'n_frequency' is allocated proportionally to each layer's input_dim * output_dim. Default is False. Note: This option is included for experimental thoroughness to allow researchers to reproduce paper results, rather than for practical utility, as no beneficial scenarios have been identified.proportional_parameterszInitialization strategy for the learnable wavelet coefficients (spectrum). If True (default), coefficients are initialized to zeros. If False, coefficients are initialized from a standard normal distribution scaled by a small factor.init_weightsc           	     ,  > [         TU ]  5         [        R                  U l        [        U R                  [        5      (       a  [        U R                  5      OU R                  U l        [        U R                  [        5      (       a  [        U R                  5      OU R                  U l	        [        U R                  [        5      (       a  U R                  b  [        S5      e[        U R                  [        5      (       a  U R                  b  [        S5      eU R                  (       a  U R                  (       d  [        S5      eU R                  [        ;  a7  [        SU R                   S[        [        R                   " 5       5       35      eg )NzD`layers_to_transform` cannot be used when `target_modules` is a str.z?`layers_pattern` cannot be used when `target_modules` is a str.zRWhen `layers_pattern` is specified, `layers_to_transform` must also be specified. zWavelet family z0 not supported. Supported wavelet families are: )super__post_init__r	   WAVEFT	peft_type
isinstancer   listsetr   r   r"   
ValueErrorr#   r   r   keys)self	__class__s    S/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/waveft/config.pyr*   WaveFTConfig.__post_init__   sS   !(243F3F(M(MC##$SWSfSf 	 *4D4H4H$)O)OC$$%UYUiUi 	 d))3//D4L4L4Xcdd d))3//D4G4G4S^__t'?'?qrr&88!$"5"5!66fgkl~  mD  mD  mF  hG  gH  I  9    )r   r,   r   )__name__
__module____qualname____firstlineno____doc__r   r   __annotations__r   r   r   r   r   r   r   r    r!   r"   r#   dictr%   r&   r'   r*   __static_attributes____classcell__)r3   s   @r4   r   r      s/   ?B K	
K  I
	GU 	  '

NC 
 X
Hd  !K
OS  !,
ND  7<Q
	7N3 	 8=hi8O4  6

D# 
 ,1a
	,O( 	 <AY
<8  7<:
	7N3 	 +0v
+  %*f

%T 
 w
	L$ 	 r6   r   N)
__future__r   dataclassesr   r   typingr   r   peft.configr   
peft.utilsr	   	constantsr   r    r6   r4   <module>rG      s7    # ( " "  ) n: n nr6   