
    oi                    v    S SK Jr  S SKJrJr  S SKJrJrJr  S SK	J
r
  S SKJr  \S   r\ " S S\
5      5       rg	)
    )annotations)	dataclassfield)LiteralOptionalUnion)
PeftConfig)PeftTyperoad_1road_2road_4c                     ^  \ rS rSr% Sr\" SSS0S9rS\S'   \" S	SS
0S9rS\S'   \" SSS0S9r	S\S'   \" SSS0S9r
S\S'   \" SSS0S9rS\S'   U 4S jrSrU =r$ )
RoadConfig   a 
  
This is the configuration class to store the configuration of a [`RoadModel`]. RoAd adapter is proposed in
https://huggingface.co/papers/2409.00119.

Args:
    variant (Union[`RoadVariant`, `str`]):
        The variant of the Road model to use. It can be one of road_1, road_2, or road_4. Refer to the paper for
        more details.
        - road_1: Uses the same scale and angle for all pairs of elements.
        This variant has lowest number of parameters, it stores a number equal to the output hidden size of
        parameters for each layer that RoAd is applied to.
        - road_2: Uses the same scale and angle for each element.
        This variant has 2x the number of parameters compared to road_1.
        - road_4: Uses two different scales and angles for each ellement.
        This variant has 4x the number of parameters compared to road_1.
    group_size (`int`):
        Group size defines how elements are grouped together into 2D vectors for rotation. Within each group
        element 0 is paired with element group_size/2, then element 1 is paired with element group_size/2+1 and so
        on. This has no effect on the model performance, since elements are unordered, however it has some effect
        on inference speed when used in e.g. VLLM. For best speed group size of at least 32 or 64 (the default) is
        recommended. Note that model hidden size (or hidden size per partition when used with tensor parallelism)
        must be divisible by group_size, so for very small models you might need to reduce this parameter.
    init_weights (`bool`):
        Whether to perform initialization of RoAd weights.
    target_modules (`Optional[Union[List[str], str]]`):
        The names of the modules to apply the adapter to. If this is specified, only the modules with the specified
        names will be replaced. When passing a string, a regex match will be performed. When passing a list of
        strings, either an exact match will be performed or it is checked if the name of the module ends with any
        of the passed strings. If this is specified as 'all-linear', then all linear/Conv1D modules are chosen (if
        the model is a PreTrainedModel, the output layer excluded). If this is not specified, modules will be
        chosen according to the model architecture. If the architecture is not known, an error will be raised -- in
        this case, you should specify the target modules manually.
    modules_to_save (`List[str]`):
        List of modules apart from Road layers to be set as trainable and saved in the final checkpoint.
r   helpz!Variant of the Road model to use.)defaultmetadatazUnion[str, RoadVariant]variant@   a]  Group size defines how elements are grouped together into 2D vectors for rotation. Within each group element 0 is paired with element group_size/2, then element 1 is paired with element group_size/2+1 and so on. This has no effect on the model performance, since elements are unordered, however it has some effect on inference speed when used in e.g. VLLM. For best speed group size of at least 64 is recommended. Note that model hidden size (or hidden size per partition when used with tensor parallelism) must be divisible by group_size, so for very small models you might need to reduce this parameter.int
group_sizeTzWhether to initialize the weights of the RoAd layers with their default initialization. Don't change this setting, except if you know exactly what you're doing.boolinit_weightsNa  List of module names or regex expression of the module names to replace with Road.For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'.This can also be a wildcard 'all-linear' which matches all linear/Conv1D (if the model is a PreTrainedModel, the output layer excluded).If not specified, modules will be chosen according to the model architecture, If the architecture is not known, an error will be raised -- in this case, you should specify the target modules manually.zOptional[Union[list[str], str]]target_modulesa  List of modules apart from RoAd layers to be set as trainable and saved in the final checkpoint. For example, in Sequence Classification or Token Classification tasks, the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved.zOptional[list[str]]modules_to_savec                  > [         TU ]  5         [        R                  U l        [        U R                  [        5      (       a  [        U R                  5      OU R                  U l        U R                  S;  a  [        SU R                   S35      eU R                  S::  d  U R                  S-  S:w  a  [        SU R                   S35      eg )Nr   zInvalid variant z7 specified. Please choose from road_1, road_2 or road_4r      zDThe group_size must be divisible by 2 when using RoadLayer, but got .)super__post_init__r
   ROAD	peft_type
isinstancer   listsetr   
ValueErrorr   )self	__class__s    Q/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/road/config.pyr!   RoadConfig.__post_init__u   s    !(243F3F(M(MC##$SWSfSf 	 <<==/~=tuvv??a4??Q#6!#;cdhdsdscttuvww $<    )r#   r   )__name__
__module____qualname____firstlineno____doc__r   r   __annotations__r   r   r   r   r!   __static_attributes____classcell__)r)   s   @r*   r   r      s    "H (->@(G$  u
J  N
L$  7<v	
7N3  ,1k
	,O( 		x 	xr,   r   N)
__future__r   dataclassesr   r   typingr   r   r   peft.configr	   
peft.utilsr
   RoadVariantr    r,   r*   <module>r<      sG    # ( + + "  23 bx bx bxr,   