
    oiD                        S SK r S SKJrJrJr  S SKrS SKJr  S SKJ	r	J
r
  SSKJrJr   " S S\	5      r " S S	\R                  \5      rS
\S\S\R$                  S\R$                  4S jrS
\S\S\R$                  S\R$                  S\\R$                  \R$                  4   4
S jrS
\S\S\R$                  S\R$                  S\R$                  4
S jrS\R                  R                  S\S\S\\R                  R                     4S jrg)    N)AnyOptionalUnion)BaseTunerLayercheck_adapters_to_merge   )
RoadConfigRoadVariantc                       \ rS rSr% SrSr\\S4   \S'   Sr	\\S4   \S'   SS\
R                  S	\S
S4S jjr\S
\\   4S j5       r SS\4S jjrS rSrg)	RoadLayer   u^  
Road layer.

Generally the idea of RoAD is to split the input vector into many 2D vectors and rotate each 2D vector with its own
2D rotation matrix. For additional flexibility, each rotation matrix is multiplied by a trainable scale.

when applied to vector R @ x each pair of elements of x is transformed like this: `y₀ = x₀ * α * cosθ - xₙ * α *
sinθ` and `yₙ = x₀ * α * sinθ + xₙ * α * cosθ`

The scales α and angles θ are learned for each pair of elements and, moreover, each of the 4 instances in the
rotation matrix may actually be different (when using variant 2 or 4).

Note that instead of using two consecutive elements x₀ x₁ we first split the whole vector into groups and pair
elements from the first with the second half of the same group, which allows for more efficient inference
implementation.

The adapter needs to only store the angles θ and scales α, rather than the full matrix R and the inference
implementation only needs to do elementwise vector multiplications.

For merging the weights, we make use of the following formula: R @ (W @ x + b) = (R @ W) @ x + R @ b. The lhs part
is how it is used in unmerged state (using efficient elementwise implementation instead of matrix multiplication)
and the rhs part is how it is used in merged state where (R @ W) becomes the new weight matrix and R @ b becomes
the new bias.

)
road_theta
road_alpha.adapter_layer_names)variant
group_sizeother_param_names
base_layerephemeral_gpu_offloadreturnNc                    Xl         0 U l        0 U l        [        R                  " 0 5      U l        [        R                  " 0 5      U l        SU l        / U l        U R                  5       n[        U[        R                  5      (       a  UR                  UR                  pTO[        S[        U5       S35      eX@l        XPl        g )NFzUnsupported layer type 'z)' encountered, cannot apply RoAd adapter.)r   r   r   nnParameterDictr   r   _disable_adaptersmerged_adaptersget_base_layer
isinstanceLinearin_featuresout_features
ValueErrortype)selfr   r   kwargsr   r    s         P/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/road/layer.py__init__RoadLayer.__init__8   s    $**2.**2.!&!((*
j")),,(2(>(>
@W@W7Z8H7IIrstt&(    c                     1 U R                   k$ N)r   )r#   s    r%   _available_adaptersRoadLayer._available_adaptersJ   s    !!!r(   inference_modec                 p   X R                   U'   X0R                  U'   U R                  U-  S:w  a  [        SU S35      eUS:X  a  U R                  S-  nO8US:X  a  U R                  nO%US:X  a  U R                  S-  nO[        SU S	35      e[        R
                  " [        R                  " U5      5      U R                  U'   [        R
                  " [        R                  " U5      5      U R                  U'   U R                  X5        U R                  U5        U R                  U R                  US
9  g )Nr   zDThe out_features of the base layer must be divisible by group_size (z) when using RoadLayer.road_1   road_2road_4Unsupported variant B for RoadLayer. Supported variants are road_1, road_2, and road_4.)r-   )r   r   r    r!   r   	Parametertorchemptyr   r   reset_parameters%_move_adapter_to_device_of_base_layerset_adapteractive_adapters)r#   adapter_namer   r   init_weightsr-   sizes          r%   update_layerRoadLayer.update_layerN   s!    &-\"(2%z)Q.VWaVbbyz 
 h$$)D $$D $$q(D&wi/qr  )+U[[5F(G%(*U[[5F(G%l922<@--nMr(   c                    USL am  [         R                  R                  U R                  U   R                  SSS9  [         R                  R                  U R
                  U   R                  SSS9  g [         R                  R                  U R                  U   R                  5        [         R                  R                  U R
                  U   R                  5        g )NFg        g      ?)meanstdg      ?)r   initnormal_r   datar   zeros_ones_)r#   r<   r=   s      r%   r8   RoadLayer.reset_parametersq   s    5 GGOODOOL9>>ScORGGOODOOL9>>ScOR
t|499:
dool3889r(   )	r   r   r   r   r   r    r   r   r   )F)__name__
__module____qualname____firstlineno____doc__r   tuplestr__annotations__r   r   Moduleboolr&   propertysetr+   r?   r8   __static_attributes__ r(   r%   r   r      s    4 ,HsCxG)BuS#XB)299 )T )`d )$ "SX " "  %!N !NF:r(   r   c                   ,  ^  \ rS rSr   SS\S\S\S\\\4   SS4
U 4S jjjr	S	 r
S
\R                  S\S\S\R                  4S jrS
\R                  S\S\\   S\S\R                  4
S jrSS\S\\\      SS4S jjrSS jrS\4U 4S jjrSrU =r$ )r   z   r<   r   r   r=   r   Nc                    > [         TU ]  5         [        R                  " X40 UD6  X l        U R	                  UUUUS9  g )N)r=   )superr&   r   _active_adapterr?   )r#   r   r<   r   r   r=   r$   	__class__s          r%   r&   Linear.__init__|   sH     	46v6+%	 	 	
r(   c                     UR                  SS5      nUc  g[        U5      [        U5      :w  a&  S[        U5       S[        U5       S3n[        U5      eU R                  (       a  Sn[        U5      eg)zMCheck if the arguments are compatible with the configs and state of the modeladapter_namesNzNLength of `adapter_names` should be the same as the number of inputs, but got z and z respectively.z`Cannot pass `adapter_names` when there are merged adapters, please call `unmerge_adapter` first.)getlenr!   merged)r#   xargsr$   r`   msgs         r%   _check_forward_argsLinear._check_forward_args   s{    

?D9 q6S''`}%&eCF8>C  S/!;; uCS/!	 r(   rd   re   r$   c                    U R                   " U/UQ70 UD6  UR                  SS 5      nU R                  (       a9  U R                  (       a  U R	                  5         U R
                  " U/UQ70 UD6nU$ U R                  (       a  U R
                  " U/UQ70 UD6nU$ Ub  U R                  " U/UQ7SU0UD6nU$ U R
                  " U/UQ70 UD6nUR                  nU R                   H  nXpR                  ;  a  M  U R                  XPR                  U   R                  5      n[        U R                  U   U R                  U   U R                  U   U R                  U   U5      nM     UR!                  U5      nU$ )Nr`   )rg   popdisable_adaptersrc   unmerger   _mixed_batch_forwarddtyper;   r+   _cast_input_dtyper   _apply_roadr   r   r   to)r#   rd   re   r$   r`   resulttorch_result_dtypeactive_adapters           r%   forwardLinear.forward   sn     4T4V4

?D9  {{__Q888F0 / [[__Q888F, + &..q_4_}_X^_F( % __Q888F!'"&"6"6!)A)AA//8W8]8]^$LL0OON3OON3OON3 #7 YY12Fr(   r`   c          	      2   U R                   " U/UQ70 UD6n[        U5      n/ nU H9  nUR                  [        U5       V	V
s/ s H  u  pX:X  d  M  U	PM     sn
n	5        M;     [        U5       H  u  pUS:X  a  M  XR                  ;  a  M  U R
                  U   R                  R                  nXWU      R                  U5      n[        U R                  U   U R                  U   U R
                  U   U R                  U   U5      XWU   '   M     U$ s  sn
n	f )N__base__)r   rU   append	enumerater+   r   rF   rn   rq   rp   r   r   r   )r#   rd   r`   re   r$   rr   unique_adapterssub_batch_indices_listadapterindexitemirt   rn   	sub_batchs                  r%   rm   Linear._mixed_batch_forward   s   
 4T4V4m,!#&G"))ImD\*pD\[U`d`o5D\*pq ' "+?!;A+%=%==OON388>>E a89<<UCI0;^,///1F!,- "<& + +qs   DD
safe_mergec                 *   [        X5      nU(       d  gU GH  nX0R                  ;   d  M  U R                  5       nUR                  R                  n[        U R                  U   U R                  U   U R                  U   R                  U R                  U   R                  5      nU(       Gad  UR                  R                  R                  5       n[        R                  " UR                  U5      U5      n[        R                  " U5      R!                  5       (       d  [#        SU S35      eUR%                  5       R                  U5      UR                  l	        UR&                  b  UR&                  R                  5       n[        R                  " UR                  U5      U5      n[        R                  " U5      R!                  5       (       d  [#        SU S35      eUR%                  5       R                  U5      UR&                  l	        OUR                  R                  n[        R                  " UR                  U5      U5      nUR%                  5       R                  U5      UR                  l	        UR&                  bj  UR&                  R                  n[        R                  " UR                  U5      U5      nUR%                  5       R                  U5      UR&                  l	        U R(                  R+                  U5        GM     g)a  
Merge the active adapter weights into the base weights

Args:
    safe_merge (`bool`, *optional*):
        If `True`, the merge operation will be performed in a copy of the original weights and check for NaNs
        before merging the weights. This is useful if you want to check if the merge operation will produce
        NaNs. Defaults to `False`.
    adapter_names (`List[str]`, *optional*):
        The list of adapter names that should be merged. If `None`, all active adapters will be merged.
        Defaults to `None`.
Nz1NaNs detected in the merged weights. The adapter z seems to be brokenz.NaNs detected in the merged bias. The adapter )r   r+   r   weightrn   _get_delta_weightr   r   r   rF   r   cloner6   matmulrq   isfiniteallr!   
contiguousbiasr   ry   )	r#   r   r`   rt   r   
orig_dtyperoad_Rorig_weight	orig_biass	            r%   mergeLinear.merge   s`    0D+N!9!99!002
'..44
*LL0OON3OON388OON388	  #-"3"3"8"8">">"@K"',,vyy/Dk"RK >>+6::<<(OP^O__rs  .9-C-C-E-H-H-TJ%%*!2$.OO$9$9$;	$)LL:1F	$R	$~~i8<<>>","PQ_P``s t#  09/C/C/E/H/H/T
,","3"3"8"8K"',,vyy/Dk"RK-8-C-C-E-H-H-TJ%%*!2$.OO$8$8	$)LL:1F	$R	/8/C/C/E/H/H/T
,$$++N;W ,r(   c                    U R                   (       d  [        R                  " S5        g[        U R                  5      S:  Ga  U R                  R                  5       nXR                  ;   Ga|  U R                  5       R                  nUR                  n[        U R                  U   U R                  U   U R                  U   R                  U R                  U   R                  5      n[         R"                  R%                  UR'                  [         R(                  5      5      R'                  U5      n[         R*                  " XRR                  5      nUR-                  5       Ul        U R                  5       R.                  be  [         R*                  " XPR                  5       R.                  R                  5      nUR-                  5       U R                  5       R.                  l        [        U R                  5      S:  a  GM  gg)zG
This method unmerges all merged adapter layers from the base weights.
z Already unmerged. Nothing to do.Nr   )rc   warningswarnrb   r   rj   r+   r   r   rn   r   r   r   r   rF   r   r6   linalginvrq   float32r   r   r   )r#   rt   r   r   r   
inv_road_Rr   r   s           r%   rl   Linear.unmerge%  sq    {{MM<=$&&'!+!11557N!9!99,,.55#\\
*LL0OON3OON388OON388	 #\\--fii.FGJJ:V
#ll:{{C)446&&(--9 %Z9L9L9N9S9S9X9X YI6?6J6J6LD'')..3) $&&'!++r(   c                 *   > [         TU ]  5       nSU-   $ )Nzroad.)r[   __repr__)r#   repr]   s     r%   r   Linear.__repr__B  s    g }r(   )r\   )r/   @   T)FN)r   N)rJ   rK   rL   rM   rP   r
   intr   rS   r&   rg   r6   Tensorr   ru   listrm   r   r   rl   r   rV   __classcell__)r]   s   @r%   r   r   z   s      ()-
 
 	

 
 D#I&
 

 
*"& c S U\\ B&):>s)OR	B=< =<Xd3i=P =<\` =<~M:#  r(   r   r   r   r   r   c                    [        XX#5      u  pE[        R                  " U5      nUR                  S   nUR	                  SSUS-  5      S S 2SS/S S 24   R                  5       n[        R                  " U5      R	                  SSUS-  U5      S S 2SS/S S 2S S 24   n	U	S S 2SS S 2S S 24==   S-  ss'   U	R	                  Xw5      n	Xi-  nU$ )Nr   r0   r   )_prepare_colsr6   diagshapereshapeflatten)
r   r   r   r   	first_col
second_coloutput_tensorr>   swapped_second_colrotated_diag_second_cols
             r%   r   r   G  s    )'zVI JJy)MAD#++B:?CA1vqLQYY[#jj);<DDRJZ[O]abcdghjkflnoqrcrsAq!QJ'2-'5==dI,Mr(   r   c                    U S:X  a  UR                  SUS-  5      R                  SSS9R                  5       nUR                  SUS-  5      R                  SSS9R                  5       nUR                  5       nUR	                  5       nX4-  nX5-  nXg4$ U S:X  a+  UR                  5       nUR	                  5       nX4-  nX5-  nXg4$ U S:X  a  UR                  SSU5      nUS S 2SS S 24   R                  5       R                  5       nUS S 2SS S 24   R	                  5       R                  5       nUR                  SSU5      nUS S 2SS S 24   R                  5       nUS S 2SS S 24   R                  5       n	X-  nX-  nXg4$ [        S	U  S
35      e)Nr/   r   r0   r   dimr1   r2   r   r3   r4   )r   repeat_interleaver   cossinr!   )
r   r   r   r   	theta_cos	theta_sinr   r   alpha_1alpha_2s
             r%   r   r   [  s    (''J!O<NNqVWNX``b
''J!O<NNqVWNX``b
NN$	NN$	*	+
0   / 
H	NN$	NN$	*	+
"   ! 
H	''Az:
q!Qw'++-557	q!Qw'++-557	''Az:
Q1W%--/Q1W%--/'	(
   	 "7)+mn
 	
r(   rd   c                     [        XX#5      u  pVUR                  SSUS-  5      nUS S 2SS S 24   nUS S 2SS S 24   n	[        R                  " U	* U4SS9R                  UR                  5      n
XE-  X-  -   nU$ )Nr   r0   r   r   r   )r   r   r6   stackr   )r   r   r   r   rd   r   r   	x_groupedx1x2rotate_half_xrr   s               r%   rp   rp     s     *'zVI 		"aq1I	1a7	B	1a7	BKK"b	q199!''BM]]77FMr(   targetr<   road_configc                     S n[        U [        5      (       a  U R                  5       nOU n[        U[        R                  R
                  5      (       a  [        X40 UD6nU$ r*   )r   r   r   r6   r   r   )r   r<   r   r$   
new_moduletarget_base_layers         r%   dispatch_defaultr     sV     J&.))"113"#UXX__55F;F;
r(   )r   typingr   r   r   r6   torch.nnr   peft.tuners.tuners_utilsr   r   configr	   r
   r   rR   r   r   r   r   rO   r   rp   rP   r   rW   r(   r%   <module>r      s1    ' '   L +]: ]:@JRYY	 JZ{   chcoco (%!%!&)%!7<||%!QVQ]Q]%!
5<<%&%!P&)7<||QVQ]Q]bgbnbnHHOO 
 ehhoor(   