
    oi                    J   S SK Jr  S SKrS SKJrJrJr  S SKrS SKJ	r	  S SK
J	s  Jr  S SKJrJr  SSKJr   " S S\	R$                  5      r " S	 S
\	R$                  5      r " S S\5      r " S S\	R$                  \5      r " S S\	R$                  \5      r        SS jrg)    )annotationsN)AnyOptionalUnion)BaseTunerLayercheck_adapters_to_merge   )	OFTConfigc                  6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )MultiplicativeDropoutLayer   z6
Implements the multiplicative dropout layer for OFT.
c                .   > [         TU ]  5         Xl        g)z
Initializes the multiplicative dropout layer.

Parameters:
p (float): The probability of dropping out a block. Defaults to 0.0.
N)super__init__p)selfr   	__class__s     O/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/oft/layer.pyr   #MultiplicativeDropoutLayer.__init__!   s     	    c                t   U R                   (       Ga%  U R                  S:  Ga  UR                  S   UR                  S   :w  a  [        S5      eUR                  u  p#nUS:X  a  U$ [	        U R                  U-  5      nX%-
  n[
        R                  " [
        R                  " XQR                  S9[
        R                  " XaR                  S9/5      nU[
        R                  " U5         R                  USS5      n[
        R                  " X1R                  S9R                  USS5      nSU-
  U-  Xx-  -   nU$ )a  
Applies multiplicative dropout to the input tensor.

Parameters:
x (Tensor): The input tensor of shape (D, H, H), where `D` represents
            the number of OFT blocks, and `H` is the size of the square blocks along the last two dimensions,
            the block size in OFT.
r   z4The last two dimensions of input should be the same!r	   device)trainingr   shape
ValueErrorinttorchcatonesr   zerosrandpermvieweyerepeat)	r   xDH_num_to_replace	num_zerosmask
eye_matrixs	            r   forward"MultiplicativeDropoutLayer.forward+   s     ===TVVaZwwr{aggbk) !WXXggGA! Av !_N*I99ejjI5;;W`iqiqKrstDq)*//1a8D1XX6==aAFJTQ!22Ar   r   )        )	__name__
__module____qualname____firstlineno____doc__r   r0   __static_attributes____classcell__r   s   @r   r   r      s     r   r   c                     ^  \ rS rSr      SU 4S jjrS rS r S         SS jjrSS jrSS jr	S r
S	 rS
 rS rSrU =r$ )OFTRotationModuleH   c                n  > [         TU ]  5         Xl        X l        X0l        X@l        [        R                  " [        R                  " X5      5      U l
        XPl        X`l        Xpl        Xl        Xl        Xl        [        R"                  " X3S5      u  pU R%                  SUSS9  U R%                  SUSS9  g )Nr	   rowsF)
persistentcols)r   r   r
n_elements
block_sizein_featuresnn	Parameterr    emptyweightcoftepsblock_sharekernel_sizeuse_cayley_neumannnum_cayley_neumann_termstriu_indicesregister_buffer)r   rC   rD   rE   rF   rK   rL   rM   rN   rO   rP   r@   rB   r   s                r   r   OFTRotationModule.__init__I   s     	$$&ll5;;q#=>	&&"4(@%''
B
VTe<VTe<r   c                    UR                   S   n[        R                  " X2X!R                  UR                  S9nXS S 2U R
                  U R                  4'   XDR                  SS5      -
  nU$ )Nr   r   dtyper   r   )r   r    r#   r   rV   r@   rB   	transpose)r   vecrE   
batch_sizematrixs        r   _pytorch_skew_symmetric)OFTRotationModule._pytorch_skew_symmetrich   s]    YYq\
ZZ

Z]ZcZcd*-q$))TYY&'**2r22r   c                `    UR                   S   nUS S 2U R                  U R                  4   nU$ )Nr   )r   r@   rB   )r   rZ   rE   rY   rX   s        r   _pytorch_skew_symmetric_inv-OFTRotationModule._pytorch_skew_symmetric_invp   s/    \\!_
 Q		499,-
r   c                \   UR                   u  pVUR                  nU R                  X5      nU(       a  [        R                  " X!R
                  UR                  S9R                  USS5      n	US:  a  U	R                  USS9  US:  a  [        R                  " X5      n
U	R                  U
SS9  U
n[        SUS-
  5       H)  n[        R                  " X5      nU	R                  USS9  M+     [        R                  " X5      nU	R                  U5        O[        R                  " UR                   S   UR
                  S9R                  S	5      R                  XXR                   S   UR                   S   5      n[        R                  R                  X-   X-
  S
S9n	U	R                  U5      $ )z
Perform the Cayley parametrization on a batch of skew-symmetric matrices.

Args:
    data: A batch of skew-symmetric matrices of shape (b, r, c).
rU   r	   g       @)alpha      r   r   r   F)left)r   rV   r[   r    r&   r   r'   add_bmmrange	unsqueezeexpandlinalgsolveto)r   QrE   rO   num_neumann_termsbr+   previous_dtypeQ_skewR	Q_squaredQ_powerid_mats                r   _cayley_batchOFTRotationModule._cayley_batchw   sc    ww --a<		*XXQWWELLQPQSTUA 1$vS)$q( %		& 9IFF9CF0'G"1&7!&;<"'))G"<wc2 = $ii8GFF7O 		&,,r*6==A1<<+V\\"-=> 
 ""6?FO%"PAttN##r   c                P   U R                  XR                  5      nUS-  [        R                  " [        R                  " UR
                  S   5      5      -  n[        R                  " UR                  S5      UR                  S5      4UR                  UR                  S9R                  S5      R                  U5      nX4-
  n[        R                  " X4-
  SSS9nXb:*  R                  5       n[        R                  " XsXBXV-  -  -   5      nU R                  XR                  5      $ )Nr	   r   rU   )r	   rb   T)dimkeepdim)r[   rE   r    sqrttensorr   r#   sizer   rV   rh   	expand_asnormboolwherer^   )	r   rm   rL   oft_RIdiff	norm_diffr.   outs	            r   _project_batch OFTRotationModule._project_batch   s    ,,Q@Ag

5<<A#?@@KKA

16u||SXS^S^_Yq\Yu 	

 yJJuyfdC	 &&(kk$q$2B+C'CD//__EEr   c                    UR                   S   S:X  a  [        U5       Vs/ s H  o1S   PM	     nnO[        U5       Vs/ s H	  o1US4   PM     nn[        R                  " U6 nU$ s  snf s  snf )Nr   r	   )r   ..)r   rg   r    
block_diag)r   r   rankiblocksAs         r   _block_diagonal!OFTRotationModule._block_diagonal   sk    ;;q>Q-24[9[Fm[F9F-24[9[AsFm[F9 f% :9s
   A&A+c                   UR                   u  p#pE[        U R                  [        5      (       a  U R                  U R                  pvOU R                  u  pgS=pS=pUSU
-  -   U-
  U-  S-   nUSU-  -   U-
  U	-  S-   nUR	                  SXh5      R	                  SXy5      nUR                  SSSSSS5      R                  5       nUR                  X,-  U-  S5      nU$ )z|
Unfold with stride=1, padding=0 to preserve spatial dimensions. Only use kernel_size from base layer to define
patch size.
r	   r   rb   rc         r   )r   
isinstancerN   r   unfoldpermute
contiguousr%   )r   r(   rY   in_channels	in_heightin_widthkernel_heightkernel_widthstride_hstride_wpad_hpad_w
out_height	out_width
x_unfoldeds                  r   _unfoldOFTRotationModule._unfold   s    
 89ww4
d&&,,*.*:*:D<L<L<*.*:*:'M  !e)+m;H1L
E	)L8XEI	 XXa9@@L[
''1aAq9DDF
__Z%<y%H"M
r   c                   Uu  p4pV[        U R                  [        5      (       a  U R                  U R                  pOU R                  u  pxXW-
  S-   n	Xh-
  S-   n
UR                  X9XXx5      nUR	                  SSSSSS5      R                  5       n[        R                  " UR                  X4U-  U-  X-  5      XV4Xx4SS9nU$ )	z+
Fold back to preserve spatial dimensions.
r	   r   rc   rb   r   r   )r	   r	   )output_sizerN   stride)r   rN   r   r%   r   r   Ffold)r   r   
orig_shaperY   r   r   r   r   r   r   r   
x_reshapedx_foldeds                r   _foldOFTRotationModule._fold   s     8B4
d&&,,*.*:*:D<L<L<*.*:*:'M .2
+a/	  __ZYUbq
  ''1aAq9DDF
 66OOJm(Cl(RT^Tjk"-&5	
 r   c                j   UR                   nX R                  R                   :w  a%  UR                  U R                  R                   5      nUR                  nU R                  (       a[  [
        R                  " 5          U R                  R                  U R                  U R                  U R                  S95        S S S 5        U R                  U R                  U R                  U R                  U R                  5      n[        U5      S:X  a  U R                  U5      nUR                  nU R                   (       a  U R"                  U R                  -  OU R$                  nUR                  S S nUR&                  " / UQUPU R                  P76 nU R                   (       a+  UR)                  USS5      n[
        R*                  " SX5      n	O[
        R*                  " SX5      n	U	R&                  " U6 n
[        U5      S:X  a  U R-                  X5      n
U
R                  U5      $ ! , (       d  f       GNo= f)NrL   r   r   r	   z...rk,rkc->...rc)rV   rJ   rl   r   rK   r    no_gradcopy_r   rL   rv   rE   rO   rP   lenr   rM   rF   rC   reshaper'   einsumr   )r   r(   required_dtyper   orth_rotatefolded_shaper   
batch_dimsr   x_rotated_reshaped	x_rotateds              r   r0   OFTRotationModule.forward   s   
 [[...T[[&&'AWW
99!!$"5"5dkktxx"5"PQ ! ((KK$*A*A4C`C`

 z?aQAww6:6F6Ft4??2DFFWWSb\
YYB
BDB$//B
%,,T1a8K!&.@*!Z!&.@*!Z&..=	z?a

99I||N++7 !s   =>H##
H2c                   U R                   nU R                  (       aR  [        R                  " 5          U R	                  XR
                  S9nU R                   R                  U5        SSS5        U R                  XR                  U R                  U R                  5      nU R                  (       d  U R                  OU R                  U R                  -  nU R                  X#5      $ ! , (       d  f       N= f)
Compute the delta weight for the given adapter.

Args:
    adapter (str):
        The name of the adapter for which the delta weight should be computed.
r   N)rJ   rK   r    r   r   rL   r   rv   rE   rO   rP   rM   rC   rF   r   )r   rJ   r   r   s       r   
get_weightOFTRotationModule.get_weight  s     99,,V,B!!&) ! ((OOT%<%<d>[>[
 "--tvv43C3Ct3V##K66 !s   5C((
C6)rM   rE   rK   rL   rF   rN   rD   rP   rC   rO   rJ   )FiUMu?F)r   r   Tr   )Tr   )
rm   torch.TensorrE   r   rO   r   rn   r   returnr   )gh㈵>)r   r   r   r   r   r   )r4   r5   r6   r7   r   r[   r^   rv   r   r   r   r   r0   r   r9   r:   r;   s   @r   r=   r=   H   s     !"=> kl&$&$+.&$DH&$dg&$	&$RF"
4>',R7 7r   r=   c                      \ rS rSr% SrSrS\S'   SrS\S'   SS jr\	SS	 j5       r
S
 rSS jrSSS jjr S SS jjrS rS rSrg)OFTLayeri6  z
Implements the OFT layer.
r   ztuple[str, ...]adapter_layer_names)rC   oft_block_sizeoft_dropoutother_param_namesc                   Xl         [        R                  " 0 5      U l        0 U l        0 U l        0 U l        [        R                  " 0 5      U l        SU l        / U l        SU l	        X l
        U R                  5       n[        U[        R                  5      (       a  UR                  UR                  pCGO[        U[        R                   5      (       a  UR"                  UR$                  pCGO['        US5      (       a*  ['        US5      (       a  UR(                  UR*                  pCGO['        US5      (       a*  ['        US5      (       a  UR,                  UR.                  pCGOb['        US5      (       a3  UR0                  R2                  S:X  a  UR                  UR                  pCGO['        US	5      (       a2  UR0                  R2                  S
:X  a  UR                  UR                  pCOUR0                  R2                  S:X  a  UR                  UR                  pCO['        US5      (       a2  UR0                  R2                  S:X  a  UR                  UR                  pCOf['        US5      (       a)  ['        US5      (       a  UR                  UR                  pCOSu  p4[4        R6                  " S[9        U5       S3[:        5        X0l        X@l        g)z
Initializes the OFT layer.

Note, currently only support linear layer and convolutional layer, with further support for other layers to be
added soon.

Parameters:
base_layer: the pretrained model layer
FT
infeaturesoutfeatures
input_sizer   	codebooksQuantizedLinearw_bitWQLinear_GEMM
EetqLinearW_q	HQQLinearrF   out_features)NNzUnsupported layer type 'z(' encountered, proceed at your own risk.N)
base_layerrG   
ModuleDictr   r   rC   r   _disable_adaptersmerged_adapterscast_input_dtype_enabledkwargsget_base_layerr   LinearrF   r   Conv2dr   out_channelshasattrr   r   r   r   r   r4   warningswarntypeUserWarning)r   r   r   rF   r   s        r   r   OFTLayer.__init__@  s3    %]]2&
  ==,!&!(,%((*
j")),,(2(>(>
@W@W
BII..(2(>(>
@W@WZ..7:}3U3U(2(=(=z?U?UZ..7:}3U3U(2(=(=z?U?UZ--*2F2F2O2OSd2d(2(>(>
@W@WZ))j.B.B.K.K.^(2(>(>
@W@W!!**l:(2(>(>
@W@WZ''J,@,@,I,I[,X(2(>(>
@W@W z=11gj.6Y6Y,6,B,BJD[D[\,6)MM*4
+;*<<degr '(r   c                    1 U R                   k$ Nr   )r   s    r   _available_adaptersOFTLayer._available_adapters{  s    }r   c                P    XR                   ;  a  g [        R                  " S5        g )NFScaling operation for OFT not supported! Automatically set scale to 1.)scalingr   r   )r   adapterscales      r   	set_scaleOFTLayer.set_scale  s    ,,&^_r   c                    US:X  a  g U R                    H8  nX R                  R                  5       ;  a  M"  [        R                  " S5        M:     g )Nr	   r   active_adaptersr   keysr   r   r   r   active_adapters      r   scale_layerOFTLayer.scale_layer  s=    A:"22NZZ__%66MMbc	 3r   Nc                    U R                    H8  nX R                  R                  5       ;  a  M"  [        R                  " S5        M:     g )Nz>Unscaling operation for OFT not supported! Keeping scale to 1.r   r   s      r   unscale_layerOFTLayer.unscale_layer  s3    "22NZZ__%66MMZ[	 3r   c                    US:  a
  [        US9nO[        R                  " 5       nU R                  R	                  [        R
                  " X05      5        US:X  a|  US:w  av  U R                  U-  S:w  d  X0R                  :  a;  UnU R                  U R                  U5      n[        R                  " SU SU S35        [        U R                  U-  5      nOUS:w  a|  US:X  av  U R                  U-  S:w  d  X R                  :  a;  UnU R                  U R                  U5      n[        R                  " SU SU S35        [        U R                  U-  5      nO[        S	5      eX3S
-
  -  S-  n[        U(       d  UOS
UUU R                  UUUU	U
S9	U R                  U'   U R                  X5        X R                  U'   X0R                   U'   U R#                  U5        U R%                  U R&                  US9  g)zU
Update the linear layer with trainable OFT weights. Override for other layer types.
r3   r2   r   Invalid `oft_block_size` (!)! Adjusted `oft_block_size` to ().Invalid `r` ()! Adjusted `r` to (ZSomething went wrong, please report this error: https://github.com/huggingface/peft/issuesr	   rb   )rK   rL   rM   rO   rP   inference_modeN)r   rG   Identityr   updater   rF   adjust_oft_parametersr   r   r   r   r=   r   reset_oft_parametersrC   r   %_move_adapter_to_device_of_base_layerset_adapterr   )r   adapter_namerC   r   module_dropoutrK   rL   rM   init_weightsrO   rP   r  r   oft_dropout_layerold_oft_block_sizeold_rrD   s                    r   update_layerOFTLayer.update_layer  s   $	 C :^ L "|.O PQ6n).0A5JZJZ9Z%3"!%!;!;D<L<Ln!]01C0DDefteuuwx D$$67A!V!+!#q(A0@0@,@..t/?/?CeW4H2NO !1!1Q!67Nl 
 $'9:a?
#4 Aa#1%=
$


<  	!!,=  !|,:L) 	22<@--nMr   c                L   USL a7  [         R                  R                  U R                  U   R                  SSS9  gXR                  R                  5       ;   aK  USL a7  [         R                  R                  U R                  U   R                  5        g[        SU< 35      eg)z
Reset the OFT parameters.
Fr3   g?)meanstdNTz$Unknown initialization init_weights=)rG   initnormal_r   rJ   r   zeros_r   )r   r  r  s      r   r
  OFTLayer.reset_oft_parameters  s     5 GGOODJJ|4;;#3OO::??,,t#tzz,7>>? #H</!JKK -r   c                    X!:  a$  UnX1::  a  X-  S:w  a  US-  nX1::  a
  X-  S:w  a  M  OU$ UnUS:  a  X-  S:w  a  US-  nUS:  a
  X-  S:w  a  M  X$-
  X2-
  ::  a  U$ U$ )zI
Adjust the OFT parameters to be divisible by the in_features dimension.
r   r	    )r   rF   paramshigher_paramslower_paramss        r   r	  OFTLayer.adjust_oft_parameters  s     "M.;3NRS3S"  .;3NRS3S Q;#=#BAL Q;#=#B !}'=>  r   )r   r   r   rF   r   r   r   r   r   r   rC   )r   	nn.Moduler   None)r   zset[str])r   floatr   r#  r   r   r#  Fr  r   )r4   r5   r6   r7   r8   r   __annotations__r   r   propertyr   r   r   r   r  r
  r	  r9   r  r   r   r   r   6  so    
 ,65)OO9)v  `d\&  %QN QNfL!r   r   c                     ^  \ rS rSrSr           S
                         SU 4S jjjrSSS jjrSS jrSS jrSS jr	SU 4S jjr
S	rU =r$ )r   i  zOFT implemented in Linear layerc                   > [         TU ]  5         [        R                  " X40 UD6  Xl        X l        U R                  UUUUUUUUU	U
S9
  Xl        g N)r   r  rK   rL   rM   r  rO   rP   )r   r   r   fan_in_fan_out_active_adapterr  is_target_conv_1d_layer)r   r   r  rC   r   r  rK   rL   rM   rO   rP   r-  r  r/  r   r   s                  r   r   Linear.__init__  sh    " 	$5f5,+))#%1%= 	 	
 (?$r   c                4   [        X5      nU(       d  gU GH  nX0R                  ;   d  M  U R                  5       nUR                  R                  nU(       a  UR                  R
                  nU R                  U5      n[        R                  " USS5      n[        R                  " XvR                  UR                  5      5      n[        R                  " USS5      n[        R                  " U5      R                  5       (       d  [        SU S35      eUR                  5       R                  U5      UR                  l        OUR                  R
                  nU R                  U5      n[        R                  " USS5      n[        R                  " XvR                  UR                  5      5      n[        R                  " USS5      nUR                  5       R                  U5      UR                  l        U R                  R!                  U5        GM     g)a  
Merge the active adapter weights into the base weights

Args:
    safe_merge (`bool`, *optional*):
        If `True`, the merge operation will be performed in a copy of the original weights and check for NaNs
        before merging the weights. This is useful if you want to check if the merge operation will produce
        NaNs. Defaults to `False`.
    adapter_names (`List[str]`, *optional*):
        The list of adapter names that should be merged. If `None`, all active adapters will be merged.
        Defaults to `None`.
Nr   r	   z1NaNs detected in the merged weights. The adapter z seems to be broken)r   r   r   rJ   rV   dataget_delta_weightr    rW   mmrl   isfiniteallr   r   r   appendr   
safe_mergeadapter_namesr   r   
orig_dtypeorig_weightsoft_mats           r   mergeLinear.merge6  s    0D+N!9!99!002
'..44
#-#4#4#9#9L"33NCG#(??<A#FL#(88G__W]]5S#TL#(??<A#FL >>,7;;==(OP^O__rs  .:-D-D-F-I-I*-UJ%%*#-#4#4#9#9L"33NCG#(??<A#FL#(88G__W]]5S#TL#(??<A#FL-9-D-D-F-I-I*-UJ%%*$$++N;7 ,r   c                   U R                   (       d  [        R                  " S5        gU R                  5       nUR                  R
                  n[        U R                  5      S:  Gak  U R                  R                  5       nX0R                  R                  5       ;   Ga  U R                  U5      nUR
                  nU[        R                  :w  a  UR                  [        R                  5      nU R                  5       R                  R                  n[        R                   " USS5      n[        R"                  " [        R$                  R'                  U5      R                  U5      UR                  U5      5      n[        R                   " USS5      nUR                  U5      UR                  l        [        U R                  5      S:  a  GMj  ggzG
This method unmerges all merged adapter layers from the base weights.
z Already unmerged. Nothing to do.Nr   r	   )mergedr   r   r   rJ   rV   r   r   popr   r   r3  r    float32rl   r2  rW   r4  rj   invr   r   r;  r   r=  rp   r<  s          r   unmergeLinear.unmergee  sM    {{MM<=((*
&&,,
$&&'!+!11557N!22//?!(!U]]2%jj7G#224;;@@$|QB$xx(8(8(A(D(D^(TVbVeVeftVuv$|QB)5)D
!!& $&&'!++r   c                <    U R                   U   R                  5       $ r   r   r   r   r  s     r   r3  Linear.get_delta_weight       zz,'2244r   c                H   UR                   nU R                  (       a8  U R                  (       a  U R                  5         U R                  " U/UQ70 UD6nOU R                  (       a  U R                  " U/UQ70 UD6nOU R
                   H^  nX`R                  R                  5       ;  a  M"  U R                  U   nU R                  XR                  R                   5      nU" U5      nM`     U R                  " UR                  U5      /UQ70 UD6nUR                  U5      nU$ r   rV   disable_adaptersrB  rG  r   r   r   r   _cast_input_dtyperJ   rl   r   r(   argsr   rp   resultr   r   s           r   r0   Linear.forward  s      {{__Q888F[[__Q888F"&"6"6!)::

>2**1ll.@.@A!H #7 __QTT.%9KDKFKF>*r   c                *   > [         TU ]  5       nSU-   $ Nzoft.r   __repr__r   repr   s     r   rZ  Linear.__repr__      g |r   )r.  r-  r/  )   r   r3   Fr   FFr   FTF)r  strrC   r   r   r   r  r$  rK   r   rL   r$  rM   r   rO   r   rP   r   r-  r   r  Union[bool, str]r/  r   r   r#  FNr9  r   r:  zOptional[list[str]]r   r#  r%  r   z!tuple[torch.Tensor, torch.Tensor])r(   r   r   r   r   r`  )r4   r5   r6   r7   r8   r   r>  rG  r3  r0   rZ  r9   r:   r;   s   @r   r   r     s    )  #!#(()$)-(-#? #? 	#?
 #? #? #? #? #? !#? #&#? #? '#? "&#?  
!#? #?J-<^E4	5. r   r   c                     ^  \ rS rSrSr          S                         SU 4S jjjr S SS jjrSSS jjrSS jrSS jr	SS jr
SU 4S	 jjrS
rU =r$ )r   i  zOFT implemented in Conv2d layerc                   > [         TU ]  5         [        R                  X5        XPl        X l        U R                  UUUUUUU	U
UUS9
  g r,  )r   r   r   r-  r.  r  )r   r   r  rC   r   r-  r  rK   rL   rM   r  rO   rP   r   r   s                 r   r   Conv2d.__init__  s]      	$+,+ 	))#%1%= 	 	
r   c                   US:  a
  [        US9nO[        R                  " 5       nU R                  R	                  [        R
                  " X05      5        U R                  5       nUR                  S   S:  a  [        S5      eU R                  UR                  S   -  UR                  S   -  nUS:X  aQ  US:w  aK  X-  S:w  d  X?:  a0  UnU R                  X5      n[        R                  " SU SU S35        [        X-  5      nObUS:w  aQ  US:X  aK  X-  S:w  d  X/:  a0  UnU R                  X5      n[        R                  " S	U S
U S35        [        X-  5      nO[        S5      eX3S-
  -  S-  n[        U(       d  UOSUUUUUUUR                  U	U
S9
U R                   U'   U R#                  X5        X R$                  U'   X0R&                  U'   U R)                  U5        U R+                  U R,                  US9  g)z5
Update the conv2d layer with trainable OFT weights.
r3   r2   r   r	   z1Conv2d with dilation > 1 is not supported by OFT.r   r   r  r  r  r  rb   )rK   rL   rM   rN   rO   rP   r  N)r   rG   r  r   r  r   r   dilationr   rF   rN   r	  r   r   r   r=   r   r
  rC   r   r  r  r   )r   r  rC   r   r  rK   rL   rM   r  rO   rP   r  r   r  r   conv_filter_dimr  r  rD   s                      r   r  Conv2d.update_layer  s   & C :^ L "|.O PQ ((*
q!A%PQQ**Z-C-CA-FFI_I_`aIbb6n)/148X%3"!%!;!;O!\01C0DDefteuuwx O56A!V!+"a'1+>..BeW4H2NO !56Nl 
 $'9:a?
#4 Aa#"..1%=$


<  	!!,=  !|,:L) 	22<@--nMr   c                <   [        X5      nU(       d  gU GH  nX0R                  R                  5       ;   d  M#  U R                  5       nUR                  R
                  nU(       GaL  UR                  R                  R                  5       nU R                  U5      nUR                  U R                  U R                  UR                  S   -  UR                  S   -  5      n[        R                  " USS5      n[        R                  " XvR!                  UR
                  5      5      n[        R                  " USS5      nUR                  U R                  U R                  UR                  S   UR                  S   5      nUR#                  5       R!                  U5      UR                  l        GOJU R                  U5      nUR                  R                  R                  5       nUR                  U R                  U R                  UR                  S   -  UR                  S   -  5      n[        R                  " USS5      n[        R                  " XvR!                  UR
                  5      5      n[        R                  " USS5      nUR                  U R                  U R                  UR                  S   UR                  S   5      nUR#                  5       R!                  U5      UR                  l        U R$                  R'                  U5        GM     g)a  
Merge the active adapter weights into the base weights

Args:
    safe_merge (`bool`, *optional*):
        If True, the merge operation will be performed in a copy of the original weights and check for NaNs
        before merging the weights. This is useful if you want to check if the merge operation will produce
        NaNs. Defaults to `False`.
    adapter_names (`List[str]`, *optional*):
        The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
        to `None`.
Nr   r	   )r   r   r   r   rJ   rV   r2  cloner3  r%   r   rF   rN   r    rW   r4  rl   r   r   r7  r8  s           r   r>  Conv2d.merge  s    0D+N!22!002
'..44
 $.#4#4#9#9#?#?#AL"33NCG#/#4#4))4+;+;j>T>TUV>W+WZdZpZpqrZs+s$L $)??<A#FL#(88G__W]]5S#TL#(??<A#FL#/#4#4))4+;+;Z=S=STU=VXbXnXnopXq$L .:-D-D-F-I-I*-UJ%%*"33NCG#-#4#4#9#9#?#?#AL#/#4#4))4+;+;j>T>TUV>W+WZdZpZpqrZs+s$L $)??<A#FL#(88G__W]]5S#TL#(??<A#FL#/#4#4))4+;+;Z=S=STU=VXbXnXnopXq$L .:-D-D-F-I-I*-UJ%%*$$++N;K ,r   c                F   U R                   (       d  [        R                  " S5        gU R                  5       nUR                  R
                  n[        U R                  5      S:  Ga9  U R                  R                  5       nX0R                  R                  5       ;   Ga  U R                  U5      nUR
                  nU[        R                  :w  a  UR                  [        R                  5      nU R                  5       R                  R                  R!                  5       nUR#                  U R$                  U R&                  U R                  5       R(                  S   -  U R                  5       R(                  S   -  5      n[        R*                  " USS5      n[        R,                  " [        R.                  R1                  U5      R                  U5      UR                  U5      5      n[        R*                  " USS5      nUR#                  U R$                  U R&                  U R                  5       R(                  S   U R                  5       R(                  S   5      nUR                  U5      UR                  l        [        U R                  5      S:  a  GM8  ggrA  )rB  r   r   r   rJ   rV   r   r   rC  r   r   r3  r    rD  rl   r2  rn  r%   r   rF   rN   rW   r4  rj   rE  rF  s          r   rG  Conv2d.unmergeS  s    {{MM<=((*
&&,,
$&&'!+!11557N!22//?!(!U]]2%jj7G#224;;@@FFH+00%%$$t':':'<'H'H'KKdNaNaNcNoNopqNrr   %|QB$xx(8(8(A(D(D^(TVbVeVeftVuv$|QB+00%%$$'')55a8'')55a8	  *6)D
!!&1 $&&'!++r   c                <    U R                   U   R                  5       $ rJ  rK  rL  s     r   r3  Conv2d.get_delta_weightw  rN  r   c                H   UR                   nU R                  (       a8  U R                  (       a  U R                  5         U R                  " U/UQ70 UD6nOU R                  (       a  U R                  " U/UQ70 UD6nOU R
                   H^  nX`R                  R                  5       ;  a  M"  U R                  U   nU R                  XR                  R                   5      nU" U5      nM`     U R                  " UR                  U5      /UQ70 UD6nUR                  U5      nU$ r   rP  rS  s           r   r0   Conv2d.forward  s      {{__Q888F[[__Q888F"&"6"6!)::

>2**1ll.@.@A!H #7 __QTT.%9KDKFKF>*r   c                *   > [         TU ]  5       nSU-   $ rX  rY  r[  s     r   rZ  Conv2d.__repr__  r^  r   )r.  r-  )
r_  r   Fr3   Fr   FTFr   )r   r"  r  r`  rC   r   r   r   r-  r   r  r$  rK   r   rL   r$  rM   r   r  ra  rO   r   rP   r   r   r#  r&  r'  rb  rc  r%  rd  )r(   r   rT  r   r   r   r   r   re  )r4   r5   r6   r7   r8   r   r  r>  rG  r3  r0   rZ  r9   r:   r;   s   @r   r   r     s    ) $ #!)-#(()"
"
 "
 	"

 "
 "
 "
 "
 "
 "
 '"
 !"
 #&"
 
"
 "
`  %KN KNZ7<r"EH	5. r   r   c                   S n[        U [        5      (       a  U R                  5       nOU n[        U[        R                  R
                  5      (       a  [        X40 UD6nU$ [        U[        R                  R                  5      (       a8  US   (       a"  [        R                  " S5        S=US'   Ul	        [        X40 UD6nU$ )Nr-  zjfan_in_fan_out is set to True but the target module is `torch.nn.Linear`. Setting fan_in_fan_out to False.F)
r   r   r   r    rG   r   r   r   r   r-  )targetr  
oft_configr   
new_moduletarget_base_layers         r   dispatch_defaultr}    s     J&.))"113"#UXX__55F;F;
  
%uxx	7	7"#MM3 DIHF#$z'@F;F;
r   )ry  ztorch.nn.Moduler  r`  rz  r
   r   zOptional[torch.nn.Module])
__future__r   r   typingr   r   r   r    torch.nnrG   torch.nn.functional
functionalr   peft.tuners.tuners_utilsr   r   configr
   Moduler   r=   r   r   r   r}  r  r   r   <module>r     s    #  ' '     L ) )Xk7		 k7\U!~ U!pURYY UpuRYY up 
 r   