
    oi$                         S SK r S SKrS SKJr  S SKrS SKJr  S SKJs  Jr	  S SK
JrJr   " S S\5      r " S S\R                  \5      rg)    N)Optional)BaseTunerLayercheck_adapters_to_mergec                   d    \ rS rSrSrSrS\R                  4S jr  SS\	S\	4S jjr
S	 rS
 rSrg)
ShiraLayer   )shira_weight)rscalingshira_indices
base_layerc                    Xl         0 U l        0 U l        [        R                  " 0 5      U l        0 U l        UR                  R                  U l	        SU l
        / U l        U R                  5       n[        U[        R                  5      (       a  UR                  UR                   pCO[#        S5      eX0l        X@l        X l        g )NFz)Only nn.Linear layers supported currently)r   r
   r   nnParameterDictr	   r   weightshapeweight_shape_disable_adaptersmerged_adaptersget_base_layer
isinstanceLinearin_featuresout_featuresNotImplementedErrorkwargs)selfr   r   r   r   s        Q/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/shira/layer.py__init__ShiraLayer.__init__    s    $,,R0&--33 "'!((*
j")),,(2(>(>
@W@W%&QRR&(    init_weightsinference_modec                    US::  a  [        SU 35      eX0R                  U'   SU R                  U'   X0R                  U R                  -   -  nXpR                  U R                  -  :  a)  [        SU SU R                  U R                  -   S35      eU(       a  [
        R                  " U5      O[
        R                  " U5      n[        R                  " UR                  U R                  R                  R                  5      R                  U R                  R                  R                  5      SS9U R                  U'   UGb  [
        R                   " US:H  5      n	[
        R"                  " U	S   R%                  S5      U	S	   R%                  S5      /S5      R                  [
        R&                  5      U R(                  U'   U R(                  U   R                  U R                  R                  R                  5      U R(                  U'   U R(                  U   R*                  S	   U R                  U   R*                  S   :w  a  [        S
U SU R                   35      eU R-                  U5        U R/                  U R0                  US9  g )Nr   z?`r` should be a positive integer value but the value passed is g      ?zThe set rank zP results in more shira params than the total number of params in the base layer z and this is not allowed.T)requires_grad   zFThe SHiRA indices and weights are not the same dimensions for adapter z
 in layer )r#   )
ValueErrorr
   r   r   r   torchzerosrandnr   	Parametertor   r   dtypedevicer	   wherecat	unsqueezeintr   r   %_move_adapter_to_device_of_base_layerset_adapteractive_adapters)
r   adapter_namemaskr
   r"   r#   r   num_shira_weightshira_init_weightmask_indicess
             r   update_layerShiraLayer.update_layer6   sy    6^_`^abcc | 	\"  0 043D3D DE..1B1BBBs"rsw  tD  tD  GK  GX  GX  tX  sY  Yr  s  >JEKK(89u{{[kOl*,,,  !7!7!=!=>AA$//BXBXB_B_`+
,'
  ;;ts{3L/4yya**1-|A/H/H/KLa0bm |, 04/A/A,/O/R/RSWSbSbSiSiSpSp/qD|,!!,/55a8D<M<Ml<[<a<abc<dd \]i\jjtuy  vE  vE  uF  G  	22<@--nMr!   c                 \    [         R                  R                  U R                  U   5        g N)r   initzeros_r	   )r   r6   s     r   reset_shira_parameters!ShiraLayer.reset_shira_parametersf   s    
t((67r!   c                 @    XR                   ;  a  g X R                   U'   g r>   )r   )r   adapterscales      r   	set_scaleShiraLayer.set_scalei   s    ,,& %Wr!   )r   r   r   r   r   r   r
   r   r   r	   r   N)TF)__name__
__module____qualname____firstlineno__adapter_layer_namesother_param_namesr   Moduler   boolr;   rA   rF   __static_attributes__ r!   r   r   r      sM    +9299 6 "$.N
 .N .N`8&r!   r   c                      ^  \ rS rSr   SS\S\S\S\SS4
U 4S jjjrSS	\S
\\	\      SS4S jjr
SS jrS\R                  4S jrS\R                  S\R                  4S jrS\4U 4S jjrSrU =r$ )r   p   r6   r
   fan_in_fan_outr"   returnNc                    > [         TU ]  5         [        R                  " X40 UD6  XPl        U R                  U R                  5       La  [        S5      eX0l        U R                  X2XFS9  g )Nz)SHiRA does not support nested base layers)r"   )	superr   r   rT   r   r   r'   _active_adapterr;   )	r   r   r7   r6   r
   rT   r"   r   	__class__s	           r   r   Linear.__init__r   sc     	D77,??$"5"5"77HII+,aKr!   
safe_mergeadapter_namesc                 >   [        X5      nU(       d  gU GH  nX0R                  R                  5       ;   d  M#  U R                  5       nU(       a  UR                  R
                  R                  5       nXPR                  U5      -  n[        R                  " U5      R                  5       (       d  [        SU S35      eXTR                  l        O.UR                  =R
                  U R                  U5      -  sl        U R                  R                  U5        GM     g)a  
Merge the active adapter weights into the base weights

Args:
    safe_merge (`bool`, *optional*):
        If True, the merge operation will be performed in a copy of the original weights and check for NaNs
        before merging the weights. This is useful if you want to check if the merge operation will produce
        NaNs. Defaults to `False`.
    adapter_names (`List[str]`, *optional*):
        The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
        to `None`.
Nz1NaNs detected in the merged weights. The adapter z seems to be broken)r   r	   keysr   r   datacloneget_delta_weightr(   isfiniteallr'   r   append)r   r[   r\   active_adapterr   orig_weightss         r   mergeLinear.merge   s     0D+N!2!2!7!7!99!002
 $.#4#4#9#9#?#?#AL $9$9.$IIL >>,7;;==(OP^O__rs  .:%%*%%**d.C.CN.SS*$$++N;% ,r!   c                    U R                   (       d  [        R                  " S5        g [        U R                  5      S:  a  U R                  R                  5       nXR                  R                  5       ;   a<  U R                  5       R                  =R                  U R                  U5      -  sl
        [        U R                  5      S:  a  M  g g )Nz Already unmerged. Nothing to do.r   )mergedwarningswarnlenr   popr	   r^   r   r   r_   ra   )r   re   s     r   unmergeLinear.unmerge   s    {{MM<=$&&'!+!11557N!2!2!7!7!99##%,,11T5J5J>5ZZ1 $&&'!+r!   c                    U R                   U   R                  U R                  U   R                  5      U R                   U'   [        R
                  " U R                   U   U R                  U   U R                  U   -  U R                  5      $ )z
Compute the delta weight for the given adapter.

Args:
    adapter (str):
        The name of the adapter for which the delta weight should be computed.
)r   r,   r	   r.   r(   sparse_coo_tensorr   r   )r   rD   s     r   ra   Linear.get_delta_weight   s     '+&8&8&A&D&DTEVEVW^E_EfEf&g7#&&w'):):7)CdllSZF[)[]a]n]n
 	
r!   xc                 0   U R                   (       a9  U R                  (       a  U R                  5         U R                  " U/UQ70 UD6nU$ U R                  (       a  U R                  " U/UQ70 UD6nU$ [        R
                  " U R                  R                  R                  5      nU R                   H5  nX`R                  R                  5       ;  a  M"  XPR                  U5      -  nM7     [        R                  " XU R                  R                  S9nU$ )N)bias)disable_adaptersrj   ro   r   copydeepcopyr   r_   r5   r	   r^   ra   Flinearrv   )r   rt   argsr   result
new_weightre   s          r   forwardLinear.forward   s      {{__Q888F  [[__Q888F  t'='='B'BCJ"&"6"6!):):)?)?)AA33NCC
 #7
 XXa$//2F2FGFr!   c                 *   > [         TU ]  5       nSU-   $ )Nzshira.)rW   __repr__)r   reprY   s     r   r   Linear.__repr__   s    g #~r!   )rX   rT   )r   FT)FN)rU   N)rH   rI   rJ   rK   strr2   rO   r   r   listrg   ro   r(   Tensorra   r   r   rP   __classcell__)rY   s   @r   r   r   p   s     $!L 	L
 L L L 
L L&%< %<Xd3i=P %<\` %<N[
5<< 
 5<< $#  r!   r   )rx   rk   typingr   r(   torch.nnr   torch.nn.functional
functionalrz   peft.tuners.tuners_utilsr   r   r   rN   r   rQ   r!   r   <module>r      sB           LS& S&liRYY
 ir!   