
    oi.                        S SK Jr  S SKrS SKJr  S SKrS SKJr  S SK	J
r
  S SKJ
s  Jr  S SKJr  S SKJrJrJr  S SKJrJr   " S S\
R.                  \5      rg)	    )annotationsN)Optional)
BufferDict)BaseTunerLayer_get_in_out_featurescheck_adapters_to_merge)check_deepspeed_zero3_enabledgather_params_ctxc                     ^  \ rS rSrSrSr S         SU 4S jjjr\S 5       rSS jr	S r
S rSSS	 jjrSS
 jrSS jrSS jrSS jrSrU =r$ )TrainableTokensLayer   )trainable_tokens_delta)token_indicestrainable_tokens_originalc                  > [         TU ]  5         Xl        X l        XPl        U(       a  U/O/ U l        U R                  (       d3  [        R                  " 0 5      U l	        [        0 5      U l        0 U l        OQU R                  R                  U l	        U R                  R                  U l        U R                  R                  U l        / U l        [        U R                  5       5      u  pgX`l        Xpl        g N)super__init__
base_layer_active_adapterkwargs_tied_adaptertied_adapternnParameterDictr   r   r   r   merged_adaptersr   get_base_layerin_featuresout_features)	selfr   adapter_namer   r   r   r   r   	__class__s	           \/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/trainable_tokens/layer.pyr   TrainableTokensLayer.__init__%   s     	$+ 0<l^   *,*:*:2*>D'-7^D*!#D*.*;*;*R*RD'-1->->-X-XD*!%!2!2!@!@D  "$89L9L9N$O!&(    c                D    U R                   (       a  U R                   S   $ g )Nr   )r   )r    s    r#   r   !TrainableTokensLayer.tied_adapterL   s    %%a((r%   c                   Sn[         R                  " S[         R                  R                  5       5      n[	        U/SS9   [
        R                  " 5       (       aF  [
        R                  " 5       (       a,  [
        R                  " 5       U:X  a  X   R                  5       nO+[         R                  " [        U5      U4UR                  US9nSSS5        [
        R                  " WUS9  U$ ! , (       d  f       N%= f)zDeepSpeed zero3 specific code to initialize trainable tokens.

Ensures that only the necessary weights are collected to a single rank, initialized, and then shared with all
ranks.
r   cudaN)modifier_rankdtypedevice)src)torchr-   r)   current_devicer
   distis_availableis_initializedget_rankcloneemptylenr,   	broadcast)r    weightrows	embed_dimsrc_rankr-   token_weightss          r#   _collect_token_weights+TrainableTokensLayer._collect_token_weightsR   s     fejj&?&?&ABxt<  ""t':':'<'<T\A\ & 2 2 4 !&Y	* ,,!! = 	}(3 =<s   BC,,
C:c                >   UR                  SS 5      (       a  g US   U R                  U'   UR                  SS5      nU R                  5       R                  n[	        U R                  5       S5      (       a  U R                  5       R
                  nOU R                  5       R                  nU(       aL  [        5       (       a   U R                  X@R                  U   U5      nO_U R                  U R                  U      nOB[        R                  " [        U R                  U   5      U4UR                  UR                  S9n[        R                  " UR!                  5       SS9U R"                  U'   UR!                  5       U R$                  U'   U R'                  U5        g )Nr   r   init_weightsTembedding_dimr+   )requires_grad)getr   r   r9   hasattrrB   r   r	   r>   r/   randnr7   r,   r-   r   	Parameterr5   r   r   %_move_adapter_to_device_of_base_layer)r    r!   r   rA   r9   r;   valuess          r#   update_layer!TrainableTokensLayer.update_layerk   sS   ::nd+++1/+B<(zz.$7 $$&--4&&(/::++-;;I ++-99I,..44V=O=OP\=]_hiT%7%7%EF [[T''56	Bll}}F 57LL_c4d##L17=||~&&|422<@r%   c                "   [        U5      S::  a  g[        5       n[        XR                  -   5       HZ  n[        U R                  U   5      n[        UR	                  U5      5      (       a  [        SU S35      eUR                  U5        M\     g)zRaises an error if the token indices of the given adapter names are overlapping.
This is currently not supported and can lead to undefined behavior of the model if no specific merging between
the overlapping indices' values is applied.
   NzToken indices of adapter zy are already defined and would result in undefined merging behavior. Only disjunct token indices are currently supported.)r7   setr   r   intersection
ValueErrorupdate)r    adapter_namesindicesr!   	index_sets        r#   _check_overlapping_tokens.TrainableTokensLayer._check_overlapping_tokens   s    
 }"%  0D0D DELD..|<=I7''	233 /~ >g g  NN9% Fr%   c                ^   [        X5      nU(       d  g U R                  U5        U R                  R                  R                  nU H  n[
        R                  " U R                  U   5      R                  UR                  5      nU R                  U   R                  U5      nUR                  SXVS9nU(       d  Mv  [
        R                  " U5      R                  5       (       a  M  [        SU S35      e   X0R                  R                  l        U R                  R!                  U5        g )Nr   dimindexsourcez1NaNs detected in the merged weights. The adapter z seems to be broken)r   rU   r   r9   datar/   tensorr   tor-   r   
index_copyisfiniteallrP   r   extend)r    
safe_mergerR   mergedr!   rZ   deltass          r#   mergeTrainableTokensLayer.merge   s    /D&&}5'',,)LLL!3!3L!ABEEfmmTE00>AA&IF&&1E&IFz%.."8"<"<">"> #TUaTbbu!vww * '-###M2r%   c                R   U R                   (       d  [        R                  " S5        g [        U R                  5      S:  a  U R                  R                  5       n[        R                  " U R                  U   5      R                  U R                  R                  R                  5      nU R                  U   R                  U R                  R                  5      nU R                  R                  R                  R                  SX#S9  [        U R                  5      S:  a  M  g g )Nz Already unmerged. Nothing to do.r   rX   )rd   warningswarnr7   r   popr/   r]   r   r^   r   r9   r-   r   r\   index_copy_)r    r!   rZ   	originalss       r#   unmergeTrainableTokensLayer.unmerge   s    {{MM<=$&&'!+//335LLL!3!3L!ABEEdooF\F\FcFcdE66|DGGH^H^_IOO""''333Y $&&'!+r%   c                   U R                   R                  nU Hm  n[        R                  " U R                  U   5      R                  UR                  5      nU R                  U   R                  U5      nUR                  SXES9nMo     U$ )Nr   rX   )	r   r9   r/   r]   r   r^   r-   r   r_   )r    active_adaptersWr!   rZ   re   s         r#   get_merged_weights'TrainableTokensLayer.get_merged_weights   sw    OO""+LLL!3!3L!ABEEahhOE00>AA!DF%?A , r%   c           
        U R                   (       d  U(       d9  U R                  (       a  U R                  5         U R                  " U/UQ70 UD6nU$ U R                  (       a  U R                  " U/UQ70 UD6nU$ U R	                  U5        U R                  U5      n[        U R                  [        R                  R                  5      (       a  [        R                  " UUU R                  R                  U R                  R                  U R                  R                  U R                  R                  U R                  R                   S9nU R#                  5       nUb  XWR%                  UR&                  5      -  nU$ [        U R                  [        R                  R(                  5      (       a  [        R*                  " UUS9nU$ [-        S5      e)N)inputr9   padding_idxmax_norm	norm_typescale_grad_by_freqsparse)rv   r9   zZTrainableTokensLayer wraps an unknown layer type, maybe you are targeting the wrong layer?)disable_adaptersrd   rn   r   rU   rs   
isinstancer/   r   	EmbeddingF	embeddingrw   rx   ry   rz   r{   _get_embed_scaler^   r,   LinearlinearrP   )r    xrq   argsr   resultrr   embed_scales           r#   forward_adapters%TrainableTokensLayer.forward_adapters   st     {{__Q888FP O [[__Q888FL I **?;''8A $//588+=+=>> $ ; ;!__55"oo77'+'I'I??11 #335*#nnV\\&BBF  DOOUXX__== 	 !p r%   c                B    U R                   " XR                  /UQ70 UD6$ r   )r   rq   )r    r   r   r   s       r#   forwardTrainableTokensLayer.forward  s#    $$Q(<(<NtNvNNr%   )
r   r   r   r   r   r   r   r   r   r   r   )
r   z	nn.Moduler!   strr   z	list[int]r   zOptional[TrainableTokensLayer]returnNone)r9   torch.Tensorr:   r   r;   intr   r   )FN)rc   boolrR   zOptional[list[str]]r   r   )r   r   )r   r   )r   r   r   r   )__name__
__module____qualname____firstlineno__adapter_layer_namesother_param_namesr   propertyr   r>   rJ   rU   rf   rn   rs   r   r   __static_attributes____classcell__)r"   s   @r#   r   r      s    5 G 8<%)%) %) !	%)
 5%) 
%) %)N  
2'AR&(3,
Z,\O Or%   r   )
__future__r   ri   typingr   r/   torch.distributeddistributedr1   torch.nnr   torch.nn.functional
functionalr   peft.tuners._buffer_dictr   peft.tuners.tuners_utilsr   r   r   peft.utils.integrationsr	   r
   Moduler    r%   r#   <module>r      sA    #         / b b ThO299n hOr%   