
    oi!                     j    S SK r S SKrS SKJr  S SKJr   " S S\R                  R                  5      rg)    N)CrossEntropyLoss)gather_params_ctxc                   T   ^  \ rS rSrSrU 4S jrS rS rS rS r	\
S 5       rS	rU =r$ )
CPTEmbedding   z
CPTEmbedding is a custom embedding layer designed for Context-aware Prompt Tuning (CPT) in PEFT. It initializes
embeddings, applies prompt-specific projections, and computes loss using label masks.
c                 z  > [         TU ]  5         [        R                  " U5      U l        UR
                  n[        R                  R                  X1R                  5      U l
        UR                  (       d  UR
                  [        UR                  5      :X  d   e[        R                  " UR                  5      R                  UR                   R"                  5      n[%        UR'                  5       5         U" U5      R)                  5       R+                  5       nSSS5        WR                  [        R,                  5      n[        R                  R/                  U5      U R                  l        U R                  R1                  S5        [        R                  R                  X1R                  5      U l        [        R4                  " U R2                  R                   5      R                  [        R,                  5      U R2                  R                   l        U R9                  5         g! , (       d  f       GN= f)a  
Initializes the CPTEmbedding module.

Args:
    config (Namespace):
        Configuration object containing model hyperparameters and CPT-specific settings.
    word_embeddings (torch.nn.Embedding):
        The base word embedding layer used to initialize CPT embeddings.
NF)super__init__copydeepcopyconfignum_virtual_tokenstorchnn	Embedding	token_dim	embeddinginference_modelencpt_token_ids
LongTensortoweightdevicer   
parametersdetachclonefloat32	Parameterrequires_grad_delta_embedding
zeros_likedataset_updated_tokens)selfr   word_embeddingsr   init_token_idsword_embedding_weights	__class__s         O/home/james-whalen/.local/lib/python3.13/site-packages/peft/tuners/cpt/model.pyr
   CPTEmbedding.__init__   s    	mmF+#66 ++,>@P@PQ $$,,F4H4H0IIII"--f.B.BCFFG]G]GdGdeN"?#=#=#?@)8)H)O)O)Q)W)W)Y& A%;%>%>u}}%M"$)HH$6$67M$NDNN!%%e,  %xx112DFVFVW+0+;+;D<P<P<W<W+X+[+[\a\i\i+j##( 	! A@s   7%H++
H:c                     [         R                  " 5          U R                  U5      nSSS5        U R                  5       U R                  R
                  l        U R	                  U5      nWU-   $ ! , (       d  f       NM= f)z
Computes the prompt embeddings and applies delta adjustments.

Args:
    indices (torch.Tensor):
        Indices of the tokens to be embedded.

Returns:
    torch.Tensor:
        Sum of prompt embeddings and delta embeddings.
N)r   no_gradr   get_projectionr!   r   r#   )r%   indicesprompt_embeddingsdelta_prompt_embeddingss       r*   forwardCPTEmbedding.forwardA   se     ]]_ $w 7  ,0+>+>+@##("&"6"6w"? #::: _s   A//
A=c                   ^ [         R                  " U R                  R                  5      R	                  5       n[         R
                  " US5      S:H  n[         R
                  " US5      S:H  n[         R
                  " US5      S:H  nX#-  U-  mTR                  SS5      mU4S jnU R                  R                  R                  U5        g)za
Sets up a backward hook to selectively update token gradients based on the CPT token type mask.
            c                 D   > U TR                  U R                  5      -  n U $ )N)r   r   )gradmasks    r*   backward_hook6CPTEmbedding.set_updated_tokens.<locals>.backward_hooka   s    $''$++..DK    N)
r   Tensorr   cpt_tokens_type_masklong	remainderviewr!   r   register_hook)r%   tensor_ICL_maskmask_input_template
mask_inputmask_output_templater=   r<   s         @r*   r$   CPTEmbedding.set_updated_tokensV   s      ,,t{{'G'GHMMO#oooqAQF___a8A=
$BaG"/2FFyyQ	 	##11-@r?   c                 B   U R                   R                  nSnU R                   R                  [        R                  " [        R
                  " U R                   R                  S-  /5      5      -  nU R                   R                  [        R                  " [        R
                  " U R                   R                  S-  /5      5      -  n[        R                  " [        R
                  " U5      5      R                  [        R                  5      U-  n[        R
                  " U5      R                  5       nX5US:  [        R                  " US5      S:H  -  '   X5US:  [        R                  " US5      S:H  -  '   XEUS:  [        R                  " US5      S:H  -  '   U$ )Ng|=i   r   r5   r6   r8   r7   )r   rA   opt_projection_format_epsilonr   sqrtr@   r   opt_projection_epsilon	ones_liker   r   rB   rC   )r%   rA   	MIN_VALUEnormalized_format_epsnormalized_input_epsepsilons         r*   get_epsilonCPTEmbedding.get_epsilong   sX   #{{??	 !% I IEJJLL$++//$678M
 !
  ${{AAEJJLL$++//$678E
  
 //%,,/C"DEHHWZcc$||,@AFFH`u%)eoo>RTU.VZ[.[\]`u%)eoo>RTU.VZ[.[\]`t%)eoo>RTU.VZ[.[\]r?   c           	      R   [         R                  " 5          U R                  R                  R	                  5       R                  U R                  R                  R                  5      n[         R                  " USSS9nUS:  n[         R                  " U5      (       ao  U R                  5       R                  U R                  R                  R                  5      nX==   XC   X#   R                  XC   S9-  R                  SS5      -  ss'   UsSSS5        $ ! , (       d  f       g= f)zQ
Applies epsilon-based projection to the delta embeddings to control their norm.
r7   r6   )pdimr   )minr9   N)r   r-   r!   r   r   r   r   normanyrT   clamprD   )r%   new_embeddings_weights
token_normprojection_maskrS   s        r*   r.   CPTEmbedding.get_projection}   s     ]]_%)%9%9%@%@%F%F%H%K%KDL`L`LgLgLnLn%o"$:aQGJ(1nOyy))**,//0D0D0K0K0R0RS&7,
0K0Q0QV]Vn0Q0op$r1+7 * __s   C8D
D&c                 ,   U R                   R                  nU R                   nUR                  U5      nUSSS2SS24   R                  5       nUSSS24   R                  5       nUSSS24   R                  5       nUR	                  5       R                  5       S:g  R                  5       n	UR                  u  pn[        SSS9nU" UR                  X-  U5      UR                  X-  5      5      nUR                  X5      nU	R	                  5       R                  5       R                  5       n[        U
5       H  nUU   S:  UU   S	-  S:H  -  nUU   U   R                  5       n[        R                  " UU   5      R                  US
9R                  5       nSn[        R                  " US/5       H  nUUUU   U:H  '   UUR                   -  nM     UR"                  S:X  d  M  UU==   U-  ss'   M     X   X   -  R%                  5       nXl        U $ )a  
Computes the loss for CPT models with optional exponential decay.

Args:
    base_model_output (ModelOutput):
        Output from the base model containing logits.
    labels (torch.Tensor):
        Ground-truth labels for the input tokens.
    cpt_type_mask (torch.Tensor):
        Token type mask used for filtering valid loss terms.
    config (Namespace):
        Configuration object containing loss-related hyperparameters.

Returns:
    ModelOutput:
        The base model output with computed loss.
.Nr9   r6   inone)	reductionignore_indexr   r5   )r   decay)logitsr   r   
contiguousr   r   boolshaper   rD   floatrangeuniquer   rO   flipopt_loss_decay_factoropt_weighted_loss_typemeanloss)base_model_outputlabelscpt_type_maskr   r   	lm_logitsshift_logitsshift_labelsshift_cpt_type_maskshift_labels_bool
batch_size
seq_length
vocab_sizeloss_fctrq   shift_labels_weightsi
idx_labels
labels_idsexponential_decaydecay_valuelabel_mask_idxs                         r*   calculate_lossCPTEmbedding.calculate_loss   s'   ( #))00%,,	6" !crc1-88:c12g113+CG4??A)//188:dBHHJ-9-?-?*

 $f4Hj5zBLDUDUV`VmDn
 yy00668??AGGIz"A-a0149LQ9ORS9SWX9XYJ,Q/
;BBDJ %0CA0F G J JRX J Y _ _ aK"'**Z!"=NY!"5a"8N"JKv;;; #> ,,7$Q'+<<' # '*>*QQWWY!%  r?   )r   r!   r   )__name__
__module____qualname____firstlineno____doc__r
   r2   r$   rT   r.   staticmethodr   __static_attributes____classcell__)r)   s   @r*   r   r      s7    
""H;*A",*$ :! :!r?   r   )	r   r   torch.nnr   peft.utils.integrationsr   r   Moduler    r?   r*   <module>r      s)      % 5s!588?? s!r?   