
    hI                         S SK r S SKJr  S SKJrJrJrJr  SSKJ	r	   " S S\R                  5      r " S S\	5      r " S	 S
\	5      rg)    N)AutoModelForCausalLMAutoModelForSeq2SeqLMis_torch_npu_availableis_torch_xpu_available   )PreTrainedModelWrapperc                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )	ValueHead   z]
The ValueHead class implements a head for GPT2 that returns a scalar for each output token.
c                   > [         TU ]  5         [        US5      (       d  UR                  SS5      nOUR                  nU(       a  [
        R                  " U5      O[
        R                  " 5       U l        [        US5      (       a  UR                  n[        US5      (       a  UR                  nOd[        US5      (       aS  UR                  (       aB  [        US5      (       a1  [        UR                  S5      (       a  UR                  R                  n[
        R                  " WS5      U l        [
        R                  " 5       U l        g )Nsummary_dropout_probg?hidden_sizeword_embed_proj_dimis_encoder_decoderdecoderr   )super__init__hasattrpopr   nnDropoutIdentitydropoutr   r   r   r   LinearsummaryFlattenflatten)selfconfigkwargsr   r   	__class__s        X/home/james-whalen/.local/lib/python3.13/site-packages/trl/models/modeling_value_head.pyr   ValueHead.__init__   s    v566#)::.Dc#J #)#>#> ;Orzz"67UWU`U`Ub 6=)) ,,K6011 44KV122((WVY-G-G6>>=99"(.."<"<Kyya0zz|    c                    U R                  U5      nUR                  U R                  R                  R                  :w  a/  UR	                  U R                  R                  R                  5      nU R                  U5      nU$ N)r   dtyper   weightto)r   hidden_statesoutputs      r"   forwardValueHead.forward2   s_    m, <<4<<..444YYt||22889Ff%r$   )r   r   r   )	__name__
__module____qualname____firstlineno____doc__r   r,   __static_attributes____classcell__r!   s   @r"   r
   r
      s    $.	 	r$   r
   c                   d   ^  \ rS rSrSr\rSrU 4S jrS r	    SS jr
S rS rS	 rS
 rSrU =r$ )!AutoModelForCausalLMWithValueHead>   a  
An autoregressive model with a value head in addition to the language model head. This class inherits from
`~trl.PreTrainedModelWrapper` and wraps a `transformers.PreTrainedModel` class. The wrapper class supports classic
functions such as `from_pretrained`, `push_to_hub` and `generate`. To call a method of the wrapped model, simply
manipulate the `pretrained_model` attribute of this class.

Class attributes:
    - **transformers_parent_class** (`transformers.PreTrainedModel`) -- The parent class of the wrapped model. This
        should be set to `transformers.AutoModelForCausalLM` for this class.
    - **supported_args** (`tuple`) -- A tuple of strings that are used to identify the arguments that are supported
        by the `ValueHead` class. Currently, the supported args are:
        - **summary_dropout_prob** (`float`, `optional`, defaults to `None`) -- The dropout probability for the
            `ValueHead` class.
        - **v_head_initializer_range** (`float`, `optional`, defaults to `0.2`) -- The initializer range for the
            `ValueHead` if a specific initialization strategy is selected.
        - **v_head_init_strategy** (`str`, `optional`, defaults to `None`) -- The initialization strategy for the
            `ValueHead`. Currently, the supported strategies are:
            - **`None`** -- Initializes the weights of the `ValueHead` with a random distribution. This is the
              default strategy.
            - **"normal"** -- Initializes the weights of the `ValueHead` with a normal distribution.
r   v_head_initializer_rangev_head_init_strategyc                    > [         TU ]  " U40 UD6  U R                  U5      u  n  n[        U R                  R
                  40 UD6U l        U R                  " S0 UD6  g)aZ  
Initializes the model.

Args:
    pretrained_model (`transformers.PreTrainedModel`):
        The model to wrap. It should be a causal language model such as GPT2. or any model mapped inside the
        `AutoModelForCausalLM` class.
    kwargs (`dict`, `optional`):
        Additional keyword arguments, that are passed to the `ValueHead` class.
N )r   r   _split_kwargsr
   pretrained_modelr   v_head_init_weightsr   r?   r    v_head_kwargs_r!   s        r"   r   *AutoModelForCausalLMWithValueHead.__init__\   s]     	)4V4"008q! 5 5 < <NN+]+r$   c                 B   UR                  SS5      nUR                  SS5      nUc  gUS:X  aq  U R                  R                  R                  R                  R                  SUS9  U R                  R                  R                  R                  R                  5         gg)a.  
Initializes the weights of the value head. The default initialization strategy is random. Users can pass a
different initialization strategy by passing the `v_head_init_strategy` argument when calling
`.from_pretrained`. Supported strategies are:
- `normal`: initializes the weights with a normal distribution.

Args:
    **kwargs (`dict`, `optional`):
        Additional keyword arguments, that are passed to the `ValueHead` class. These arguments can contain the
        `v_head_init_strategy` argument as well as the `v_head_initializer_range` argument.
r:   皙?r;   Nnormal        meanstdr   r@   r   r(   datanormal_biaszero_r   r    initializer_rangeinit_strategys       r"   rA   /AutoModelForCausalLMWithValueHead._init_weightsl   s     #JJ'A3G

#94@ h&KK&&++33BS3TKK$$))//1 'r$   c                    SUS'   X%S'   U R                   (       a5  U R                  R                  R                  S:X  a  UR	                  S5        U R                  " SUUS.UD6nUR
                  S   nUR                  nUR                  n	UR                  U R                  R                  R                  R                  :w  a9  UR                  U R                  R                  R                  R                  5      nU R                  U5      R                  S5      n
UR                  [        R                   :w  a  UR#                  5       nU(       a  XXR$                  4$ XU
4$ )a  
Applies a forward pass to the wrapped model and returns the logits of the value head.

Args:
    input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
        Indices of input sequence tokens in the vocabulary.
    past_key_values (`tuple(tuple(torch.FloatTensor))`, `optional`):
        Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model
        (see `past_key_values` input) to speed up sequential decoding.
    attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
        Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.
    return_past_key_values (bool): A flag indicating if the computed hidden-states should be returned.
    kwargs (`dict`, `optional`):
        Additional keyword arguments, that are passed to the wrapped model.
Toutput_hidden_statespast_key_valuesPREFIX_TUNING)	input_idsattention_maskr=   )is_peft_modelr?   active_peft_config	peft_typer   r*   logitslossdevicer@   r   r(   r)   squeezer'   torchfloat32floatrX   r   rZ   rX   r[   return_past_key_valuesr    base_model_outputlast_hidden_state	lm_logitsra   values              r"   r,   )AutoModelForCausalLMWithValueHead.forward   s9   2 *.%&$3 !$"7"7"J"J"T"TXg"gJJ() 11 
)
 
 .;;B?%,,	 %%##t{{':':'A'A'H'HH 1 4 4T[[5H5H5O5O5V5V W-.66r: ??emm+!)I!U,M,MNNU++r$   c                 :    U R                   R                  " U0 UD6$ )a  
A simple wrapper around the `generate` method of the wrapped model. Please refer to the
[`generate`](https://huggingface.co/docs/transformers/internal/generation_utils) method of the wrapped model
for more information about the supported arguments.

Args:
    *args (`list`, *optional*):
        Positional arguments passed to the `generate` method of the wrapped model.
    **kwargs (`dict`, *optional*):
        Keyword arguments passed to the `generate` method of the wrapped model.
r?   generater   argsr    s      r"   rp   *AutoModelForCausalLMWithValueHead.generate   s      $$--t>v>>r$   c                     U R                   (       d  U R                  R                  " U0 UD6nO0 nU R                  R                  " U0 UD6nUR	                  5        H  u  pVXcSU 3'   M     U$ z
Returns the state dictionary of the model. We add the state dictionary of the value head to the state
dictionary of the wrapped model by prepending the key with `v_head.`.
v_head.r]   r?   
state_dictr@   itemsr   rr   r    pretrained_model_state_dictv_head_state_dictkvs          r"   rx   ,AutoModelForCausalLMWithValueHead.state_dict   u    
 !!*.*?*?*J*JD*[TZ*[' +-' KK22DCFC%++-DA9:'!6 .**r$   c                 p    U R                   U R                  l         U R                  R                  " U0 UD6$ r&   r@   r?   push_to_hubrq   s      r"   r   -AutoModelForCausalLMWithValueHead.push_to_hub   0    '+{{$$$00$A&AAr$   c                 8  ^ [        UR                  5       5       H.  nSU;   d  M  UR                  U5      XR                  SS5      '   M0     U R                  R                  USS9  A[        U R                  S5      (       Ga  SU R                  R                  R                  5       ;   d(  SU R                  R                  R                  5       ;   a  [        S5      e[        [        U R                  R                  R                  5       5      5      S	   m[        T[        5      (       a/  [        5       (       a  S
T 3mO[        5       (       a  ST 3mOST 3mU R                  R!                  T5      U l        U4S jnU R#                  U5        SU l        gg)
We add the state dictionary of the value head to the state dictionary of the wrapped model by prepending the
key with `v_head.`. This function removes the `v_head.` prefix from the keys of the value head state
dictionary.
rv    Fstricthf_device_mapcpudiskdThe model is offloaded on CPU or disk - CPU & disk offloading is not supported for ValueHead models.r   znpu:zxpu:zcuda:c                    > SnU H=  n[        U[        R                  5      (       a  X4R                  T5      4-  nM8  X44-  nM?     U$ )Nr=   
isinstancerd   Tensorr)   )moduleinputoutputs
new_outputr+   first_devices        r"   set_device_hookDAutoModelForCausalLMWithValueHead.post_init.<locals>.set_device_hook   sI    
%F!&%,,77"yy'>&@@
"i/
	 &
 "!r$   TN)listkeysr   replacer@   load_state_dictr   r?   r   values
ValueErrorsetr   intr   r   r)   register_forward_hookis_sequential_parallel)r   rx   r}   r   r   s       @r"   	post_init+AutoModelForCausalLMWithValueHead.post_init   s\    joo'(AA~7A~~a7H
99Y34 ) 	##Ju#=4((/::..<<CCEET22@@GGII z   D$9$9$G$G$N$N$P QRSTUL,,,)++%),#8L+--%),#8L%*<.#9L++..6DK" &&7*.D'= ;r$   )r   r@   NNNF)r.   r/   r0   r1   r2   r   transformers_parent_classsupported_argsr   rA   r,   rp   rx   r   r   r3   r4   r5   s   @r"   r7   r7   >   sO    , !5N, 20 $5,n?+ B
*/ */r$   r7   c                   r   ^  \ rS rSrSr\r/ SQrSrU 4S jr	S r
S rS rS	 rS
 r    SS jrS rSrU =r$ )"AutoModelForSeq2SeqLMWithValueHeadi	  a}  
A seq2seq model with a value head in addition to the language model head. This class inherits from
`~trl.PreTrainedModelWrapper` and wraps a `transformers.PreTrainedModel` class. The wrapper class supports classic
functions such as `from_pretrained` and `push_to_hub` and also provides some additional functionalities such as
`generate`.

Args:
    pretrained_model (`transformers.PreTrainedModel`):
        The model to wrap. It should be a causal language model such as GPT2. or any model mapped inside the
        `AutoModelForSeq2SeqLM` class.
    kwargs:
        Additional keyword arguments passed along to the `ValueHead` class.
)lm_head	embed_outoutput_projectionr9   c                   > [         TU ]  " U40 UD6  U R                  U5      u  n  nSU l        U R	                  5       (       d  [        S5      e[        U R                  R                  40 UD6U l	        U R                  " S0 UD6  g )NTzOThe model does not have a language model head, please use a model that has one.r=   )r   r   r>   r   _has_lm_headr   r
   r?   r   r@   rA   rB   s        r"   r   +AutoModelForSeq2SeqLMWithValueHead.__init__   s~    )4V4"008q!"&  ""noo 5 5 < <NN+]+r$   c                    ^ U R                   R                  5        H,  u  mn[        U4S jU R                   5       5      (       d  M,    g   g)Nc              3   ,   >#    U  H	  oT;   v   M     g 7fr&   r=   .0	attributenames     r"   	<genexpr>BAutoModelForSeq2SeqLMWithValueHead._has_lm_head.<locals>.<genexpr>/  s     K6J$6J   TF)r?   named_modulesanylm_head_namings)r   _moduler   s     @r"   r   /AutoModelForSeq2SeqLMWithValueHead._has_lm_head,  s@    !22@@BMD'Kd6J6JKKK C r$   c                   ^^ [        UR                  5       5       H.  nSU;   d  M  UR                  U5      XR                  SS5      '   M0     U R                  R                  USS9  A[        U R                  S5      (       a  SU R                  R                  R                  5       ;   d(  SU R                  R                  R                  5       ;   a  [        S5      eU R                  R                  5        HB  u  mn[        U4S	 jU R                   5       5      (       d  M,  UR                  R                  m  O   U R                  R!                  T5      U l        U4S
 jnU R#                  U5        SU l        gg)r   rv   r   Fr   r   r   r   r   c              3   ,   >#    U  H	  oT;   v   M     g 7fr&   r=   r   s     r"   r   ?AutoModelForSeq2SeqLMWithValueHead.post_init.<locals>.<genexpr>J  s     O:NYD(:Nr   c                    > SnU H=  n[        U[        R                  5      (       a  X4R                  T5      4-  nM8  X44-  nM?     U$ )a-  
A hook that sets the device of the output of the model to the device of the first parameter of the
model.

Args:
    module (`nn.Module`):
        The module to which the hook is attached.
    input (`tuple`):
        The input to the module.
    outputs (`tuple`):
        The output of the module.
r=   r   )r   r   r   r   r+   lm_head_devices        r"   r   EAutoModelForSeq2SeqLMWithValueHead.post_init.<locals>.set_device_hookQ  sK      
%F!&%,,77"yy'@&BB
"i/
	 &
 "!r$   TN)r   r   r   r   r@   r   r   r?   r   r   r   r   r   r   r(   rb   r)   r   r   )r   rx   r}   r   r   r   r   s        @@r"   r   ,AutoModelForSeq2SeqLMWithValueHead.post_init3  s9    joo'(AA~7A~~a7H
99Y34 ) 	##Ju#=4((/::..<<CCEET22@@GGII z 
 !% 5 5 C C EfO$:N:NOOO%+]]%9%9N !F ++..8DK"* &&7*.D'Q ;r$   c                     U R                   (       d  U R                  R                  " U0 UD6nO0 nU R                  R                  " U0 UD6nUR	                  5        H  u  pVXcSU 3'   M     U$ ru   rw   rz   s          r"   rx   -AutoModelForSeq2SeqLMWithValueHead.state_dicti  r   r$   c                 p    U R                   U R                  l         U R                  R                  " U0 UD6$ r&   r   rq   s      r"   r   .AutoModelForSeq2SeqLMWithValueHead.push_to_huby  r   r$   c                 B   UR                  SS5      nUR                  SS5      nUc  gUS:X  aq  U R                  R                  R                  R                  R                  SUS9  U R                  R                  R                  R                  R                  5         gg)z.
We initialize the weights of the value head.
r:   rG   r;   NrH   rI   rJ   rM   rR   s       r"   rA   0AutoModelForSeq2SeqLMWithValueHead._init_weights~  s     #JJ'A3G

#94@ h&KK&&++33BS3TKK$$))//1 'r$   c                    X%S'   U R                   (       a5  U R                  R                  R                  S:X  a  UR	                  S5        U R                  " SUUSS.UD6nUR
                  S   nUR                  nUR                  n	U R                  U5      R                  S5      n
UR                  [        R                  :w  a  UR                  5       nU(       a  XXR                  4$ XU
4$ )NrX   rY   T)rZ   r[   rW   r\   r=   )r]   r?   r^   r_   r   decoder_hidden_statesr`   ra   r@   rc   r'   rd   re   rf   rX   rg   s              r"   r,   *AutoModelForSeq2SeqLMWithValueHead.forward  s     %4 !$"7"7"J"J"T"TXg"gJJ() 11 
)!%
 	
 .CCBG%,,	 %%-.66r: ??emm+!)I!U,M,MNNU++r$   c                 :    U R                   R                  " U0 UD6$ )z*
We call `generate` on the wrapped model.
ro   rq   s      r"   rp   +AutoModelForSeq2SeqLMWithValueHead.generate  s      $$--t>v>>r$   )r   r   r@   r   )r.   r/   r0   r1   r2   r   r   r   r   r   r   r   rx   r   rA   r,   rp   r3   r4   r5   s   @r"   r   r   	  sZ     !6CON
,4/l+ B
2  $ ,D? ?r$   r   )rd   torch.nnr   transformersr   r   r   r   modeling_baser   Moduler
   r7   r   r=   r$   r"   <module>r      sF      t t 1%		 %PH/(> H/Vi?)? i?r$   