
    h@~                        S SK r S SKrS SKJrJr  S SKrS SKrS SKJ	r	  S SK
Jr  S SKJrJr  S SKJrJrJrJrJrJrJrJr  S SKJr  S SKJr  S	S
KJr  S	SKJr  S	SK J!r!J"r"J#r#  S	SK$J%r%  SSK&J'r'  SSK(J)r)  \" 5       (       a  S SK*J+r+J,r,  S SK-J.r.  S SK/J0r0  S SK1J2r2  \" 5       (       a  S SK3r3\ Rh                  " \55      r6 S-S\7\8   S\S\S\	S\\   S\9S\7\8   4S jjr: " S S\5      r; " S S \5      r<S!\S\7\8   S"\7\8   S#\7\8   S\Rz                  4
S$ jr> " S% S&\5      r? " S' S(\5      r@ " S) S*\5      rA " S+ S,\5      rBg).    N)OptionalUnion)Accelerator)AcceleratorState)gather_objectis_wandb_available)GenerationConfigPreTrainedModelPreTrainedTokenizerBaseTrainerTrainerCallbackTrainerControlTrainerStateTrainingArguments)
has_length)is_rich_available   )maybe_apply_chat_template)is_mergekit_available)MergeConfigmerge_modelsupload_model_to_hf)unwrap_model_for_generation   )BasePairwiseJudge)log_table_to_comet_experiment)ConsoleGroup)Live)Panel)Progresspromptsmodel	tokenizeracceleratorgeneration_config
batch_sizereturnc           	         / n[        X5       n[        S[        U 5      U5       H  nXX-    n	U" U	SSSS9R                  UR                  5      n
UR
                  " S0 U
DSU0D6n[        U
R                  U5       H4  u  pU[        U5      S nUR                  USS9nUR                  U5        M6     M     SSS5        U$ ! , (       d  f       U$ = f)	a  
Generates completions for a list of pre-formatted prompts from the given model.

Args:
    prompts (list[str]): A list of input prompts for which completions are to be generated.
    model (PreTrainedModel): The pre-trained model to be used for generation.
    tokenizer (PreTrainedTokenizerBase): The tokenizer to be used for encoding and decoding.
    accelerator (Accelerator): The accelerator to be used for model execution.
    generation_config (GenerationConfig): Configuration for text generation.
    batch_size (int, optional): The number of prompts to process in each batch. Default is 1.

Returns:
    list[str]: A list of generated text completions corresponding to the input prompts.
r   ptT)return_tensorspadding
truncationr&   N)skip_special_tokens )
r   rangelentodevicegeneratezip	input_idsdecodeappend)r"   r#   r$   r%   r&   r'   completionsunwrapped_modelidxbatchtokenized_batchgenerationsprompt
generation
completions                  O/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/callbacks.py_generate_completionsrC   ;   s    , K	$U	8OCL*5C#"23E'dD]abeefkfrfrsO)22 !"3K '*/*C*C[&Q"'F6
&--jd-S
"":.	 'R 6 
9  
9	8 s   B*C
Cc                       \ rS rSrSrS\\\R                  R                  4   S\
\   4S jr\S 5       r\S 5       rS rS	rg
)SyncRefModelCallbackb   z;
Callback to synchronize the model with a reference model.
	ref_modelr%   c                     X l         Xl        g Nr%   rG   )selfrG   r%   s      rB   __init__SyncRefModelCallback.__init__g   s    
 '"    c                     [        UR                  5       U R                  5       5       H;  u  p4UR                  R                  SU-
  5      R	                  UR                  US9  M=     g )N      ?alpha)r5   
parametersdatamul_add_)r#   target_modelrR   target_param
copy_params        rB   _sync_target_model'SyncRefModelCallback._sync_target_modelo   sU    (+L,C,C,EuGWGWGY(Z$L""3;/44Z__E4R )[rN   c                    [        5       R                  nUb  UR                  S:X  a  SS KnUR                  R                  [        U R                  5       5      [        UR                  5       5      -   SS9   UR                  R                  5       S:X  a  [        R                  XU5        S S S 5        g [        R                  XU5        g ! , (       d  f       g = f)N   r   )modifier_rank)r   deepspeed_plugin
zero_stage	deepspeedzeroGatheredParameterslistrS   commget_rankrE   rZ   )r#   rW   rR   r_   ra   s        rB   sync_target_model&SyncRefModelCallback.sync_target_modelt   s    +->>',<,G,G1,L22U%%'(40G0G0I+JJZ[ 3  >>**,1(;;EQVW	  !33EO s   75C
Cc                 
   US   nU R                   bq  UR                  UR                  -  S:X  aS  U R                  (       a  U R                  R	                  U5      nU R                  XPR                   UR                  5        g g g )Nr#   r   )rG   global_stepref_model_sync_stepsr%   unwrap_modelrg   ref_model_mixup_alpha)rK   argsstatecontrolkwargsr#   s         rB   on_step_end SyncRefModelCallback.on_step_end   sp    !'>>%%*;*;d>W>W*W[\*\((55e<""5..$:T:TU +]%rN   rJ   N)__name__
__module____qualname____firstlineno____doc__r   r
   torchnnModuler   r   rL   staticmethodrZ   rg   rr   __static_attributes__r/   rN   rB   rE   rE   b   sg    #%((//9:# k*# S S P PVrN   rE   c                   P    \ rS rSrSrS rS rS rSS jrS r	S	 r
SS
 jrS rSrg)RichProgressCallback   zX
A [`TrainerCallback`] that displays the progress of training or evaluation using Rich.
c                     [        5       (       d  [        S5      eS U l        S U l        S U l        S U l        S U l        S U l        S U l        S U l	        g )NzSRichProgressCallback requires the `rich` extra. To install, run `pip install rich`.)
r   ImportErrortraining_barprediction_bartraining_task_idprediction_task_id
rich_grouprich_consoletraining_statuscurrent_step)rK   s    rB   rL   RichProgressCallback.__init__   sV     ""stt " $"& # rN   c           	         UR                   (       a  [        5       U l        [        5       U l        [	        5       U l        U R
                  R                  S5      U l        [        [        [        U R                  U R                  U R                  5      5      5      U l        U R                  R                  5         U R                  R                  SUR                  S9U l        SU l        g g )NzNothing to log yet ...z[blue]Training the modeltotalr   )is_world_process_zeror!   r   r   r   r   statusr   r   r    r   r   startadd_task	max_stepsr   r   rK   rn   ro   rp   rq   s        rB   on_train_begin#RichProgressCallback.on_train_begin   s    && (
D"**D '	D#'#4#4#;#;<T#UD "5t/@/@$BUBUW[WkWk)l#mnDOOO!!#$($5$5$>$>?Yafapap$>$qD! !D 'rN   c                     UR                   (       aN  U R                  R                  U R                  UR                  U R
                  -
  SS9  UR                  U l        g g )NTadvanceupdate)r   r   r   r   rj   r   r   s        rB   rr    RichProgressCallback.on_step_end   sR    &&$$T%:%:EDUDUX\XiXiDirv$w % 1 1D 'rN   Nc                     UR                   (       al  [        U5      (       a[  U R                  c(  U R                  R	                  S[        U5      S9U l        U R                  R                  U R                  SSS9  g g g )Nz*[blue]Predicting on the evaluation datasetr   r   Tr   )r   r   r   r   r   r1   r   )rK   rn   ro   rp   eval_dataloaderrq   s         rB   on_prediction_step'RichProgressCallback.on_prediction_step   sw    &&:o+F+F&&.*.*=*=*F*F@OH\ +G +' &&t'>'>RV&W ,G&rN   c                     UR                   (       a;  U R                  b-  U R                  R                  U R                  5        S U l        g g g rI   r   r   r   remove_taskr   s        rB   on_evaluate RichProgressCallback.on_evaluate   C    &&&&2##//0G0GH*.' 3 'rN   c                     UR                   (       a;  U R                  b-  U R                  R                  U R                  5        S U l        g g g rI   r   r   s        rB   
on_predictRichProgressCallback.on_predict   r   rN   c                     UR                   (       aH  U R                  b:  UR                  SS 5      nU R                  R	                  S[        U5       35        g g g )N
total_flosz[bold green]Status = )r   r   popr   r   str)rK   rn   ro   rp   logsrq   _s          rB   on_logRichProgressCallback.on_log   sO    &&4+<+<+Ht,A  ''*?D	{(KL ,I&rN   c                     UR                   (       aS  U R                  R                  5         S U l        S U l        S U l        S U l        S U l        S U l        S U l        S U l	        g g rI   )
r   r   stopr   r   r   r   r   r   r   r   s        rB   on_train_end!RichProgressCallback.on_train_end   s_    &&OO  " $D"&D$(D!&*D#"DO $D#'D  $D 'rN   )r   r   r   r   r   r   r   r   rI   )rt   ru   rv   rw   rx   rL   r   rr   r   r   r   r   r   r}   r/   rN   rB   r   r      s1    ! "2
X//M
%rN   r   ro   r9   winner_indicesc           	          [        U R                  5      /[        U5      -  n[        [	        XAX#5      5      nU Vs/ s H  ofS   US   US   S   US   S   US   4PM      nn[
        R                  " U/ SQS9$ s  snf )Nr   r   r   r]   )stepr?   reference_modelpolicywinner_index)columns)r   rj   r1   rd   r5   pd	DataFrame)ro   r"   r9   r   rj   rT   item
split_datas           rB   _win_rate_completions_dfr      s     u(()*S\9KK+FGDRVWRV$7DGT!WQZaT!WERVJW<<
,kll Xs   %A:c                   ~    \ rS rSrSr    SS\S\S\\   S\\	   S\
S	\
4S
 jjrS\S\S\4S jrS\S\S\4S jrSrg)WinRateCallback   a|  
A [`~transformers.TrainerCallback`] that computes the win rate of a model based on a reference.

It generates completions using prompts from the evaluation dataset and compares the trained model's outputs against
a reference. The reference is either the initial version of the model (before training) or the reference model, if
available in the trainer. During each evaluation step, a judge determines how often the trained model's completions
win against the reference using a judge. The win rate is then logged in the trainer's logs under the key
`"eval_win_rate"`.

Usage:
```python
trainer = DPOTrainer(...)
judge = PairRMJudge()
win_rate_callback = WinRateCallback(judge=judge, trainer=trainer)
trainer.add_callback(win_rate_callback)
```

Args:
    judge (`BasePairwiseJudge`):
        The judge to use for comparing completions.
    trainer (`Trainer`):
        Trainer to which the callback will be attached. The trainer's evaluation dataset must include a `"prompt"`
        column containing the prompts for generating completions. If the `Trainer` has a reference model (via the
        `ref_model` attribute), it will use this reference model for generating the reference completions;
        otherwise, it defaults to using the initial model.
    generation_config (`GenerationConfig`, *optional*):
        The generation config to use for generating completions.
    num_prompts (`int` or `None`, *optional*, defaults to `None`):
        The number of prompts to generate completions for. If not provided, defaults to the number of examples in
        the evaluation dataset.
    shuffle_order (`bool`, *optional*, defaults to `True`):
        Whether to shuffle the order of the completions before judging.
    use_soft_judge (`bool`, *optional*, defaults to `False`):
        Whether to use a soft judge that returns a win probability between 0 and 1 for the first completion vs the
        second.
Njudgetrainerr&   num_promptsshuffle_orderuse_soft_judgec                 "   Xl         X l        XPl        X0l        / U l        X`l        U R                  R                  c  [        S5      eU R                  R                  U l        Ub*  U R                  R                  [        U5      5      U l        g g )NzCTrainer must have an evaluation dataset to use the WinRateCallback.)
r   r   r   r&   ref_completionsr   eval_dataset
ValueErrorselectr0   )rK   r   r   r&   r   r   r   s          rB   rL   WinRateCallback.__init__  s     
*!2!,<<$$,bcc $ 9 9D" $ 1 1 8 8{9K LD #rN   rn   ro   rp   c           
      H   US   nSUl         U R                  R                  n[        U R                  SS 5      nUc  U R                  R                  nUR                  U R                  S   5       n[        UUUUU R                  UR                  S9U l
        [        [        U R                  U R                  5      5      n	U R                  (       aJ  U R                  R                  XU R                  SS9n
U
 Vs/ s H  oS:  a  S	OS
PM     nn[!        U
5      n
O&U R                  R                  XU R                  5      n[!        U5      n[!        U	5      n	[!        U5      nS S S 5        U R                  R                  R"                  (       a  [%        S W 5       5      ['        U5      -  nU R                  (       a8  S[%        W
5      ['        U
5      -  -
  nU R                  R)                  XS.5        OU R                  R)                  SU05        SUR*                  ;   a?  S	S KnUR.                  b.  [1        UWW	US9nUR(                  " SUR2                  " US905        SUR*                  ;   a  [1        UWW	US9n[5        SUS9  g g g s  snf ! , (       d  f       GN5= f)Nprocessing_classleftrG   r?   r#   r$   r%   r&   r'   Treturn_scores      ?r   r   c              3   *   #    U  H	  oS :H  v   M     g7fr   Nr/   .0
winner_idxs     rB   	<genexpr>1WinRateCallback.on_train_begin.<locals>.<genexpr>J       L^z?^   rP   eval_avg_win_probeval_win_rater   wandbro   r"   r9   r   win_rate_completions	dataframecomet_mlwin_rate_completions.csvnametable)padding_sider   r%   getattrmodel_wrappedsplit_between_processesr   rC   r&   per_device_eval_batch_sizer   rd   r5   r   r   r   r   is_main_processsumr1   log	report_tor   runr   Tabler   rK   rn   ro   rp   rq   r$   r%   r#   r"   r9   ref_win_probsscorer   win_rateavg_win_probr   dfs                    rB   r   WinRateCallback.on_train_begin#  sZ   -.	!'	ll..k48 =LL..E001B1B81LMQX#8#'"&"8"8::$D  s4#7#79M9MNOK"" $

 0 0tGYGYim 0 nGT!U}es{!"9}!U -m <!%!1!1'HZHZ![#G,G'4K*>:N' N, <<##33L^LLsSaObbH"""S%7#m:L%LL  |!_`  /8!<=$..(99(1# '$/'5	B II5u{{R7PQRT^^+-# +#1	 .3 ,) 4 "V NMs    1BJ<JAJJ
J!c           
         US   nSUl         U R                  R                  nU R                  R                  nUR	                  U R
                  S   5       n[        UUUUU R                  UR                  S9n	[        [        U R                  U	5      5      n	U R                  (       aJ  U R                  R                  XU R                  SS9n
U
 Vs/ s H  oS:  a  SOS	PM     nn[        U
5      n
O&U R                  R                  XU R                  5      n[        U5      n[        U	5      n	[        U5      nS S S 5        U R                  R                  R                   (       a  [#        S
 W 5       5      [%        U5      -  nU R                  (       a8  S[#        W
5      [%        U
5      -  -
  nU R                  R'                  XS.5        OU R                  R'                  SU05        SUR(                  ;   a?  SS KnUR,                  b.  [/        UWW	US9nUR&                  " SUR0                  " US905        SUR(                  ;   a  [/        UWW	US9n[3        SUS9  g g g s  snf ! , (       d  f       GN5= f)Nr   r   r?   r   Tr   r   r   r   c              3   *   #    U  H	  oS :H  v   M     g7fr   r/   r   s     rB   r   .WinRateCallback.on_evaluate.<locals>.<genexpr>  r   r   rP   r   r   r   r   r   r   r   r   r   )r   r   r%   r   r   r   rC   r&   r   rd   r5   r   r   r   r   r   r   r   r1   r   r   r   r   r   r   r   r   s                    rB   r   WinRateCallback.on_evaluatei  s:    -.	!'	ll..**001B1B81LMQX/#'"&"8"8::K s4#7#7EFK"" $

 0 0tGYGYim 0 nGT!U}es{!"9}!U -m <!%!1!1'HZHZ![#G,G'4K*>:N) N. <<##33L^LLsSaObbH"""S%7#m:L%LL  |!_`  /8!<=$..(99(1# '$/'5	B II5u{{R7PQRT^^+-# +#1	 .3 ,) 4 "V NMs    A<I)I$&AI)$I))
I8)r   r&   r   r   r   r   r   )NNTF)rt   ru   rv   rw   rx   r   r   r   r	   intboolrL   r   r   r   r   r   r}   r/   rN   rB   r   r      s    #R 9=%)"$M M M $$45	M
 c]M M M0D#4 D\ DTb DL= 1 =, =Q_ =rN   r   c            
       T    \ rS rSrSr   SS\S\\   S\\   S\\   4S jjr	S	 r
S
rg)LogCompletionsCallbacki  a  
A [`~transformers.TrainerCallback`] that logs completions to Weights & Biases and/or Comet.

Usage:
```python
trainer = DPOTrainer(...)
completions_callback = LogCompletionsCallback(trainer=trainer)
trainer.add_callback(completions_callback)
```

Args:
    trainer (`Trainer`):
        Trainer to which the callback will be attached. The trainer's evaluation dataset must include a `"prompt"`
        column containing the prompts for generating completions.
    generation_config (`GenerationConfig`, *optional*):
        The generation config to use for generating completions.
    num_prompts (`int` or `None`, *optional*):
        The number of prompts to generate completions for. If not provided, defaults to the number of examples in
        the evaluation dataset.
    freq (`int` or `None`, *optional*):
        The frequency at which to log completions. If not provided, defaults to the trainer's `eval_steps`.
Nr   r&   r   freqc                    Xl         X l        X@l        / U l        SU l        U R                   R
                  c  [        S5      eU R                   R
                  U l        Ub*  U R
                  R                  [        U5      5      U l        g g )NzJTrainer must have an evaluation dataset to use the LogCompletionsCallback.)	r   r&   r  r   _last_logged_stepr   r   r   r0   )rK   r   r&   r   r  s        rB   rL   LogCompletionsCallback.__init__  sz     !2	
!#<<$$,ijj $ 9 9D" $ 1 1 8 8{9K LD #rN   c           
         UR                   U R                  :X  a  g U R                  =(       d    UR                  nUR                   U-  S:w  a  g US   nSUl        U R
                  R                  nU R
                  R                  nUR                  U R                  S   5       n	U	 V
s/ s H  n
[        SU
0U5      S   PM     n	n
[        U	UUUU R                  UR                  S9n[        U5      n[        U	5      n	S S S 5        U R
                  R                  R                  (       a  [!        UR                   5      /[#        W	5      -  n[%        ['        XW5      5      nU R(                  R+                  U5        [,        R.                  " / SQU R(                  S9nSUR0                  ;   a  [2        R4                  " S	U05        S
UR0                  ;   a
  [7        SUS9  UR                   U l        g s  sn
f ! , (       d  f       N= f)Nr   r   r   r?   r   )r   r?   rA   )r   rT   r   r9   r   zcompletions.csvr   )rj   r	  r  
eval_stepsr   r   r%   r   r   r   r   rC   r&   r   r   r   r   r1   rd   r5   r   extendr   r   r   r   r   r   )rK   rn   ro   rp   rq   r  r$   r%   r#   r"   r?   r9   rj   rT   r   s                  rB   rr   "LogCompletionsCallback.on_step_end  s    6 66 yy,E,,t#q(-.	!'	ll..**001B1B81LMQXhopho^d0(F1CYOPXYhoGp/#'"&"8"8::K (4K#G,G N <<##33u0012S\AKK+>?DJJd#LL)IPTPZPZ[E$..(		=%01T^^+-* "'!2!2; q NMs   %G8*G3:G83G88
H)r	  r   r  r&   r   r   )NNN)rt   ru   rv   rw   rx   r   r   r	   r  rL   rr   r}   r/   rN   rB   r  r    sU    4 9=%)"MM $$45M c]	M
 smM*,3rN   r  c                   X    \ rS rSrSr   SS\S   S\S\4S jjrS	 rSS
 jr	SS jr
Srg)MergeModelCallbacki  a$  
A [`~transformers.TrainerCallback`] that merges the policy model (the model being trained) with another model based
on a merge configuration.

Args:
    merge_config ([`MergeConfig`], *optional*, defaults to `None`):
        Configuration used for the merging process. If not provided, the default [`MergeConfig`] is used.
    merge_at_every_checkpoint (`bool`, *optional*, defaults to `False`):
        Whether to merge the model at every checkpoint.
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether to push the merged model to the Hub after merging.

Example:

```python
from trl.mergekit_utils import MergeConfig
from trl import MergeModelCallback

config = MergeConfig()
merge_callback = MergeModelCallback(config)
trainer = DPOTrainer(..., callbacks=[merge_callback])
```
Nmerge_configr   merge_at_every_checkpointpush_to_hubc                     [        5       (       d  [        S5      eU=(       d
    [        5       U l        X l        X0l        g )NzYMergeModelCallback requires the `mergekit` extra. To install, run `pip install mergekit`.)r   r   r   r  r  r  )rK   r  r  r  s       rB   rL   MergeModelCallback.__init__  s:     %&&k  )9KM)B&&rN   c                    [         R                  R                  USU 35      nX@R                  l        U R                  R
                  c%  UR                  R                  U R                  l        [         R                  R                  US5      n[        U R                  R                  5       U5        U R                  (       a  U SU S3n[        XV5        g g )Nzcheckpoint-mergedz_checkpoint-_merged)ospathjoinr  policy_model_pathtarget_model_pathconfig_name_or_pathr   creater  r   )rK   
output_dirrj   r#   checkpoint_path
merge_path	repo_names          rB   _merge_and_maybe_push(MergeModelCallback._merge_and_maybe_push,  s    '',,z[3NO.=+..627,,2L2LD/WW\\/8<
T&&--/<%,l;-wGIz5 rN   c                 v    U R                   (       a(  U R                  UR                  UR                  U5        g g rI   r  r%  r!  rj   rK   rn   ro   rp   r#   rq   s         rB   on_saveMergeModelCallback.on_save9  s,    ))&&t8I8I5Q *rN   c                 v    U R                   (       d(  U R                  UR                  UR                  U5        g g rI   r(  r)  s         rB   r   MergeModelCallback.on_train_end=  s,    --&&t8I8I5Q .rN   )r  r  r  )NFFrI   )rt   ru   rv   rw   rx   r   r  rL   r%  r*  r   r}   r/   rN   rB   r  r    sI    4 15*/!	'}-' $(' 	'6RRrN   r  c                   Z   \ rS rSrSr        SS\S\S\S\S\S\S	\S
\4S jjr\	S 5       r
\R                  " 5       S\S\S\S\4S j5       rS\S\4S jrS\S\4S jrS\4S jr\R                  " 5       S\S\S\S\4S j5       r\R                  " 5       S\S\S\4S j5       rSrg)BEMACallbackiB  a	  
A [`~transformers.TrainerCallback`] that implements [BEMA](https://huggingface.co/papers/2508.00180)
(Bias-Corrected Exponential Moving Average) by [Adam Block](https://huggingface.co/abblock) and [Cyril
Zhang](https://huggingface.co/cyrilzhang). Code from https://github.com/abblock/bema under MIT license.

BEMA computes model weights that scale like:

$$
\theta_t' = \alpha_t \cdot (\theta_t - \theta_0) + \text{EMA}_t
$$

where  \\( \theta_t \\) is the current model weights,  \\( \theta_0 \\) is a snapshot of the model weights at the
first `update_after` step,  \\( \text{EMA}_t  \\) is the exponential moving average of the model weights, and
 \\( \alpha_t \\) is a scaling factor that decays with the number of steps  \\( t \\) as

$$
\alpha_t = (\rho + \gamma \cdot t)^{-\eta}.
$$

The EMA is computed as:

$$
\text{EMA}_t = (1 - \beta_t) \cdot \text{EMA}_{t-1} + \beta_t \cdot \theta_t
$$

where  \\( \beta_t \\) is a decay factor that decays with the number of steps  \\( t \\) as

$$
\beta_t = (\rho + \gamma \cdot t)^{-\kappa}.
$$

Args:
    update_freq (`int`, *optional*, defaults to `400`):
        Update the BEMA weights every X steps. Denoted this as  \\( \phi \\) in the paper.
    ema_power (`float`, *optional*, defaults to `0.5`):
        Power for the EMA decay factor. Denoted  \\( \kappa \\) in the paper. To disable EMA, set this to `0.0`.
    bias_power (`float`, *optional*, defaults to `0.2`):
        Power for the BEMA scaling factor. Denoted  \\( \eta \\) in the paper. To disable BEMA, set this to `0.0`.
    lag (`int`, *optional*, defaults to `10`):
        Initial offset in the weight decay schedule that controls early-stage smoothness by acting as a virtual
        starting age for the updates. Denoted as  \\( \rho \\) in the paper.
    update_after (`int`, *optional*, defaults to `0`):
        Burn-in time before starting to update the BEMA weights. Denoted  \\( \tau \\) in the paper.
    multiplier (`float`, *optional*, defaults to `1.0`):
        Initial value for the EMA decay factor. Denoted as  \\( \gamma \\) in the paper.
    min_ema_multiplier (`float`, *optional*, defaults to `0.0`):
        Minimum value for the EMA decay factor.
    device (`str`, *optional*, defaults to `"cpu"`):
        Device to use for the BEMA buffers, e.g. `"cpu"` or `"cuda"`. Note that in most cases, this device SHOULD
        BE DIFFERENT from the device used for training in order to avoid OOM.

Example:

```python
from trl import BEMACallback

trainer = Trainer(..., callbacks=[BEMACallback()])
```
update_freq	ema_power
bias_powerlagupdate_after
multipliermin_ema_multiplierr3   c	                     Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        / U l        / U l	        / U l
        / U l        S U l        g rI   )r0  r1  r2  r3  r4  r5  r6  r3   param_namesthetat_paramstheta0_params
ema_paramsrunning_model)	rK   r0  r1  r2  r3  r4  r5  r6  r3   s	            rB   rL   BEMACallback.__init__  sX     '"$($"4 !rN   c                     [        U S5      (       a  [        U S5      (       a  U R                  $ [        U S5      (       a  U R                  $ [        U S5      (       a  U R                  $ U $ )z}
Helper function to unwrap model from various wrappers including DataParallel, DistributedDataParallel,
DeepSpeed, and FSDP.
moduleengine_fsdp_wrapped_module)hasattrr?  rA  )r#   s    rB   _unwrap_modelBEMACallback._unwrap_model  sa     5(##x(@(@<< 5011--- 5(##<<rN   rn   ro   rp   r#   c                    U R                  U5      n[        U5      " UR                  5      R                  U R                  5      U l        U R
                  R                  UR                  5       5        UR                  5        H  u  pgUR                  (       d  M  U R                  R                  U5        U R                  R                  U5        UR                  5       R                  5       R                  U R                  5      nU R                  R                  U5        U R                   R                  UR                  5       5        M     g rI   )rC  typer  r2   r3   r<  load_state_dict
state_dictnamed_parametersrequires_gradr8  r8   r9  detachcloner:  r;  )	rK   rn   ro   rp   r#   rq   r   paramtheta0s	            rB   r   BEMACallback.on_train_begin  s     ""5) "%[699$++F**5+;+;+=> !113KD&&##D)%%e, \\^))+..t{{;F%%f-OO""6<<>2 4rN   r   r(   c                     U R                   U R                  U-  -   U R                  * -  n[        X R                  5      $ )uD   Compute the EMA decay factor βₜ = (ρ + γ·t)⁻ᵏᵃᵖᵖᵃ.)r3  r5  r1  maxr6  )rK   r   betas      rB   	_ema_betaBEMACallback._ema_beta  s5    4??T11G40011rN   c                 V    U R                   U R                  U-  -   U R                  * -  $ )uA   Compute the BEMA scaling factor αₜ = (ρ + γ·t)⁻ᵉᵗᵃ.)r3  r5  r2  )rK   r   s     rB   _bema_alphaBEMACallback._bema_alpha  s&    4??T117GHHrN   c                    U R                  U5      nU R                  U5      n[        U R                  U R                  U R
                  U R                  R                  5       5       Hh  u  pEpgUR                  5       R                  U R                  5      nUR                  SU-
  5      R                  XBS9  UR                  XcXE-
  -  -   5        Mj     g )Nr   rQ   )rS  rV  r5   r9  r:  r;  r<  rS   rK  r2   r3   rU   rV   copy_)rK   r   rR  rR   thetatrN  ema	run_params           rB   _update_bema_weights!BEMACallback._update_bema_weights  s    ~~d#  & /2 2 2DOOTEWEWEbEbEd/
*FC ]]_''4FHHQX##F#7OOC6?";;</
rN   c                    UR                   nX`R                  :  a  g X`R                  :X  aX  [        U R                  U R                  U R
                  5       H(  u  pxn	UR                  U5        U	R                  U5        M*     g X`R                  -
  U R                  -  S:X  a*  U R                  U5        [        R                  SU 35        g g )Nr   zUpdated BEMA weights at step )rj   r4  r5   r9  r:  r;  rY  r0  r]  loggerinfo)
rK   rn   ro   rp   r#   rq   r   thetat_paramtheta0_param	ema_params
             rB   rr   BEMACallback.on_step_end  s        ### $$$9<T=O=OQUQcQceietet9u5I""<0- :v
 &&&$*:*::a?%%d+KK7v>? @rN   c                     UR                   (       aC  UR                   S3nU R                  R                  U5        [        R                  SU 35        g g )Nz/bemazSaved BEMA model to )r   r!  r<  save_pretrainedr`  ra  )rK   rn   ro   rp   rq   save_directorys         rB   r   BEMACallback.on_train_end  sJ    && $06N..~>KK.~.>?@ 'rN   )r2  r3   r;  r1  r3  r6  r5  r8  r<  r:  r9  r4  r0  N)i  r   g?
   r   rP   g        cpu)rt   ru   rv   rw   rx   r  floatr   rL   r|   rC  ry   no_gradr   r   r   r
   r   rS  rV  r]  rr   r   r}   r/   rN   rB   r/  r/  B  sn   :| $'"" " 	"
 " " " "" "8  * ]]_3%3.:3ES3\k3 3*2c 2e 2
I I I
= 
= ]]_@%@.:@ES@\k@ @( ]]_A!2 A< AR` A ArN   r/  )r   )Cloggingr  typingr   r   pandasr   ry   
accelerater   accelerate.stater   accelerate.utilsr   r   transformersr	   r
   r   r   r   r   r   r   transformers.trainer_utilsr   transformers.utilsr   
data_utilsr   import_utilsr   mergekit_utilsr   r   r   models.utilsr   judgesr   utilsr   rich.consoler   r   	rich.liver   
rich.panelr    rich.progressr!   r   	getLoggerrt   r`  rd   r   r  rC   rE   r   r   r   r   r  r  r/  r/   rN   rB   <module>r     s{    	 "   " - >	 	 	 2 0 2 0 J J 6 % 0 + & 
		8	$ $#Y$$ '$ 	$
   01$ $ 
#Y$N&V? &VRM%? M%`mm"&s)m:>s)mUYZ]U^m\\mAo AHY3_ Y3x:R :RzuA? uArN   