
    h -                    B   S SK r S SKJr  S SKrS SKrS SKJr  S SKJ	r	J
r
  S SK JrJr  S SKJr  S SKJrJrJrJr  S SKrS SKrS SKrS SKJs  Jr  S SKrS SKJrJ r J!r!  S SK"J#r#  S S	K$J%r%J&r&  S S
K'J(r(  S SKJ)r)  S SK*J+r+J,r,J-r-J.r.J/r/J0r0J1r1  S SK2J3r3J4r4J5r5J6r6J7r7J8r8  SSK9J:r:  \5" 5       (       a  S SK;J<r<  S SK=J>r>  S SK?J@r@  S SKAJBrB  \1" 5       (       a  S SKCrC\4" 5       (       a  S SKDJErEJFrF  \!R                  " \H5      rI\ " S S5      5       rJ\ " S S5      5       rK   SS\L\R                     S\NS\OS\\N   S\R                  4
S jjrP\ " S S 5      5       rQ\ " S! S"5      5       rR\R                  " 5        SS#\R                  S\T\R                  \R                  \N4   4S$ jj5       rUS%\,S\V\O\W4   4S& jrXSS'\R                  S(\NS)\\N\W4   S*\NS\R                  4
S+ jjrYS,\R4                  R                  SS4S- jr[SS. jr\ " S/ S05      r]S1 r^S2\:S\\+   4S3 jr_S\\V\O\N4      4S4 jr`S2\:SS54S6 jraSS7 jrbSS8 jrcS9\R                  SS4S: jreS;rfS<rg\ " S= S>\/5      5       rh\ " S? S@\05      5       ri\R                  4SA\R                  S\R                  4SB jjrkS,\R4                  R                  SC\R                  SD\NSE\NS\T\R                  \R                  \R                  4   4
SF jrlS,\R4                  R                  SC\R                  SD\NS\34SG jrm SS,\R4                  R                  SH\NSI\nSJ\nS\R4                  R                  4
SK jjroSL\NSD\NSM\R                  S\R                  4SN jrpSO\R4                  R                  SP\R                  SD\NSQ\-S\T\R                  \R                  4   4
SR jrq\R                  " 5       S,\R4                  R                  SP\R                  SS\NSD\NSQ\-4
ST j5       rrSU\\N   SV\NSW\V\O\L\N   4   SX\NSY\V\O\L\N   4   SZ\NS[\V\O\L\N   4   4S\ jrsS]\NSY\V\O\L\N   4   S[\V\O\L\N   4   4S^ jrtS_\R                  SL\NSD\NS\T\R                  \R                  4   4S` jruSSa jrvSb\R                  Sc\.S\L\O   4Sd jrw    SSe\\O   Sf\OSg\OSh\\O   Si\L\O   Sj\\O   Sk\OSl\\O   Sm\\O   Sn\\O   So\\O   S\%4Sp jjrxS\\O   4Sq jrySr\OSs\R                  SS4St jrzSu\R                  S\R                  S\\R                  \T\R                  Sv4   4   4Sw jr{Su\R                  S\R                  S\\R                  \T\R                  Sv4   4   4Sx jr|S\R                  4Sy jr}SSz\R                  S{\NS\R                  4S| jjr~ SS}\L\O   S~\L\O   S\V\O\L\W   4   S\L\W   S\NS\NSS4S jjr " S S\)5      rS'\R                  S\R                  4S jrS\V\O\\R                     4   S\NS\L\V\O\\R                     4      4S jrS\V\O\\	   4   S\V\O\\	   4   4S jrS'\R                  S\R                  4S jrS'\R                  S\R                  4S jrS rS\V\O\R                  4   S\V\O\\R                  \L\R                     4   4   4S jrS\V\O\\R                  \L\R                     4   4   S\V\O\R                  4   4S jrS\R                  Su\R                  S\NS\L\N   S\T\R                  \R                  4   4
S jrg)    N)deque)SequenceSized)	dataclassfield)version)AnyLiteralOptionalUnion)AcceleratorPartialStatelogging)AcceleratorState)	ModelCardModelCardData)pad_sequence)Sampler)BitsAndBytesConfigEvalPredictionGenerationConfigPreTrainedTokenizerBaseTrainerStateTrainingArgumentsis_comet_available)ModelOutputis_peft_availableis_rich_availableis_torch_mlu_availableis_torch_npu_availableis_torch_xpu_available   )ModelConfig)Console)Panel)Table)Text)
LoraConfig
PeftConfigc                       \ rS rSr% Sr\\S'   Sr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   S rS\\\\4      S\\\R$                  4   4S jrSrg)DataCollatorForChatMLH   z+
Data collator for ChatML format datasets.
	tokenizerignore_indexN
max_lengthprompt
prompt_keymessagesmessages_keyc                     U R                   R                  c  [        S5      eU R                  c&  [	        U R                   R
                  S5      U l        g g )NzTThe tokenizer does not have a pad token. Please set `pad_token_id` in the tokenizer.i   )r-   pad_token_id
ValueErrorr0   minmodel_max_lengthselfs    K/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/utils.py__post_init__#DataCollatorForChatML.__post_init__T   sE    >>&&.stt??"!$.."A"A4HDO #    examplesreturnc           
         / n/ n/ n/ n/ nU GH  nUR                  U R                  S 5      nUc,  XpR                     S S n	U R                  R	                  U	SSS9nSU;  a  XpR                     n
U R                  R	                  U
SSS9nU R                  USU R
                  SS SS9nUR                  US   5        SU;   a  UR                  US   5        OrUR                  S/[        US   5      -  5        OPUR                  US   5        SU;   a  UR                  US   5        O!UR                  S/[        US   5      -  5        U R                  US[        US   5      SS SS9nUR                  US   5        UR                  US   5        U R                  /[        US   5      -  n[        US   5      nUS   US  XS & UR                  U5        GM     U Vs/ s H&  n[        R                  " U[        R                  S	9PM(     nnU Vs/ s H&  n[        R                  " U[        R                  S	9PM(     nnU Vs/ s H&  n[        R                  " U[        R                  S	9PM(     nn[        US
U R                  R                  S9n[        US
SS9n[        US
U R                  S9nU Vs/ s H&  n[        R                  " U[        R                  S	9PM(     nnU Vs/ s H&  n[        R                  " U[        R                  S	9PM(     nn[        US
U R                  R                  S9n[        US
SS9nUUUUUS.$ s  snf s  snf s  snf s  snf s  snf )NFT)tokenizeadd_generation_prompt	input_ids)
truncationr0   paddingreturn_tensorsadd_special_tokensattention_mask   dtypeleft)padding_sidepadding_valuer   )rF   rK   labelspromptsprompt_attention_mask)getr2   r4   r-   apply_chat_templater0   appendlenr/   torchtensorlongpadr6   )r;   r@   rF   rK   prompts_input_idsrT   rR   exampleformatted_promptr1   messageformatted_messagetokenized_messagetokenized_promptlabelcompletion_start_idxidsmasks                     r<   __call__DataCollatorForChatML.__call__[   s   	 "G&{{4??DA' !2!23CR8#'>>#E#EU$ $F $  ')!"3"34$(NN$F$Fe5 %G %! %)NN%##!#'', %3 %!   !2;!?@#w."))*;<L*MN"))1#4Ek4R0S*ST  !56#w."))'2B*CD"))1#GK4H0I*IJ#~~ y}-##(  .   $$%5k%BC!(()9:J)KL &&'#im*<<E#&'7'D#E +4R=9M9N+OE'(MM% c  h ENNISU\\#UZZ8I	NKYZ>4%,,t5::>>ZEKLVE%,,uEJJ7VL	dnnFaFab	^&PQRV&@Q@QRL]^L]SU\\#UZZ@L]^Rg hRg$d%**!ERg h 1VZVdVdVqVqr #$9^_ ` #,(%:
 	
 OZL
 _ hs    -M-M-M8-M+-M )r0   )__name__
__module____qualname____firstlineno____doc__r   __annotations__r/   intr0   r2   strr4   r=   listdictr	   rY   Tensorrh   __static_attributes__ r?   r<   r+   r+   H   sn     '&L#JJ"L#"IM
d38n!5 M
$sELL?P:Q M
r?   r+   c                       \ rS rSr% Sr\\S'   Sr\\	\
4   \S'   Sr\\   \S'   Sr\
\S	'   S
\\\
\4      S\\
\4   4S jrSrg)RewardDataCollatorWithPadding   a0  
Reward DataCollator class that pads the inputs to the maximum length of the batch.

Args:
    tokenizer (`PreTrainedTokenizerBase`):
        The tokenizer used for encoding the data.
    padding (`Union[bool, str, `PaddingStrategy`]`, `optional`, defaults to `True`):
        padding_strategy to pass to the tokenizer.
    pad_to_multiple_of (`int` or `None`, `optional`, defaults to `None`):
        If set will pad the sequence to a multiple of the provided value.
    return_tensors (`str`, `optional`, defaults to `"pt"`):
        The tensor type to use.
r-   TrH   Npad_to_multiple_ofptrI   featuresrA   c                    / n/ n/ nSUS   ;   nU Hw  nSU;  d  SU;  d  SU;  d  SU;  a  [        S5      eUR                  US   US   S.5        UR                  US   US   S.5        U(       d  Mc  UR                  US   5        My     U R                  R                  UU R                  U R
                  U R                  S	9nU R                  R                  UU R                  U R
                  U R                  S	9nUS
   US   US
   US   SS.n	U(       a'  [        R                  " U[        R                  S9nXIS'   U	$ )Nmarginr   input_ids_choseninput_ids_rejectedattention_mask_chosenattention_mask_rejectedz{The features should include `input_ids_chosen`, `attention_mask_chosen`, `input_ids_rejected` and `attention_mask_rejected`)rF   rK   )rH   rz   rI   rF   rK   T)r   r   r   r   return_lossrM   )
r7   rW   r-   r\   rH   rz   rI   rY   rZ   float)
r;   r|   features_chosenfeatures_rejectedr~   
has_marginfeaturebatch_chosenbatch_rejectedbatchs
             r<   rh   &RewardDataCollatorWithPadding.__call__   s   !,
G #'1'w6*'9,G;  R  ""!();!<&-.E&F $$!()=!>&-.G&H zgh/03  4 ~~))LL#66..	 * 
 ++LL#66..	 , 
 !-[ 9%12B%C"0"='56F'G
 \\&<F$(Or?   rv   )rj   rk   rl   rm   rn   r   ro   rH   r   boolrq   rz   r   rp   rI   rr   rs   r	   rh   ru   rv   r?   r<   rx   rx      sc     '& $GU49$(,,NC6d38n!5 6$sCx. 6r?   rx   tensorsrQ   rP   rz   rA   c                 Z   [         R                  " U  Vs/ s H  oDR                  PM     snS5      R                  5       nUb  US   U-  nUS:w  a  US==   X6-
  -  ss'   [        R
                  " [        U 5      /UQ7XS   R                  U S   R                  S9n[        U 5       Hz  u  pUS:X  a  US   UR                  S   -
  n	OUS:X  a  Sn	O[        S5      e[        XUR                  S   -   5      n
U
4[        S UR                  SS  5       5      -   nXGU   U'   M|     U$ s  snf )	aB  
Pads a list of tensors to the same shape along the first dimension.

Args:
    tensors (`list[torch.Tensor]`):
        List of input tensors to pad.
    padding_value (`int`):
        Value to use for padding. Default is 0.
    padding_side (`str`):
        Side on which to add padding. Must be 'left' or 'right'. Default is 'right'.
    pad_to_multiple_of (`int`, *optional*, defaults to `None`):
        If set will pad the sequence to a multiple of the provided value.

Returns:
    `torch.Tensor`:
        A single tensor containing the padded tensors.

Examples:
```python
>>> import torch

>>> pad([torch.tensor([1, 2, 3]), torch.tensor([4, 5])])
tensor([[1, 2, 3],
        [4, 5, 0]])

>>> pad([torch.tensor([[1, 2], [3, 4]]), torch.tensor([[5, 6]])])
tensor([[[1, 2],
        [3, 4]],
        [[5, 6],
        [0, 0]]])
```
r   NrN   devicerO   rightz&padding_side must be 'left' or 'right'c              3   :   #    U  H  n[        S U5      v   M     g7f)r   N)slice).0ss     r<   	<genexpr>pad.<locals>.<genexpr>5  s     %G;aeAqkk;s   rL   )npmaxshapetolistrY   fullrX   rN   r   	enumerater7   r   tuple)r   rQ   rP   rz   toutput_shape	remainderoutputi	seq_start	seq_sliceslicess               r<   r\   r\      s1   N 66G4Gq77G4a8??AL % O&88	>O1==O ZZW55}TUJL\L\elmneoevevwF'"6!$Q!''!*4IW$IEFF )%;<	%G17712;%G GGq	& # M1 5s   D(c                   v    \ rS rSr% SrSr\\S'   Sr\\S'   Sr	\
\   \S'   S	\\\\4      S
\\\4   4S jrSrg)DPODataCollatorWithPaddingi;  a  
DPO DataCollator class that pads the tokenized inputs to the maximum length of the batch.

Args:
    pad_token_id (`int` defaults to 0):
        The tokenizer's pad_token_id.
    label_pad_token_id (`int`, defaults to -100):
        The label used for masking.
    is_encoder_decoder (`bool` or `None`, `optional`, defaults to `None`):
        Whether you model has an encoder_decoder architecture.
r   r6   r.   label_pad_token_idFis_encoder_decoderr|   rA   c           	      R   0 nUS   R                  5        GHx  nUR                  S5      (       Ga  U R                  (       a  U Vs/ s H  n[        R                  " XC   5      PM     nnUR                  S5      (       a;  UR                  S5      (       a%  U R                  c  [        S5      eU R                  nOQUR                  S5      (       a  SnO8UR                  S5      (       d  SU;   a  U R                  nO[        S	U S
35      e[        USUS9X#'   GM  UR                  S5      (       a%  U R                  c  [        S5      eU R                  nOdUR                  S5      (       a  U R                  nOAUR                  S5      (       a  SnO(UR                  S5      (       a  SnO[        S	U S
35      eUS;   a  SnOSnUR                  S5      (       a  [        R                  nO[        R                  nU Vs/ s H  n[        R                  " XC   US9PM     nn[        XVUS9X#'   GM  UR                  S5      (       a.  [        R                  " U Vs/ s H  oDU   PM	     sn5      X#'   GMa  U Vs/ s H  oDU   PM	     snX#'   GM{     U$ s  snf s  snf s  snf s  snf )Nr   )
_input_ids_attention_mask_labels_pixel_valuesr1   rF   zPadding is enabled, but the tokenizer is not configured with a padding token. Explicitly set `tokenizer.pad_token` (e.g. `tokenizer.pad_token = tokenizer.eos_token`) before calling the trainer.r   )chosenrejected
completiondecoderzUnexpected key in batch ''T)batch_firstrQ   r   r   r   )prompt_input_idsrT   rO   r   rM   rQ   rP   _logps)keysendswithr   rY   
LongTensor
startswithr6   r7   r   r   float32int64rZ   r\   )	r;   r|   padded_batchkexto_padrQ   rP   rN   s	            r<   rh   #DPODataCollatorWithPadding.__call__M  sh   !!!#AzzWXX**@HI"e..ru5FIX..QZZ5L5L,,4",!?# 
 )-(9(9$566()&JKKPY]^P^(,(?(?(+DQCq)IJJ&26t[h&iLO zz,//,,4",!?# 
 )-(9(9I..(,(?(?$566()O44()(+DQCq)IJJ II'-'. zz/22 % % JRR2ell25>FR&)&\h&iLOH%%"',,/I"1/I"J3;"<8Ra58"<u $x s Jb S 0J"<s   "J!JJ
;J$rv   N)rj   rk   rl   rm   rn   r6   rp   ro   r   r   r   r   rr   rs   rq   r	   rh   ru   rv   r?   r<   r   r   ;  sU    
 L#"")..?d38n!5 ?$sCx. ?r?   r   c                       \ rS rSr% Sr\\S'   Sr\\S'   Sr	\\S'   Sr
\\S'   S	r\\S
'   \R                  " 5       S\R                  S\\\4   4S j5       rS\4S jr\S\S\4S j5       rSrg)RunningMomentsi  z
Calculates the running mean and standard deviation of a data stream. Reference:
https://github.com/OpenLMLab/MOSS-RLHF/blob/40b91eb2f2b71b16919addede0341d2bef70825d/utils.py#L75
acceleratorr   meanrL   stdvargW:countxsrA   c                 .   U R                   R                  (       a  [        U R                   U5      u  p#nO'UR                  5       n[        R
                  " USS9u  p2UR                  5       UR                  5       p2X R                  -
  nU R                  U-   nX4-  nU R                  U R                  -  US-  U R                  -  U-  U-  -   nX-   n	U =R                  XT-  U-  R                  5       -  sl        X-  n
X-  US-
  -  R                  5       R                  5       R                  5       U l        U
R                  5       U l	        X`l        UR                  5       X4-  US-
  -  R                  5       R                  5       R                  5       4$ )zD
Updates running moments from batch's moments computed across ranks
F)unbiasedr"   rL   )r   use_distributedget_global_statisticsnumelrY   var_meanr   r   r   r   itemsqrtr   )r;   r   xs_meanxs_varxs_countdelta	tot_countnew_sumold_sumtot_sumnew_vars              r<   updateRunningMoments.update  sR   
 ++(=d>N>NPR(S%GXxxzH#nnR%@OF!--/6<<>))#JJ)	#((TZZ'%(TZZ*?(*JY*VV#		e&288::	%'9q=9@@BGGINNP<<>
||~ 1X\ BIIKPPRWWYYYr?   	json_pathc                    U R                   R                  (       aU  [        R                  " U S S9n[        R
                  " USSS9S-   n[        USSS	9 nUR                  U5        S
S
S
5        g
g
! , (       d  f       g
= f)zDSave the content of this instance in JSON format inside `json_path`.c                 L    U  VVs0 s H  u  pUS:w  d  M  X_M     snn$ s  snnf )Nr   rv   )xr   vs      r<   <lambda>-RunningMoments.save_to_json.<locals>.<lambda>  s*    \]Ht\]RXSTabfsas\]HtHts     )dict_factoryr"   T)indent	sort_keys
wutf-8encodingN)r   is_main_processdataclassesasdictjsondumpsopenwrite)r;   r   	save_dictjson_stringfs        r<   save_to_jsonRunningMoments.save_to_json  sm     ++#**4>tuI**YqDIDPKiw71$ 87 , 87s   A22
B c                     [        USS9 nUR                  5       nSSS5        U " SSU0[        R                  " W5      D6$ ! , (       d  f       N,= f)z3Create an instance from the content of `json_path`.r   r   Nr   rv   )r   readr   loads)clsr   r   r   texts        r<   load_from_jsonRunningMoments.load_from_json  sE     )g.!668D /?{?djj.>?? /.s   A
A)r   r   r   N)rj   rk   rl   rm   rn   r   ro   r   r   r   r   r   rY   no_gradrt   r   r   rq   r   classmethodr   ru   rv   r?   r<   r   r     s    
 D%OCNCNE5
]]_Z Z%u*= Z Z6%c % @ @ @ @r?   r   r   c                    UR                  U R                  5      n[        R                  " UR	                  5       Uc  UR                  5       OUR	                  5       /UR                  S9nU R                  U5      nUu  pVXV-  n[        R                  " X-
  S-  R                  Uc  SOU5      5      nU R                  U5      nX-  n	UR                  U5      U	R                  U5      UR                  5       4$ )z
Computes element-wise mean and variance of the tensor across processes. Reference:
https://github.com/OpenLMLab/MOSS-RLHF/blob/40b91eb2f2b71b16919addede0341d2bef70825d/utils.py#L57C1-L73C75
r   r"   rL   )	tor   rY   rZ   sumr   reducemulr   )
r   r   rg   r   sum_and_count
global_sumr   global_meansum_var
global_vars
             r<   r   r     s     
{!!	"BLL"&&(4<RXXZTXXZ!YbdbkbklM&&}5M%J$Kii"*q0554<aTRSG  )GJ>>&!:==#8%**,FFr?   	eval_predc                    U u  pUR                   S:X  a  [        R                  " USS9n[        R                  " [	        X5       VVVVs/ s H$  u  p4[	        X45        H  u  pVUS:w  d  M  UPM     M&     snnnn5      n[        R                  " U VVs/ s H  oD  H  ofS:w  d  M
  UPM     M     snn5      nOUS S 2S4   US S 2S4   :H  n[        UR                  5       5      nUS:  a6  [        5         [        R                  SU S[        US S 2S4   5       S	35        X)    nX')    n[        R                  " USS9n[        R                  " X:H  [        S
9R                  5       R                  5       n	SU	0$ s  snnnnf s  snnf )N   r"   )axisr.   r   rL   z
There are z out of zu instances where the predictions for both options are equal. These instances are ignored in the accuracy computation.rM   accuracy)ndimr   argmaxarrayziprp   r  r   loggerwarningrX   r   r   r   )
r  predictionsrR   
predictionrd   plbl
equal_maskequal_predictions_countr  s
             r<   compute_accuracyr     st   #K1 ii!4 hh(+K(@w(@$:QTU_QgXaknrvkvQQgQ(@w
 FSF5esd{3e3FST
 !A&+ad*;;
"%jnn&6"7"Q& NNN45Xc+aQRdBS>T=U Vg g "+.$ ii!4xx-U;@@BGGIH!!9 xSs   
E;+
E;F
&
F
rZ   length	pad_valuedimc           
         U R                  U5      U:  a  U $ [        U R                  5      nXR                  U5      -
  XC'   [        R                  " U U[        R
                  " X@R                  U R                  S.6-  /US9$ )Nr   r#  )sizerr   r   rY   catonesrN   r   )rZ   r!  r"  r#  pad_sizes        r<   pad_to_lengthr*    sr    {{36!%S!11yyEJJV]][[ 
 	
r?   modelc                     U R                  5        H5  n[        U[        R                  R                  5      (       d  M.  SUl        M7     g )Nr   )modules
isinstancerY   nnDropoutr  )r+  modules     r<   disable_dropout_in_modelr2    s0    --/fehh..//FH "r?   c           
      N    X-  nXU-  :w  a  [        U SU  SU SX-   35      eU$ )Nz, inexact division: z / z = )r7   )abcustom_error_messageqs       r<   	exact_divr8    sA    	AEz011EaSA3cRSRWQXYZZHr?   c                   *    \ rS rSrSrS rS rS rSrg)PerPromptStatTrackeri$  a-  
Class for tracking statistics per prompt. Mainly used to calculate advantage for the DPPO algorithm

Args:
    buffer_size (`int`):
        Size of the buffer to keep for each prompt.
    min_count (`int`):
        Minimum number of samples to keep in the buffer before calculating the mean and std.
c                 *    Xl         X l        0 U l        g Nbuffer_size	min_countstats)r;   r>  r?  s      r<   __init__PerPromptStatTracker.__init__/  s    &"
r?   c                    [         R                  " U5      n[         R                  " U5      n[         R                  " U5      n[         R                  " U5      nU GH  nX!U:H     nXPR                  ;  a   [        U R                  S9U R                  U'   U R                  U   R                  U5        [        U R                  U   5      U R                  :  a0  [         R                  " U5      n[         R                  " U5      S-   nOI[         R                  " U R                  U   5      n[         R                  " U R                  U   5      S-   nXg-
  U-  XAU:H  '   GM     U$ )N)maxlengư>)r   r  unique
empty_liker@  r   r>  extendrX   r?  r   r   )	r;   rS   rewardsrE  
advantagesr1   prompt_rewardsr   r   s	            r<   r   PerPromptStatTracker.update4  s   ((7#((7#7#]]7+
F$%67NZZ'%*$2B2B%C

6"JJv%%n54::f%&7www'ffWo,wwtzz&12ffTZZ/047-;-Bc,IJ&()  r?   c           
          U R                   R                  5        VVs0 s H=  u  pU[        R                  " U5      [        R                  " U5      [        U5      S._M?     snn$ s  snnf )N)r   r   r   )r@  itemsr   r   r   rX   )r;   r   r   s      r<   	get_statsPerPromptStatTracker.get_statsI  sM    W[WaWaWgWgWijWitqBGGAJrvvay3q6JJWijjjs   AA&r=  N)	rj   rk   rl   rm   rn   rA  r   rN  ru   rv   r?   r<   r:  r:  $  s    
*kr?   r:  c                   ^ U R                  5        H  u  mn[        U[        R                  R                  5      (       d  ST;   a!  UR                  [        R                  5      nMV  [        U4S jS 5       5      (       d  Mr  [        US5      (       d  M  UR                  R                  [        R                  :X  d  M  UR                  [        R                  5      nM     g )Nnormc              3   ,   >#    U  H	  oT;   v   M     g 7fr<  rv   )r   r   names     r<   r   .peft_module_casting_to_bf16.<locals>.<genexpr>Q  s     N$Mqd$Ms   )lm_headembed_tokenswtewpeweight)named_modulesr.  rY   r/  	LayerNormr  r   anyhasattrrY  rN   bfloat16)r+  r1  rS  s     @r<   peft_module_casting_to_bf16r_  M  s    ++-ffehh0011Vt^YYu}}-FN$MNNNvx((==&&%--7#YYu~~6F .r?   
model_argsc                     U R                   (       a7  [        SU R                  U R                  U R                  U R                  S9nU$ U R
                  (       a  [        SS9nU$ S nU$ )NT)load_in_4bitbnb_4bit_compute_dtypebnb_4bit_quant_typebnb_4bit_use_double_quantbnb_4bit_quant_storage)load_in_8bit)rb  r   rN   rd  use_bnb_nested_quantrg  )r`  quantization_configs     r<   get_quantization_configrj  W  st    0#-#3#3 * > >&0&E&E#-#3#3
  
	 	 0
  #r?   c                      [         R                  R                  5       (       d  [        5       (       a  S[	        5       R
                  0$ g )N )rY   cudais_availabler!   r   local_process_indexrv   r?   r<   get_kbit_device_maprp  j  s4    zz  $:$<$<LN6677r?   zOptional[PeftConfig]c                    U R                   SL a  g [        5       (       d  [        S5      e[        U R                  U R
                  U R                  U R                  U R                  SU R                  U R                  U R                  S9	nU$ )NFzYou need to have PEFT library installed in your environment, make sure to install `peft`. Make sure to run `pip install -U peft`.none)		task_typertarget_modules
lora_alphalora_dropoutbias
use_rslorause_doramodules_to_save)use_peftr   r7   r(   lora_task_typelora_rlora_target_modulesrv  rw  ry  rz  lora_modules_to_save)r`  peft_configs     r<   get_peft_configr  q  s    e#6
 	

 ++


!55((,,(($$"77
K r?   c                 j   [         R                  " S/5      R                  U R                  5      [         R                  " U R                  5      R
                  -   n[         R                  " U5      R                  U R                  5      nUS:  a"  [         R                  " USU-  -  5      SU-  -  $ U$ )a  
Get the exponent cap of a value. This is used to cap the exponent of a value to avoid overflow. The formula is :
log(value.dtype.max) E.g. for float32 data type, the maximum exponent value is 88.7228 to 4 decimal points.

Args:
    value (`torch.Tensor`):
        The input tensor to obtain the data type
    decimal (`int`):
        The number of decimal points of the output exponent cap. eg: direct calling exp(log(torch.float32.max))
        will result in inf so we cap the exponent to 88.7228 to avoid overflow.
rL   r   
   )	rY   zerosr  rN   finfor   logr   floor)valuedecimal
vdtype_maxvdtype_log_maxs       r<   get_exp_capr    s     aS!$$U[[1EKK4L4P4PPJYYz*--ell;NFMPQk5;;~G34r7{BeWeer?   c                 x    US:  a  [        U 5      OUn[        R                  " [        R                  " XS95      $ )Nr   )r   )r  rY   expclamp)r  caps     r<   cap_expr    s,     #a+e
SC99U[[011r?   dfc                 Z   [        5       (       d  [        S5      e[        5       n[        SS9nU R                   H  nUR                  U5        M     U R                  5        H5  u  pEUR                  " UR                  [        5      R                  5       6   M7     UR                  U5        g )NzgThe function `print_rich_table` requires the `rich` library. Please install it with `pip install rich`.T)
show_lines)r   ImportErrorr$   r&   columns
add_columniterrowsadd_rowastyperq   r   print)r  consoletablecolumn_rows         r<   print_rich_tabler    s    u
 	
 iGT"E**  ++-szz#--/0  MM%r?   zT{% for message in messages %}{{' ' + message['content']}}{% endfor %}{{ eos_token }}z{% for message in messages %}{{message['role'].capitalize() + ': ' + message['content'] + '

'}}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}c                   $    \ rS rSr% Sr\\S'   Srg)OnlineTrainerStatei  r   episoderv   N)rj   rk   rl   rm   r  rp   ro   ru   rv   r?   r<   r  r    s    GSr?   r  c                     ^  \ rS rSr% Sr\" SSS0S9r\\S'   \" SSS	0S9r	\
\S
'   \" SSS0S9r\\
   \S'   \" SSS0S9r\\   \S'   \" SSS0S9r\\   \S'   \" SSS0S9r\\S'   \" SSS0S9r\\   \S'   \" SSS0S9r\\S'   \" SSS0S9r\\S'   \" SSS0S9r\\S'   \" SSS0S9r\\S       \S!'   \" SSS"0S9r\\   \S#'   \" S$SS%0S9r\\S&'   \" SSS'0S9r\\   \S('   \" S)SS*0S9r\\S+'   \" SSS,0S9r\\   \S-'   \" SSS.0S9r\\   \S/'   \" SSS00S9r\\   \S1'   \" SSS20S9r\\   \S3'   \" SSS40S9r \\   \S5'   \" SSS60S9r!\\   \S7'   \" SSS80S9r"\\   \S9'   \" S:SS;0S9r#\
\S<'   U 4S= jr$S>r%U =r&$ )?OnPolicyConfigi  a9  
Base configuration class for on-policy trainers.

This class includes only the parameters that are specific to some on-policy training. For a full list of training
arguments, please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this
class may differ from those in [`~transformers.TrainingArguments`].

Using [`~transformers.HfArgumentParser`] we can turn this class into
[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
command line.

Parameters:
    run_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the run.
    dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
        Number of processes to use for processing the dataset.
    num_mini_batches (`int`, *optional*, defaults to `1`):
        Number of minibatches to split a batch into.
    total_episodes (`int` or `None`, *optional*, defaults to `None`):
        Total number of episodes in the dataset.
    local_rollout_forward_batch_size (`int`, *optional*, defaults to `64`):
        Per rank no grad forward pass in the rollout phase.
    num_sample_generations (`int`, *optional*, defaults to `10`):
        Number of debugging samples generations (i.e., `generate_completions` calls) throughout training.
    response_length (`int`, *optional*, defaults to `53`):
        Length of the response.
    stop_token (`str` or `None`, *optional*, defaults to `None`):
        Specifies the stop token to use for text generation. This parameter is mutually exclusive with
        `stop_token_id`.

        - `None`: No stop token is applied, unless `stop_token_id` is specified.
        - `'eos'`: Uses the tokenizer's `eos_token`.

    stop_token_id (`int` or `None`, *optional*, defaults to `None`):
        Specifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied,
        unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.
    temperature (`float`, *optional*, defaults to `0.7`):
        Sampling temperature.
    missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`):
        Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to
        generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive
        value.
    sft_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`):
        Path to the SFT model.
    world_size (`int` or `None`, *optional*, defaults to `None`):
        Number of processes (GPUs) to use for the training.
    num_total_batches (`int` or `None`, *optional*, defaults to `None`):
        Number of total batches to train.
    micro_batch_size (`int` or `None`, *optional*, defaults to `None`):
        Micro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).
    local_batch_size (`int` or `None`, *optional*, defaults to `None`):
        Batch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).
    batch_size (`int` or `None`, *optional*, defaults to `None`):
        Batch size across devices (HF's `per_device_train_batch_size` * `world_size` *
        `gradient_accumulation_steps`).
    local_mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
        Mini batch size per GPU.
    mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
        Mini batch size across GPUs.
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether to push the model to the Hub after training.
r  helpzLog every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps.)defaultmetadatalogging_stepsTzZIf True, use gradient checkpointing to save memory at the expense of slower backward pass.gradient_checkpointingNzWhether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if `fp16` is not set.bf16zName of the run.run_namez6Number of processes to use for processing the dataset.dataset_num_procrL   z,Number of minibatches to split a batch into.num_mini_batchesz(Total number of episodes in the dataset.total_episodes@   z3Per rank no grad forward pass in the rollout phase. local_rollout_forward_batch_sizezaNumber of debugging samples generations (i.e., `generate_completions` calls) throughout training.num_sample_generations5   zLength of the response.response_lengthzoSpecifies the stop token to use for text generation. This parameter is mutually exclusive with `stop_token_id`.eos
stop_tokenzSpecifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied, unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.stop_token_idgffffff?zSampling temperature.temperaturezPenalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive value.missing_eos_penaltyzEleutherAI/pythia-160mzPath to the SFT model.sft_model_pathz3Number of processes (GPUs) to use for the training.
world_sizez!Number of total batches to train.num_total_batcheszTMicro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).micro_batch_sizezXBatch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).local_batch_sizeznBatch size across devices (HF's `per_device_train_batch_size` * `world_size` * `gradient_accumulation_steps`).
batch_sizezMini batch size per GPU.local_mini_batch_sizezMini batch size across GPUs.mini_batch_sizeFz4Whether to push the model to the Hub after training.push_to_hubc                    > U R                   c  U R                  (       + OU R                   U l         [        TU ]  5         g r<  )r  fp16superr=   )r;   	__class__s    r<   r=   OnPolicyConfig.__post_init__r  s*    '+yy'8Odii	r?   )r  )'rj   rk   rl   rm   rn   r   r  r   ro   r  r   r  r   r  rq   r  rp   r  r  r  r  r  r  r
   r  r  r  r  r  r  r  r  r  r  r  r  r=   ru   __classcell__)r  s   @r<   r  r    sA   =@ ! D
M5  $)p
$D  ! !
D(4.  $,-Hhsm  ',RS'hsm  "HIc  %*DE%NHSM  -2OP-$c  #(w
#C  !34OS  ,1 
,J(  $) q
$M8C=  12K  ,1  
,%   (23NC  !&OP!J  (-=>(x}  ',pq'hsm  ',tu'hsm  !& .
!J  ,145,8C=  &+89&OXc]  PQK 
   r?   r  boolsc                     U R                  S5      nX ) R                  U5      -  [        R                  " X!U R                  S9-   n[        R
                  " USS9R                  $ )as  
Takes an N-dimensional bool tensor and returns an (N-1)-dimensional tensor of integers giving the position of the
first True in each "row".

Returns the length of the rows (bools.size(-1)) if no element is True in a given row.

Args:
    bools (`torch.Tensor`):
        An N-dimensional boolean tensor.
    dtype (`torch.dtype`, optional):
        The desired data type of the output tensor. Defaults to `torch.long`.

Returns:
    `torch.Tensor`:
        An (N-1)-dimensional tensor of integers indicating the position of the first True in each row. If no True
        value is found in a row, returns the length of the row.
rC   r   r%  )r&  typerY   aranger   r8   values)r  rN   row_lenzero_or_indexs       r<   first_true_indicesr  x  sR    $ jjnGvmmE22U\\'_d_k_k5llM99]+222r?   query_responsesr6   context_lengthc           	         X:g  nUR                  S5      UR                  5       -
  n[        X R                  5      n[        R
                  " X) S5      nU" UUUSSSS9nU R                  UR                  S   5      n	[        USS2US24   U:H  5      S-
  U-   n
U	U	[        R                  " U	R                  S5      U	R                  S9U
4   R                  S5      U
4$ )	a  
Computes the reward logits and the rewards for a given model and query responses.

Args:
    model (`torch.nn.Module`):
        The model used to compute the reward logits.
    query_responses (`torch.Tensor`):
        The tensor containing the query responses.
    pad_token_id (`int`):
        The token ID representing the pad token.
    context_length (`int`):
        The length of the context in the query responses.

Returns:
    tuple:
        - `reward_logits` (`torch.Tensor`):
            The logits for the reward model.
        - `final_rewards` (`torch.Tensor`):
            The final rewards for each query response.
        - `sequence_lengths` (`torch.Tensor`):
            The lengths of the sequences in the query responses.
rL   r   TF)rF   rK   position_idsreturn_dictoutput_hidden_states	use_cacherC   Nr  )cumsumr[   getattrbase_model_prefixrY   masked_fillscorehidden_statesr  r  r&  r   squeeze)r+  r  r6   r  rK   r  lm_backbonerF   r   reward_logitssequence_lengthss              r<   
get_rewardr    s    2 %4N!((+n.A.A.CCL%!8!89K!!/?AFI%!!F KK 4 4R 89M)/!^_:L*MQ]*]^abbess 	LL++A.}7K7KL	
 '"+ r?   c                     X:g  nUR                  S5      UR                  5       -
  n[        R                  " X) S5      nU " UUUSSS9$ )a  
Performs a forward pass through the model with the given query responses and pad token ID.

Args:
    model (`torch.nn.Module`):
        The model to perform the forward pass.
    query_responses (`torch.Tensor`):
        The tensor containing the query responses.
    pad_token_id (`int`):
        The token ID representing the pad token.

Returns:
    `ModelOutput`:
        The output of the model, including hidden states.
rL   r   T)rF   rK   r  r  r  )r  r[   rY   r  )r+  r  r6   rK   r  rF   s         r<   forwardr    sZ    ( %4N!((+n.A.A.CCL!!/?AFI%!! r?   per_device_train_batch_sizer  r  c                    SSK n[        5       R                  nUR                  nUS   S   S:w  a+  XS'   US   SSS.nU(       a  S	S
0US'   OU(       a  S	S
0US'   O[	        U S5      (       a{  [        U R                  SS5      (       a  [        U R                  R                  5      O[        U R                  SS5      nUb&  US   S   S:X  a  UR                  Xw-  SU-  SS.5        UR                  XS9tpU R                  5         U $ )aO  
Prepares the model for training with DeepSpeed (both for stage 2 and 3), configuring the appropriate settings based
on the model and batch size.

Args:
    model (`torch.nn.Module`):
        The model to be prepared for DeepSpeed training.
    per_device_train_batch_size (`int`):
        The training batch size per device.
    fp16 (`bool`, defaults to `False`):
        Whether to use FP16 precision.
    bf16 (`bool`, defaults to `False`):
        Whether to use BF16 precision.

Returns:
    `torch.nn.Module`:
        The model initialized and configured with DeepSpeed for training.
r   Nzero_optimizationstager  train_micro_batch_size_per_gpuF)r  prescale_gradientswall_clock_breakdownenabledTr  r  confighidden_sizeshidden_sizer  )z$zero_optimization.reduce_bucket_sizez4zero_optimization.stage3_param_persistence_thresholdz-zero_optimization.stage3_prefetch_bucket_size)r+  r  )	deepspeedr   deepspeed_plugindeepspeed_configr]  r  r  r   r  r   
initializeeval)	r+  r  r  r  r  r  config_kwargsr  r  s	            r<   prepare_deepspeedr    s,   * ')::$55M()'2a7:U67.;<\.]"'$)

 %.$5M&!%.$5M&!5(## 5<<>> ELL--.U\\=$? 
 &=9L+Mg+VZ[+[ $$@K@YPRU`P`IJ $$5$GIE	JJLLr?   r  	responsesc                 F   [        X :H  5      R                  S5      nS/[        UR                  5       5      S-
  -  UR                  S   /-   n[
        R                  " UR                  S   UR                  S9R                  " U6 n[
        R                  " X%U:  U5      nU$ )a  
Truncates the responses at the first occurrence of the stop token, filling the rest with pad tokens.

Args:
    stop_token_id (`int`):
        The token ID representing the stop token where truncation occurs.
    pad_token_id (`int`):
        The token ID representing the pad token used to fill the truncated responses.
    responses (`torch.Tensor`):
        The tensor containing the responses to be truncated.

Returns:
    `torch.Tensor`:
        The truncated responses tensor with pad tokens filled after the stop token.
rC   rL   r  )
r  	unsqueezerX   r&  r   rY   r  r   viewr  )r  r6   r  
trunc_idxsnew_sizeidxspostprocessed_responsess          r<   truncate_responser    s      $I$>?II"MJsc)..*+a/0IOOA4F3GGH<<	*93C3CDII8TD#//	*;Ll[""r?   r  queriesgeneration_configc                    UR                   S   nX:g  n[        R                  " X) S5      nU R                  UUUSSS9n[        R                  " UR
                  S5      n[        R                  " XR                  SS2US24   4SS9U4$ )a  
Generates sequences from the language model backbone in a way that does not affect padding tokens.

Args:
    lm_backbone (`torch.nn.Module`):
        The language model backbone used for generation.
    queries (`torch.Tensor`):
        The tensor containing the input queries.
    pad_token_id (`int`):
        The token ID representing the pad token.
    generation_config (`GenerationConfig`):
        The configuration for the generation process.

Returns:
    tuple:
        - `generated_sequences` (`torch.Tensor`):
            The concatenated tensor of input queries and generated sequences.
        - `logits` (`torch.Tensor`):
            The logits output from the generation process.
rL   r   T)rF   rK   r  return_dict_in_generateoutput_scoresNr%  )r   rY   r  generatestackscoresr'  	sequences)	r  r  r6   r  r  rK   rF   r   logitss	            r<   r  r  2  s    . ]]1%N,N!!'?A>I!!% , $ " F [[*F99g//>?0BCD!LfTTr?   r  c                 x   / n/ nUR                   S   n[        SXr5       H;  nXX-    n	[        U U	UU5      u  pUR                  U
5        UR                  U5        M=     [	        XSSS9n[	        USSS9nUR                  SUR                   S   5      S U nUR
                  " S/UR                   SS  Q76 S U nX4$ )Nr   r   r   rC   r"   )r   ranger  rW   r\   r  )r+  r  r  r6   r  r  logitssr  r   queryquery_responser  padded_query_responsespadded_logitsss                 r<   batch_generationr  Y  s     OGq!J1jCA@A!)	"
 	~.v D ![bcHN 488=S=Y=YZ\=]^_j`jk#((Gn.B.B12.FGTN!11r?   bos_token_idprompt_len_input_idsprompt_tokenschosen_prompt_len_input_idschosen_tokensrejected_prompt_len_input_idsrejected_tokensc                    U b{  US:X  d  XS   S   :w  a  U /US   -   US'   S/US   -   US'   US:X  d  XS   S   :w  a  U /US   -   US'   S/US   -   US'   US:X  d  XS   S   :w  a  U /US   -   US'   S/US   -   US'   X$U4$ )Nr   r   rL   rT   rv   )r  r  r  r  r  r  r  s          r<   add_bos_token_if_neededr  z  s     1$FX8YZ[8\(\1=OaAb0bM,-67S=I`;a5aM12&!+|M_?`ab?c/c1=OaAb0bM,-67S=I`;a5aM12(A-QcAdefAg1g3?.?SeCf2fO./89s_Md=e7eO3488r?   eos_token_idc                    [        US   5      S:X  d  XS   S   :w  a(  US   R                  U 5        US   R                  S5        [        US   5      S:X  d  XS   S   :w  a(  US   R                  U 5        US   R                  S5        X4$ )NrF   r   rC   rK   rL   )rX   rW   )r  r  r  s      r<   add_eos_token_if_neededr    s     =%&!+|[?YZ\?]/]k")),7&'..q1
?;'(A-Q\A]^`Aa1a$++L9()003))r?   rF   c                    [        X:H  5      R                  S5      nS/[        U R                  5       5      S-
  -  U R                  S   /-   n[
        R                  " U R                  S   U R                  S9R                  " U6 n[
        R                  " XU:  U5      n[
        R                  " [
        R                  " U 5      XS:  S5      nXg4$ )a  
Truncates the input tensor from the right side after the first occurrence of the stop token.

Args:
    input_ids (`torch.Tensor`):
        The tensor containing the responses to be truncated
    stop_token_id (`int`):
        The token ID representing the stop token where truncation occurs
    pad_token_id (`int`):
        The token ID representing the pad token used to fill the truncated responses

Returns:
    tuple:
        - `output_ids` (`torch.Tensor`):
            The truncated responses tensor with pad tokens filled after the stop token
        - `mask` (`torch.Tensor`):
            The mask tensor to indicate the padding tokens
rC   rL   r  r   )r  r  rX   r&  r   rY   r  r   r  r  	ones_like)rF   r  r6   r  r  r  
output_idsrg   s           r<   truncate_rightr#    s    * $I$>?II"MJsc)..*+a/0IOOA4F3GGH<<	*93C3CDII8TD""9Z.?NJU__Y79JANDr?   c                  T   [        5       (       a  [        R                  R                  5         g[	        5       (       a  [        R
                  R                  5         g[        5       (       a  [        R                  R                  5         g[        R                  R                  5         g)a#  Empties the cache of the available torch device.

This function checks for the availability of different torch devices (XPU, MLU, NPU, CUDA) and empties the cache of
the first available device it finds.

If none of the specific devices are available, it defaults to emptying the CUDA cache.
N)	r!   rY   xpuempty_cacher   mlur    npurm  rv   r?   r<   r&  r&    s_     				!	!				!	!		

 r?   inputsr-   c                     UR                  U SS9nU Vs/ s H  o3R                  UR                  S5      PM      sn$ s  snf )aQ  
Decodes the input tensor and strips the padding tokens.

Args:
    inputs (`torch.Tensor`):
        The input tensor to be decoded.
    tokenizer (`transformers.PreTrainedTokenizerBase`):
        The tokenizer used to decode the input tensor.

Returns:
    `list[str]`:
        The list of decoded strings with padding tokens stripped.
F)skip_special_tokensrl  )batch_decodereplace	pad_token)r)  r-   decodedds       r<   decode_and_strip_paddingr1    s@     $$V$GG8?@1IIi))2.@@@s   %=
base_model
model_namehub_model_iddataset_nametags	wandb_urltrainer_nametrainer_citationpaper_titlepaper_id	comet_urlc           
      d   [        U USSUS/UQS9n[        R                  " U40 S[        [        R
                  " S5      R                  S5      5      _SU _S	U_S
U_SU_SU_SU
_SU_SU_SU_SU	_S[        S5      _S[        S5      _S[        S5      _S[        S5      _S[        S5      _6nU$ )a7  
Generate a `ModelCard` from a template.

Args:
    base_model (`str` or `None`):
        Base model name.
    model_name (`str`):
        Model name.
    hub_model_id (`str`):
        Hub model ID as `username/model_id`.
    dataset_name (`str` or `None`):
        Dataset name.
    tags (`list[str]`):
        Tags.
    wandb_url (`str` or `None`):
        Weights & Biases run URL.
    comet_url (`str` or `None`):
        Comet experiment URL.
    trainer_name (`str`):
        Trainer name.
    trainer_citation (`str` or `None`, defaults to `None`):
        Trainer citation as a BibTeX entry.
    paper_title (`str` or `None`, defaults to `None`):
        Paper title.
    paper_id (`str` or `None`, defaults to `None`):
        ArXiv paper ID as `YYMM.NNNNN`.

Returns:
    `ModelCard`:
        A ModelCard object.
transformerslicensegenerated_from_trainer)r2  datasetslibrary_namelicencer3  r6  template_pathtrlztemplates/lm_model_card.mdr2  r3  r4  r5  r7  r<  r8  r9  r:  r;  trl_versiontransformers_versionpytorch_versionrY   datasets_versionrA  tokenizers_version
tokenizers)r   r   from_templaterq   pkg_resourcesfilesjoinpathr   )r2  r3  r4  r5  r6  r7  r8  r9  r:  r;  r<  	card_datacards                r<   generate_model_cardrR    s   X #&..I ""---e4==>Z[\  	
 " "   " *    EN %^4  (  !,!" #<0#D& Kr?   c                      [        5       (       d  g[        R                  " 5       b  [        R                  " 5       R                  $ g)zl
If Comet integration is enabled, return the URL of the current Comet experiment; otherwise, return `None`.
N)r   comet_mlget_running_experimenturlrv   r?   r<   get_comet_experiment_urlrW  '  s7     &&(4..0444r?   rS  r  c                     [        5       (       d  [        S5      e[        R                  " 5       nUb  UR	                  XS9  gg)z
If Comet integration is enabled logs a table to the Comet experiment if it is currently running.

Args:
    name (`str`):
        Table name.
    table (`pd.DataFrame`):
        The Pandas DataFrame containing the table to log.
zLThe comet-ml is not installed. Please install it first: pip install comet-mlN)tabular_datafilename)r   ModuleNotFoundErrorrT  rU  	log_table)rS  r  
experiments      r<   log_table_to_comet_experimentr^  4  sC     !"pqq002J%? r?   rg   .c                    U R                   u  p#U R                  5       nU Vs/ s H  oUR                  5       PM     nnUR                  SS9n[        R                  " X4R
                  S9R                  S5      nXvR                  S5      -   U-  nUR                  SU5      n	U Vs/ s H  oUR                  SU5      PM     n
nU	R                  SS9nUS:H  nUR                  5       (       a6  [        UR                  [        R                  5      R                  5       5      OUnU	SS2SU24   nU
 Vs/ s H  oUSS2SU24   PM     nnU(       d  U$ U/UQ7$ s  snf s  snf s  snf )aQ  
Shift non-zero elements in the mask and corresponding tensors to the left.

This function operates on a binary mask and any number of additional tensors with the same dimensions as the mask.
For each row, non-zero values are shifted to the leftmost positions. Then, columns that contain only zeros across
all rows are truncated from the mask and tensors. Visually, this operation can be represented as follows:

```
[[0, 0, x, x, x, x],  ->  [[x, x, x, x],
 [0, x, x, x, 0, 0]]       [x, x, x, 0]]
```

Args:
    mask (`torch.Tensor`):
        2D tensor (binary mask) with shape `(N, M)`.
    *tensors (`torch.Tensor`):
        One or more 2D tensors with the same shape as `mask`. These tensors will be processed alongside `mask`,
        with non-zero values shifted and excess zero columns truncated in the same manner.

Returns:
    `torch.Tensor`:
        Updated binary mask with non-zero values flushed to the left and trailing zero columns removed.
    `*torch.Tensor`
        Updated tensors, processed in the same way as the mask.

Example:
```python
>>> mask = torch.tensor([[0, 0, 1, 1, 1], [0, 1, 1, 0, 0]])
>>> tensor = torch.tensor([[9, 9, 2, 3, 4], [9, 5, 6, 9, 9]])
>>> new_mask, new_tensor = flush_left(mask, tensor)
>>> print(new_mask)
tensor([[1, 1, 1],
        [1, 1, 0]])

>>> print(new_tensor)
tensor([[2, 3, 4],
        [5, 6, 0]])
```
rL   r%  r  r   N)r   cloner  rY   r  r   r  gatherr  r\  rp   r  int8)rg   r   r  M	mask_copyr   first_non_zeroposidx_roll	mask_rollrolled_tensorscol_sums
empty_colsfirst_empty_colflushed_maskflushed_tensorss                   r<   
flush_leftro  F  sX   P ::DA 

I")*'Qwwy'G* %%!%,N
,,q!1!1
2
<
<Q
?C..q11Q6H  H-I5<=Whhq(+WN= }}}#HQJAKAQAQc*--

3::<=WXOQ 0 001L7EF~!,_,,-~OF)/))% + > Gs   E&E3E"c                    U R                   u  p#U R                  5       nU Vs/ s H  oUR                  5       PM     nn[        R                  " U5      nUR	                  SS9n[        R
                  " X4R                  S9R                  S5      nXR                  S5      -
  U-  n	UR                  SU	5      n
U Vs/ s H  oUR                  SU	5      PM     nnU
R                  SS9nUS:g  nUR                  5       (       a6  [        UR                  [        R                  5      R	                  5       5      OUnU
SS2US24   nU Vs/ s H  oUSS2US24   PM     nnU(       d  U$ U/UQ7$ s  snf s  snf s  snf )zk
Shift non-zero elements in the mask and corresponding tensors to the right. See `flush_left` for details.
rL   r%  r  r   N)r   r`  rY   fliplrr  r  r   r  ra  r  r\  rp   r  rb  )rg   r   r  rc  rd  r   flipped_maskre  rf  rg  rh  ri  rj  non_empty_colsfirst_non_empty_colrm  rn  s                    r<   flush_rightru    sf    ::DA 

I")*'Qwwy'G* <<	*L!((Q(/N
,,q!1!1
2
<
<Q
?C..q11Q6H  H-I5<=Whhq(+WN= }}}#H]NIWI[I[I]I]#n//

;BBDEcdQ 3 445L;IJ>a/001>OJ)/))' + > Ks   E.<E3	E8c           
      T   U R                   [        R                  [        R                  4;   at  [        R                  " U SUR                  S5      S9R                  S5      n[        R                  " U  Vs/ s H  n[        R                  " USS9PM     sn5      nX$-
  nU$ / n[        X5       HY  u  pg[        R                  " USS9nUR	                  SUR                  S5      S9R                  S5      n	UR                  U	5        M[     [        R                  " U5      nU$ s  snf )a?  
A memory-efficient implementation of the common `log_softmax -> gather` operation.

This function is equivalent to the following naive implementation:
```python
logps = torch.gather(logits.log_softmax(-1), dim=-1, index=index.unsqueeze(-1)).squeeze(-1)
```

Args:
    logits (`torch.Tensor`):
        Logits tensor of shape `(..., num_classes)`.
    index (`torch.Tensor`):
        Index tensor of shape `(...)`, specifying the positions to gather from the log-softmax output.

Returns:
    `torch.Tensor`:
        Gathered log probabilities with the same shape as `index`.
rC   )r#  indexr%  )rN   rY   r   float64ra  r  r  r  	logsumexpr  Flog_softmaxrW   )
r  rw  selected_logitslglogsumexp_valuesper_token_logps
row_logits
row_labels	row_logpsrow_per_token_logpss
             r<   selective_log_softmaxr    s    & ||u}}55,,v2U__R=PQYYZ\] ;;f'Uf(Cf'UV)<  &)&&8"Jjb9I"+"2"2rAUAUVXAY"2"Z"b"bce"f""#67 '9  ++o6 (Vs   7D%r  
chunk_sizec                 ~   U R                   SS nU R                   S   nU R                  SU5      n/ nUR                  USS9 HR  n[        R                  " USS9n[
        R                  " U5      U-  R                  S5      * nUR                  U5        MT     [
        R                  " USS9nUR                  U5      $ )a  
Compute the Shannon entropy (in nats) for each row of *logits* in a memory-efficient way.

Instead of materializing the full softmax for all rows at once, the logits are flattened to shape (N, num_classes),
where N is the product of all leading dimensions. Computation is then performed in chunks of size `chunk_size`
along this flattened dimension, reducing peak memory usage. The result is reshaped back to match the input's
leading dimensions.

Args:
    logits (`torch.Tensor`):
        Logits tensor of shape `(..., num_classes)`. Entropy is taken along the last axis; all leading dimensions
        are preserved in the output.
    chunk_size (`int`, *optional*, defaults to `128`):
        Number of rows from the flattened logits to process per iteration. Smaller values reduce memory usage at
        the cost of more iterations.

Returns:
    `torch.Tensor`:
        Entropy values with shape `logits.shape[:-1]`.
NrC   r   r%  )
r   reshapesplitrz  r{  rY   r  r  rW   r'  )	r  r  original_shapenum_classesflat_logits	entropieschunklogpschunk_entropys	            r<   entropy_from_logitsr    s    * \\#2&N,,r"K ..[1KI"":1"5e,))E*U277;;' 6
 		)+I^,,r?   rS   completionsrH  rI  stepnum_samplesc                    [        5       (       d  [        S5      e[        5       n[        SSSS9nUR	                  SSS9  UR	                  SS	S9  UR                  5        H  nUR	                  US
SS9  M     UR	                  SSSS9  Ub  U[        U 5      :  a  SnOUS::  a  gUb  [        R                  " [        [        U 5      5      U5      n	U	 V
s/ s H  oU
   PM	     n n
U	 V
s/ s H  oU
   PM	     nn
UR                  5        VVV
s0 s H  u  pX V
s/ s H  oU
   PM	     sn
_M     nnnn
U	 V
s/ s H  oU
   PM	     nn
[        [        U 5      5       Hj  n
UR                  5        Vs/ s H  oU   U
   S PM     nnUR                  " [        X
   5      [        X   5      /UQX:   S P76   UR                  5         Ml     [        USSU 3SS9nUR                  U5        gs  sn
f s  sn
f s  sn
f s  sn
nnf s  sn
f s  snf )u
  
Print out a sample of model completions to the console with multiple reward metrics.

This function creates a nicely formatted table showing prompt-completion pairs, useful for monitoring model outputs
during training. It requires the `rich` library to be installed.

Args:
    prompts (`list[str]`):
        List of prompts.
    completions (`list[str]`):
        List of completions corresponding to the prompts.
    rewards (`dict[str, list[float]]`):
        Dictionary where keys are reward names and values are lists of rewards.
    advantages (`list[float]`):
        List of advantages corresponding to the prompts and completions.
    step (`int`):
        Current training step number, used in the output title.
    num_samples (`int` or `None`, *optional*, defaults to `None`):
        Number of random samples to display. If `None` (default), all items will be displayed.

Example:
```python
>>> from trl.trainer.utils import print_prompt_completions_sample

>>> prompts = ["The sky is", "The sun is"]
>>> completions = [" blue.", " in the sky."]
>>> rewards = {"Correctness": [0.123, 0.456], "Format": [0.789, 0.101]}
>>> advantages = [0.987, 0.654]
>>> print_prompt_completions_sample(prompts, completions, rewards, advantages, 42)
╭──────────────────────────── Step 42 ─────────────────────────────╮
│ ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┓ │
│ ┃ Prompt     ┃ Completion   ┃ Correctness ┃ Format ┃ Advantage ┃ │
│ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━┩ │
│ │ The sky is │  blue.       │        0.12 │   0.79 │      0.99 │ │
│ ├────────────┼──────────────┼─────────────┼────────┼───────────┤ │
│ │ The sun is │  in the sky. │        0.46 │   0.10 │      0.65 │ │
│ └────────────┴──────────────┴─────────────┴────────┴───────────┘ │
╰──────────────────────────────────────────────────────────────────╯
```
zvThe function `print_prompt_completions_sample` requires the `rich` library. Please install it with `pip install rich`.Tz
bold white)show_headerheader_styleexpandPromptbright_yellow)style
Completionbright_greenz	bold cyanr   )r  justify	Advantagezbold magentaNr   z.2fFzStep )r  titleborder_style)r   r  r$   r&   r  r   rX   randomsampler  rM  r  r'   add_sectionr%   r  )rS   r  rH  rI  r  r  r  r  reward_nameindicesr   keyvalreward_valuespanels                  r<   print_prompt_completions_sampler    s   ` "
 	
 iGddKE 
X_5	\8||~KI &	[H #g,&KA --c'l 3[A'./w!1:w//67w!1~w7BI--/R/hc31AQ11/R-45WmW
53w< =D\\^L^cCLOC01^Ld7:&[^(<e}eQ[Q^_bPce !
 %uTF^,WEMM% 071R5 Ms0   G))G.G8G3'G86G?/H3G8c                   `    \ rS rSrSr    SS\S\S\S\S\S	\\   4S
 jjr	S r
S\4S jrSrg)RepeatSampleriD  au  
Sampler that repeats the indices of a dataset in a structured manner.

Args:
    data_source (`Sized`):
        Dataset to sample from.
    mini_repeat_count (`int`):
        Number of times to repeat each index per batch.
    batch_size (`int`, *optional*, defaults to `1`):
        Number of unique indices per batch.
    repeat_count (`int`, *optional*, defaults to `1`):
        Number of times to repeat the full sampling process.
    shuffle (`bool`, *optional*, defaults to `True`):
        Whether to shuffle the dataset.
    seed (`int` or `None`, *optional*, defaults to `None`):
        Random seed for reproducibility (only affects this sampler).

Example:
```python
>>> sampler = RepeatSampler(["a", "b", "c", "d", "e", "f", "g"], mini_repeat_count=2, batch_size=3, repeat_count=4)
>>> list(sampler)
[4, 4, 3, 3, 0, 0,
 4, 4, 3, 3, 0, 0,
 4, 4, 3, 3, 0, 0,
 4, 4, 3, 3, 0, 0,
 1, 1, 2, 2, 6, 6,
 1, 1, 2, 2, 6, 6,
 1, 1, 2, 2, 6, 6,
 1, 1, 2, 2, 6, 6]
```

```txt
mini_repeat_count = 3
      -   -   -
     [0,  0,  0,  1,  1,  1,  2,  2,  2,  3,  3,  3,      |
      4,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,      |
      8,  8,  8,  9,  9,  9, 10, 10, 10, 11, 11, 11,      |
                                                            repeat_count = 2
      0,  0,  0,  1,  1,  1,  2,  2,  2,  3,  3,  3,      |
      4,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,      |
      8,  8,  8,  9,  9,  9, 10, 10, 10, 11, 11, 11, ...] |
      ---------   ---------   ---------   ---------
       ---------   ---------   ---------   ---------
        ---------   ---------   ---------   ---------
                     batch_size = 12
```
Ndata_sourcemini_repeat_countr  repeat_countshuffleseedc                     Xl         X l        X0l        X@l        [	        U5      U l        XPl        X`l        U(       a:  [        R                  " 5       U l
        Ub  U R                  R                  U5        g g g r<  )r  r  r  r  rX   r  r  r  rY   	Generator	generatormanual_seed)r;   r  r  r  r  r  r  s          r<   rA  RepeatSampler.__init__u  se     '!2$({+	"__.DN**40   r?   c              #   `  #    U R                   (       a8  [        R                  " U R                  U R                  S9R                  5       nO[        [        U R                  5      5      n[        S[        U5      U R                  5       Vs/ s H  o!X"U R                  -    PM     nnU Vs/ s H   n[        U5      U R                  :X  d  M  UPM"     nnU HH  n[        U R                  5       H,  nU H#  n[        U R                  5       H  nUv   M	     M%     M.     MJ     g s  snf s  snf 7f)N)r  r   )r  rY   randpermr  r  r   rr   r  rX   r  r  r  )r;   indexesr   r  r  rw  s         r<   __iter__RepeatSampler.__iter__  s     <<nnT%5%5PWWYG5!1!123G >C1c'lTXTcTc=de=d14??23=de '.OgUUt1N5gOE4,,-"E"4#9#9:# ; # .  f Ps%   B
D.D$&D.,D)D)AD.rA   c                     U R                   U R                  -  U R                  -  U R                  -  U R                  -  $ r<  )r  r  r  r  r:   s    r<   __len__RepeatSampler.__len__  s7      DOO3tFI_I__bfbsbsssr?   )r  r  r  r  r  r  r  r  )rL   rL   TN)rj   rk   rl   rm   rn   r   rp   r   r   rA  r  r  ru   rv   r?   r<   r  r  D  sl    .h "11 1 	1
 1 1 sm1,$*t tr?   r  c                     [         R                  " U [         R                  " U SS9-
  S-  5      n[         R                  " [         R                  " U 5      ) 5      nXUS-
  -  -  n[         R                  " U5      $ )a  
Compute the standard deviation of a tensor, ignoring NaNs. This function only supports 1D tensors.

Args:
    tensor (`torch.Tensor`):
        Input tensor of shape `(N,)`.

Returns:
    `torch.Tensor`:
        Standard deviation of the tensor, ignoring NaNs.
T)keepdimr"   rL   )rY   nanmeanr  isnanr   )rZ   variancer   s      r<   nanstdr    sa     }}fu}}VT'JJqPQHIIu{{6**+E##H::hr?   tensor_dict
num_chunksc                    [        S U R                  5        5       5      nUR                  S   U-  n/ n[        U5       H  n0 nU R	                  5        H[  u  pxUb6  [        U[        5      (       d  UR                  S:  a  XU-  US-   U-   Xg'   M>  Ub  UR                  S:X  a  XU'   MW  SXg'   M]     UR                  U5        M     U$ )a  
Splits a dictionary of tensors along the first dimension into `num_chunks` equal parts.

Example:
```python
>>> x = torch.arange(12).reshape(6, 2)
>>> y = torch.arange(6).reshape(6, 1)
>>> tensor_dict = {"x": x, "y": y}
>>> split_tensor_dict(tensor_dict, 3)
[
    {"x": tensor([[0, 1], [2, 3]]), "y": tensor([[0], [1]])},
    {"x": tensor([[4, 5], [6, 7]]), "y": tensor([[2], [3]])},
    {"x": tensor([[ 8,  9], [10, 11]]), "y": tensor([[4], [5]])}
]
```
c              3   .   #    U  H  oc  M  Uv   M     g 7fr<  rv   )r   rZ   s     r<   r   $split_tensor_dict.<locals>.<genexpr>  s     X-A6-A   	r   NrL   )	nextr  r   r  rM  r.  rr   r  rW   )	r  r  first_tensorr  chunksr   
chunk_dictr  rZ   s	            r<   split_tensor_dictr    s    & X[-?-?-AXXL##A&*4JF:
&,,.KC!z&$'?'?6;;QR?"(Z1q5J:N"O
#q(8"(3"&
 / 	j!  Mr?   seq_dictc                 (  ^ [        [        S U R                  5        5       5      5      n[        R                  " U5      mS[
        [           S[
        [           4U4S jjnU R                  5        VVs0 s H  u  p4X2" U5      _M     snn$ s  snnf )aM  
Shuffles all sequence-like values in a dictionary along the first dimension in unison.

Example:
```python
>>> x = torch.arange(6).reshape(3, 2)
>>> y = ["a", "b", "c"]
>>> seq_dict = {"x": x, "y": y}
>>> shuffle_sequence_dict(seq_dict)
{'x': tensor([[2, 3],
              [0, 1],
              [4, 5]]),
 'y': ['b', 'a', 'c']}
```
c              3   .   #    U  H  oc  M  Uv   M     g 7fr<  rv   )r   r   s     r<   r   (shuffle_sequence_dict.<locals>.<genexpr>  s     H%6!!%6r  r   rA   c                 
  > U c  g [        U [        R                  5      (       a  U R                  S:X  a  U $ [        U [        R                  5      (       a  U R                  S:  a  U T   $ T Vs/ s H  oU   PM	     sn$ s  snf )Nr   rL   )r.  rY   rt   r  )r   r   permutations     r<   permute&shuffle_sequence_dict.<locals>.permute  sj    9a&&166Q;Ha&&166Q;[>!)*k!k***s   /B )rX   r  r  rY   r  r   r   rM  )r  r  r  r  r  r  s        @r<   shuffle_sequence_dictr    sz    " THX__%6HHIJ..,K+8H% +(8*< + /7nn.>?.>(#C.>???s   6Bc                    [         R                  " U 5      R                  5       (       a3  [         R                  " [	        S5      U R
                  U R                  S9$ [         R                  " U [         R                  " U 5      )    5      $ )a  
Compute the minimum value of a tensor, ignoring NaNs. This function only supports 1D tensors.

Args:
    tensor (`torch.Tensor`): Input tensor of shape `(N,)`.

Returns:
    `torch.Tensor`: Minimum value of the tensor, ignoring NaNs. Returns NaN if all values are NaN.
nanr   )rY   r  allrZ   r   rN   r   r8   rZ   s    r<   nanminr    [     {{6  ||E%LV]]SS99VU[[00122r?   c                    [         R                  " U 5      R                  5       (       a3  [         R                  " [	        S5      U R
                  U R                  S9$ [         R                  " U [         R                  " U 5      )    5      $ )a  
Compute the maximum value of a tensor, ignoring NaNs. This function only supports 1D tensors.

Args:
    tensor (`torch.Tensor`): Input tensor of shape `(N,)`.

Returns:
    `torch.Tensor`: Maximum value of the tensor, ignoring NaNs. Returns NaN if all values are NaN.
r  r   )rY   r  r  rZ   r   rN   r   r   r  s    r<   nanmaxr  	  r  r?   c                     U $ )z Do we really need docs for this?rv   )r   s    r<   identityr    s    Hr?   r   c                 J   SU ;  d  SU ;  a  U $ U S   R                  SS9R                  5       nU S   n[        U5      UR                  S5      :w  a)  [	        S[        U5       SUR                  S5       35      e[        [        R                  " U S   USS95      n0 U ESU0E$ )z
Splits `batch["pixel_values"]` into a list of tensors based on the product of each row in
`batch["image_grid_thw"]`, while keeping other entries unchanged.
image_grid_thwpixel_valuesrL   r%  r   zMismatch: sum(lengths) = z != pixel_values.size(0) = )prodr   r  r&  r7   rr   rY   r  )r   lengthsr  split_valuess       r<   split_pixel_values_by_gridr    s    
 u$e(C$%**q*188:G(L
7||((++4S\NB]^j^o^opq^r]stuuE.$97JKL2e2^\22r?   c                     U R                  S5      n[        U[        5      (       a  [        R                  " USS9n0 U ESU0E$ U $ )z
Opposite of `split_pixel_values_by_grid`. Merges a list of tensors in `batch["pixel_values"]` back into a single
tensor along the first dimension.
r  r   r%  )rU   r.  rr   rY   r'  )r   r  mergeds      r<   unsplit_pixel_values_by_gridr  /  sF    
 99^,L,%%<Q/0%000r?   rf   target_lengthprotected_tokensc                 z  ^^ [        U5      n[        R                  " [        U5      U R                  S9mUU4S jn/ n/ n[        U R                  S   5       H4  nU" X   X   5      u  pUR                  U	5        UR                  U
5        M6     [        R                  " U5      [        R                  " U5      4$ )a  
Truncate tensors to target length while preserving protected tokens.

Args:
    ids (`torch.Tensor`):
        Input tensor of token IDs, shape (batch_size, sequence_length).
    mask (`torch.Tensor`):
        Input tensor of attention masks, shape (batch_size, sequence_length).
    target_length (`int`):
        Desired length of the output sequences.
    protected_tokens (`list[int]`):
        List of token IDs that should be preserved in the output.
r  c           	      H  > [         R                  " U T
5      nU) nUR                  5       R                  5       nTU-
  nUS:  a  [	        ST SU SU S35      e[         R
                  " U5      S   n[         R                  " U5      nUS:  a	  Xe* S  nSXx'   X'-  n	X	   X   4$ )Nr   ztarget_length (z)) is too small for the protected tokens (z4 tokens). Please increase target length to at least z or disable truncation.T)rY   isinr  r   r7   where
zeros_like)rf   rg   is_protectedis_non_protectednum_protectednum_non_protected_needednon_protected_indiceskeep_non_protectedkeep_indices	keep_maskprotected_tokens_tensorr  s             r<   process_sequence8truncate_with_protected_tokens.<locals>.process_sequenceQ  s    zz#'>?(= %((*//1#0=#@ #a'!-0YZgYh i==JOKbd  !&,< =a @"--.>?#a'01J1KLL/3, !5	~t..r?   r   )	setrY   rZ   rr   r   r  r   rW   r  )rf   rg   r  r  protected_setr  truncated_seqtruncated_maskr   new_idsnew_maskr  s     `        @r<   truncate_with_protected_tokensr  =  s      ()M#ll4+>szzR/8 MN399Q< ,SVTW=W%h' !
 ;;}%u{{>'BBBr?   )r   r   N)Ncpu)rC   )rl  )   )FF)rA   N)NNNN)   r<  )r   importlib.resources	resourcesrM  r   r  collectionsr   collections.abcr   r   r   r   importlib.metadatar   typingr	   r
   r   r   numpyr   pandaspdrY   torch.nn.functionalr/  
functionalrz  torch.utils.data
accelerater   r   r   accelerate.stater   huggingface_hubr   r   torch.nn.utils.rnnr   r   r>  r   r   r   r   r   r   r   transformers.utilsr   r   r   r   r    r!   trainer.model_configr#   rich.consoler$   
rich.panelr%   
rich.tabler&   	rich.textr'   rT  peftr(   r)   
get_loggerrj   r  r+   rx   rr   rt   rp   rq   r\   r   r   r  r   r   rs   r   r   r*  Moduler2  r8  r:  r_  rj  rp  r  r  r  	DataFramer  SIMPLE_SFT_CHAT_TEMPLATESIMPLE_CHAT_TEMPLATEr  r  r[   r  r  r  r   r  r  r  r  r  r  r#  r&  r1  rR  rW  r^  ro  ru  r  r  r  r  r  r  r  r  r  r  r  r  r  rv   r?   r<   <module>r%     s	    +    + ( & 0 0       9 9 - 4 + $    / $  + 
		H	% _
 _
 _
D J J J^ (,	?%,,?? ? !	?
 \\?D P P Pf 7@ 7@ 7@t 5:G\\G
5<<s*+G G(%" %"4U
3C %"P
%,, 
 
c5j@Q 
X[ 
ejeqeq 
EHHOO  &k &kR7 AS8T &Xd38n5  0F 2f"2 $  r  C     z & z  z z 38** 3ell 3 3./88??/-2\\/IL/^a/
5<<u||34/d88??\\  	B `e788??79<7DH7X\7
XX__7t#S # # #Y^YeYe #.$U$U+0<<$UGJ$U_o$U
5<<%&$UN 288??2\\2 '*2 	2
 (2 2@93-99 T#Y'9 "%	9
 T#Y'9 $'9 #tCy.)9,	*	*&*3S	>&:	*MQRUW[\_W`R`Ma	*||,/?B
5<<%&:!$AU\\ A>U AZ^_bZc A4 '+!%"#GGG G 3-	G
 s)G }G G smG #G smG }G GT
(3- 
@ @BLL @T @$>*U\\ >*U\\ >*eELLRWX]XdXdfiXiRjDj>k >*B*ell *ell *uU\\SXY^YeYegjYjSkEk?l *< ELL  F"- "-# "- "-V T#YTcT #tE{"#T U	T
 T T 
Tn]tG ]tB 5<<  ELL  $ c8ELL112 @C 	$sHU\\**
+, F@Dhx.@)@$A @d3PXYaPbKbFc @@35<< 3ELL 335<< 3ELL 3
3d3+<&= 3$sERWR^R^`dejeqeq`rRrLsGsBt 3$S%d5<<FX8X2Y-Y(Z _cdginiuiudu_v 8C	8C"\\8C:=8CQUVYQZ8C
5<<%&8Cr?   