
    Y:i              
          S r SSKJr  SSKrSSKJr  SSKJr  SSKJrJ	r	J
r
JrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJ
r
JrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)JrJ*r*J+r+J,r,JrJ-r-J.r.  SSK*r*SSK7  SSK/J0r0J1r1  SS	K2J3r3  SSKrSSK4r5SS
K6J7r7  SSKJr  SSK8J9r9J:r;  SSK<J=r=  SSK>r>SSK?J@r@  S rA SSSSSS.rB\R                  " SS\BS9S 5       rDS\R                  S\ES\ES\R                  4S jrFS\R                  S\R                  S\ES\ES\R                  4
S jrGS\R                  S\ES\R                  4S jrH\0 " S  S!\5      5       rI  " S" S#\5      rJ " S$ S%\J5      rKg)&z;
2025.10.10
2025.10.9
4.56.2
0.23.0
__UNSLOTH_VERSIONING__
    )TensorN)
functional)AnyListOptionalTupleUnionDictSetCallable)%r   BaseImageProcessorBasePairwiseJudger   DatasetEvalPredictionFFeatureExtractionMixinIterableDatasetOnlineDPOTrainerOptimizerNamesr   	PeftModelPreTrainedModelPreTrainedTokenizerBaseProcessorMixinSIMPLE_CHAT_TEMPLATETrainerCallbackr	   	XPOConfig
XPOTrainerempty_cachegenerate_model_cardget_comet_experiment_url
get_rewardis_conversationalis_peft_availableis_wandb_availablejinja2maybe_apply_chat_templatennosselective_log_softmaxtextwraptorchtruncate_rightunwrap_model_for_generation)*)	dataclassfield)Version)nullcontext)DataCollatorForSeq2SeqDataCollatorForLanguageModeling)ParallelMode)
MethodTypec                 F   ^  [         R                  " T 5      U 4S j5       nU$ )Nc                 8  > [        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         T" U /UQ70 UD6n[        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         U$ )Nmodelfor_trainingfor_inference)hasattrr9   r:   r;   )selfargskwargsoutputfs       H/home/james-whalen/llama.cpp/unsloth_compiled_cache/UnslothXPOTrainer.pywrapper*prepare_for_training_mode.<locals>.wrapper0   sx     4!!gdjj.&I&IJJ##%4)$)&)4!!gdjj/&J&JJJ$$&    )	functoolswraps)rA   rC   s   ` rB   prepare_for_training_moderH   /   s%    __Q  NrE   TF)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphs)dynamic	fullgraphoptionsc                 d   [         R                  " U R                  SU R                  S   5      SSS9n[         R                  " UR                  S5      SSS9n/ n[	        X#5       H  u  pVUR                  [         R                  5      n[         R                  " USUR                  S5      S9R                  S5      n[         R                  " USS9nXx-
  n	UR                  U	5        M      [         R                  " U5      nUR                  U R                  S   U R                  S   45      nU$ )N   r   )chunksdim)rS   indexrS      )r+   chunkreshapeshapeziptofloat32gather	unsqueezesqueeze	logsumexpappendconcat)
logitsrT   chunked_logitschunked_indexall_per_token_logpschunk_logitschunk_indexselected_logitslogsumexp_valuesper_token_logpss
             rB   chunked_selective_log_softmaxrl   E   s    [[FLL4D!EPQYZ[N[[r!2QaHM%(%G!#u}}5,,|2{G\G\]_G`aiijlm ??<rB)<""?3 &H 	,,':;-55v||AUV6XYrE   	input_idslogits_to_keeppad_token_idreturnc                 ~    XR                   S   :  a  [        S5      eU SS2SU* 24   nX2:H  nUR                  SS9nU$ )zr
Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens 
rV   z8logits_to_keep must be smaller than the sequence length.NrU   )rY   
ValueErrorsum)rm   rn   ro   prompt_sectionpadding_maskpad_token_countss         rB   calculate_pad_tokens_in_promptrw   W   sX     ++STTq"2N?"223N"2L#''A'.rE   completion_input_idsleft_pad_tokens_per_promptmax_left_padc                     U R                   u  pEU R                  nX!-
  n[        R                  " XVS9R	                  S5      nXR	                  S5      :  n	X:g  n
X-  nU$ )a)  
Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]

Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
and pad are pad tokens, this function would make a completion mask that would 0 out the pad
and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
devicer   rV   )rY   r}   r+   aranger^   )rx   ry   rz   ro   
batch_sizecompletion_lenr}   num_tokens_to_maskindices
shift_masknon_padding_mask
final_masks               rB    create_completion_attention_maskr   j   si     "6!;!;J!((F%Bll>9CCAFG88;;J,<.JrE   tensorpad_idc                 l    X:g  n[         R                  " USSSS9n[         R                  " U SU5      nU$ )zD
Moves all padding tokens in each sequence of a batch to the right.
rV   T)rS   
descendingstable)r+   argsortr]   )r   r   masksorted_indicespacked_tensors        rB   left_pack_paddingr      s8     D]]4Q4MNLLN;MrE   c                     ^  \ rS rSr% Sr\" SSS0S9r\\   \	S'   \" SSS	0S9r
\\   \	S
'   \" SSS0S9r\\   \	S'   SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS SSS!SSSSSSSSS"S"SSSSS#S$SSSSSSSS%SSSSSSSSSSSSSS%SSSSSSS&S'SSSS(SSSSSSSSSSSS)SSSSSSSSS$SSSS%SSSS*S+SSSSSSSSSSSSSSSS,S-SSSSS0 SSSS.SSS/SS0S1SS2S3S4S SSSSSSSS4U 4S5 jjrS6rU =r$ )7UnslothXPOConfig   a  
    
Configuration class for the [`XPOTrainer`].

Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following:

Parameters:
    alpha (`float` or `list[float]`, *optional*, defaults to `1e-5`):
        Weight of the XPO loss term. If a list of floats is provided then the alpha is selected for each new epoch
        and the last alpha is used for the rest of the epochs.

    NhelpzvLLM SamplingParams)defaultmetadatavllm_sampling_paramsrP   z8Chunk size to reduce memory usage. -1 is most efficient.unsloth_num_chunksz'Maximum sequence length to truncate to.max_seq_lengthFnorQ      r      g-C6
?g{Gz?g?g+?g:0yE>g      ?g      @linear皙?passivewarningTstepsrV     iO  O1auto         
adamw_8bitlength
every_savelasti  @   i   sigmoidvllmg?colocatez0.0.0.0i@  g      n@c                   > US:  a  [        SU S35        US:  a  [        SU S35        Uc  U#S:X  a
  U$S:X  a  SnS	n#Wc$  S
SKJn  [        [	        U" 5       S-   S5      S5      nWS
::  a  [        S5      eWS:  a  [        S5      e[        TU ]  " S0 SU_SU_SU_SU_SU_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_S U_S!U_S"U_S#U_S$U_S%U_S&U_S'U_S(U_S)U_S*U_S+U_S,U_S-U_S.U_S/U_S0U_S1U _S2U!_S3U"_S4U#_S5U$_S6U%_S7U&_S8U'_S9U(_S:U)_S;U*_S<U+_S=U,_S>U-_S?U._S@U/_SAU0_SBU1_SCU2_SDU3_SEU4_SFU5_SGU6_SHU7_SIU8_SJU9_SKU:_SLU;_SMU<_SNU=_SOU>_SPU?_SQW@_SRWA_SSWB_STWC_SUWD_SVWE_SWWF_SXWG_SYWH_SZWI_S[WJ_S\WK_S]WL_S^WM_S_WN_S`WO_SaWP_SbWQ_ScWR_SdWS_SeWT_SfWU_SgWV_ShWW_SiWX_SjWY_SkWZ_SlW[_SmW\_SnW]_SoW^_SpW__SqW`_SrWa_SsWb_StWc_SuWd_SvWe_SwWf_SxWg_SyWh_SzWi_S{Wj_S|Wk_S}Wl_S~Wm_SWn_SWo_SWp_SWq_SWr_SWs_SWt_SWu_SWv_SWw_SWx_SWy_SWz_SW{_SW|_SW}_SW~_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_WD6  WU l        WU l	        WU l
        g )NgHz>z Unsloth: Your learning rate of `zi` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!rV   za` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!r   r   unsloth_training_checkpointsr   r   )	cpu_countrQ   r   r   zUUnsloth: Please set a positive non-zero temperature since your results will be wrong.
   zgUnsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.
output_diroverwrite_output_dirdo_traindo_eval
do_predicteval_strategyprediction_loss_onlyper_device_train_batch_sizeper_device_eval_batch_sizeper_gpu_train_batch_sizeper_gpu_eval_batch_sizegradient_accumulation_stepseval_accumulation_steps
eval_delaytorch_empty_cache_stepslearning_rateweight_decay
adam_beta1
adam_beta2adam_epsilonmax_grad_normnum_train_epochs	max_stepslr_scheduler_typewarmup_ratiowarmup_steps	log_levellog_level_replicalog_on_each_nodelogging_dirlogging_strategylogging_first_steplogging_stepslogging_nan_inf_filtersave_strategy
save_stepssave_total_limitsave_safetensorssave_on_each_nodesave_only_model'restore_callback_states_from_checkpointno_cudause_cpuuse_mps_deviceseed	data_seedjit_mode_evaluse_ipexbf16fp16fp16_opt_levelhalf_precision_backendbf16_full_evalfp16_full_evaltf32
local_rankddp_backendtpu_num_corestpu_metrics_debugdebugdataloader_drop_last
eval_stepsdataloader_num_workersdataloader_prefetch_factor
past_indexrun_namedisable_tqdmremove_unused_columnslabel_namesload_best_model_at_endmetric_for_best_modelgreater_is_betterignore_data_skipfsdpfsdp_min_num_paramsfsdp_config"fsdp_transformer_layer_cls_to_wrapaccelerator_configparallelism_config	deepspeedlabel_smoothing_factoroptim
optim_args	adafactorgroup_by_lengthlength_column_name	report_toddp_find_unused_parametersddp_bucket_cap_mbddp_broadcast_buffersdataloader_pin_memorydataloader_persistent_workersskip_memory_metricsuse_legacy_prediction_looppush_to_hubresume_from_checkpointhub_model_idhub_strategy	hub_tokenhub_private_repohub_always_pushhub_revisiongradient_checkpointinggradient_checkpointing_kwargsinclude_inputs_for_metricseval_do_concat_batchesfp16_backendpush_to_hub_model_idpush_to_hub_organizationpush_to_hub_tokenmp_parametersauto_find_batch_sizefull_determinismtorchdynamo	ray_scopeddp_timeouttorch_compiletorch_compile_backendtorch_compile_modeinclude_tokens_per_secondinclude_num_input_tokens_seenneftune_noise_alphaoptim_target_modulesbatch_eval_metricseval_on_startuse_liger_kernelliger_kernel_configeval_use_gather_objectaverage_tokens_across_devicesreward_model_pathjudgemax_new_tokens
max_lengthtemperaturetop_ptop_kmin_prepetition_penaltygeneration_kwargsuse_transformers_pagedcache_implementationmissing_eos_penalty	loss_typedisable_dropoutuse_vllmvllm_model_implvllm_guided_decoding_regexvllm_gpu_memory_utilization	vllm_modevllm_server_base_urlvllm_server_hostvllm_server_portvllm_server_timeoutvllm_tensor_parallel_sizeds3_gather_for_generationmodel_init_kwargsreward_weightsdataset_num_procgpu_memory_utilization )printmultiprocessingr   minmax	MathErrorsuper__init__r   r   r   )r=   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r"  r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  rD  rE  rF  rG  rH  rI  rJ  rK  rL  rM  rN  rO  rP  rQ  r   r   r   r?   r   	__class__s                                                                                                                                                                        rB   rY  UnslothXPOConfig.__init__   sr	   L 4)I-  YB  (C  "D1e&F}o  Vw  %x  y-7":zS?P7J M#1"3y{1}a#8"=!sttB  F  G  G 	 _	F#_	F#7_	F  _	F 	_	F
 $_	F *_	F $8_	F +F_	F *D_	F (@_	F '>_	F +F_	F '>_	F $_	F '>_	F  *!_	F" (#_	F$ $%_	F& $'_	F( ()_	F* *+_	F,  0-_	F. "/_	F0 !21_	F2 (3_	F4 (5_	F6 "7_	F8 !29_	F:  0;_	F< &=_	F>  0?_	F@ "4A_	FB *C_	FD &<E_	FF *G_	FH $I_	FJ  0K_	FL  0M_	FN !2O_	FP .Q_	FR 7^S_	FT U_	FV W_	FX ,Y_	FZ [_	F\ "]_	F^ *__	F`  a_	Fb c_	Fd e_	Ff ,g_	Fh &<i_	Fj ,k_	Fl ,m_	Fn o_	Fp $q_	Fr &s_	Ft *u_	Fv !2w_	Fx y_	Fz $8{_	F| $}_	F~ &<_	F@ *DA_	FB $C_	FD  E_	FF (G_	FH %:I_	FJ &K_	FL &<M_	FN %:O_	FP !2Q_	FR  0S_	FT U_	FV #6W_	FX &Y_	FZ 2T[_	F\ "4]_	F^ "4__	F` "a_	Fb &<c_	Fd e_	Ff $g_	Fh "i_	Fj .k_	Fl "4m_	Fn "o_	Fp *Dq_	Fr !2s_	Ft %:u_	Fv %:w_	Fx -Jy_	Fz #6{_	F| *D}_	F~ &_	F@ &<A_	FB (C_	FD (E_	FF "G_	FH  0I_	FJ .K_	FL (M_	FN &<O_	FP -JQ_	FR *DS_	FT &<U_	FV (W_	FX $8Y_	FZ (@[_	F\ !2]_	F^ *__	F` $8a_	Fb  0c_	Fd &e_	Ff "g_	Fh &i_	Fj *k_	Fl %:m_	Fn "4o_	Fp )Bq_	Fr -Js_	Ft #6u_	Fv $8w_	Fx "4y_	Fz *{_	F|  0}_	F~ #6_	F@ &<A_	FB -JC_	FD !2E_	FF G_	FH ,I_	FJ $K_	FL &M_	FN O_	FP Q_	FR S_	FT "4U_	FV !2W_	FX &<Y_	FZ $8[_	F\ #6]_	F^ "__	F` .a_	Fb  c_	Fd .e_	Ff *Dg_	Fh +Fi_	Fj "k_	Fl $8m_	Fn  0o_	Fp  0q_	Fr #6s_	Ft )Bu_	Fv )Bw_	Fx !2y_	Fz ,{_	F|  0}_	F~ &<f_	F@ %9!"4,rE   )r   r   r   )__name__
__module____qualname____firstlineno____doc__r0   r   r   r   __annotations__r   intr   rY  __static_attributes____classcell__rZ  s   @rB   r   r      sh    +012+(3-  */VW*#  &+EF&NXc]  #$&'%&#'"&&'"#"%$%""!&!27!'!$!"%) $!& $  -1!!!$%%)  $ $(-"%*!%#!%(,%*!%##' $  $!$)(-"#" "!&(,  !&#" %)&*#$#$%$( !%#GV- V-rE   r   c            $       X  ^  \ rS rSrSrSS/r                S)S\\\R                  4   S\\\R                  4   S	\
\R                     S
\
\   S\
\   S\
\   S\
\\\4      S\
\\\\\4   4      S\
\\\\\4      S\
\\\\   4      S\
\   S\
\\/\4      S\
\\      S\\R6                  R8                  \R6                  R:                  R<                  4   S\
\\R>                  \R>                  /\R>                  4      S\
\\\R                  4      SS4"U 4S jjjr \!S 5       r"S r#S r$S r%S r&S r'S r(  SS  jr) S*S\R                  S!\\\\R>                  \*4   4   S"\
\+   S\R>                  4S# jjr,   S+S$\
\   S%\
\   S&\\\\   S4   4S' jjr-S(r.U =r/$ ),_UnslothXPOTraineri  aK  
Initialize XPOTrainer as a subclass of [`OnlineDPOConfig`].

Args:
    model (`transformers.PreTrainedModel`):
        The model to train, preferably an `AutoModelForCausalLM`.
    ref_model (`PreTrainedModelWrapper`):
        Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
        and loss. If no reference model is provided, the trainer will create a reference model with the same
        architecture as the model to be optimized.
    reward_funcs (`transformers.PreTrainedModel`):
        The reward model to score completions with, preferably an `AutoModelForSequenceClassification`.
    judge (`BasePairwiseJudge`):
        The judge to use for pairwise comparison of model completions.
    args (`XPOConfig`):
        The XPO config arguments to use for training.
    data_collator (`transformers.DataCollator`):
        The data collator to use for training. If None is specified, the default data collator
        (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
        sequences in the batch, given a dataset of paired sequences.
    train_dataset (`datasets.Dataset`):
        The dataset to use for training.
    eval_dataset (`datasets.Dataset`):
        The dataset to use for evaluation.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If provided, will be used to automatically process the inputs
        for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
        reuse the fine-tuned model.
    peft_config (`dict`):
        The peft config to use for training.
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
        metric values.
    callbacks (`list[transformers.TrainerCallback]`):
        The callbacks to use for training.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
        The optimizer and scheduler to use for training.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
        The function to use to preprocess the logits before computing the metrics.

.. deprecated:: 0.22.0
    The following parameters are deprecated and will be removed in a future version:

    * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`.
    * `reward_processing_class`: Use `reward_processing_classes` instead. For example, change
      `reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`.
trlxpoNNNr9   	ref_modelreward_funcsr5  r>   data_collatortrain_dataseteval_datasetprocessing_classreward_processing_classespeft_configcompute_metrics	callbacks
optimizerspreprocess_logits_for_metricsreward_modelrp   c                   > [         TU ]  UUUUUUUUUU	U
UUUUUS9  U R                  R                  U l        / / / / / / / / / / / / / / S.U l        U R                  bf  [        U R                  5      S:w  a  [        S5      eU R                  S   U l        / U R
                  S'   / U R
                  S'   / U R
                  S'   g g )	N)r9   rk  r5  rl  rw  r>   rm  rn  ro  rp  rq  rr  rs  rt  ru  rv  )loss/dpoloss/xpoobjective/klobjective/entropyrewards/chosenrewards/rejectedrewards/accuraciesrewards/marginslogps/chosenlogps/rejectedval/model_contain_eos_tokenval/ref_contain_eos_tokenalphabetarV   z3XPOTrainer only supports one reward function/model.r   objective/model_scoresobjective/ref_scoresobjective/scores_margin)	rX  rY  r>   r  _alphastatsrl  lenrr   )r=   r9   rk  rl  r5  r>   rm  rn  ro  rp  rq  rr  rs  rt  ru  rv  rw  rZ  s                    rB   rY  _UnslothXPOTrainer.__init__4  s    , 	%%''%-&?#+!*G! 	 	
& iioo !#  ""$! +-)+#

& (4$$%* !VWW $ 1 1! 4D35DJJ/013DJJ-.46DJJ01 )rE   c                     [        U R                  [        5      (       aM  U R                  R                  nU[        U R                  5      :  a  U R                  U   $ U R                  S   $ U R                  $ )NrP   )
isinstancer  liststateepochr  )r=   r  s     rB   r  _UnslothXPOTrainer.alpha{  sY    dkk4((JJ$$E).T[[1A)A4;;u%Vt{{SUV;;rE   c                 Z   [        X R                  5       nUR                  US   US   U R                  S9nS S S 5        U R                  cS  U R                  R                  U5      n[        5       (       a&  [        U[        5      (       a  UR                  5       nO(UnO%U R                  R                  U R                  5      n[        X`R                  5       nUR                  US   US   U R                  S9nS S S 5        WU4$ ! , (       d  f       N= f! , (       d  f       WW4$ = f)Nrm   attention_mask)rm   r  generation_config)
r-   acceleratorgenerater  rk  unwrap_modelr#   r  r   get_base_model)	r=   promptsr9   unwrapped_policy_model_for_genmodel_output"unwrapped_main_model_for_ref_logicactual_model_for_ref_generationfinal_ref_model_for_gen
ref_outputs	            rB   _generate_completions(_UnslothXPOTrainer._generate_completions  s(   (0@0@AEc9BB!+.&'78"&"8"8 C L B >>!151A1A1N1Nu1U. ""z2TV_'`'`2T2c2c2e/2T/.2.>.>.K.KDNN.[+()HJZJZ[_v099!+.&'78"&"8"8 : J \ Z''3 BA$ \[ Z''s   "D"D
D
D*c                    US   R                   S   nUS S 2US 24   n[        XPR                  R                  U R                  R                  5      u  pV[
        R                  " US   U4SS9[
        R                  " US   U4SS9US   S.nUS S 2US 24   n[        XR                  R                  U R                  R                  5      u  p[
        R                  " US   U4SS9[
        R                  " US   U	4SS9US   S.n
Xz4$ )Nrm   rV   rU   r  rawrm   r  r  )rY   r,   rp  eos_token_idro   r+   cat)r=   r  r  r  context_lengthmodel_completion_idsmodel_completion_mask
model_dataref_completion_idsref_completion_maskref_datas              rB   _process_completions'_UnslothXPOTrainer._process_completions  s*    -33A6  ,A~,>?6D "7"7"D"DdF[F[FhFh7
3 GK$8:N#OUVW#ii1A)BDY(Z`ab5>

 (>?(:;2@ 5 5 B BDDYDYDfDf3
/ GK$8:L#MSTU#ii1A)BDW(X^_`5>
 ##rE   c                    [         R                  " 5          [        U R                  US   U R                  R
                  U5      u  pEn[        U R                  US   U R                  R
                  U5      u  pFnS S S 5        U R                  R                  b  [         R                  " US   U R                  R                  :H  SS9n[         R                  " US   U R                  R                  :H  SS9nWU) ==   U R                  R                  -  ss'   WU) ==   U R                  R                  -  ss'   WW4$ ! , (       d  f       N= f)Nrm   rP   rU   )
r+   no_gradr!   rl  rp  ro   r>   r@  anyr  )	r=   r  r  r  _model_scores
ref_scoresmodel_contain_eosref_contain_eoss	            rB   _compute_rewards#_UnslothXPOTrainer._compute_rewards  s!   ]]_!+!!:k#:D<Q<Q<^<^`n"AQ  *!!8K#8$:O:O:\:\^l A1	  99((4 %		*[*ATEZEZEgEg*gmo p#ii(=AVAVAcAc(ciklO++,		0M0MM,'(DII,I,II(Z'' _s   A%E  
Ec           	         US   nU R                   R                  US   S S 2US 24   SS9nU Vs/ s H  ofR                  5       PM     nnU R                   R                  US   S S 2US 24   SS9nU Vs/ s H  ofR                  5       PM     nn[        SUS   05      (       a  U Vs/ s H	  nSUS./PM     nn[        R
                  " 5       nUR                  [        5      n	U V
s/ s H  oR                  U
S	9PM     nn
U Vs/ s H  oiR                  US	9PM     nnU Vs/ s H	  nSUS./PM     nnU Vs/ s H  oiR                  US	9PM     nnU R                  R                  U[        [        XW5      5      5      n[        R                  " U Vs/ s H  oS:H  PM	     snUS   R                  S
9$ s  snf s  snf s  snf s  sn
f s  snf s  snf s  snf s  snf )Nr  rm   T)skip_special_tokenspromptr   	assistant)rolecontent)messagesr|   )rp  batch_decodestripr"   r%   Environmentfrom_stringr   renderr5  r  rZ   r+   r   r}   )r=   r  r  r  r  model_data_completions
completionref_data_completionsenvironmenttemplatemessageranks_of_first_completionranks                rB   _compute_judge!_UnslothXPOTrainer._compute_judge  s   U#!%!6!6!C!C{#A~$67T "D "
 H^!^G]"2"2"4G]!^#44AA[!!^_"454  B  
 FZZEYz 0 0 2EYZh
344Qg&Qg:+*=>Qg # & !,,.K"../CDHHOPW8GP]s%t]szoozo&J]s"%t Rf$Qe:+*=>Qe ! $ \p#p[oZOOZO$H[o #p$(JJ$4$4+BC%
! ||3LM3L4QY3LMV`alVmVtVtuu9 "_
  [&
 Q%t$ $q Ns/   F85F=)G)GG%G;GGc                 F  ^ U4S jnU" X5      nU" X5      n[         R                  " 5          U R                  c*  UR                  5          U" X5      nU" X5      n	S S S 5        O&U" U R                  U5      nU" U R                  U5      n	S S S 5        US   S S 2TS 24   S:H  n
US   S S 2TS 24   S:H  nUR	                  U
S5      nUR	                  US5      nW	R	                  US5      n	WR	                  U
S5      nXgX4$ ! , (       d  f       N= f! , (       d  f       N= f)Nc                    > U " US   US   S9nUR                   S S 2TS-
  S24   n[        X1S   S S 2TS 24   5      nU$ )Nrm   r  )r  rV   rP   )rc   r)   )mdatar@   rc   token_logprobsr  s        rB   compute_logprobs_for_dataG_UnslothXPOTrainer._compute_logprobs.<locals>.compute_logprobs_for_data  sZ    tK(>N9OPF]]1nq&82&=#=>F26;LQP^P_M_;`aN!!rE   r  r   r   )r+   r  rk  disable_adaptermasked_fill)r=   r9   r  r  r  r  model_logprobs_model_datamodel_logprobs_ref_dataref_logprobs_model_dataref_logprobs_ref_datamodel_padding_maskref_padding_masks       `       rB   _compute_logprobs$_UnslothXPOTrainer._compute_logprobs  s9   	" %>e$P!";E"L ]]_~~%**,.G.Z+,Ee,V) -, +DDNNT^*_'(A$..RZ([%  ((89!^_:LMQRR#$45a6HIQN$=$I$IJ\^a$b!"9"E"EFVX["\ 5 A ABRTW X"9"E"EFXZ]"^(CXqq -, _s#   DD/D
D	D
D c                    UR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      n	[        R                  " XVU5      n
[        R                  " XYU5      nX-
  n[        R                  " U) Xg5      n[        R                  " U) X5      nX-
  nX-
  nU R                  R                  S:X  a%  [
        R                  " U R                  U-  5      * nOUU R                  R                  S:X  a  USSU R                  -  -  -
  S-  nO"[        SU R                  R                   35      eU R                  U-  nUU-   R                  5       nUUU4$ )NrV   r   ipor   zinvalid loss type )rs   r+   wherer>   rA  r   
logsigmoidr  NotImplementedErrorr  mean)r=   r  r  r  r  chosen_maskmodel_logprobs_model_data_summodel_logprobs_ref_data_sumref_logprobs_ref_data_sumref_logprobs_model_data_sumchosen_model_logprobschosen_ref_logprobschosen_log_ratiosrejected_model_logprobsrejected_ref_logprobsrejected_log_ratiosrc   
dpo_losses
xpo_losseslosss                       rB   _compute_losses"_UnslothXPOTrainer._compute_losses  s\    )B(E(Ea(H%&=&A&A!&D#$9$=$=a$@!&=&A&A!&D# %KXs t#kk+Tmn1G"'++{l<Y"w %[L:U q5M #899)+,,tyy6'9::JYY  E) 1DII#661<J%(:499;N;N:O&PQQ ZZ"==
 Z'--/Z++rE   c                 	  ^  U 4S jnT R                   S   R                  U" U5      5        T R                   S   R                  U" U	5      5        T R                  bn  T R                   S   R                  U" U5      5        T R                   S   R                  U" U5      5        T R                   S   R                  U" X-
  5      5        UR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      n[        R
                  " X~U5      n[        R
                  " UUU5      nUU-
  n[        R
                  " U) X5      n[        R
                  " U) UU5      nUU-
  nT R                   S   R                  U" UR                  5       UR                  5       -   5      5        T R                   S	   R                  U" UR                  5       UR                  5       -   5      5        UT R                  -  nUT R                  -  nT R                   S
   R                  U" UR                  5       5      5        T R                   S   R                  U" UR                  5       5      5        X6-
  nXE-
  nUR                  S5      UR                  S5      -   R                  5       S-  nT R                   S   R                  U" U5      5        UR                  S5      * nUR                  S5      * nUR                  5       UR                  5       -   S-  nT R                   S   R                  U" U5      5        UU-
  n T R                   S   R                  U" U R                  5       5      5        U S:  R                  5       n!T R                   S   R                  U" U!R                  5       5      5        US   S S 2U
S 24   T R                  R                  :H  R                  SS9n"US   S S 2U
S 24   T R                  R                  :H  R                  SS9n#T R                   S   R                  U" U"R                  5       5      5        T R                   S   R                  U" U#R                  5       5      5        T R                   S   R                  T R                  5        T R                   S   R                  T R                  5        g )Nc                 r   > TR                   R                  U 5      R                  5       R                  5       $ N)r  gather_for_metricsr  item)r   r=   s    rB   gather_mean7_UnslothXPOTrainer._log_statistics.<locals>.gather_meanG  s,    ##66v>CCEJJLLrE   ry  rz  r  r  r  rV   r  r  r}  r~  r   r{  r|  r  r   r  rm   rU   r  r  r  r  )r  ra   rl  rs   r+   r  r  r  floatrp  r  r  r  )$r=   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  chosen_rewardsrejected_rewardskl_model_datakl_ref_datamean_klentropy_model_dataentropy_ref_datamean_entropymarginaccuracy	model_eosref_eoss$   `                                   rB   _log_statistics"_UnslothXPOTrainer._log_statistics7  sJ    	M 	

:%%k*&=>

:%%k*&=> (JJ/077L8QRJJ-.55k*6MNJJ0188\E^9_` )B(E(Ea(H%&=&A&A!&D#$9$=$=a$@!&=&A&A!&D# %KXs t#kk+7RTmn14GG"'++{l<Y"w %[L:UWp q58MM

>"))+6K6P6P6RUhUmUmUo6o*pq

#$++K8O8T8T8VYnYsYsYu8u,vw +TYY6.:

#$++K8K8K8M,NO

%&--k:J:O:O:Q.RS 2K-E $$Q'+//!*<<BBDqH

>"))+g*>? 8;;A>>377::*//14D4I4I4KKqP

&'..{</HI  "22

$%,,[-GH QJ%%'

'(//HMMO0LM  ,Q-?@DDYDYDfDffkkpqkr	K(NO);<@U@U@b@bbgglmgn

0188Y__EV9WX

./66{7==?7ST 	

7""4::.

6!!$)),rE   inputsnum_items_in_batchc                 <   UR                  5         [        [        [        UR	                  5       5      5      5      nUS   n[        U5       VVVs/ s H*  obR                  5        VVs0 s H
  u  pxXxU   _M     snnPM,     nnnnU V	s/ s H  n	[        XR                  5      PM     nn	U V	s/ s H<  oR                  XR                  R                  R                  U R                  5      PM>     nn	U R                  U5      nU R                  U5      nUS   R                  S   n
US   US   US.nAU R!                  XQ5      u  pU R#                  XU5      u  pU R$                  b  U R'                  XU
5      u  nnUU:  nOSu  nnU R)                  XU
5      nU R+                  XX5      u  nnnnU R-                  UUUUU5      u  nnnU R/                  UUUR1                  5       UR1                  5       UUUUR1                  5       UR1                  5       U
UU5        U R2                  R4                  b;  U R6                  R8                  U R2                  R4                  -  S:X  a
  [;        5         0 nU R2                  R<                  [>        R@                  [>        RB                  4;   a  U RE                  5       US'   U R2                  RF                  S:  a  URI                  5       nU RJ                  (       a:  [L        RO                  UU RP                  5       nURS                  5         S S S 5        OU RT                  RR                  " U40 UD6  UR1                  5       U R2                  RV                  -  $ s  snnf s  snnnf s  sn	f s  sn	f ! , (       d  f       NL= f)	Nr  prompt_input_idsrV   prompt_attention_maskr  rj  r   r   ),trainr  nextitervaluesrangeitemsr&   rp  tokenize_rowr9   configis_encoder_decoderrm  _prepare_inputsrY   r  r  rl  r  r  r  r  r  detachr>   r   r  global_stepr   r  r   LOMOADALOMO_get_learning_raten_gpur  use_apexamp
scale_loss	optimizerbackwardr  r   )r=   r9   r  r  r   r  ikvxr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r?   scaled_losss                              rB   training_step _UnslothXPOTrainer.training_step  sA    	 d6==?345
"@Ej@QR@Q1||~6~tq1d7~6@QROUVv!+A/D/DEvVmstmshi##Azz'8'8'K'KTMbMbcmst##F+ %%f- 2399!< 23$%<=

  $(#=#=g#M   $88SZ[
 ('+'<'<ZSa'b$L*&*4K'1$L*--jNSK ""5hO 	k!#:<QSj
 (,';';%#!#(
$j* 	%,,.#**,!#	
  II--9

&&)J)JJaOM99??~22N4J4JKK&*&=&=&?F?#99??Q99;D==dnn5$$& 65 %%d5f5{{}tyyDDDD_ 7RVtP 65s1   M<(M69M<	N.ANN6M<
N
model_namedataset_nametagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        R                   " S5      n[#        UUU R$                  UU['        5       (       a+  [(        R*                  b  [(        R*                  R,                  OS[/        5       SUS	S
S9nUR1                  [        R
                  R3                  U R4                  R6                  S5      5        g)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
N_name_or_pathunsloth_versionunslothJOB_IDhf_jobsa          @article{jung2024binary,
            title        = {{Exploratory Preference Optimization: Harnessing Implicit Q*-Approximation for Sample-Efficient RLHF}},
            author       = {Tengyang Xie and Dylan J. Foster and Akshay Krishnamurthy and Corby Rosset and Ahmed Awadallah and Alexander Rakhlin},
            year         = 2024,
            eprint       = {arXiv:2405.21046}
        }XPOzcExploratory Preference Optimization: Harnessing Implicit Q*-Approximation for Sample-Efficient RLHFz
2405.21046)
base_modelr0  r  r1  r2  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror<   r9   r  r(   pathisdirr4  setr  straddenvironupdate
_tag_namesr*   dedentr   r  r$   wandbrunurlr    savejoinr>   r   )r=   r0  r1  r2  r:  citation
model_cards          rB   create_model_card$_UnslothXPOTrainer.create_model_card  sn   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$ ?? $  )!!**%'9';';		@Ueiimm[_.0%}!

 	TYY%9%9;GHrE   )r  rl  r  )NNNNNNNNNNNNNrj  NNr  )NNN)0r\  r]  r^  r_  r`  rI  r	   r   r'   Moduler   r   r   r   r   r   dictrE  r   r   r   r   r  r   r   tupler+   r  	Optimizerlr_schedulerLambdaLRr   rY  propertyr  r  r  r  r  r  r  r  r   rb  r.  rR  rc  rd  re  s   @rB   rg  rg    s
   .` J 487;,0-1$(,0CGEI mq&*FJ59VbhlDH)E7_bii/0E7 "))34E7 ryy)	E7
 )*E7 y!E7  )E7  g&> ?@E7 uWd3<.@%@ABE7 #)+=?UWeef
E7 $,E2I4PgKh2h,i#jE7 d^E7 "(N+;T+A"BCE7  D12!E7" %++//1I1I1R1RRS#E7$ (0%,,9UW\WcWc9c0d'e%E7( u_bii%?@A)E7* 
+E7 E7N  (8$6($!vFr@&,h Q-h rvWEYYWE(,S%c8I2J-J(KWEaijmanWE	WEv %)&*,0	@ISM@I sm@I CcD()	@I @IrE   rg  c                   N   ^  \ rS rSrSr               SU 4S jjrSrU =r$ )UnslothXPOTraineri$  aU  
    
Initialize XPOTrainer as a subclass of [`OnlineDPOConfig`].

Args:
    model (`transformers.PreTrainedModel`):
        The model to train, preferably an `AutoModelForCausalLM`.
    ref_model (`PreTrainedModelWrapper`):
        Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
        and loss. If no reference model is provided, the trainer will create a reference model with the same
        architecture as the model to be optimized.
    reward_funcs (`transformers.PreTrainedModel`):
        The reward model to score completions with, preferably an `AutoModelForSequenceClassification`.
    judge (`BasePairwiseJudge`):
        The judge to use for pairwise comparison of model completions.
    args (`XPOConfig`):
        The XPO config arguments to use for training.
    data_collator (`transformers.DataCollator`):
        The data collator to use for training. If None is specified, the default data collator
        (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
        sequences in the batch, given a dataset of paired sequences.
    train_dataset (`datasets.Dataset`):
        The dataset to use for training.
    eval_dataset (`datasets.Dataset`):
        The dataset to use for evaluation.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If provided, will be used to automatically process the inputs
        for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
        reuse the fine-tuned model.
    peft_config (`dict`):
        The peft config to use for training.
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
        metric values.
    callbacks (`list[transformers.TrainerCallback]`):
        The callbacks to use for training.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
        The optimizer and scheduler to use for training.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
        The function to use to preprocess the logits before computing the metrics.

.. deprecated:: 0.22.0
    The following parameters are deprecated and will be removed in a future version:

    * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`.
    * `reward_processing_class`: Use `reward_processing_classes` instead. For example, change
      `reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`.

    c                 8  > Uc
  [        5       n[        USS5      n[        U5      [        La  Sn[        USS5      n[        U5      [        La  SnSn[        R
                  R                  SS5      S:H  nU(       d1  [        R
                  R                  SS5      S:X  a  [        S5        S	n[        R
                  R                  S
S5      n[        UR                  SS 5      =(       d    [        UR                  SS 5      nUc  UR                  5       R                  nSSKJn  U" U5      nU[        R                  :H  nU(       d  U(       a  U(       a  [        S5      eU(       d  U(       d  U(       a  [        S5      eU(       a"  SUl        SUl        S[        R
                  S'   OCU(       d<  U(       d5  US:X  a/  UUl        U(       + Ul        U(       a  SOS[        R
                  S'   [        USS 5      b-  [        USS5      S:X  a  SUl        [        USS 5      c  SUl        [        USS 5      nUb/  US:  a)  SSKJn  [-        U5      [-        S5      ::  a  [        S5        [        USS5      S:w  aL  [        USS5      nUS:X  a!  UR.                  U:  a  UR.                  Ul        [        US S 5      c
  Ub  UUl        [        US!S5      n[        U5      [        La  Sn[        US"S5      n[        U5      [        La  SnUR                   (       a  U(       a  SUl        S	Ul        UR"                  (       a  U(       a  S	Ul        SUl        U(       a  SUl        SUl        Oc[        R
                  R                  S
S5      S#:X  a  S	Ul        SUl        O0U(       d)  U(       d"  UR"                  Ul        UR                   Ul        Sn[9        5       R                  S$S 5      b  S	n[9        5       R                  S%S 5      b  S	nU(       a  S[        R
                  S&'   S'[9        5       ;  a  [;        US'5      (       d  OD[        US'S 5      n[        US'S 5      n U c'  Ub$  UR<                  n![;        US'5      (       a  U!Ul        Ub!  [;        US(5      (       a  UR?                  5         S)[9        5       ;   a   [;        [@        S*5      (       a  S+[@        l!        S,[9        5       ;   aU  [;        U	S*5      (       a  S+U	l!        [;        U	S)5      (       a,  [;        U	R@                  S*5      (       a  S+U	R@                  l!        S,[9        5       ;   a  U	O[@        n"SS-K"J#n#  [I        UU#5      (       dx  [I        U[J        5      (       a(  S.URL                  ;  a  [O        U"SS/[        US0S 5      S19nO[I        U[N        5      (       a%  S.URL                  ;   a  [K        U"[        US0S 5      S29nOJ[;        US35      (       a  SUl(        [;        US45      (       a  S5Ul)        [;        US65      (       a	  S7S	0Ul*        [I        UU#5      (       dx  [;        U"S85      (       dg  [;        U"S)5      (       aV  [I        U[J        5      (       a   [K        U"R@                  [        US0S 5      S29nO![O        U"R@                  SS/[        US0S 5      S19n/ n$SS9K+J,n%  U%" S:U$5        [        US;S 5      [Z        R\                  :X  a(  UR^                  S:  a  [        US<S5      S:w  a  SUl0        S=[9        5       ;   a!  [;        US(5      (       a  UR?                  5         [b        T(U ]  " SDUUUUUUUUU	U
UUUUUS>.UD6  S=[9        5       ;   a!  [;        US?5      (       a  URg                  5         [;        U S@5      (       a-  U Rh                  Rk                  5         [;        U S@5      (       a  U ?4[        USAS 5      b  U Rl                  UR                  5       l6         [;        U SB5      (       aV  U Rn                  Rp                  n&Un'[;        U'S=5      (       a&  U&U'l9        U'Rt                  n'[;        U'S=5      (       a  M&  U&U'l9         [;        U SC5      (       a.  [w        [y        U Rz                  R|                  5      U 5      U l>        g )ENr   Fr   UNSLOTH_ENABLE_FULL_FINETUNING01UNSLOTH_FORCE_FLOAT32zKUnsloth: Switching to float32 training since model cannot work with float16TUNSLOTH_MIXED_PRECISIONr\   dtypetorch_dtyper   )
_get_dtypezuUnsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`zuUnsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`r   ACCELERATE_MIXED_PRECISIONro  r   r   r   r   r   rV   )__version__z4.45.2z**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!
`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`r      r   r   r   bfloat16rs  rv  UNSLOTH_RETURN_LOGITSr   r:   	tokenizerpadding_siderightrp  )UnslothVisionDataCollatorlabelsr   pad_to_multiple_of)mlmmlm_probabilityrp  )rp  r   dataset_text_fieldr   dataset_kwargsskip_prepare_datasetpad)PatchRLStatisticsxpo_trainerparallel_mode_n_gpur9   )r9   rk  rl  r5  r>   rm  rn  ro  rp  rq  rr  rs  rt  rv  rw  r;   neftune_hook_handler,  r  r  rR  )?r   getattrtypeboolr(   rG  getrS  r  get_input_embeddingsrc  unsloth_zoo.utilsre  r+   float16	TypeErrorr   r   r   r   transformersrg  r1   r   r   r   r   r   localsr<   r   r:   rk  rl  unsloth_zoo.vision_utilsrn  r  r3   column_names+TransformersDataCollatorForLanguageModelingr   rs  rt  unsloth_zoo.logging_utilsrw  r5   NOT_DISTRIBUTEDr#  rz  rX  rY  r;   r{  remover,  r  scaleraccelerator_scalerr9   r6   rH   rZ  r  ))r=   r9   rk  rl  r5  r>   rm  rn  ro  rp  rq  rr  rs  rt  rv  rw  r?   use_bf16use_fp16force_float32full_finetuningmixed_precision_dtyperc  re  r  ga_stepstransformers_versioneval_bszr   r   _output_logitsmodel_max_seq_lengthargs_max_seq_lengthr   _UnslothXPOTrainer__tokenizerrn  other_metricsrw  r  current_modelrZ  s)                                           rB   rY  UnslothXPOTrainer.__init__V  s   & < 0 24/>%%x4/>%%x**..)I3OSVVBJJNN3JC$PTW$W_` M "

/H) Tgt4bm]a8b=%"<"<">"D"D%05!5==('hy  JA  @B  :Bg(9  NE  DF  >FDIDI7;BJJ3481F)1SDI#DIAHvfBJJ344.:wt_^b?cgk?k!(Dt\408C$/4!>EHqLH+,0AA @ A4$/47t%A1EH1}!A!AH!Lpt  qQ  qQdNmt6=E(J^  @H`d`| '7?t+e^ '7?t+e^99u)<\`dFY99t)<[`TEX"'D"'DZZ^^5yAZO"&D"'D"&))D"&))D8<<)40<tn8<<7>J]aN25BJJ./68+GDBR4S4S#*52BD#I #*42BD#I"*/C/O!&!5!54!122.D4G!?!? &("wy.'I'Idk9Ka)'88Za:J:W'55'BRB\B\^l:m:m  Zao  pJ  pJ  pW*<*H&iF-)BCC-)?@@XUbUoUoEo K&))07KT)R	! M+VWW\dhu  iC  iC  ]C 6)07KT)R!
 t455TYt7Qt122bD4Kt-..G]_cFd0C-)BCC;..7;3T3Tm-CDD$:#---4T;OQU-V%M
 %P#--#*--4T;OQU-V	%M ?-7 4$/<3O3OOTXT^T^abTbtXq)Q.fh75.#A#A  	2!'))'/(A%-!,I'	2 +1	2  fh75/#B#B!4.//$$++-t2339Q4.5A?C?W?WE&&(<4''%%,,F!M-11390 - 3 3 -11 06M,4!!#$=dnn>R>R$SUYZDJrE   )r  )NNNNNNNNNNNNNNN)r\  r]  r^  r_  r`  rY  rc  rd  re  s   @rB   r\  r\  $  sD    0f $((,!m mrE   r\  )Lr`  r+   r   torch.nnr'   r   r   typingr   r   r   r   r	   r
   r   r   trl.trainer.xpo_trainerr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r(   r)   r*   r,   r-   dataclassesr/   r0   packaging.versionr1   numpynp
contextlibr2   r  r3   r4   r  transformers.training_argsr5   rF   typesr6   rH   torch_compile_optionscompilerl   rb  rw   r   r   r   rg  r\  rR  rE   rB   <module>r     s  0    $ I I I X	  X	  X	  X	  X	  X	  X	  X	  X	  X	 
  ( %   " $  3      4;PR S"||  \\	&,, %  	
 \\6ell C ELL  o-y o- o-` bI) bIF_* _B rE   