
    Y:i             
          S r SSKJr  SSKrSSKJr  SSKJr  SSKJrJ	r	J
r
JrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ
r
JrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%JrJ&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=JrJ>r>J?r?J@r@JArAJBrBJCrCJDrDJErEJFrFJGrGJHrHJrJIrIJrJ
r
JrJrJ#r#J5r5J9r9J@r@Jr  SSK@r@SSK7  SSKJJKrKJLrL  SS	KMJNrN  SSKrSSKOr>SS
KPJ?r?  SSKJr  SSKQJRrRJSrT  SSKUJVrV  SSKWrWSSKXJYrY  S rZ SSSSSS.r[\R                  " SS\[S9S 5       r]S\R                  S\^S\^S\R                  4S jr_S\R                  S\R                  S\^S\^S\R                  4
S jr`S\R                  S\^S\R                  4S jra\K " S  S!\5      5       rb  " S" S#\#5      rc " S$ S%\c5      rd \e" \9S&5      (       a3  SSK:r: " S' S(\:R                  5      rg \9R                  " \g" S)5      5        gg)*z;
2025.10.10
2025.10.9
4.56.2
0.23.0
__UNSLOTH_VERSIONING__
    )TensorN)
functional)AnyListOptionalTupleUnionDictSetCallable)Ir   AutoModelForCausalLMBaseImageProcessorr   DPODataCollatorWithPaddingDataCollator
DataLoaderDatasetEvalLoopOutputFFeatureExtractionMixin	KTOConfig
KTOTrainerLiteralr   PartialStatePath	PeftModelPreTrainedModelPreTrainedTokenizerBaseProcessorMixinSequentialSamplerTrainerTrainerCallbackTrainingArgumentsr	   _get_kl_dataset_process_tokens	_tokenizeautocastconcatenate_datasetscontextmanagercreate_reference_modeldefaultdictdisable_dropout_in_modelgenerate_model_cardget_comet_experiment_url
has_lengthinspectis_comet_availableis_liger_kernel_availableis_peft_availableis_wandb_available
itemgetterlog_table_to_comet_experimentloggerloggingmaybe_apply_chat_templatemaybe_extract_promptmaybe_unpair_preference_datasetnnnpnullcontextospad_to_lengthpdpeft_module_casting_to_bf16prepare_deepspeedprepare_model_for_kbit_trainingrandomselective_log_softmaxtextwraptorchtqdmr   r   r   r   r    r2   r6   r>   rG   )*)	dataclassfield)Version)r=   )DataCollatorForSeq2SeqDataCollatorForLanguageModeling)ParallelMode)
MethodTypec                 F   ^  [         R                  " T 5      U 4S j5       nU$ )Nc                 8  > [        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         T" U /UQ70 UD6n[        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         U$ )Nmodelfor_trainingfor_inference)hasattrrS   rT   rU   )selfargskwargsoutputfs       H/home/james-whalen/llama.cpp/unsloth_compiled_cache/UnslothKTOTrainer.pywrapper*prepare_for_training_mode.<locals>.wrapper0   sx     4!!gdjj.&I&IJJ##%4)$)&)4!!gdjj/&J&JJJ$$&    )	functoolswraps)r[   r]   s   ` r\   prepare_for_training_moderb   /   s%    __Q  Nr_   TF)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphs)dynamic	fullgraphoptionsc                 d   [         R                  " U R                  SU R                  S   5      SSS9n[         R                  " UR                  S5      SSS9n/ n[	        X#5       H  u  pVUR                  [         R                  5      n[         R                  " USUR                  S5      S9R                  S5      n[         R                  " USS9nXx-
  n	UR                  U	5        M      [         R                  " U5      nUR                  U R                  S   U R                  S   45      nU$ )N   r   )chunksdim)rm   indexrm      )rG   chunkreshapeshapeziptofloat32gather	unsqueezesqueeze	logsumexpappendconcat)
logitsrn   chunked_logitschunked_indexall_per_token_logpschunk_logitschunk_indexselected_logitslogsumexp_valuesper_token_logpss
             r\   chunked_selective_log_softmaxr   E   s    [[FLL4D!EPQYZ[N[[r!2QaHM%(%G!#u}}5,,|2{G\G\]_G`aiijlm ??<rB)<""?3 &H 	,,':;-55v||AUV6XYr_   	input_idslogits_to_keeppad_token_idreturnc                 ~    XR                   S   :  a  [        S5      eU SS2SU* 24   nX2:H  nUR                  SS9nU$ )zr
Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens 
rp   z8logits_to_keep must be smaller than the sequence length.Nro   )rs   
ValueErrorsum)r   r   r   prompt_sectionpadding_maskpad_token_countss         r\   calculate_pad_tokens_in_promptr   W   sX     ++STTq"2N?"223N"2L#''A'.r_   completion_input_idsleft_pad_tokens_per_promptmax_left_padc                     U R                   u  pEU R                  nX!-
  n[        R                  " XVS9R	                  S5      nXR	                  S5      :  n	X:g  n
X-  nU$ )a)  
Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]

Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
and pad are pad tokens, this function would make a completion mask that would 0 out the pad
and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
devicer   rp   )rs   r   rG   arangerx   )r   r   r   r   
batch_sizecompletion_lenr   num_tokens_to_maskindices
shift_masknon_padding_mask
final_masks               r\    create_completion_attention_maskr   j   si     "6!;!;J!((F%Bll>9CCAFG88;;J,<.Jr_   tensorpad_idc                 l    X:g  n[         R                  " USSSS9n[         R                  " U SU5      nU$ )zD
Moves all padding tokens in each sequence of a batch to the right.
rp   T)rm   
descendingstable)rG   argsortrw   )r   r   masksorted_indicespacked_tensors        r\   left_pack_paddingr      s8     D]]4Q4MNLLN;Mr_   c                     ^  \ rS rSr% Sr\" SSS0S9r\\   \	S'   \" SSS	0S9r
\\   \	S
'   \" SSS0S9r\\   \	S'                                                                                                                                                          SU 4S jjrSrU =r$ )UnslothKTOConfig   u  
    
Configuration class for the [`KTOTrainer`].

This class includes only the parameters that are specific to KTO training. For a full list of training arguments,
please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may
differ from those in [`~transformers.TrainingArguments`].

Using [`~transformers.HfArgumentParser`] we can turn this class into
[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
command line.

Parameters:
    max_length (`int` or `None`, *optional*, defaults to `1024`):
        Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want
        to use the default data collator.
    max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
        Maximum length of the prompt. This argument is required if you want to use the default data collator.
    max_completion_length (`int` or `None`, *optional*, defaults to `None`):
        Maximum length of the completion. This argument is required if you want to use the default data collator
        and your model is an encoder-decoder.
    beta (`float`, *optional*, defaults to `0.1`):
        Parameter controlling the deviation from the reference model. Higher β means less deviation from the
        reference model.
    loss_type (`str`, *optional*, defaults to `"kto"`):
        Type of loss to use. Possible values are:

            - `"kto"`: KTO loss from the [KTO](https://huggingface.co/papers/2402.01306) paper.
            - `"apo_zero_unpaired"`: Unpaired variant of APO-zero loss from the
              [APO](https://huggingface.co/papers/2408.06266) paper.

    desirable_weight (`float`, *optional*, defaults to `1.0`):
        Desirable losses are weighed by this factor to counter unequal number of desirable and undesirable paris.
    undesirable_weight (`float`, *optional*, defaults to `1.0`):
        Undesirable losses are weighed by this factor to counter unequal number of desirable and undesirable pairs.
    label_pad_token_id (`int`, *optional*, defaults to `-100`):
        Label pad token id. This argument is required if you want to use the default data collator.
    padding_value (`int` or `None`, *optional*, defaults to `None`):
        Padding value to use. If `None`, the padding value of the tokenizer is used.
    truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
        Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
        This argument is required if you want to use the default data collator.
    generate_during_eval (`bool`, *optional*, defaults to `False`):
        If `True`, generates and logs completions from both the model and the reference model to W&B or Comet
        during evaluation.
    is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
        When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
        you need to specify if the model returned by the callable is an encoder-decoder model.
    precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
        Whether to precompute reference model log probabilities for training and evaluation datasets. This is
        useful when training without the reference model to reduce the total GPU memory needed.
    model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
        Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
        string.
    ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
        Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model
        from a string.
    dataset_num_proc: (`int` or `None`, *optional*, defaults to `None`):
        Number of processes to use for processing the dataset.
    disable_dropout (`bool`, *optional*, defaults to `True`):
        Whether to disable dropout in the model and reference model.
    use_liger_loss (`bool`, *optional*, defaults to `False`):
        Whether to use Liger loss. It requires liger-kernel to be installed.
    base_model_attribute_name (`str`, *optional*, defaults to `"model"`):
        Name of the attribute in the model that contains the base model. This is used to get the base model from
        the model when the model does not have a `get_decoder` method in the case when `use_liger_loss` is `True`.

    NhelpzvLLM SamplingParams)defaultmetadatavllm_sampling_paramsrj   z8Chunk size to reduce memory usage. -1 is most efficient.unsloth_num_chunksz'Maximum sequence length to truncate to.max_seq_lengthc                   > US:  a  [        SU S35        US:  a  [        SU S35        Uc  U#S:X  a
  U$S:X  a  SnS	n#Wc$  S
SKJn  [        [	        U" 5       S-   S5      S5      n[
        TU ]  " S0 SU_SU_SU_SU_SU_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_S U_S!U_S"U_S#U_S$U_S%U_S&U_S'U_S(U_S)U_S*U_S+U_S,U_S-U_S.U _S/U!_S0U"_S1U#_S2U$_S3U%_S4U&_S5U'_S6U(_S7U)_S8U*_S9U+_S:U,_S;U-_S<U._S=U/_S>U0_S?U1_S@U2_SAU3_SBU4_SCU5_SDU6_SEU7_SFU8_SGU9_SHU:_SIU;_SJU<_SKU=_SLU>_SMU?_SNW@_SOWA_SPWB_SQWC_SRWD_SSWE_STWF_SUWG_SVWH_SWWI_SXWJ_SYWK_SZWL_S[WM_S\WN_S]WO_S^WP_S_WQ_S`WR_SaWS_SbWT_ScWU_SdWV_SeWW_SfWX_SgWY_ShWZ_SiW[_SjW\_SkW]_SlW^_SmW__SnW`_SoWa_SpWb_SqWc_SrWd_SsWe_StWf_SuWg_SvWh_SwWi_SxWj_SyWk_SzWl_S{Wm_S|Wn_S}Wo_S~Wp_SWq_SWr_SWs_SWt_SWu_SWv_SWw_SWx_SWy_SWz_SW{_SW|_SW}_SW~_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_WD6  WU l        WU l        WU l	        g )NgHz>z Unsloth: Your learning rate of `zi` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!rp   za` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!steps  unsloth_training_checkpointsnor   )	cpu_countrk      @   
output_diroverwrite_output_dirdo_traindo_eval
do_predicteval_strategyprediction_loss_onlyper_device_train_batch_sizeper_device_eval_batch_sizeper_gpu_train_batch_sizeper_gpu_eval_batch_sizegradient_accumulation_stepseval_accumulation_steps
eval_delaytorch_empty_cache_stepslearning_rateweight_decay
adam_beta1
adam_beta2adam_epsilonmax_grad_normnum_train_epochs	max_stepslr_scheduler_typewarmup_ratiowarmup_steps	log_levellog_level_replicalog_on_each_nodelogging_dirlogging_strategylogging_first_steplogging_stepslogging_nan_inf_filtersave_strategy
save_stepssave_total_limitsave_safetensorssave_on_each_nodesave_only_model'restore_callback_states_from_checkpointno_cudause_cpuuse_mps_deviceseed	data_seedjit_mode_evaluse_ipexbf16fp16fp16_opt_levelhalf_precision_backendbf16_full_evalfp16_full_evaltf32
local_rankddp_backendtpu_num_corestpu_metrics_debugdebugdataloader_drop_last
eval_stepsdataloader_num_workersdataloader_prefetch_factor
past_indexrun_namedisable_tqdmremove_unused_columnslabel_namesload_best_model_at_endmetric_for_best_modelgreater_is_betterignore_data_skipfsdpfsdp_min_num_paramsfsdp_config"fsdp_transformer_layer_cls_to_wrapaccelerator_configparallelism_config	deepspeedlabel_smoothing_factoroptim
optim_args	adafactorgroup_by_lengthlength_column_name	report_toddp_find_unused_parametersddp_bucket_cap_mbddp_broadcast_buffersdataloader_pin_memorydataloader_persistent_workersskip_memory_metricsuse_legacy_prediction_looppush_to_hubresume_from_checkpointhub_model_idhub_strategy	hub_tokenhub_private_repohub_always_pushhub_revisiongradient_checkpointinggradient_checkpointing_kwargsinclude_inputs_for_metricseval_do_concat_batchesfp16_backendpush_to_hub_model_idpush_to_hub_organizationpush_to_hub_tokenmp_parametersauto_find_batch_sizefull_determinismtorchdynamo	ray_scopeddp_timeouttorch_compiletorch_compile_backendtorch_compile_modeinclude_tokens_per_secondinclude_num_input_tokens_seenneftune_noise_alphaoptim_target_modulesbatch_eval_metricseval_on_startuse_liger_kernelliger_kernel_configeval_use_gather_objectaverage_tokens_across_devices
max_lengthmax_prompt_lengthmax_completion_lengthbeta	loss_typedesirable_weightundesirable_weightlabel_pad_token_idpadding_valuetruncation_modegenerate_during_evalis_encoder_decoderdisable_dropoutprecompute_ref_log_probsmodel_init_kwargsref_model_init_kwargsdataset_num_procuse_liger_lossbase_model_attribute_name )
printmultiprocessingr   minmaxsuper__init__r   r   r   )rW   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r"  r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  rD  rE  rF  rG  rH  rI  rJ  rK  rL  rM  rN  rO  r   r   r   rY   r   	__class__s                                                                                                                                                             r\   rV  UnslothKTOConfig.__init__   s   v 4)I-  YB  (C  "D1e&F}o  Vw  %x  y-7":zS?P7J M#1"3y{1}a#8"= T	L#T	L#7T	L  T	L 	T	L
 $T	L *T	L $8T	L +FT	L *DT	L (@T	L '>T	L +FT	L '>T	L $T	L '>T	L  *!T	L" (#T	L$ $%T	L& $'T	L( ()T	L* *+T	L,  0-T	L. "/T	L0 !21T	L2 (3T	L4 (5T	L6 "7T	L8 !29T	L:  0;T	L< &=T	L>  0?T	L@ "4AT	LB *CT	LD &<ET	LF *GT	LH $IT	LJ  0KT	LL  0MT	LN !2OT	LP .QT	LR 7^ST	LT UT	LV WT	LX ,YT	LZ [T	L\ "]T	L^ *_T	L`  aT	Lb cT	Ld eT	Lf ,gT	Lh &<iT	Lj ,kT	Ll ,mT	Ln oT	Lp $qT	Lr &sT	Lt *uT	Lv !2wT	Lx yT	Lz $8{T	L| $}T	L~ &<T	L@ *DAT	LB $CT	LD  ET	LF (GT	LH %:IT	LJ &KT	LL &<MT	LN %:OT	LP !2QT	LR  0ST	LT UT	LV #6WT	LX &YT	LZ 2T[T	L\ "4]T	L^ "4_T	L` "aT	Lb &<cT	Ld eT	Lf $gT	Lh "iT	Lj .kT	Ll "4mT	Ln "oT	Lp *DqT	Lr !2sT	Lt %:uT	Lv %:wT	Lx -JyT	Lz #6{T	L| *D}T	L~ &T	L@ &<AT	LB (CT	LD (ET	LF "GT	LH  0IT	LJ .KT	LL (MT	LN &<OT	LP -JQT	LR *DST	LT &<UT	LV (WT	LX $8YT	LZ (@[T	L\ !2]T	L^ *_T	L` $8aT	Lb  0cT	Ld &eT	Lf "gT	Lh &iT	Lj *kT	Ll %:mT	Ln "4oT	Lp )BqT	Lr -JsT	Lt #6uT	Lv $8wT	Lx "4yT	Lz *{T	L|  0}T	L~ #6T	L@ &<AT	LB -JCT	LD $ET	LF !2GT	LH %:IT	LJ KT	LL "MT	LN  0OT	LP "4QT	LR "4ST	LT *UT	LV .WT	LX $8YT	LZ "4[T	L\ .]T	L^ (@_T	L` !2aT	Lb %:cT	Ld  0eT	Lf ,gT	Lh )BFiT	Lj %9!"4,r_   )r   r   r   )NNFFFr   Frk   rk   NNr   r   r      g-C6
?g{Gz?g?g+?g:0yE>      ?g      @rj   linear皙?r   passivewarningTNr   Frp   Fr   r   NTFFFFFFO  r_  FFFFO1autoFFNrj   NNF FNr   Nrj   NNTNFNNFrb  r   NNNNN        
adamw_8bitNFFlengthNNNNTFTFFNN
every_saveNNFNTNFTra  NNNrb  FFNlasti  FNNFFNNFFFNFTi      Nr\  ktorZ  rZ  Nkeep_endFNTFNNNFrS   Nrj   N)__name__
__module____qualname____firstlineno____doc__rK   r   r   r   __annotations__r   intr   rV  __static_attributes____classcell__rW  s   @r\   r   r      sI   CH +012+(3-  */VW*#  &+EF&NXc]  #$&'%&#'"&&'"#"%$%""!&!27!'!$!"%) $!& $  -1!!!$%%)  $ $(-"%*!%#!%(,%*!%##' $  $!$)(-"#" "!&(, $ !$$!#(  $$+#q{- {-r_   r   c                    |  ^  \ rS rSrSrSS/r               SGS\\\R                  \
4   S\\\\R                  \
4      S\S	\\   S
\\\\\
\4   4      S\\\\\\4      S\\   S\\/ \4      S\\\      S\\R2                  R4                  \R2                  R6                  R8                  4   S\\\R:                  \R:                  /\R:                  4      S\\   S\\\/\4      S\\
   S\\
   4U 4S jjjr\ S 5       r!S\"4U 4S jjr#SHS
\\   S\"4U 4S jjjr$S\S\4S jr%\&   SIS\RN                  S\RP                  S\)S\*S \)S\RN                  4S! jj5       r+S\R                  S"\\
\\\RP                  4   4   S\\RN                  \RN                  \RN                  \RN                  4   4S# jr,S$\RN                  S%\RN                  S&\RN                  S'\RN                  S(\RN                  S)\RN                  S\\RN                  \RN                  \RN                  \RN                  4   4S* jr-S+ r.S, r/S"\\
\\\RP                  4   4   4S- jr0  SJS\\\R                  4   S.\\
\\R:                  \14   4   S\\R:                  \\R:                  \\
\R:                  4   4   4   4S/ jjr2SKS0\\
\34   S1\4S2   SS4S3 jjr5SHS4\\   S\\Rl                  Rn                  Rp                     4S5 jjr9S"\\
\RP                  4   S\\
\
4   4S6 jr: SHS\\\R                  4   S.\\
\\R:                  \14   4   S7\)S8\\\
      4S9 jjr;   SLS:\"S;\
S7\\)   S8\\\
      S<\
S\4U 4S= jjjr<SHS>\\
\34   S?\\3   SS4U 4S@ jjjr=U 4SA jr>   SMSB\\
   SC\\
   SD\\
\\
   S4   4SE jjr?SFr@U =rA$ )N_UnslothKTOTraineri  a
  
Initialize KTOTrainer.

Args:
    model (`transformers.PreTrainedModel`):
        The model to train, preferably an `AutoModelForSequenceClassification`.
    ref_model (`PreTrainedModelWrapper`):
        Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
        and loss. If no reference model is provided, the trainer will create a reference model with the same
        architecture as the model to be optimized.
    args (`KTOConfig`):
        The arguments to use for training.
    train_dataset (`datasets.Dataset`):
        The dataset to use for training.
    eval_dataset (`datasets.Dataset`):
        The dataset to use for evaluation.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If provided, will be used to automatically process the inputs
        for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
        reuse the fine-tuned model.
    data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
        The data collator to use for training. If None is specified, the default data collator
        (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
        sequences in the batch, given a dataset of paired sequences.
    model_init (`Callable[[], transformers.PreTrainedModel]`):
        The model initializer to use for training. If None is specified, the default model initializer will be
        used.
    callbacks (`list[transformers.TrainerCallback]`):
        The callbacks to use for training.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
        The optimizer and scheduler to use for training.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
        The function to use to preprocess the logits before computing the metrics.
    peft_config (`dict`, defaults to `None`):
        The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
        a PEFT model.
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
        metric values.
    model_adapter_name (`str`, defaults to `None`):
        Name of the train target PEFT adapter, when using LoRA with multiple adapters.
    ref_adapter_name (`str`, defaults to `None`):
        Name of the reference PEFT adapter, when using LoRA with multiple adapters.
trlri  NrS   	ref_modelrX   train_dataseteval_datasetprocessing_classdata_collator
model_init	callbacks
optimizerspreprocess_logits_for_metricspeft_configcompute_metricsmodel_adapter_nameref_adapter_namec                   > [        U5      [        L a  [        S5      e[        U[        5      (       d  X!L a  [        S5      eUR
                  c  0 nO[        U[        5      (       d  [        S5      eUR
                  nUR                  S5      nUbd  [        U[        5      (       a  US:w  a  [        [        U5      nUS:w  a.  [        U[        R                  5      (       d  [        SU S35      eUUS'   UR                  c  0 nO[        U[        5      (       d  [        S5      eUR                  nUR                  S5      nUbd  [        U[        5      (       a  US:w  a  [        [        U5      nUS:w  a.  [        U[        R                  5      (       d  [        SU S35      eUUS'   [        U[        5      (       a  [        R                  " U40 UD6n[        U[        5      (       a  [        R                  " U40 UD6nS	U l        [        5       (       d  Ub  [        S
5      e[        5       (       GaN  UGbJ  [        U[        5      (       a  UR!                  5       n[        USS	5      (       d  [        USS	5      (       at  [#        US5      =(       a0    S[%        [&        R(                  " [*        5      R,                  5      ;   nSUR.                  0nU(       a  UR0                  US'   [+        U40 UD6nOUUR.                  (       aD  [#        US5      (       a  UR3                  5         O"S nUR5                  5       R7                  U5        UnUR8                  (       a$  [        USS	5      (       a  [;        U5        SU l        OUUR.                  (       aD  [#        US5      (       a  UR3                  5         O"S nUR5                  5       R7                  U5        UR<                  (       a)  [?        5       (       d  [A        5       (       d  [        S5      eUb  URB                  RD                  U l"        O)URD                  c  [        S5      eURD                  U l"        [        5       =(       a    [        U[        5      U l#        Xl$        Xl%        U(       a  X l&        O:U RF                  (       d  URN                  (       a  S U l&        O[Q        U5      U l&        Uc  [        S5      eURR                  c  [T        RV                  " S5        SnURR                  b  URR                  nURX                  c  [T        RV                  " S5        SnURX                  b  URX                  nS nURZ                  c)  U RD                  (       a  [T        RV                  " S5        SnURZ                  b  U RD                  (       a  URZ                  nUc_  []        UR^                  UR`                  U RD                  S9nURb                  (       a  S	Ul1        [T        RV                  " S5        SU l2        OS	U l2        URf                  (       a-  [i        U5        U RL                  b  [i        U RL                  5        URj                  U l5        WU l)        UR<                  U l        UR`                  U l0        URl                  b  URl                  OUR^                  U l6        WU l,        URn                  U l7        UU l-        X`l8        URN                  U l'        SU l9        U Rj                  S;   a  S	U l9        S	U l:        S	U l;        [y        S 5      U l=        UR|                  U l>        UR~                  U l?        UR                  U l@        [        URB                  SS	5      U lA        [        URB                  S S!5      U lB        U R                  (       a&  U R                  S!:X  a  [T        RV                  " S"5        SUR                  S#'   [        5       R                  5          UR                  [        UR                  S$S%9n[        XCR                  S&S'9nUR                  [        S(U0UR                  S)S*9nUbU  UR                  [        UR                  S+S%9n[        XSR                  S,S'9nUR                  [        S(U0UR                  S-S*9nUR                  [        SS(U Rp                  0UR                  S.S/9nS0U RD                  U Rp                  U RR                  U Rn                  U R`                  U RX                  U RZ                  S1.nUR                  [        UUR                  S2S*9nUbM  UR                  [        S(U Rp                  0SUR                  S3S49nUR                  [        UUR                  S5S*9nU Rr                  (       Ga-  UR                  S6::  a  [        S75      eUR                  [        SUR                  UR                  S8S99nS:US;'   UR                  [        UUR                  UR                   Vs/ s H  nUUR                  ;   d  M  UPM     snS<S=9n[        UU/S6S>9nUb  UR                  [        SUR                  UR                  S?S99nUR                  [        UUR                  UR                   Vs/ s H  nUUR                  ;   d  M  UPM     snS@S=9n[        UU/S6S>9n[        [        USA   5      S65      n[        [        USA   5      U-
  S65      nUU:w  a  [        UU R                  -  U-  S6-  SB5      n[        UU R                  -  U-  SC-  SB5      n [        UU R~                  -  U-  SC-  SB5      n![        UU R~                  -  U-  S6-  SB5      n"UU R~                  s=:*  =(       a    U :*  Os  n#U!U R                  s=:*  =(       a    U":*  Os  n$U#(       d*  U$(       d#  [T        RV                  " SDU SEU  SFU! SEU" SG3	5        S S S 5        [        T%U G]Y  UUUUUUUUU	U
USH9  S	U lW        [#        U R                  SI5      (       a%  U R                  R                  U R                  5        [#        U SJ5      (       d  [        SK5      eU R                  (       aJ  U R                  R                  R                  R                  SL:X  a  U RN                  (       a  [        SM5      eU RL                  c.  U RF                  (       d  U RN                  (       d  [        SN5      eO`U R                  (       a&  [        U RL                  U R                  5      U l&        O)U R                  R                  U RL                  SSO9U l&        U R                  R                  (       a  [        5       (       d  [        SP5      eU Rj                  S;   a  [        SQ5      eU RN                  (       a  [        SR5      eU RF                  (       d  U RJ                  b  [        SS5      e[        U R`                  U R|                  U RL                  S LST9U lh        g g s  snf s  snf ! , (       d  f       GNQ= f)UNz1Please use `KTOConfig` instead TrainingArguments.z`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the same as `model`, you must mass a copy of it, or `None` if you use peft.zRYou passed model_kwargs to the KTOTrainer. But your model is already instantiated.dtypera  zhInvalid `dtype` passed to the KTOConfig. Expected a string with either `torch.dtype` or 'auto', but got .zZYou passed ref_model_kwargs to the KTOTrainer. But your ref_model is already instantiated.FzPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it with `pip install peft` to use the PEFT modelsis_loaded_in_8bitis_loaded_in_4bitr#  use_gradient_checkpointingenable_input_require_gradsc                 &    UR                  S5        g NTrequires_grad_moduleinputrZ   s      r\   make_inputs_require_grad=_UnslothKTOTrainer.__init__.<locals>.make_inputs_require_grad  s    --d3r_   Tc                 &    UR                  S5        g r  r  r  s      r\   r  r    s    ))$/r_   z`generate_during_eval=True` requires Weights and Biases or Comet to be installed. Please install `wandb` or `comet-ml` to resolve.zMWhen no model is provided, you need to pass the parameter is_encoder_decoder.zdmax_length or a processing_class must be specified when using the default DPODataCollatorWithPaddingzWhen using DPODataCollatorWithPadding, you should set `max_length` in the KTOTrainer's init it will be set to `512` by default, but you should do it yourself in the future.rh  zWhen using DPODataCollatorWithPadding, you should set `max_prompt_length` in the KTOTrainer's init it will be set to `128` by default, but you should do it yourself in the future.   zWhen using DPODataCollatorWithPadding with an encoder decoder architecture, you should set `max_completion_length` in the KTOTrainer's init it will be set to `128` by default, but you should do it yourself in the future.)r   rD  rH  zWhen using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your KTOConfig we have set it for you, but you should do it yourself in the future.)apo_zero_unpairedc                       [        [        5      $ N)r*   listrP  r_   r\   <lambda>-_UnslothKTOTrainer.__init__.<locals>.<lambda>7  s	    ;t3Dr_   output_router_logitsrouter_aux_loss_coefrc  a-  You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to `0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary loss.estimate_tokensz$Extracting prompt from train dataset)num_procdesczUnpairing train dataset)r  	tokenizerz'Applying chat template to train dataset)	fn_kwargsr  r  z#Extracting prompt from eval datasetzUnpairing eval datasetz&Applying chat template to eval datasetzTokenizing train dataset)batchedr  r  r  rb  )prefixrH  r  r=  rF  rD  r>  r?  z"Processing tokenized train datasetzTokenizing eval dataset)r  r  r  r  z!Processing tokenized eval datasetrp   zActual (not effective) batch size must be > 1. KTO will not work properly because the KL term will be equivalent to the implied reward.zExtracting KL train dataset)r  r   r  r  KL_r  z%Processing tokenized train KL dataset)r  r  remove_columnsr  )axiszExtracting eval KL datasetz$Processing tokenized eval KL datasetlabelr   gHzG?zYou have different amounts of desirable/positive and undesirable/negative examples but the weights on the desirable and undesirable losses don't seem to be in an ideal range. Based on your data, we recommend EITHER desirable_weight in [z, z] or undesirable_weight in [zN] (but NOT BOTH). See the documentation on how to optimally set these weights.)rS   rX   r}  rz  r{  r|  r~  r  r  r  r  add_model_tagsacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`.   zrYou cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`.z]No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`)evaluation_modezYou set `use_liger_loss=True` but the liger kernel is not available. Please install liger-kernel first: `pip install liger-kernel`znYou cannot set `loss_type='apo_zero_unpaired'` with liger-kernel.Only KTO loss is supported with liger-kernel.znYou cannot use `precompute_ref_log_probs=True` with liger kernel. Please set `precompute_ref_log_probs=False`.zYYou cannot use `use_liger_loss=True` with Peft models. Please set `use_liger_loss=False`.)ignore_indexr@  use_ref_model)ityper"   r   
isinstancestrrK  getgetattrrG   r  rL  r   from_pretrained_peft_has_been_casted_to_bf16r2   r   merge_and_unloadrV   r  r/   	signaturerC   
parametersr"  r#  r  get_input_embeddingsregister_forward_hookr   rA   rG  r3   r0   configrH  is_peft_modelr  r  ry  rJ  r)   r=  r6   r^  r>  r?  r   r   rD  r   use_dpo_data_collatorrI  r+   rA  rE  rF  r|  calculate_KL _precomputed_train_ref_log_probs_precomputed_eval_ref_log_probsr*   _stored_metricsr@  rB  rC  aux_loss_enabledaux_loss_coefwarnings_issuedr   main_process_firstmapr9   rM  r:   r8   r%   r$   r   r#   column_namesr'   rT  r   lenroundrU  rV  model_accepts_loss_kwargsrS   r  
_tag_namesAttributeErroris_deepspeed_enabledr  statedeepspeed_plugin
zero_stagerB   prepare_modelrX   rN  r1   ImportErrorLigerFusedLinearKTOLosskto_loss_fn)&rW   rS   ry  rX   rz  r{  r|  r}  r~  r  r  r  r  r  r  r  rK  r  rL  _support_gc_kwargsprepare_model_kwargsr  r=  r>  r?  r  train_kl_datasetceval_kl_datasetnum_desirablenum_undesirabledes_weight_lower_bounddes_weight_upper_boundund_weight_lower_boundund_weight_upper_bounddes_weight_in_rangeund_weight_in_rangerW  s&                                        r\   rV  _UnslothKTOTrainer.__init__N  s   ( :**PQQ%%%)*<Z 
 !!) "E3''qrr $ 6 6%))'2E eS))evo#E51EF?:eU[[+I+I$ C  DI  CJ  JK  L  .3!'*%%-$&!Is++l  %)$>$>!)--g6E eS))evo#E51EF?:eU[[+I+I$ C  DI  CJ  JK  L  27%g.eS!!(88TBSTEi%%,<<Y`J_`I .3* ""{'> a    [%<%++..0u1599WUL_af=g=g%,9& &5%%&EFQQ:  # )EdFaFa'b$%LPLnLn()HI7VAUV,,5">??4464 ..0FFG_` EyyWU,?GG+E2592
 ((u:;;0020 **,BBC[\$$.@.B.BFXFZFZD 
 &+ll&E&ED#$$,lmm&*&=&=D#.0QZy5Q"4 0&N4#@#@!DN3E:DN#v  ??"NNd J??&J!!)NNd !$!!- $ 6 6 $%%-$2I2INNd %(!%%1d6M6M$($>$>! 6-::#'#:#:#'#:#:M ))-2*\
 *.D&).D& $U+~~)(8$$($=$=!"&"9"9373E3E3QT//WgWtWt!2#33%:" 0(,(E(E% !>>22 %D 16-/4,  ++DE II	 $ 5 5"&"9"9 '6Le T$U\\3I3O  T%7%73%>NN 48/0 ^..0)--$t/D/DKq . M <44;TM *--)&(89..>	 . M '+//(43H3HOt  0    ? "7"7>V   ,//-*,<=!22A	  0   *--&(=(=>../ . M &*&=&=!22"oo#'#7#7&*&=&=%)%;%;)-)C)C	I *--#..9	 . M '+//*D,A,AB !222  0    ,//#'!22<	  0      33q8$ b  $1#4#4# #??!226 $5 $  ',	(##3#7#7#'!22/?/L/L#p/L!PQUbUoUoPoA/L#p@ $8 $  !5mEU5V]^ _+&2&6&6' $#'#C#C!%!6!69 '7 'O '6&9&9'"+!%!6!63B3O3O'r3OaSTXdXqXqSq3O'rC ': 'O $88W^_#`L  M'$: ;Q?M!#mG&<"="MqQO/).$BYBY0Y\i0imn/npq)r&).$BYBY0Y\i0imq/qst)u&).@U@U0UXg0gko/oqr)s&).@U@U0UXg0gkl/lno)p&&<@U@U&o&oYo&o#&<@W@W&q&q[q&q#+/BNN0 1G/GrJ`Ia b22H1ILbKc dW	WW 1h 	''%-!+!*G 	 	
" */& 4::/00JJ%%doo6t]++ j 
 $$%%66AAQF4KhKh  I  >>!&&$*G*G s  ((!24>>4CSCS!T!%!1!1!?!?`d!?!e 99##,..!T  ~~!66 D  ,, 8  !!T%:%:%F o   7!44499UYUcUckoUo D) $G $q* (sk 10s9   	H#y,yy
A*y4yyD6y
y
y!c              #     #    U R                   (       aD  U R                  (       d3  U R                  R                  U R                  5      R                  5       O	[        5          U R                  (       a%  U R                  R                  U R                  5        Sv   U R                  (       a.  U R                  R                  U R                  =(       d    S5        SSS5        g! , (       d  f       g= f7f)zWContext manager for handling null reference model (that is, peft adapter manipulation).Nr   )	r  r  r  unwrap_modelrS   disable_adapterr=   set_adapterr  )rW   s    r\   null_ref_context#_UnslothKTOTrainer.null_ref_context-  s     
 !!$*?*? ))$**5EEG $$

&&t'<'<=$$

&&t'>'>'K)L  s   A C6"A:C%	C6%
C3/C6r   c                 ,  > U R                   (       Ga  U R                  (       Gd  U R                  R                  U R                  U R                  R
                  U R                  R                  SS.nU R                  R                  [        U R                  40 UD65      n/ n/ n[        USS9 H  nU R                  U5      u  pgU R                  R                  U5      nUR                  UR                  5       5        U R                   (       d  Mc  U R                  R                  U5      nUR                  UR                  5       5        M     U R                  R#                  S[$        R&                  " U5      R)                  5       R+                  5       S9U l
        U R                   (       aO  U R                  R#                  S[$        R&                  " U5      R)                  5       R+                  5       S9U l
        SU l        [,        TU ]]  5       $ )	z
Returns the training [`~torch.utils.data.DataLoader`].

Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`.
Fr   
collate_fnnum_workers
pin_memoryshufflez!Train dataset reference log probsiterabler  reference_logpsnamecolumnreference_KL_logpsT)rJ  r  rX   r   r}  r   r  r  preparer   rz  rH   compute_reference_log_probsgather_for_metricsr{   cpur  
add_columnrG   catfloatnumpyrU  get_train_dataloader)	rW   dataloader_paramsdata_loaderreference_completion_logpsr  padded_batchreference_completion_logpreference_KL_logprW  s	           r\   r   '_UnslothKTOTrainer.get_train_dataloader;  s    (((1V1V1V"iiCC"00#yy??"ii== ! **22:d>P>P3fTe3fgK)+&!# $k@c d?C?_?_`l?m<),0,<,<,O,OPi,j)*112K2O2O2QR$$$(,(8(8(K(KL](^%&--.?.C.C.EF !e "&!3!3!>!>&uyy9S/T/Z/Z/\/b/b/d "? "D   %)%7%7%B%B-eii@R6S6Y6Y6[6a6a6c &C &" 59D1w+--r_   c                 X  > Uc  U R                   c  [        S5      eUb  UOU R                   nU R                  (       Ga  U R                  (       Gd  U R                  R
                  U R                  U R                  R                  U R                  R                  SS.nU R                  R                  [        U40 UD65      n/ n/ n[        USS9 H  nU R                  U5      u  pxU R                  R                  U5      nUR                  UR!                  5       5        U R"                  (       d  Mc  U R                  R                  U5      nUR                  UR!                  5       5        M     UR%                  S[&        R(                  " U5      R+                  5       R-                  5       S9nU R"                  (       a@  UR%                  S[&        R(                  " U5      R+                  5       R-                  5       S9nU R                   b  Xl         S	U l        [.        T	U ]a  US
9$ )a  
Returns the evaluation [`~torch.utils.data.DataLoader`].

Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`.

Args:
    eval_dataset (`torch.utils.data.Dataset`, *optional*):
        If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
        by the `model.forward()` method are automatically removed. It must implement `__len__`.
z-Trainer: evaluation requires an eval_dataset.Fr  z Eval dataset reference log probsr  r  r  r  T)r{  )r{  r   rJ  r  rX   r   r}  r   r  r  r  r   rH   r  r  r{   r  r  r  rG   r  r  r  rU  get_eval_dataloader)
rW   r{  r  r  r  r  r  r  r  rW  s
            r\   r	  &_UnslothKTOTrainer.get_eval_dataloaderg  s    D$5$5$=LMM'3'?|TEVEV(((1U1U1U"iiBB"00#yy??"ii== ! **22:l3`N_3`aK)+&!# $k@b c?C?_?_`l?m<),0,<,<,O,OPi,j)*112K2O2O2QR$$$(,(8(8(K(KL](^%&--.?.C.C.EF !d (22&uyy9S/T/Z/Z/\/b/b/d 3 L   +66-eii@R6S6Y6Y6[6a6a6c  7  
   ,$0!37D0w**EEr_   r  c           	         [         R                  " 5          U R                  c  U R                  5          U R                  (       az  U R                  US   US   UR                  S5      US   S9R                  nU R                  (       a4  U R                  US   US   UR                  S	5      US
   S9R                  nOQU R                  US   US   S9R                  nU R                  (       a   U R                  US   US   S9R                  nSSS5        OU R                  (       az  U R                  US   US   UR                  S5      US   S9R                  nU R                  (       a4  U R                  US   US   UR                  S	5      US
   S9R                  nOQU R                  US   US   S9R                  nU R                  (       a   U R                  US   US   S9R                  nSSS5        U R                  WUS   SU R                  U R                  S9nU R                  (       a-  U R                  WUS
   SU R                  U R                  S9nXE4$ SnXE4$ ! , (       d  f       N= f! , (       d  f       N= f)zfComputes log probabilities of the reference model for a single padded batch of a KTO specific dataset.Nprompt_input_idsprompt_attention_maskcompletion_decoder_input_idscompletion_labels)attention_maskdecoder_input_idslabelsKL_prompt_input_idsKL_prompt_attention_maskKL_completion_decoder_input_idsKL_completion_labelsr   completion_attention_mask)r  KL_completion_input_idsKL_completion_attention_maskFaverage_log_probrH  rD  )rG   no_gradry  r  rH  rS   r  r}   r  get_batch_logpsrD  )rW   r  completion_logits	KL_logitscompletion_logpsKL_logpss         r\   r  ._UnslothKTOTrainer.compute_reference_log_probs  s   ]]_~~%**,..,0JJ();<+78O+P.:.>.>?].^#/0C#D	 -7 -
 !& *  ,,(,

 ,-B C/;<V/W2>2B2BCd2e'34J'K	 )3 )
 %f & -1JJ()?@+78S+T -7 - !& *
  ,,(,

 ,-F G/;<Z/[ )3 ) %f &/ -,8 **(,$%78'34K'L*6*:*:;Y*Z+,?@	 )7 )
 f & (($(NN()>?+78R+S.:.>.>?`.a#/0F#G	 %3 %
 !& " )-$%;<\ZuMv )7 )f & (($(NN()BC+78V+W %3 % !& "g p  //,-"#66#66 0 
 ++34!&#'#:#:#'#:#: , H  )) H))S -, _s$   I<CI+C%I<+
I9	5I<<
J
r}   r  r  rD  rH  c                 z   U R                   SS UR                   :w  a  [        S5      eU(       d(  USS2SS24   R                  5       nU SS2SS2SS24   n OUR                  5       nX:g  nSXU:H  '   [        X5      nU(       a%  Xe-  R	                  S5      UR	                  S5      -  $ Xe-  R	                  S5      $ )aQ  Compute the log probabilities of the given labels under the given logits.

Args:
    logits:
        Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size)
    labels:
        Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are
        ignored. Shape: (batch_size, sequence_length)
    average_log_prob:
        If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the
        log probabilities of the (non-masked) tokens.
    label_pad_token_id:
        The label value to ignore when computing log probabilities.
    is_encoder_decoder:
        Whether the model is an encoder-decoder model. If True, the labels are not shifted and the logits are
        assumed to already be aligned with the labels. If False, the labels are shifted to the right by one
        position, and the logits are assumed to be aligned with the shifted labels.

Returns:
    A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the
    given logits.
Nrj   zKLogits (batch and sequence length dim) and labels must have the same shape.rp   r   )rs   r   clonerE   r   )r}   r  r  rD  rH  	loss_maskr   s          r\   r  "_UnslothKTOTrainer.get_batch_logps  s    < <<,jkk!AqrE]((*FAssAI&F \\^F0	 01++,/?#/44R89==;LLL#/44R88r_   batchc                    U R                  X5      nU R                  (       a  US   UR                  S5      S.O0 nU R                  (       a  SUS'   U" US   4SUS   0UD6nUR                  nU R                  UUS   S	U R                  U R                  S
9nUR                  S   [        US   5      :w  a  [        S5      e[        UR                  S   5       Vs/ s H  oS   U   SL d  M  UPM     n	n[        UR                  S   5       Vs/ s H  oS   U   S	L d  M  UPM     n
nXyS4   nXzS4   nXiS4   nXjS4   nU R                  (       a  XXX5R                  4$ XXU4$ s  snf s  snf )Nr  r  r  r  Tr  r   r  r  Fr  r   r  zThere is a mismatch between the number of examples in this batch and the number of examples for which an output sequence was predicted..)_compute_kl_logpsrH  r  r  r}   r  rD  rs   r  r   rangeaux_loss)rW   rS   r'  r!  model_kwargsoutputsr  r   i
chosen_idxrejected_idxchosen_logpsrejected_logpschosen_logitsrejected_logitss                  r\   forward_UnslothKTOTrainer.forward!  s    ))%7 &&   34%*YY/M%N
  	   37L/0()
 !<=
 

 $NN//%&"#66#66 0 
 !!!$E'N(;;G 
 "''7'='=a'@!A_!AA7^TUEVZ^E^a!A
_#()9)?)?)B#Cb#CaW~VWGX\aGa#Cb'C8)*;<)c/:+#,=>   -RZ\l\lmm -RZ[[ `bs   E(/E(E-#E-policy_chosen_logpspolicy_rejected_logpspolicy_KL_logpsreference_chosen_logpsreference_rejected_logpsr  c                 <   U R                   (       aW  X6-
  R                  5       R                  5       nU R                  R	                  U5      R                  5       R                  SS9nO/[        R                  " S5      R                  UR                  5      nUR                  S   S:w  d  UR                  S   S:w  a  X-
  nU R                  S:X  a)  S[        R                  " U R                  X-
  -  5      -
  n	O6U R                  S:X  a&  S[        R                  " U R                  U-  5      -
  n	U R                  UR                  5       -  n
Or[        R                  " / 5      R                  U R                  R                  5      n	[        R                  " / 5      R                  U R                  R                  5      n
UR                  S   S:w  d  UR                  S   S:w  a  X%-
  nU R                  S:X  a)  S[        R                  " U R                  X{-
  -  5      -
  nO3U R                  S:X  a#  [        R                  " U R                  U-  5      nU R                  UR                  5       -  nOr[        R                  " / 5      R                  U R                  R                  5      n[        R                  " / 5      R                  U R                  R                  5      n[        R                   " U R"                  W	-  U R$                  W-  4S5      nXX4$ )a  Compute the KTO loss for a batch of policy and reference model log probabilities.

Args:
    policy_chosen_logps:
        Log probabilities of the policy model for the chosen responses. Shape: (num(chosen) in batch_size,)
    policy_rejected_logps:
        Log probabilities of the policy model for the rejected responses. Shape: (num(rejected) in batch_size,)
    policy_KL_logps: Log probabilities of the policy model for the KL responses. Shape: (batch_size,)
    reference_chosen_logps:
        Log probabilities of the reference model for the chosen responses. Shape: (num(chosen) in batch_size,)
    reference_rejected_logps:
        Log probabilities of the reference model for the rejected responses. Shape: (num(rejected) in
        batch_size,)
    reference_KL_logps: Log probabilities of the reference model for the KL responses. Shape: (batch_size,)

Returns:
    A tuple of four tensors: (losses, chosen_rewards, rejected_rewards, KL). The losses tensor contains the KTO
    loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for
    the chosen and rejected responses, respectively. The KL tensor contains the detached KL divergence estimate
    between the policy and reference models.
r   rS  rp   ri  r  )r  meandetachr  r  clamprG   zerosru   r   rs   rA  r   sigmoidr@  r   r  rB  rC  )rW   r8  r9  r:  r;  r<  r  klchosen_logratioschosen_losseschosen_rewardsrejected_logratiosrejected_lossesrejected_rewardslossess                  r\   kto_loss_UnslothKTOTrainer.kto_lossT  s   < !6<<>EEGB!!44R8==?EE!ELBQ""#6#=#=>B $$Q'1,0F0L0LQ0OST0T2K~~& !AIIdii;K;P.Q$R R#66 !"AIIdii:J.J$K K!YY)9)@)@)BBN "LL,//0@0@0G0GHM"\\"-001A1A1H1HIN !&&q)Q.2J2P2PQR2SWX2X!6!Q~~&"#aii		R=T0U&V"V#66"#))DII8J,J"K#yy+=+D+D+FF $ll2.11$2B2B2I2IJO$||B/2243C3C3J3JK""]2D4K4Ko4]^

 '7;;r_   c                 z   SnU R                   (       a  U R                  (       a   US   US   US   UR                  S5      S.nOUS   US   S	.n[        R                  " 5          U" S0 UD6R
                  nSSS5        U R                  WUS   S
U R                  U R                  S9nU$ ! , (       d  f       N:= f)z/Compute KL log probabilities for a given batch.Nr  r  r  r  )r   r  r  r  r  r  )r   r  Fr  rP  )r  rH  r  rG   r  r}   r  rD  )rW   rS   r'  r!  KL_model_kwargsr  s         r\   r*  $_UnslothKTOTrainer._compute_kl_logps  s    &&!&'<!=&+,F&G#$:;).3T)U	# "''@!A&+,J&K#
 !4O4;;	 ! ++,-!&#'#:#:#'#:#: , H  !s   %B,,
B:c                    U R                  X5      nU R                  U R                  U5      nU R                  (       aW  X4-
  R                  5       R	                  5       nU R
                  R                  U5      R                  5       R                  SS9nO9[        R                  " S5      R                  U R
                  R                  5      nU R                  (       a  US   UR                  S5      S.O0 nU R                  (       a  SUS'   U R                  (       a  UR                  5       " US	   4US
   SS.UD6nUR!                  5       " SUS   UR"                  SS.UD6nU R                  R                  5       " US	   4US
   SS.UD6n	U R                  R!                  5       " SUS   U	R"                  SS.UD6n
GO [%        US5      (       a"  UR!                  5       b  UR!                  5       nO-['        USU R(                  R*                  5      n['        XU5      nU" US	   4US
   SS.UD6n[%        U R                  S5      (       a6  U R                  R!                  5       b  U R                  R!                  5       nOK['        U R                  SU R(                  R*                  5      n['        U R                  XR                  5      nU" US	   4US
   SS.UD6n
UR-                  5       nU R                  R-                  5       nU R/                  U R                  (       d  UR"                  SS2SS24   OUR"                  UR0                  US   SS2SS24   [%        US5      (       a  UR2                  OS[        R4                  " US   [        R6                  S9R                  U R
                  R                  5      U R                  (       d  U
R"                  SS2SS24   OUR"                  UR0                  [%        US5      (       a  UR2                  OSUS9	u  nu  nnnnnnUUUUUUUUS.nU R                  (       a  UR8                  US'   U$ )a  
Compute the KTO loss using the Liger-Kernel's LigerFusedLinearKTOLoss.

Args:
    model:
        The policy model used for generating log probabilities and outputs. It could be an encoder-decoder
        model or a regular language model.
    batch: A dictionary containing the input data and labels for the batch.

Returns:
    A dictionary containing the following keys:
        - "loss": The computed KTO loss for the batch.
        - "chosen_logits_sum": Sum of the logits for the chosen responses from the policy model.
        - "rejected_logits_sum": Sum of the logits for the rejected responses from the policy model.
        - "chosen_logps": Log probabilities of the chosen responses from the policy model.
        - "rejected_logps": Log probabilities of the rejected responses from the policy model.
        - "chosen_rewards": Rewards for the chosen responses.
        - "rejected_rewards": Rewards for the rejected responses.
        - "kl": The KL divergence between the policy and reference models (detached).

    If auxiliary loss is enabled, the dictionary will also include:
        - "aux_loss": The auxiliary loss from the model outputs.
r   r>  rp   r  r  r)  Tr  r   r  )r  return_dictr  F)r   encoder_hidden_states	use_cacheget_decoderNbase_model_prefix)r  rT  rj   biasr  )r  )	_input
lin_weighttargetrW  preference_labels	ref_input
ref_weightref_biasrD  )losschosen_logits_sumrejected_logits_sumchosen_logps_sumrejected_logps_sumchosen_rewards_sumrejected_rewards_sumrD  r,  rP  )r*  ry  r  r?  r@  r  r  rA  rG   rB  ru   r   rH  r  r  get_encoderrU  last_hidden_staterV   r  rX   rO  get_output_embeddingsr  weightrW  r   boolr,  )rW   rS   r'  r:  r  rD  r-  encoder_outputsr.  ref_encoder_outputsref_outputs
base_model	base_attrref_base_modelref_attrlm_headref_lm_headr_  rb  rc  r`  ra  rd  re  rZ   s                            r\   _compute_loss_liger&_UnslothKTOTrainer._compute_loss_liger  sX   0 00>!33DNNEJ!6<<>EEGB!!44R8==?EE!ELBQ""4#3#3#:#:;B &&   34%*YY/M%N
  	   37L/0""#//1,-$%@A  	O '') &':;&5&G&G 	G #'.."<"<">,-#$%@A # 	# ..446 &':;&9&K&K 	K um,,1B1B1D1P"..0
#E+>		@c@cd	$Uu=
 ,-$%@A 	G t~~}55$..:T:T:V:b!%!;!;!="4>>3F		HkHkl!(>>!R(,-$%@A 	K --/nn::< <@<S<S7,,QV4Y`YrYr~~,-ae4!(&!9!9t#ll5>LOOPTP`P`PgPgh** "33AssF;**")))0&)A)A[%%t  

	
 "!#"$" !2#6 0"4"4$8	
   !(!1!1F:r_   c           	         0 nUR                  5        VVs0 s HL  u  pEU[        U[        R                  5      (       a%  UR	                  U R
                  R                  5      OU_MN     nnn[        R                  " US   5      nUR                  5       R	                  U R
                  R                  5      n[        U5      U-
  R	                  U R
                  R                  5      nU R                  R                  (       aQ  U R                  X5      n	U	S   n
U	S   nU	S   nU	S   nU	S   nU	S   nU	S   nU	S	   nU R                  (       a  U	S
   nGOU R                  X5      nUSS u  nnnnnU R                  (       a  US   nSU;   a  [        US   R                   S   5       Vs/ s H  nUS   U   SL d  M  UPM     nn[        US   R                   S   5       Vs/ s H  nUS   U   SL d  M  UPM     nnUS   US4   nUS   US4   nU R"                  (       a  US   nOSnO[        R$                  " 5          U R&                  c?  U R)                  5          U R                  U R*                  U5      SS u  nnnnnSSS5        O%U R                  U R&                  U5      SS u  nnnnnSSS5        U R-                  UUUWWW5      u  pnnUR/                  5       US	'   U R
                  R1                  U5      R                  5       R/                  5       nU R
                  R1                  U5      R                  5       R/                  5       nUS:  a  U R
                  R1                  UR3                  5       5      R3                  5       R/                  5       US'   U R
                  R1                  UR3                  5       5      R3                  5       R/                  5       US'   U R
                  R1                  UR3                  5       5      R3                  5       R/                  5       US'   UUS'   US:  a  U R
                  R1                  UR3                  5       5      R3                  5       R/                  5       US'   U R
                  R1                  UR3                  5       5      R3                  5       R/                  5       US'   U R
                  R1                  UR3                  5       5      R3                  5       R/                  5       US'   UUS'   U
R5                  5       nU R                  (       a  UU R6                  W-  -  nUU4$ s  snnf s  snf s  snf ! , (       d  f       GN= f! , (       d  f       GN= f)zWCompute the KTO loss and other metrics for the given batch of inputs for train or test.r  r_  r`  ra  rb  rc  rd  re  rD  r,  N   r  r   TF.r  zrewards/chosen_sumzlogps/chosen_sumlogits/chosen_sumzcount/chosenzrewards/rejected_sumzlogps/rejected_sumlogits/rejected_sumzcount/rejected)itemsr  rG   r   ru   r  r   r   r   r  rX   rN  rt  r  r6  r+  rs   r  r  ry  r  rS   rL  itemr  nansumnanmeanr  )rW   rS   r'  metricskvr  
num_chosennum_rejectedmodel_outputrK  policy_chosen_logitspolicy_rejected_logitsr8  r9  rG  rJ  rD  r,  forward_outputr:  r/  r0  r1  r;  r<  r  _all_num_chosenall_num_rejectedr_  s                                  r\   get_batch_loss_metrics)_UnslothKTOTrainer.get_batch_loss_metricsJ  sP    fkfqfqfstfs^b^_jELL6Q6QQTT$**112WXXfsteGn-ZZ\__T%5%5%<%<=
Fj044T5E5E5L5LM99##33EAL!&)F#/0C#D %12G%H""./A"B$01E$F!)*>?N+,BCd#B$$'
3!\\%7N r"#%$&$$)!, !E)).u5F/G/M/Ma/P)Qo)QAUZ[bUcdeUfjnUna)Q
o+07H1I1O1OPQ1R+Sr+SaW\]dWefgWhlqWq+Sr)./@)A*c/)R&+01B+CLRUDU+V($$)./C)D&)-&]]_~~-!224 !%TZZ ? C 6 8 ! ! 2 54 !LL?C24.! %& <@==#%&("<8F$4b 	))<<ZHLLNSSU++>>|LPPRWWYA  33N4I4I4KLSSUZZ\ ()   334G4N4N4PQXXZ__a &'   334H4O4O4QRYY[``b '( '5GN#a  334D4K4K4MNUUW\\^ *+   334I4P4P4RSZZ\aac ()   334J4Q4Q4ST[[]bbd )* )9G$%~~  D&&11DW}O u@ pr 54 %_sB   AU*U=U"U5U U4&U".U4"
U1	,U44
Vinputsc                    U R                   (       a)  [        U R                  R                  R                  5      O	[        5       nU   U R                  X5      u  pgS S S 5        WR                  U R                  R                  5      nU R                  R                  (       a  U R                  WSS9  U(       a  UW4$ U$ ! , (       d  f       Nk= f)Ntrain
train_eval)r  r&   r  r   r  r=   r  ru   rX   is_main_processstore_metrics)rW   rS   r  return_outputsnum_items_in_batchcompute_loss_context_managerr_  r~  s           r\   compute_loss_UnslothKTOTrainer.compute_loss  s     7;6X6XHT%%,,112^i^k 	% * 77FMD * wwtyy''(++w7;'?" *)s   C  
Cr~  r  )r  evalc                 x    UR                  5        H&  u  p4U R                  U   U   R                  U5        M(     g r  )rz  r  r{   )rW   r~  r  keyvalues        r\   r   _UnslothKTOTrainer.store_metrics  s2    !--/JC  ,S188? *r_   datasetc                 ^    Uc  U R                   nUb  [        U5      (       d  g [        U5      $ r  )rz  r.   r   )rW   r  s     r\   _get_train_sampler%_UnslothKTOTrainer._get_train_sampler  s/    ?((G?*W"5"5 ))r_   c           
         U R                   (       a)  [        U R                  R                  R                  5      O	[        5       nU   UR                  US   US   U R                  SU R                  R                  S9nSU;   a  US   nOU R                  c[  U R                  5          U R                  R                  US   US   U R                  SU R                  R                  S9nSSS5        OAU R                  R                  US   US   U R                  SU R                  R                  S9nSSS5        [        WU R                  U R                  R                  5      nU R                  R                  USS9n[        WU R                  U R                  R                  5      nU R                  R                  USS9nXg4$ ! , (       d  f       N= f! , (       d  f       N= f)zRGenerate samples from the model and reference model for the given batch of inputs.r  r  T)r   r  r=  	do_sampler   reference_outputN)skip_special_tokens)r  r&   r  r   r  r=   generater=  r|  r   ry  r  rS   r?   batch_decode)rW   rS   r'  generate_context_managerpolicy_outputr  policy_output_decodedreference_output_decodeds           r\   generate_from_model_and_ref._UnslothKTOTrainer.generate_from_model_and_ref  s    7;6X6XHT%%,,112^i^k 	! &!NN 23$%<=??!22?? + M "U*#();#< >>)..0+/::+>+>&+,>&?+01H+I'+&*)-)>)>)K)K ,? ,( 10 (,~~'>'>"'(:";',-D'E#'??"&%)%:%:%G%G (? ($/ &> &mT__dF[F[FhFhi $ 5 5 B B=fj B k()94??DLaLaLnLno#'#8#8#E#EFVlp#E#q $>>/ 10 &%s'   A!G(AG	*A
G	
G	G
G(r   ignore_keysc                    Uc+  [        US5      (       a  [        UR                  S/ 5      nO/ nU R                  (       a)  [	        U R
                  R                  R                  5      O	[        5       n[        R                  " 5          U   U R                  X5      u  pgS S S 5        S S S 5        U R
                  R                  (       a  U R                  WSS9  U(       a  WR                  5       S S 4$ 0 nSW;   a  US   US'   SU;   a  US   US'   UR                  5        V	V
s/ s H  u  pX;  d  M  U
PM     nn	n
[        R                   " XR
                  R                  S	9n[        R"                  " UR$                  S
   U R
                  R                  S	9nWR                  5       X4$ ! , (       d  f       GN= f! , (       d  f       GN= fs  sn
n	f )Nr  keys_to_ignore_at_inferencer  r  rx  zeval_logits/chosenry  zeval_logits/rejectedr   r   )rV   r  r  r  r&   r  r   r  r=   rG   r  r  r  r  r@  rz  r   rB  rs   )rW   rS   r  r   r  prediction_context_managerr_  r~  logits_dictr  r  r}   r  s                r\   prediction_step"_UnslothKTOTrainer.prediction_step  s    uh''%ell4QSUV  7;6X6XHT%%,,112^i^k 	# ]]_8 77FMD 9_ ++w6:KKM4.. ')078K0LK,- G+29:O2PK./ + 1 1 3L 3q7K! 3Lf-=-=-D-DEV\\!_T5E5E5L5LMv..) 98__  Ms0   F?F-F?%G4G-
F<	7F??
G
dataloaderdescriptionmetric_key_prefixc                   > U R                   (       Ga  [        UR                  5      n[        R                  " [        U5      U R                  R                  S9nUR                  R                  U5      nU R                  U5      n	U R                  U	5      n	[        R                  " U	S   [        R                  U R                  R                  S9n
[        R                   " U
) 5      S   nU	S   U   U	S   U   [#        U6 " U	S   5      S.nU R%                  U R&                  U5      u  p[(        R*                  " / S	Q[-        US   X5       VVVs/ s H$  u  nnnUU[        U5      S
 U[        U5      S
 /PM&     snnnS9nSU R                  R.                  ;   a(  [0        R3                  S[0        R5                  US905        SU R                  R.                  ;   a
  [7        SUS9  [8        TU ]u  XX4U5      nU$ s  snnnf )z
Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by
`Trainer.evaluate()` and `Trainer.predict()`.

Works both with or without labels.
)r  r  )r  r   r   r  r  prompt)r  r  r  )PromptPolicyz	Ref ModelN)columnsdatawandbgame_log)r  comet_mlzgame_log.csv)r  table)rG  r  r  rD   sampler+  rX   eval_batch_sizeselectr}  _prepare_inputsrG   r   rj  r  r   wherer4   r  rS   r@   	DataFramert   r  r  logTabler5   rU  evaluation_loop)rW   r  r  r   r  r  num_samplesrandom_indicesrandom_batch_datasetrandom_batchtarget_labelstarget_indicestarget_batchr  ref_output_decodedr  polrefr  initial_outputrW  s                       r\   r  "_UnslothKTOTrainer.evaluation_loop3  s     $$$j001K#]]5+=AZAZ[N $.#5#5#<#<^#L --.BCL//=L!LLg)>ejjY]YiYiYpYpqM"[[-8;N$01C$D^$T)56M)N~)^$n5l86LML
 9=8X8XY]YcYceq8r5!LL9 -0X0FH],r,r(S SV/S[]1CD,rE $))---		:u{{{'>?@TYY000-' 0%9HY
 's   +G=logs
start_timec           	      D  > SU;   a  SOSnUS:X  a  SOSnS H  nSU 3U R                   U   ;   d  M  [        R                  " U R                   U   SU 3   5      R                  5       R	                  5       nS Hm  n[        R                  " U R                   U   U S	U S
3   5      R                  5       R	                  5       U-  X U S	U 3'   U R                   U   U S	U S
3	 Mo     U R                   U   SU 3	 M     U S3U;   a  U S3U;   a  X S3   X S3   -
  X S3'   U R                   U   R                  5        H=  u  p[        R                  " U	5      R                  5       R	                  5       X U 3'   M?     U R                   U	 [        T
U ]!  X5      $ )z
Log `logs` on the various objects watching training, including stored metrics.

Args:
    logs (`dict[str, float]`):
        The values to log.
    start_time (`float` or `None`, *optional*, defaults to `None`):
        Start time of the training.
r_  r  r  eval_rb  )chosenrejectedzcount/)rewardslogpsr}   /_sumzrewards/chosenzrewards/rejectedzrewards/margins)	r  rG   r   r   r{  rz  r?  rU  r  )rW   r  r  r  r  split	count_summetricr  r~  rW  s             r\   r  _UnslothKTOTrainer.logm  s    !'$WF
&&0b+Ew4#7#7
#CC!LL)=)=j)IFSXRYJZ)[\``bggi	<FT%9%9*%EPQRWQXX\F]%^_ccejjl#$ 8F81UG45
 ,,Z8F81UG49PQ = ((4veW5EF , X^$,F8;K1LPT1T/3hn4M/NQUX``pVqQr/rD8?+, 00<BBDLC%*\\'%:%?%?%A%F%F%HD8C5!" E  ,w{4,,r_   c                   > U R                   R                  c*  [        U R                   R                  5      R                  nO(U R                   R                  R                  S5      S   nU R                  US9  [        TU ]!  X5        g )Nr  rj   )
model_name)	rX   r  r   r   r  r  create_model_cardrU  _save_checkpoint)rW   rS   trialr  rW  s       r\   r  #_UnslothKTOTrainer._save_checkpoint  sj    99!!)dii22388J//55c:2>J*5 .r_   r  dataset_nametagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        R                   " S5      n[#        UUU R$                  UU['        5       (       a+  [(        R*                  b  [(        R*                  R,                  OS[/        5       SUS	S
S9nUR1                  [        R
                  R3                  U R4                  R6                  S5      5        g)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
N_name_or_pathunsloth_versionunslothJOB_IDhf_jobsaJ          @article{ethayarajh2024kto,
            title        = {{KTO: Model Alignment as Prospect Theoretic Optimization}},
            author       = {Kawin Ethayarajh and Winnie Xu and Niklas Muennighoff and Dan Jurafsky and Douwe Kiela},
            year         = 2024,
            eprint       = {arXiv:2402.01306},
        }KTOz7KTO: Model Alignment as Prospect Theoretic Optimizationz
2402.01306)rn  r  r  r  r  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zerorV   rS   r  r>   pathisdirr  setr  r  addenvironupdater  rF   dedentr,   r  r3   r  runurlr-   savejoinrX   r   )rW   r  r  r  rn  citation
model_cards          r\   r  $_UnslothKTOTrainer.create_model_card  sn   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$ ?? $  )!!**%'9';';		@Ueiimm[_.0%Q!

 	TYY%9%9;GHr_   )r  r  r  r  r  r  r@  r  rB  r{  rG  rH  r  r  rD  rA  r?  r=  r>  r  r  rE  rJ  r|  r  ry  rz  rF  rC  r  )NNNNNNNNN)NNNNNNNr  )Frj  F)FNr  )NNr  )NNN)Brl  rm  rn  ro  rp  r  r	   r   r;   Moduler  r   r   r   dictr   r   r   r   r   r   r  r!   tuplerG   r  	Optimizerlr_schedulerLambdaLRr   r   rV  r(   r  r   r   r	  r  staticmethodFloatTensor
LongTensorrj  rr  r  r6  rL  r*  rt  r  r   r  r  r   r  utilsr  Samplerr  r  r  r  r  r  r  rs  rt  ru  s   @r\   rw  rw    s   +Z J 9=FJ+/EI 04>B59Vbhl&*FJ,0*.%]_bii45] E/299c"ABC] 	]
  (] uWd3<.@%@AB] #)+=?UWeef
]  -] Xb/&9:;] D12] %++//1I1I1R1RRS] (0%,,9UW\WcWc9c0d'e] d^]  "(N+;T+A"BC!]" %SM#]$ #3-%] ]~ M M*.j *.X5F0A 5FZ 5F 5FnM* M* M*^  "'"&#(19!!19  19 19  	19
 !19 
		19 19f1\YY1\'+CtU=M=M7M1N,N'O1\	u  %"3"3U5F5FHYHYY	Z1\fK<"..K<  %00K< **	K<
 !& 1 1K< #("3"3K< "--K< 
u  %"3"3U5F5FHYHYY	ZK<Z:JXn CtU%5%55667nh _bii/0 S%c 1223 
u||U5<<c5<<6G1H#HII	J0@T#u*%5 @7?C[ @jn @*(7*; *xPUP[P[P`P`PhPhGi *.?S%BRBR=R8S .?X]^acf^fXg .?j ,0$/_bii/0$/ S%c 1223$/ #	$/
 d3i($/T 04+/!'88 8 'tn	8
 d3i(8 8 
8 8t!-S%Z( !-huo !-QU !- !-H/ %)&*,0	@ISM@I sm@I CcD()	@I @Ir_   rw  c                   L   ^  \ rS rSrSr              SU 4S jjrSrU =r$ )UnslothKTOTraineri  a
  
    
Initialize KTOTrainer.

Args:
    model (`transformers.PreTrainedModel`):
        The model to train, preferably an `AutoModelForSequenceClassification`.
    ref_model (`PreTrainedModelWrapper`):
        Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
        and loss. If no reference model is provided, the trainer will create a reference model with the same
        architecture as the model to be optimized.
    args (`KTOConfig`):
        The arguments to use for training.
    train_dataset (`datasets.Dataset`):
        The dataset to use for training.
    eval_dataset (`datasets.Dataset`):
        The dataset to use for evaluation.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If provided, will be used to automatically process the inputs
        for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
        reuse the fine-tuned model.
    data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
        The data collator to use for training. If None is specified, the default data collator
        (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
        sequences in the batch, given a dataset of paired sequences.
    model_init (`Callable[[], transformers.PreTrainedModel]`):
        The model initializer to use for training. If None is specified, the default model initializer will be
        used.
    callbacks (`list[transformers.TrainerCallback]`):
        The callbacks to use for training.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
        The optimizer and scheduler to use for training.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
        The function to use to preprocess the logits before computing the metrics.
    peft_config (`dict`, defaults to `None`):
        The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
        a PEFT model.
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
        metric values.
    model_adapter_name (`str`, defaults to `None`):
        Name of the train target PEFT adapter, when using LoRA with multiple adapters.
    ref_adapter_name (`str`, defaults to `None`):
        Name of the reference PEFT adapter, when using LoRA with multiple adapters.

    c                 6  > Uc
  [        5       n[        USS5      n[        U5      [        La  Sn[        USS5      n[        U5      [        La  SnSn[        R
                  R                  SS5      S:H  nU(       d1  [        R
                  R                  SS5      S:X  a  [        S5        S	n[        R
                  R                  S
S5      n[        UR                  SS 5      =(       d    [        UR                  SS 5      nUc  UR                  5       R                  nSSKJn  U" U5      nU[        R                  :H  nU(       d  U(       a  U(       a  [        S5      eU(       d  U(       d  U(       a  [        S5      eU(       a"  SUl        SUl        S[        R
                  S'   OCU(       d<  U(       d5  US:X  a/  UUl        U(       + Ul        U(       a  SOS[        R
                  S'   [        USS 5      b-  [        USS5      S:X  a  SUl        [        USS 5      c  SUl        [        USS 5      nUb/  US:  a)  SSKJn  [-        U5      [-        S5      ::  a  [        S5        [        USS5      S:w  aL  [        USS5      nUS:X  a!  UR.                  U:  a  UR.                  Ul        [        US S 5      c
  Ub  UUl        [        US!S5      n[        U5      [        La  Sn[        US"S5      n[        U5      [        La  SnUR                   (       a  U(       a  SUl        S	Ul        UR"                  (       a  U(       a  S	Ul        SUl        U(       a  SUl        SUl        Oc[        R
                  R                  S
S5      S#:X  a  S	Ul        SUl        O0U(       d)  U(       d"  UR"                  Ul        UR                   Ul        Sn[9        5       R                  S$S 5      b  S	n[9        5       R                  S%S 5      b  S	nU(       a  S[        R
                  S&'   S'[9        5       ;  a  [;        US'5      (       d  OD[        US'S 5      n[        US'S 5      nUc'  Ub$  UR<                  n [;        US'5      (       a  U Ul        Ub!  [;        US(5      (       a  UR?                  5         S)[9        5       ;   a   [;        [@        S*5      (       a  S+[@        l!        S,[9        5       ;   aU  [;        US*5      (       a  S+Ul!        [;        US)5      (       a,  [;        UR@                  S*5      (       a  S+UR@                  l!        S,[9        5       ;   a  UO[@        n!SS-K"J#n"  [I        UU"5      (       dx  [I        U[J        5      (       a(  S.URL                  ;  a  [O        U!SS/[        US0S 5      S19nO[I        U[N        5      (       a%  S.URL                  ;   a  [K        U![        US0S 5      S29nOJ[;        US35      (       a  SUl(        [;        US45      (       a  S5Ul)        [;        US65      (       a	  S7S	0Ul*        [I        UU"5      (       dx  [;        U!S85      (       dg  [;        U!S)5      (       aV  [I        U[J        5      (       a   [K        U!R@                  [        US0S 5      S29nO![O        U!R@                  SS/[        US0S 5      S19n/ n#SS9K+J,n$  U$" S:U#5        [        US;S 5      [Z        R\                  :X  a(  UR^                  S:  a  [        US<S5      S:w  a  SUl0        S=[9        5       ;   a!  [;        US(5      (       a  UR?                  5         [b        T'U ]  " SDUUUUUUUUU	U
UUUUS>.UD6  S=[9        5       ;   a!  [;        US?5      (       a  URg                  5         [;        U S@5      (       a-  U Rh                  Rk                  5         [;        U S@5      (       a  U ?4[        USAS 5      b  U Rl                  UR                  5       l6         [;        U SB5      (       aV  U Rn                  Rp                  n%Un&[;        U&S=5      (       a&  U%U&l9        U&Rt                  n&[;        U&S=5      (       a  M&  U%U&l9         [;        U SC5      (       a.  [w        [y        U Rz                  R|                  5      U 5      U l>        g )ENr   Fr   UNSLOTH_ENABLE_FULL_FINETUNING01UNSLOTH_FORCE_FLOAT32zKUnsloth: Switching to float32 training since model cannot work with float16TUNSLOTH_MIXED_PRECISIONrv   r  torch_dtyper   )
_get_dtypezuUnsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`zuUnsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`r   ACCELERATE_MIXED_PRECISIONr{  r   r   r   r\  r   rp   )__version__z4.45.2z**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!
`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`r      r   r   r   bfloat16r  r  UNSLOTH_RETURN_LOGITSr   rT   r  padding_siderightr|  )UnslothVisionDataCollatorr  rc  pad_to_multiple_of)mlmmlm_probabilityr&  )r&  r   dataset_text_fieldrb  dataset_kwargsskip_prepare_datasetpad)PatchRLStatisticskto_trainerparallel_mode_n_gpurS   )rS   ry  rX   rz  r{  r|  r}  r~  r  r  r  r  r  r  rU   neftune_hook_handler5  r  r  rP  )?r   r  r  rj  r>   r  r  rQ  r  r  r  unsloth_zoo.utilsr  rG   float16	TypeErrorr   r   r   r   transformersr  rL   r   r   r   r   r   localsrV   r   rT   r  r#  unsloth_zoo.vision_utilsr%  r  rM   r  +TransformersDataCollatorForLanguageModelingr   r)  r*  unsloth_zoo.logging_utilsr-  rO   NOT_DISTRIBUTEDn_gpur0  rU  rV  rU   r1  remover5  r  scaleraccelerator_scalerrS   rP   rb   rW  r  )(rW   rS   ry  rX   rz  r{  r|  r}  r~  r  r  r  r  r  r  rY   use_bf16use_fp16force_float32full_finetuningmixed_precision_dtyper  r  r3  ga_stepstransformers_versioneval_bszr   r   _output_logitsmodel_max_seq_lengthargs_max_seq_lengthr   _UnslothKTOTrainer__tokenizerr%  other_metricsr-  r=  current_modelrW  s(                                          r\   rV  UnslothKTOTrainer.__init__	  s   $ < 0 24/>%%x4/>%%x**..)I3OSVVBJJNN3JC$PTW$W_` M "

/H) Tgt4bm]a8b=%"<"<">"D"D%05!5==('hy  JA  @B  :Bg(9  NE  DF  >FDIDI7;BJJ3481F)1SDI#DIAHvfBJJ344.:wt_^b?cgk?k!(Dt\408C$/4!>EHqLH+,0AA @ A4$/47t%A1EH1}!A!AH!Lpt  qQ  qQdNmt6=E(J^  @H`d`| '7?t+e^ '7?t+e^99u)<\`dFY99t)<[`TEX"'D"'DZZ^^5yAZO"&D"'D"&))D"&))D8<<)40<tn8<<7>J]aN25BJJ./68+GDBR4S4S#*52BD#I #*42BD#I"*/C/O!&!5!54!122.D4G!?!? &("wy.'I'Idk9Ka)'88Za:J:W'55'BRB\B\^l:m:m  Zao  pJ  pJ  pW*<*H&iF-)BCC-)?@@XUbUoUoEo K&))07KT)R	! M+VWW\dhu  iC  iC  ]C 6)07KT)R!
 t455TYt7Qt122bD4Kt-..G]_cFd0C-)BCC;..7;3T3Tm-CDD$:#---4T;OQU-V%M
 %P#--#*--4T;OQU-V	%M ?-7 4$/<3O3OOTXT^T^abTbtXq)Q.fh75.#A#A  	:!)'/)#!,I%-!3/	: 39	: fh75/#B#B!4.//$$++-t2339Q4.5A?C?W?WE&&(<4''%%,,F!M-11390 - 3 3 -11 06M,4!!#$=dnn>R>R$SUYZDJr_   r  )NNNNNNNNNNNNNN)rl  rm  rn  ro  rp  rV  rs  rt  ru  s   @r\   r  r    sA    -` (,!k kr_   r  	addFilterc                        \ rS rSrS rS rSrg)HideLoggingMessagei  c                     Xl         g r  text)rW   rS  s     r\   rV  HideLoggingMessage.__init__  s    d)r_   c                 <    U R                   UR                  5       ;  $ r  )rS  
getMessage)rW   xs     r\   filterHideLoggingMessage.filter  s    alln)DEr_   rR  N)rl  rm  rn  ro  rV  rX  rs  rP  r_   r\   rP  rP    s    2Er_   rP  z`use_cache=True`)irp  rG   r   torch.nnr;   r   r   typingr   r   r   r   r	   r
   r   r   trl.trainer.kto_trainerr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rH   dataclassesrJ   rK   packaging.versionrL   r  
contextlibr5  rM   rN   r8  transformers.training_argsrO   r`   typesrP   rb   torch_compile_optionscompiler   rr  r   r   r   r   rw  r  rV   FilterrP  rN  rP  r_   r\   <module>re     s   0    $ I I I a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a 
  ( %   " $  3      4;PR S"||  \\	&,, %  	
 \\6ell C ELL  L-y L- L-Z {I {Ix-Z* Zx  6;FW^^ F 	
'(:;<  r_   