
    f:i0             
       "   S r SSKJr  SSKrSSKJr  SSKJr  SSKJrJ	r	J
r
JrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ r J
r
J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+JrJ,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=J>r>J?r?J@r@JArAJBrBJCrCJrJDrDJErEJFrFJGrGJHrHJIrIJJrJJKrKJLrLJMrMJNrNJOrOJPrPJrJQrQJrJ
r
J$r$J%r%J*r*J=r=J@r@JErEJr  SSKErESSK7  SSKRJ0r0JSrS  SS	KTJUrU  SSKrSSKVrWSS
KXJDrD  SSKJr  SSKYJZrZJ[r\  SSK]J^r^  SSK_r_SSK`Jara  S rb SSSSSS.rc\R                  " SS\cS9S 5       reS\R                  S\fS\fS\R                  4S jrgS\R                  S\R                  S\fS\fS\R                  4
S jrhS\R                  S\fS\R                  4S jri\0 " S  S!\5      5       rj  " S" S#\*5      rk " S$ S%\k5      rl \m" \@S&5      (       a3  SSKArA " S' S(\AR                  5      ro \@R                  " \o" S)5      5        gg)*z;
2025.10.10
2025.10.9
4.56.2
0.23.0
__UNSLOTH_VERSIONING__
    )TensorN)
functional)AnyListOptionalTupleUnionDictSetCallable)Qr   AutoModelForCausalLMAutoTokenizerBaseImageProcessorr   	DPOConfig
DPOTrainerDataCollatorDataCollatorForPreference
DataLoaderDatasetEvalLoopOutputFFDivergenceConstantsFDivergenceTypeFeatureExtractionMixinIterableDatasetLiteral*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMESr   PartialStatePath
PeftConfig	PeftModelPreTrainedModelPreTrainedTokenizerBaseProcessorMixinRunningMomentsSyncRefModelCallbackTrainerTrainerCallbackr	   autocastcap_expcontextmanagercreate_reference_model	dataclassdefaultdictdisable_dropout_in_modelempty_cache
flush_leftflush_rightgenerate_model_cardget_comet_experiment_urlget_peft_modelinspectis_comet_availableis_liger_kernel_availableis_mlflow_availableis_peft_availableis_wandb_availablelog_table_to_comet_experimentloggerloggingmaybe_apply_chat_templatemaybe_extract_promptnnnullcontextospadpad_to_lengthpdpeft_module_casting_to_bf16prepare_deepspeedprepare_fsdpprepare_model_for_kbit_trainingrandomselective_log_softmaxshift_tokens_righttextwraptorchtqdmr   r   r!   r"   r'   r:   r=   rC   rO   )*)r-   field)Version)rB   )DataCollatorForSeq2SeqDataCollatorForLanguageModeling)ParallelMode)
MethodTypec                 F   ^  [         R                  " T 5      U 4S j5       nU$ )Nc                 8  > [        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         T" U /UQ70 UD6n[        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         U$ )Nmodelfor_trainingfor_inference)hasattrrZ   r[   r\   )selfargskwargsoutputfs       >/home/james-whalen/unsloth_compiled_cache/UnslothDPOTrainer.pywrapper*prepare_for_training_mode.<locals>.wrapper0   sx     4!!gdjj.&I&IJJ##%4)$)&)4!!gdjj/&J&JJJ$$&    )	functoolswraps)rb   rd   s   ` rc   prepare_for_training_moderi   /   s%    __Q  Nrf   TF)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphs)dynamic	fullgraphoptionsc                 d   [         R                  " U R                  SU R                  S   5      SSS9n[         R                  " UR                  S5      SSS9n/ n[	        X#5       H  u  pVUR                  [         R                  5      n[         R                  " USUR                  S5      S9R                  S5      n[         R                  " USS9nXx-
  n	UR                  U	5        M      [         R                  " U5      nUR                  U R                  S   U R                  S   45      nU$ )N   r   )chunksdim)rt   indexrt      )rO   chunkreshapeshapeziptofloat32gather	unsqueezesqueeze	logsumexpappendconcat)
logitsru   chunked_logitschunked_indexall_per_token_logpschunk_logitschunk_indexselected_logitslogsumexp_valuesper_token_logpss
             rc   chunked_selective_log_softmaxr   E   s    [[FLL4D!EPQYZ[N[[r!2QaHM%(%G!#u}}5,,|2{G\G\]_G`aiijlm ??<rB)<""?3 &H 	,,':;-55v||AUV6XYrf   	input_idslogits_to_keeppad_token_idreturnc                 ~    XR                   S   :  a  [        S5      eU SS2SU* 24   nX2:H  nUR                  SS9nU$ )zr
Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens 
rw   z8logits_to_keep must be smaller than the sequence length.Nrv   )rz   
ValueErrorsum)r   r   r   prompt_sectionpadding_maskpad_token_countss         rc   calculate_pad_tokens_in_promptr   W   sX     ++STTq"2N?"223N"2L#''A'.rf   completion_input_idsleft_pad_tokens_per_promptmax_left_padc                     U R                   u  pEU R                  nX!-
  n[        R                  " XVS9R	                  S5      nXR	                  S5      :  n	X:g  n
X-  nU$ )a)  
Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]

Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
and pad are pad tokens, this function would make a completion mask that would 0 out the pad
and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
devicer   rw   )rz   r   rO   aranger   )r   r   r   r   
batch_sizecompletion_lenr   num_tokens_to_maskindices
shift_masknon_padding_mask
final_masks               rc    create_completion_attention_maskr   j   si     "6!;!;J!((F%Bll>9CCAFG88;;J,<.Jrf   tensorpad_idc                 l    X:g  n[         R                  " USSSS9n[         R                  " U SU5      nU$ )zD
Moves all padding tokens in each sequence of a batch to the right.
rw   T)rt   
descendingstable)rO   argsortr~   )r   r   masksorted_indicespacked_tensors        rc   left_pack_paddingr      s8     D]]4Q4MNLLN;Mrf   c                     ^  \ rS rSr% Sr\" SSS0S9r\\   \	S'   \" SSS	0S9r
\\   \	S
'   \" SSS0S9r\\   \	S'                                                                                                                                                                        SU 4S jjrSrU =r$ )UnslothDPOConfig   u,  
    
Configuration class for the [`DPOTrainer`].

This class includes only the parameters that are specific to DPO training. For a full list of training arguments,
please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may
differ from those in [`~transformers.TrainingArguments`].

Using [`~transformers.HfArgumentParser`] we can turn this class into
[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
command line.

Parameters:
    > Parameters that control the model and reference model

    model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
        Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `model` argument of the
        [`DPOTrainer`] is provided as a string.
    ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
        Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `ref_model` argument of the
        [`DPOTrainer`] is provided as a string.
    model_adapter_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the train target PEFT adapter, when using LoRA with multiple adapters.
    ref_adapter_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the reference PEFT adapter, when using LoRA with multiple adapters.
    force_use_ref_model (`bool`, *optional*, defaults to `False`):
        If you provide a PEFT model as the active model and wish to use a different model for the `ref_model`, set
        this flag to `True`.
    disable_dropout (`bool`, *optional*, defaults to `True`):
        Whether to disable dropout in the model and reference model.
    use_logits_to_keep (`bool`, *optional*, defaults to `False`):
        If `True`, only a specified number of logits are computed in the forward pass. This can be useful for
        saving memory and speeding up training by not computing the logits for all tokens, especially in scenarios
        when working with very long prompts where labels are ignored (-100).

    > Parameters that control the data preprocessing

    dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
        Number of processes to use for processing the dataset.
    padding_value (`int` or `None`, *optional*, defaults to `None`):
        Padding value to use. If `None`, the padding value of the tokenizer is used.
    label_pad_token_id (`int`, *optional*, defaults to `-100`):
        Padding value to use for labels.
    max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
        Maximum length of the prompt.
    max_completion_length (`int` or `None`, *optional*, defaults to `None`):
        Maximum length of the completion.
    max_length (`int` or `None`, *optional*, defaults to `1024`):
        Maximum length of the full sequence (prompt + completion).
    truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
        Truncation mode to use when the sequence exceeds `max_length`. Possible values are `"keep_end"` and
        `"keep_start"`.
    padding_free (`bool`, *optional*, defaults to `False`):
        Whether to perform forward passes without padding by flattening all sequences in the batch into a single
        continuous sequence. This reduces memory usage by eliminating padding overhead. Currently, this is only
        supported with the `flash_attention_2` attention implementation, which can efficiently handle the flattened
        batch structure.
    precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
        Whether to precompute the log probabilities from the reference model. Setting this to `True` allows
        training without needing the reference model during training, which can help reduce GPU memory usage. If
        set to `False` (default), the reference model will be used during training to compute log probabilities
        on-the-fly.
    precompute_ref_batch_size (`int` or `None`, *optional*, defaults to `None`):
        Batch size to use when precomputing reference model log probabilities. This can be set higher than the
        training batch size to speed up preprocessing. If `None`, defaults to `per_device_train_batch_size` for
        training and `per_device_eval_batch_size` for evaluation.
    tools (`Optional[list[Union[dict, Callable]]]`, *optional*, defaults to `None`):
        List of tools (callable functions) that will be accessible to the model. If the template does not support
        function calling, this argument will have no effect.

    > Parameters that control the training

    loss_type (`str` or `list[str]`, *optional*, defaults to `"sigmoid"`):
        Type of loss to use. Possible values are:

            - `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper.
            - `"hinge"`: hinge loss on the normalized likelihood from the
              [SLiC](https://huggingface.co/papers/2305.10425) paper.
            - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper.
            - `"exo_pair"`: pairwise EXO loss from the [EXO](https://huggingface.co/papers/2402.00856) paper.
            - `"nca_pair"`: pairwise NCA loss from the [NCA](https://huggingface.co/papers/2402.05369) paper.
            - `"robust"`: unbiased estimate of the DPO loss that is robust to preference noise from the [Robust
              DPO](https://huggingface.co/papers/2403.00409) paper.
            - `"bco_pair"`: pairwise BCO loss from the [BCO](https://huggingface.co/papers/2404.04656) paper.
            - `"sppo_hard"`: SPPO loss with hard label from the [SPPO](https://huggingface.co/papers/2405.00675)
              paper.
            - `"aot"`: AOT loss for paired datasets from the [AOT](https://huggingface.co/papers/2406.05882) paper.
            - `"aot_pair"`: AOT loss for unpaired datasets from the [AOT](https://huggingface.co/papers/2406.05882)
              paper.
            - `"discopop"`: DiscoPOP (a.k.a Log-Ratio Modulated Loss, LRML) loss from the
              [DiscoPOP](https://huggingface.co/papers/2406.08414) paper.
            - `"apo_zero"`: APO-zero loss from the [APO](https://huggingface.co/papers/2408.06266) paper.
            - `"apo_down"`: APO-down loss from the [APO](https://huggingface.co/papers/2408.06266) paper.
            - `"sft"`: Negative log-likelihood loss (standard supervised fine-tuning loss).

        Multiple loss types can be combined using comma separation (e.g., `["sigmoid", "bco_pair", "sft"]` for
        [MPO](https://huggingface.co/papers/2411.10442)). The `loss_weights` parameter can be used to specify
        corresponding weights for each loss type.

    use_liger_loss (`bool`, *optional*, defaults to `False`):
        Whether to use Liger loss.
    base_model_attribute_name (`str`, *optional*, defaults to `"model"`):
        Name of the attribute in the model that contains the base model. This is used to get the base model from
        the model when the model does not have a `get_decoder` method in the case when `use_liger_loss` is `True`.
    beta (`float`, *optional*, defaults to `0.1`):
        Parameter controlling the deviation from the reference model. Higher β means less deviation from the
        reference model. For the IPO loss (`loss_type="ipo"`), β is the regularization parameter denoted by τ in
        the [paper](https://huggingface.co/papers/2310.12036).
    f_divergence_type (`str`, *optional*, defaults to `FDivergenceType.REVERSE_KL`):
        Type of f-divergence regularization function to compute divergence between policy and reference model.
    f_alpha_divergence_coef (`float`, *optional*, defaults to `1.0`):
        α coefficient in the α-divergence u^-α regularization function for DPO loss.
    reference_free (`bool`, *optional*, defaults to `False`):
        Whether to ignore the provided reference model and implicitly use a reference model that assigns equal
        probability to all responses.
    label_smoothing (`float`, *optional*, defaults to `0.0`):
        Robust DPO label smoothing parameter from the [cDPO report](https://ericmitchell.ai/cdpo.pdf) and [Robust
        DPO](https://huggingface.co/papers/2403.00409) paper that should be between `0.0` and `0.5`.
    use_weighting (`bool`, *optional*, defaults to `False`):
        Whether to weight the loss as done in the [WPO paper](https://huggingface.co/papers/2406.11827).
    rpo_alpha (`float`, *optional*, defaults to `None`):
        α parameter from the [RPO paper](https://huggingface.co/papers/2404.19733) (v3), which controls the
        weighting of the NLL term in the loss. If `None`, no weighting is applied and the loss is the same as the
        DPO loss. The paper recommends `rpo_alpha=1.0`.
    ld_alpha (`float` or `None`, *optional*, defaults to `None`):
        α parameter from the [LD-DPO paper](https://huggingface.co/papers/2409.06411), which controls the weighting
        of the verbose token log-probabilities in responses. If `None`, no weighting is applied to the verbose
        part, and the loss is equivalent to the standard DPO loss. The paper recommends setting `ld_alpha` between
        `0.0` and `1.0`.
    discopop_tau (`float`, *optional*, defaults to `0.05`):
        τ/temperature parameter from the [DiscoPOP](https://huggingface.co/papers/2406.08414) paper, which controls
        the shape of log ratio modulated loss. The paper recommends the default value `discopop_tau=0.05`.
    loss_weights (`list[float]` or `None`, *optional*, defaults to `None`):
        List of loss weights for multi-loss combinations. Used when combining multiple loss types. Example: `[0.8,
        0.2, 1.0]` for [MPO](https://huggingface.co/papers/2411.10442). If not provided, defaults to equal weights
        (`1.0`) for all loss types.
    sync_ref_model (`bool`, *optional*, defaults to `False`):
        Whether to synchronize the reference model with the active model every `ref_model_sync_steps` steps, using
        the `ref_model_mixup_alpha` parameter. This synchronization originates from the
        [TR-DPO](https://huggingface.co/papers/2404.09656) paper.
    ref_model_mixup_alpha (`float`, *optional*, defaults to `0.6`):
        α parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which controls the mix
        between the current policy and the previous reference policy during updates. The reference policy is
        updated according to the equation: `π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you
        must set `sync_ref_model=True`.
    ref_model_sync_steps (`int`, *optional*, defaults to `512`):
        τ parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which determines how
        frequently the current policy is synchronized with the reference policy. To use this parameter, you must
        set `sync_ref_model=True`.

    > Parameters that control the logging

    generate_during_eval (`bool`, *optional*, defaults to `False`):
        Whether to generate and log completions from both the model and the reference model to W&B or Comet during
        evaluation.

    NhelpzvLLM SamplingParams)defaultmetadatavllm_sampling_paramsrq   z8Chunk size to reduce memory usage. -1 is most efficient.unsloth_num_chunksz'Maximum sequence length to truncate to.max_seq_lengthc                   > US:  a  [        SU S35        US:  a  [        SU S35        Uc  U#S:X  a
  U$S:X  a  SnS	n#Wc$  S
SKJn  [        [	        U" 5       S-   S5      S5      n[
        TU ]  " S0 SU_SU_SU_SU_SU_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_S U_S!U_S"U_S#U_S$U_S%U_S&U_S'U_S(U_S)U_S*U_S+U_S,U_S-U_S.U _S/U!_S0U"_S1U#_S2U$_S3U%_S4U&_S5U'_S6U(_S7U)_S8U*_S9U+_S:U,_S;U-_S<U._S=U/_S>U0_S?U1_S@U2_SAU3_SBU4_SCU5_SDU6_SEU7_SFU8_SGU9_SHU:_SIU;_SJU<_SKU=_SLU>_SMU?_SNW@_SOWA_SPWB_SQWC_SRWD_SSWE_STWF_SUWG_SVWH_SWWI_SXWJ_SYWK_SZWL_S[WM_S\WN_S]WO_S^WP_S_WQ_S`WR_SaWS_SbWT_ScWU_SdWV_SeWW_SfWX_SgWY_ShWZ_SiW[_SjW\_SkW]_SlW^_SmW__SnW`_SoWa_SpWb_SqWc_SrWd_SsWe_StWf_SuWg_SvWh_SwWi_SxWj_SyWk_SzWl_S{Wm_S|Wn_S}Wo_S~Wp_SWq_SWr_SWs_SWt_SWu_SWv_SWw_SWx_SWy_SWz_SW{_SW|_SW}_SW~_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_WD6  WU l        WU l        WU l	        g )NgHz>z Unsloth: Your learning rate of `zi` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!rw   za` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!steps  unsloth_training_checkpointsnor   )	cpu_countrr      @   
output_diroverwrite_output_dirdo_traindo_eval
do_predicteval_strategyprediction_loss_onlyper_device_train_batch_sizeper_device_eval_batch_sizeper_gpu_train_batch_sizeper_gpu_eval_batch_sizegradient_accumulation_stepseval_accumulation_steps
eval_delaytorch_empty_cache_stepslearning_rateweight_decay
adam_beta1
adam_beta2adam_epsilonmax_grad_normnum_train_epochs	max_stepslr_scheduler_typewarmup_ratiowarmup_steps	log_levellog_level_replicalog_on_each_nodelogging_dirlogging_strategylogging_first_steplogging_stepslogging_nan_inf_filtersave_strategy
save_stepssave_total_limitsave_safetensorssave_on_each_nodesave_only_model'restore_callback_states_from_checkpointno_cudause_cpuuse_mps_deviceseed	data_seedjit_mode_evaluse_ipexbf16fp16fp16_opt_levelhalf_precision_backendbf16_full_evalfp16_full_evaltf32
local_rankddp_backendtpu_num_corestpu_metrics_debugdebugdataloader_drop_last
eval_stepsdataloader_num_workersdataloader_prefetch_factor
past_indexrun_namedisable_tqdmremove_unused_columnslabel_namesload_best_model_at_endmetric_for_best_modelgreater_is_betterignore_data_skipfsdpfsdp_min_num_paramsfsdp_config"fsdp_transformer_layer_cls_to_wrapaccelerator_configparallelism_config	deepspeedlabel_smoothing_factoroptim
optim_args	adafactorgroup_by_lengthlength_column_name	report_toddp_find_unused_parametersddp_bucket_cap_mbddp_broadcast_buffersdataloader_pin_memorydataloader_persistent_workersskip_memory_metricsuse_legacy_prediction_looppush_to_hubresume_from_checkpointhub_model_idhub_strategy	hub_tokenhub_private_repohub_always_pushhub_revisiongradient_checkpointinggradient_checkpointing_kwargsinclude_inputs_for_metricseval_do_concat_batchesfp16_backendpush_to_hub_model_idpush_to_hub_organizationpush_to_hub_tokenmp_parametersauto_find_batch_sizefull_determinismtorchdynamo	ray_scopeddp_timeouttorch_compiletorch_compile_backendtorch_compile_modeinclude_tokens_per_secondinclude_num_input_tokens_seenneftune_noise_alphaoptim_target_modulesbatch_eval_metricseval_on_startuse_liger_kernelliger_kernel_configeval_use_gather_objectaverage_tokens_across_devicesmodel_init_kwargsref_model_init_kwargsmodel_adapter_nameref_adapter_nameforce_use_ref_modeldisable_dropoutuse_logits_to_keepdataset_num_procpadding_valuelabel_pad_token_idmax_prompt_lengthmax_completion_length
max_lengthtruncation_modepadding_freeprecompute_ref_log_probsprecompute_ref_batch_sizetoolsuse_liger_lossbase_model_attribute_namebetaf_alpha_divergence_coefreference_freelabel_smoothinguse_weighting	rpo_alphald_alphadiscopop_tauloss_weightssync_ref_modelref_model_mixup_alpharef_model_sync_stepsgenerate_during_eval )
printmultiprocessingr   minmaxsuper__init__r   r   r   )r^   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r"  r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  rD  rE  rF  rG  rH  rI  rJ  rK  rL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  r   r   r   r`   r   	__class__s                                                                                                                                                                           rc   rk  UnslothDPOConfig.__init__9  sh	   R 4)I-  YB  (C  "D1e&F}o  Vw  %x  y-7":zS?P7J M#1"3y{1}a#8"= b	B#b	B#7b	B  b	B 	b	B
 $b	B *b	B $8b	B +Fb	B *Db	B (@b	B '>b	B +Fb	B '>b	B $b	B '>b	B  *!b	B" (#b	B$ $%b	B& $'b	B( ()b	B* *+b	B,  0-b	B. "/b	B0 !21b	B2 (3b	B4 (5b	B6 "7b	B8 !29b	B:  0;b	B< &=b	B>  0?b	B@ "4Ab	BB *Cb	BD &<Eb	BF *Gb	BH $Ib	BJ  0Kb	BL  0Mb	BN !2Ob	BP .Qb	BR 7^Sb	BT Ub	BV Wb	BX ,Yb	BZ [b	B\ "]b	B^ *_b	B`  ab	Bb cb	Bd eb	Bf ,gb	Bh &<ib	Bj ,kb	Bl ,mb	Bn ob	Bp $qb	Br &sb	Bt *ub	Bv !2wb	Bx yb	Bz $8{b	B| $}b	B~ &<b	B@ *DAb	BB $Cb	BD  Eb	BF (Gb	BH %:Ib	BJ &Kb	BL &<Mb	BN %:Ob	BP !2Qb	BR  0Sb	BT Ub	BV #6Wb	BX &Yb	BZ 2T[b	B\ "4]b	B^ "4_b	B` "ab	Bb &<cb	Bd eb	Bf $gb	Bh "ib	Bj .kb	Bl "4mb	Bn "ob	Bp *Dqb	Br !2sb	Bt %:ub	Bv %:wb	Bx -Jyb	Bz #6{b	B| *D}b	B~ &b	B@ &<Ab	BB (Cb	BD (Eb	BF "Gb	BH  0Ib	BJ .Kb	BL (Mb	BN &<Ob	BP -JQb	BR *DSb	BT &<Ub	BV (Wb	BX $8Yb	BZ (@[b	B\ !2]b	B^ *_b	B` $8ab	Bb  0cb	Bd &eb	Bf "gb	Bh &ib	Bj *kb	Bl %:mb	Bn "4ob	Bp )Bqb	Br -Jsb	Bt #6ub	Bv $8wb	Bx "4yb	Bz *{b	B|  0}b	B~ #6b	B@ &<Ab	BB -JCb	BD !2Eb	BF %:Gb	BH "4Ib	BJ  0Kb	BL #6Mb	BN .Ob	BP "4Qb	BR  0Sb	BT *Ub	BV "4Wb	BX !2Yb	BZ %:[b	B\ $]b	B^ ._b	B` (ab	Bb (@cb	Bd )Beb	Bf gb	Bh ,ib	Bj )Bkb	Bl mb	Bn '>ob	Bp ,qb	Br .sb	Bt *ub	Bv "wb	Bx  yb	Bz ({b	B| (}b	B~ ,b	B@ %:Ab	BB $8Cb	BD $8&Eb	BF %9!"4,rf   )r   r   r   )NNFFFr   Frr   rr   NNr   r   r      g-C6
?g{Gz?g?g+?g:0yE>      ?g      @rq   linear皙?r   passivewarningTNr   Frw   Fr   r   NTFFFFFFO  rt  FFFFO1autoFFNrq   NNF FNr   Nrq   NNTNFNNFrw  r   NNNNN        
adamw_8bitNFFlengthNNNNTFTFFNN
every_saveNNFNTNFTrv  NNNrw  FFNlasti  FNNFFNNFFFNFTNNNNFTFNNi   Ni   keep_endFFNNFrZ   rq  ro  Frx  FNNg?NFg333333?r}  FNrq   N)__name__
__module____qualname____firstlineno____doc__rR   r   r   r   __annotations__r   intr   rk  __static_attributes____classcell__rl  s   @rc   r   r      ss   \z +012+(3-  */VW*#  &+EF&NXc]  #$&'%&#'"&&'"#"%$%""!&!27!'!$!"%) $!& $  -1!!!$%%)  $ $(-"%*!%#!%(,%*!%##' $  $!$)(-"#" "!&(,  $!#"! $$#($($+"% #"$#MW- W-rf   r   c                   	  ^  \ rS rSrSrSS/r            SMS\\\R                  \
4   S\\\
\R                  \4      S\\   S	\\   S
\\\\4      S\\\\\\\\\4   4   4      S\\\\\\4      S\\\/\4      S\\\      S\\\R6                  R8                     \\R6                  R:                  R<                     4   S\\\\R6                  R8                     \\\ 4   4      S\\\RB                  \RB                  /\RB                  4      S\S   4U 4S jjjr"SNS\S\S\#S\
4S jjr$S\
S\
S\ S\S\
4
S jr%S\
S\4S jr&S\\\4   S\\\\\4   S\S\S\\\4   4
S jr'\(   SOS\\\4   S\S\\)   S \\)   S!\#S\\\\)   4   4S" jj5       r*\(   SOS\\\4   S\S\\)   S \\)   S!\#S\\\\)   4   4S# jj5       r+S$ r,S\-4U 4S% jjr.SPS\\   S\-4U 4S& jjjr/\0S' 5       r1S(\\\Rd                  4   S\\RB                  \RB                  4   4S) jr3\(S(\\\\\Rd                  4   4   S*\)S\\\Rd                  4   4S+ j5       r4  SQS,\Rj                  S-\Rj                  S.\Rj                  S/\Rj                  S0\S1\\\Rj                  4   S\\Rj                  \Rj                  \Rj                  4   4S2 jjr6S\R                  S(\\\\\Rd                  4   4   S\\\RB                  4   4S3 jr7 SNS\R                  S(\\\\\Rd                  4   4   S4\#S\\\RB                  4   4S5 jjr8 SRS\\
\R                  4   S(\\\\\Rd                  4   4   S6\9S7   S\\RB                  \\\:4   4   4S8 jjr;  SSS\\
\R                  4   S9\\\\RB                  \ 4   4   S\\RB                  \\RB                  \\\:4   4   4   4S: jjr<S(\\\Rd                  4   S\\\4   4S; jr= SPS\\
\R                  4   S9\\\\RB                  \ 4   4   S<\#S=\\\      S\\RB                  \\RB                     \\RB                     4   4
S> jjr>SRS?\\\:4   S6\9S7   SS4S@ jjr?   STSA\-SB\S<\\#   S=\\\      SC\S\4U 4SD jjjr@SPSE\\\:4   SF\\:   SS4U 4SG jjjrAU 4SH jrB   SUSI\\   S\\   SJ\\\\   S4   4SK jjrCSLrDU =rE$ )V_UnslothDPOTraineri  a  
Trainer for Direct Preference Optimization (DPO) method.

This class is a wrapper around the [`transformers.Trainer`] class and inherits all of its attributes and methods.

Args:
    model (`Union[str, PreTrainedModel]`):
        Model to be trained. Can be either:

        - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
          path to a *directory* containing model weights saved using
          [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
          using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
          `args.model_init_kwargs`.
        - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
    ref_model (`PreTrainedModelWrapper`):
        Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
        and loss. If no reference model is provided, the trainer will create a reference model with the same
        architecture as the model to be optimized.
    args ([`DPOConfig`], *optional*, defaults to `None`):
        Configuration for this trainer. If `None`, a default configuration is used.
    data_collator (`DataCollator`, *optional*):
        Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
        Will default to [`DataCollatorForPreference`].
    train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
        Dataset to use for training. DPO supports [preference](#preference) type and. The format of the samples can
        be either:

        - [Standard](dataset_formats#standard): Each sample contains plain text.
        - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role
          and content).
    eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
        Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If `None`, the processing class is loaded from the model's name
        with [`~transformers.AutoTokenizer.from_pretrained`].
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function that will be used to compute metrics at evaluation. Must take a [`EvalPrediction`] and return
        a dictionary string to metric values. *Note* When passing TrainingArgs with `batch_eval_metrics` set to
        `True`, your compute_metrics function must take a boolean `compute_result` argument. This will be triggered
        after the last eval batch to signal that the function needs to calculate and return the global summary
        statistics rather than accumulating the batch-level statistics.
    callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
        List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
        in [here](https://huggingface.co/docs/transformers/main_classes/callback).

        If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`]
        method.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
        A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
        model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
    optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`):
        A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in
        `args`. Incompatible with the `optimizers` argument.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`):
        A function that preprocess the logits right before caching them at each evaluation step. Must take two
        tensors, the logits and the labels, and return the logits once processed as desired. The modifications made
        by this function will be reflected in the predictions received by `compute_metrics`.

        Note that the labels (second parameter) will be `None` if the dataset does not have them.
    peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
        PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
trldpoNrZ   	ref_modelr_   data_collatortrain_dataseteval_datasetprocessing_classcompute_metrics	callbacks
optimizersoptimizer_cls_and_kwargspreprocess_logits_for_metricspeft_configr    c                   > [        U[        5      (       a  UOUR                  R                  nUc"  UR	                  S5      S   n[        U S35      nUc  [        R                  " U5      nUR                  b  UR                  U l        O[        US5      (       a  UR                  b  UR                  U l        OO[        US5      (       a3  UR                  R                  b  UR                  R                  U l        O[        S5      e[        U[        5      (       d  X!L a  [        S5      eUR                  b+  [        U[        5      (       d  [        R                  " S5        [        U[        5      (       a  U R!                  X5      nUR"                  b+  [        U[        5      (       d  [        R                  " S	5        [        U[        5      (       a  U R!                  X#S
S9nU R%                  XX5      nUR&                  (       a8  [)        5       (       d)  [+        5       (       d  [-        5       (       d  [        S5      eUR                  R.                  U l        UR                  R0                  [2        R4                  " 5       ;   U l        [9        5       =(       a    [        U[:        5      U l        UR>                  U l        UR@                  U l         URB                  U l!        U(       a  X l"        O:U R<                  (       d  URF                  (       a  S U l"        O[I        U5      U l"        URJ                  (       a-  [M        U5        U RD                  b  [M        U RD                  5        URN                  (       at  [Q        5       (       d  [S        S5      eURT                  S;  a  [        S5      e[W        URX                  URZ                  URB                  (       + SURT                  S9U l.        S
UR^                  S'   Uc  [a        U R                  S9nUR&                  U l        URX                  U l,        URb                  U l1        URd                  U l2        URf                  U l3        URh                  U l4        URF                  U l#        URj                  U l5        URl                  (       aV  UR                  Rn                  S:w  a  [        R                  " S5        URp                  S:X  a  [        R                  " S5        URl                  U l6        SU l9        SU l:        URZ                  U l-        URv                  U l;        [        URT                  [x        5      (       a  URT                  OURT                  /U l*        URz                  U l=        [}        UR                  SS5      U l?        UR                  U l@        [}        UR                  SS5      U lA        U R~                  (       a&  U R                  S:X  a  [        R                  " S5        U RT                   HD  nUS;   a*  URv                  S:  a  [        R                  " SU S35        US :X  d  M;  [        S!5      e   [        S" 5      U lC        UR                  U lD        [        R                  UR                  0U lH        UR                  U lI        U R                  XWUS#5      nUb`  [        U[        5      (       a8  UR                  5        VVs0 s H  u  nnUU R                  UXsU5      _M     nnnOU R                  XgUS$5      n[        TU G]9  UUUUUUUU	U
UUS%9  SU lO        [        U R                  S&5      (       a%  U R                  R                  U R                  5        [        U S'5      (       d  [        S(5      eU R                  (       aJ  U R                  R                  R                  R                  S):X  a  U RF                  (       a  [        S*5      eU RD                  cJ  U R<                  (       d  U RF                  (       d  [        S+5      eUR                  (       a  [        S,5      eOU R                  (       a&  [        U RD                  U R                  5      U l"        O`U R                  (       a&  [        U RD                  U R                  5      U l"        O)U R                  R                  U RD                  S
S-9U l"        UR                  (       aI  U RF                  (       a  [        S.5      eU R                  [        U RD                  U R                  S/95        S0U RT                  ;   a  [        U R                  5      U la        g g s  snnf )1N/rq   z-DPOr   	tokenizera  `padding_value` is not specified in `DPOConfig`, and `pad_token_id` is missing in the `processing_class`. Please either set the `padding_value` argument in `DPOConfig`, or set `tokenizer.pad_token` (e.g., `tokenizer.pad_token = tokenizer.eos_token`) before instantiating the trainer.z`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the same as `model`, you must mass a copy of it, or `None` if you use peft.zYou passed model_init_kwargs to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.zYou passed ref_model_init_kwargs to the `DPOConfig`, but your ref_model is already instantiated. The `ref_model_init_kwargs` will be ignored.T)is_refz`generate_during_eval=True` requires Weights and Biases, MLFlow or Comet to be installed. Please install `wandb`, `mlflow` or `comet-ml` to resolve.zYou set `use_liger_loss=True` but the liger kernel is not available. Please install liger-kernel first: `pip install liger-kernel`)sigmoidapo_zeroapo_down	sppo_hardnca_pairzYou set `use_liger_loss=True` but the loss type is not from `[sigmoid, apo_zero, apo_down, sppo_hard, nca_pair`. Please set `loss_type='[sigmoid | apo_zero | apo_down | sppo_hard | nca_pair]'` to use the liger kernel.F)ignore_indexrX  use_ref_modelaverage_log_prob	loss_typeestimate_tokens)r   flash_attention_2a  Padding-free training is enabled, but the attention implementation is not set to 'flash_attention_2'. Padding-free training flattens batches into a single sequence, and 'flash_attention_2' is the only known attention mechanism that reliably supports this. Using other implementations may lead to unexpected behavior. To ensure compatibility, set `attn_implementation='flash_attention_2'` in the model configuration, or verify that your attention mechanism can handle flattened sequences.rw   zYou are using a per_device_train_batch_size of 1 with padding-free training. Using a batch size of 1 anihilate the benefits of padding-free training. Please consider increasing the batch size to at least 2.output_router_logitsrouter_aux_loss_coefrx  a-  You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to `0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary loss.)hingeipobco_pairr  r  r  r  r   zYou are using the z loss type that does not support label smoothing. The `label_smoothing` parameter will be ignored. Set `label_smoothing` to `0.0` to remove this warning.kto_pairzKSupport for kto_pair has been removed in DPOTrainer. Please use KTOTrainer.c                       [        [        5      $ N)r.   listre  rf   rc   <lambda>-_UnslothDPOTrainer.__init__.<locals>.<lambda>  s	    ;t3Drf   traineval)rZ   r_   r  r  r  r  r  r  r  r  r  add_model_tagsacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`.   zrYou cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`.z]No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`zYYou currently cannot use `ref_model=None` with TR-DPO method. Please provide `ref_model`.)evaluation_modezoYou cannot use `precompute_ref_log_probs=True` with TR-DPO method. Please set `precompute_ref_log_probs=False`.)r  r  r  )b
isinstancestrconfig_name_or_pathsplitr   r   from_pretrainedrL  r]   r   r  r   rD  r=   rs  _create_model_from_pathrE  _prepare_peft_modelrd  r;   r7   r9   is_encoder_decoder
model_typer   keysis_vision_modelr:   r!   is_peft_modelrF  rG  rZ  r  rS  r,   rI  r/   rV  r8   ImportErrorr  LigerFusedLinearDPOLossrM  rX  dpo_loss_fnwarnings_issuedr   rN  rO  rP  rQ  rJ  rR  _attn_implementationr    _precomputed_train_ref_log_probs_precomputed_eval_ref_log_probsr[  r  r`  getattraux_loss_enabledr\  aux_loss_coefr.   _stored_metricsf_divergence_typer   ALPHA_DIVERGENCE_COEF_KEYrY  f_divergence_paramsrK  _prepare_datasetdictitemsrj  rk  model_accepts_loss_kwargsrZ   r  
_tag_namesAttributeErroris_deepspeed_enabledr  statedeepspeed_plugin
zero_stagera  rH   is_fsdp_enabledrI   prepare_modeladd_callbackr&   r%   running)r^   rZ   r  r_   r  r  r  r  r  r  r  r  r  r  model_id
model_namer  keydatasetrl  s                      rc   rk  _UnslothDPOTrainer.__init__  s   & 'uc2258R8R<!,R0J
|401D #,<<XF)!%!3!3D'88=M=Z=Z=f%5%B%B");77<L<V<V<c<c<o%5%?%?%L%L" #  %%%)*<Z 
 !!-j6L6LNN; eS!!00=E%%1*YPS:T:TNN? i%%44YT4RI ((;M$$.@.B.BFXFZFZ^q^s^sN 
 #(,,"A"A$||66:d:i:i:kk.0QZy5Q"&"9"9 $ 5 5"11&N4#@#@!DN3E:DN $U+~~)(8 ,..!T  ~~%aa    7!44YY"&"5"55!&.. D 48/0  54CUCUVM$($=$=!"&"9"9!%!7!7%)%?%?"//#33(,(E(E%"&"9"9||004GGJ //14%
 !-- 16-/4,II	#33+5dnnd+K+KRVR`R`Qa -- '6Le T!//$U\\3I3O  T%7%73%>NN Ijj((1,( 4 
 J& !noo (  ++DE!%!7!7$8$R$RTXTpTp#q  $ 5 5 --mtU\]#,-- )5(:(:(< (<W ..w8HPSTT(<   
  $44\UY[ab''%-+!%=*G 	 	
" */& 4::/00JJ%%doo6t]++ j 
 $$%%66AAQF4KhKh  I  >>!&&$*G*G s  "" o  #
 ((!24>>4CSCS!T%%!-dnnd>N>N!O!%!1!1!?!?`d!?!e,,  F  2T^^Y]YiYijk')$*:*:;DL (G s   >!f 
model_pathr  r   c                 ~   U(       d  UR                   =(       d    0 nOUR                  =(       d    0 nUR                  S5      n[        U[        R
                  5      (       d	  US:X  d  Uc  O9[        U[        5      (       a  [        [        U5      nXTS'   O[        SU S35      e[        R                  " U40 UD6nU$ )z0Creates a model from a path or model identifier.dtyperv  zInvalid `dtype` passed to `DPOConfig`. Expected either 'auto' or a string representing a `torch.dtype` (e.g., 'float32'), but got .)rD  rE  getr  rO   r  r  r  r   r   r  )r^   r  r_   r  rD  r  rZ   s          rc   r  *_UnslothDPOTrainer._create_model_from_path  s     $ 6 6 <" $ : : @b "%%g.eU[[))Uf_s##E5)E).g&>>CWAG  %44ZUCTUrf   c                    SU l         [        5       (       d  Ub  [        S5      e[        5       (       Ga3  UGb/  [        U[        5      (       a  UR                  5       nUb  UR                  (       d  [        S5      e[        USS5      (       d  [        USS5      (       at  [        US5      =(       a0    S[        [        R                  " [        5      R                  5      ;   nSUR                  0nU(       a  UR                  US'   [        U40 UD6nOU R!                  X5      n[#        X5      nUR$                  (       a$  [        USS5      (       a  ['        U5        SU l         U$ U R!                  X5      nU$ )	z#Prepares a model for PEFT training.FzvPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT modelsa8  You passed both a ref_model and a peft_config. For training PEFT adapters with DPO there is no need to pass a reference model. Please pass `ref_model=None` in case you want to train PEFT adapters, or pass a ref_model with `force_use_ref_model=True` in DPOTrainer's init. if you want to use a different ref_model.is_loaded_in_8bitis_loaded_in_4bitr*  use_gradient_checkpointingT)_peft_has_been_casted_to_bf16r:   r   r  r!   merge_and_unloadrH  r  r]   r  r6   	signaturerJ   
parametersr)  r*  _prepare_gradient_checkpointingr5   r   rG   )r^   rZ   r  r  r_   _support_gc_kwargsprepare_model_kwargss          rc   r  &_UnslothDPOTrainer._prepare_peft_model  sr    .3* ""{'> I    [%<%++..0$T-E-E A  u1599WUL_af=g=g%,9& &5%%&EFQQ:  # )EdFaFa'b$%LPLnLn()HI7VAUV <<UI #56EyyWU,?GG+E2592
  88EErf   c                     UR                   (       aE  [        US5      (       a  UR                  5         U$ S nUR                  5       R	                  U5        U$ )z4Prepare the gradienting checkpointing for the model.enable_input_require_gradsc                 &    UR                  S5        g )NT)requires_grad_)moduleinputra   s      rc   make_inputs_require_gradT_UnslothDPOTrainer._prepare_gradient_checkpointing.<locals>.make_inputs_require_grad8  s    ))$/rf   )r)  r]   r  get_input_embeddingsregister_forward_hook)r^   rZ   r_   r  s       rc   r  2_UnslothDPOTrainer._prepare_gradient_checkpointing-  sR    
 &&u:;;002 0 **,BBC[\rf   r  dataset_namec           
         0 n[        U[        5      (       a  UR                  US'   SUS'   [        5       R	                  5          [        U[        5      (       a	  SU S3US'   UR
                  " [        40 UD6n[        U[        5      (       a	  SU S3US'   UR
                  " [        4SX#R                  S	.0UD6n[        U[        5      (       a	  S
U S3US'   UR
                  " U R                  (       d  U R                  OU R                  4SS/UUR                  UR                  SS.S.UD6nS S S 5        U$ ! , (       d  f       U$ = f)Nnum_proc
   writer_batch_sizezExtracting prompt in z datasetdesczApplying chat template to 	fn_kwargs)r  rU  zTokenizing chosenrejectedF)r  rN  rO  add_special_tokens)remove_columnsr	  )r  r   rK  r   main_process_firstmapr@   r?   rU  r  tokenize_rowprocess_rowrN  rO  )r^   r  r  r_   r  
map_kwargss         rc   r  #_UnslothDPOTrainer._prepare_dataset?  sX    
gw''%)%:%:Jz".0J*+^..0'7++'<\N(%S
6"kk"6E*EG '7++'A,x%X
6"kk)CS^h^h5imwG
 '7++'2<.%I
6"kk)-)=)=!!4CSCS (*5(8)-)?)?-1-G-G*/ G# 1< = 10< s   C/D==
EfeaturesrN  rO  r  c                 F   UnU" U S   SS9S   nU" U S   SS9S   nU" U S   SS9S   nU(       a9  UR                   b  UR                   /U-   nUR                  b  XeR                  /-   nXuR                  /-   nXR                  /-   nUb  Xb* S nUb
  USU nUSU nUUUS.$ )	a  
Tokenize a row of the dataset.

Args:
    features (`dict[str, str]`):
        Row of the dataset, should contain the keys `"prompt"`, `"chosen"`, and `"rejected"`.
    processing_class (`PreTrainedTokenizerBase`):
        Processing class used to process the data.
    max_prompt_length (`int` or `None`):
        Maximum length of the prompt sequence. If `None`, the prompt sequence is not truncated.
    max_completion_length (`int` or `None`):
        Maximum length of the completion sequences. If `None`, the completion sequences are not truncated.
    add_special_tokens (`bool`):
        Whether to add special tokens to the sequences. Typically used for encoder-decoder models. If `True`,
        the prompt sequence will have a bos token prepended and an eos token appended. In any case, the
        completion sequences will have an eos token appended.

Returns:
    `dict[str, list[int]]`:
        Tokenized sequences with the keys `"prompt_input_ids"`, `"chosen_input_ids"`, and
        `"rejected_input_ids".

Example:
```python
>>> from transformers import GPT2Tokenizer

>>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
>>> features = {"prompt": "The sky is", "chosen": " blue", "rejected": " green"}
>>> DPOTrainer.tokenize_row(
...     features, tokenizer, max_prompt_length=3, max_completion_length=3, add_special_tokens=False
... )
{'prompt_input_ids': [464, 6766, 318], 'chosen_input_ids': [4171, 50256], 'rejected_input_ids': [4077, 50256]}
```
promptFr  r   r
  r  N)prompt_input_idschosen_input_idsrejected_input_ids)bos_token_ideos_token_id)	r  r  rN  rO  r  r  r  r  r  s	            rc   r  _UnslothDPOTrainer.tokenize_rowl  s   T %	$Xh%7ERS^_$Xh%7ERS^_&x
';PUVWbc %%1$-$:$:#;>N#N %%1#37M7M6N#N +/E/E.FF/3I3I2JJ (/0B0CD ,/0F1FG!34J5J!K !1 0"4
 	
rf   c                    XR                   peU" U S   U S   SS9nUS   S   nUS   S   n	U" U S   SS	9S   n
U" U S
   SS	9S   nU(       a9  UR                  b  UR                  /U-   nUR                  b  XR                  /-   nXR                  /-   n
XR                  /-   nUb  X* S nUb
  U
SU n
USU nUU	U
US.nSU;   a  US   S   US'   SU;   a  US   S   US'   U$ )zd
Same as `tokenize_row` but for vision models. Please refer to `tokenize_row` for more information.
imagesr  F)r  textr  r   r   pixel_valuesr
  r  r  N)r  r!  r  r  pixel_attention_maskimage_sizes)r  r  r  )r  r  rN  rO  r  	processorr  processed_featuresr  r!  r  r  ra   s                rc   r  _UnslothDPOTrainer.process_row  sv     01K1K9&hx.@xPXGYnst-k:1=).9!<$Xh%7ERS^_&x
';PUVWbc %%1$-$:$:#;>N#N %%1#37M7M6N#N +/E/E.FF/3I3I2JJ (/0B0CD ,/0F1FG!34J5J!K !1( 0"4	
 "%77-?@V-WXY-ZF)*..$6}$Ea$HF=!rf   c                 2    U R                   c
  / SQU l         g g )N)r  r  r  r#  ref_chosen_logpsref_rejected_logps)_signature_columnsr^   s    rc    _set_signature_columns_if_needed3_UnslothDPOTrainer._set_signature_columns_if_needed  s    
 ""*'D# +rf   c                 B  > U R                   (       Ga  U R                  (       Gd  U R                  R                  =(       d    U R                  R                  nUU R
                  U R                  R                  U R                  R                  SS.nU R                  R                  [        U R                  40 UD65      n/ n/ n[        USS9 H  nU R                  U5      u  pxU R                  R                  Xx45      u  pxUR                  UR!                  5       5        UR                  UR!                  5       5        [#        5         U R                  R%                  5         M     [&        R(                  " U5      R+                  5       R-                  5       n	[&        R(                  " U5      R+                  5       R-                  5       n
U R                  R/                  SU	S9U l        U R                  R/                  SU
S9U l        SU l        [0        TU ]e  5       $ )	z
Returns the training [`~torch.utils.data.DataLoader`].

Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`.
Fr   
collate_fnnum_workers
pin_memoryshufflez!Train dataset reference log probsiterabler  r(  namecolumnr)  T)rS  r  r_   rT  r   r  r  r  r  preparer   r  rP   compute_ref_log_probsgather_for_metricsr   cpur0   free_memoryrO   catfloatnumpy
add_columnrj  get_train_dataloader)r^   r   dataloader_paramsdata_loaderr(  r)  padded_batchref_chosen_logpref_rejected_logpall_ref_chosen_logpsall_ref_rejected_logpsrl  s              rc   rB  '_UnslothDPOTrainer.get_train_dataloader  s    (((1V1V1V<<e		@e@eJ("00#yy??"ii== ! **22:d>P>P3fTe3fgK!!# $k@c d595O5OP\5]2595E5E5X5X$862 !''(;(;(=>"))*;*?*?*AB   ,,. !e $)99-=#>#D#D#F#L#L#N %*YY/A%B%H%H%J%P%P%R"!%!3!3!>!>DV_s!>!tD!%!3!3!>!>)2H "? "D 59D1w+--rf   c                 &  > Uc  U R                   c  [        S5      eUb  UOU R                   nU R                  (       Ga  U R                  (       Gd  U R                  R
                  =(       d    U R                  R                  nUU R                  U R                  R                  U R                  R                  SS.nU R                  R                  [        U40 UD65      n/ n/ n[        USS9 Hr  nU R                  U5      u  pU R                  R                  X45      u  pUR!                  UR#                  5       5        UR!                  U	R#                  5       5        Mt     [$        R&                  " U5      R)                  5       R+                  5       n
[$        R&                  " U5      R)                  5       R+                  5       nUR-                  SU
S9nUR-                  SUS9nU R                   b  Xl         S	U l        [.        TU ]a  US
9$ )a  
Returns the evaluation [`~torch.utils.data.DataLoader`].

Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`.

Args:
    eval_dataset (`torch.utils.data.Dataset`, *optional*):
        If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
        by the `model.forward()` method are automatically removed. It must implement `__len__`.
z-Trainer: evaluation requires an eval_dataset.Fr/  z Eval dataset reference log probsr4  r(  r6  r)  T)r  )r  r   rS  r  r_   rT  r   r  r  r  r  r9  r   rP   r:  r;  r   r<  rO   r>  r?  r@  rA  rj  get_eval_dataloader)r^   r  r   rC  rD  r(  r)  rE  rF  rG  rH  rI  rl  s               rc   rL  &_UnslothDPOTrainer.get_eval_dataloader  s    D$5$5$=LMM'3'?|TEVEV(((1U1U1U<<d		@d@dJ("00#yy??"ii== ! **22:l3`N_3`aK!!# $k@b c595O5OP\5]2595E5E5X5X$862 !''(;(;(=>"))*;*?*?*AB !d $)99-=#>#D#D#F#L#L#N %*YY/A%B%H%H%J%P%P%R"'228JSg2hL'228LUk2lL   ,$0!37D0w**EErf   c              #     #    U R                   (       aD  U R                  (       d3  U R                  R                  U R                  5      R                  5       O	[        5          U R                  (       a%  U R                  R                  U R                  5        Sv   U R                  (       a.  U R                  R                  U R                  =(       d    S5        SSS5        g! , (       d  f       g= f7f)zWContext manager for handling null reference model (that is, peft adapter manipulation).Nr   )	r  rG  r  unwrap_modelrZ   disable_adapterrB   set_adapterrF  r+  s    rc   null_ref_context#_UnslothDPOTrainer.null_ref_contextR  s     
 !!$*?*? ))$**5EEG $$

&&t'<'<=$$

&&t'>'>'K)L  s   A C6"A:C%	C6%
C3/C6batchc           	         U R                   (       a)  [        U R                  R                  R                  5      O	[        5       n[        R                  " 5          U   U R                  c5  U R                  5          U R                  U R                  USS9nSSS5        OU R                  U R                  USS9nSSS5        SSS5        WS   US   4$ ! , (       d  f       N(= f! , (       d  f       N1= f! , (       d  f       N:= f)zfComputes log probabilities of the reference model for a single padded batch of a DPO specific dataset.NT)is_ref_modelchosen_logpsrejected_logps)r  r)   r  r   typerB   rO   no_gradr  rR  concatenated_forwardrZ   )r^   rT  compte_ref_context_managerref_model_outputs       rc   r:  (_UnslothDPOTrainer.compute_ref_log_probs`  s     7;6X6XHT%%,,112^i^k 	# ]]_8~~%**,'+'@'@Uae'@'f$ -, $(#<#<T^^Uae#<#f  9_  /1ABR1SSS	 -, 98__s<   C7C&;C$C&;C7
C#C&&
C4	0C77
DrL  c           	         0 n[         R                  " U S   U S   /SS9US'   [         R                  " U S   U S   /SS9US'   SU ;   a   [         R                  " U S   U S   /SS9US'   SU ;   a   [         R                  " U S   U S   /SS9US'   SU ;   a   [         R                  " U S   U S   /SS9US'   [        U S   R                  S	   U S
   R                  S	   5      n[         R                  " [	        U S   X1S9[	        U S
   X1S945      US'   [         R                  " [	        U S   USS9[	        U S   USS945      US'   U$ )a  
Concatenate the `chosen` and `rejected` inputs from the batch into a single tensor for both the prompt and
completion sequences.

Args:
    batch (`dict[str, Union[list, torch.LongTensor]]`):
        A batch of input data. The batch must contain the following keys:

        - `"prompt_input_ids"`: Tensor of shape `(batch_size, prompt_length)` representing the prompt input
          IDs.
        - `"chosen_input_ids"`: Tensor of shape `(batch_size, chosen_length)` representing the chosen
          completion input IDs.
        - `"rejected_input_ids"`: Tensor of shape `(batch_size, rejected_length)` representing the rejected
          completion input IDs.
        - `"prompt_pixel_values"` (optional): Tensor for pixel values, if available.
        - `"prompt_pixel_attention_mask"` (optional): Tensor for pixel attention masks, if available.

    padding_value (`int`):
        The padding value to use for the concatenated completion sequences (`chosen_input_ids` and
        `rejected_input_ids`).

Returns:
    `dict[str, torch.LongTensor]`: A dictionary containing:

        - `"prompt_input_ids"`: Concatenated prompt input IDs of shape `(2 * batch_size, prompt_length)`.
        - `"completion_input_ids"`: Concatenated chosen and rejected completion input IDs of shape `(2 *
          batch_size, max_completion_length)`.
        - `"prompt_attention_mask"`: Concatenated prompt attention masks of shape `(2 * batch_size,
          prompt_length)`.
        - `"completion_attention_mask"`: Concatenated chosen and rejected attention masks of shape `(2 *
          batch_size, max_completion_length)`.
        - `"pixel_values"` (optional): Concatenated pixel values if `"prompt_pixel_values"` are present.
        - `"pixel_attention_mask"` (optional): Concatenated pixel attention masks if
          `"prompt_pixel_attention_mask"` are present.

Notes:
    The completion input IDs and attention masks are padded to the maximum completion length of the chosen or
    rejected sequences.
r  r   rv   prompt_attention_maskr!  r"  r#  r  rw   r  )	pad_valuer   chosen_attention_maskrejected_attention_maskcompletion_attention_mask)rO   r>  ri  rz   rE   )rT  rL  ra   rO  s       rc   concatenated_inputs&_UnslothDPOTrainer.concatenated_inputsm  s   V  &+YY6H0I5QcKd/ekl%m!"*/))*+U3J-KLRS+
&' U"%*YYn0Eu^G\/]cd%eF>"!U*-2YY-.6L0MNTU.F)* E!$)IIu]/CU=EY.Z`a$bF=! !$E*<$=$C$CA$FNbHcHiHijkHl m).e$679Nhe$89;Pj*
%& /4iie$;<>S_`ae$=>@Uabc/
*+ rf   rW  rX  r(  r)  r  model_outputc                    U R                   R                  nUR                  U5      U R                  (       + UR                  U5      -  -
  nUR                  U5      U R                  (       + UR                  U5      -  -
  n	U R                  [
        R                  R                  :X  a  [        R                  n
U R                  (       aD  [        R                  U R                  ;   a&  [        U R                  [        R                     5      n
[        X* -  5      [        X* -  5      -
  U
-  nOX-
  nU R                  (       a,  [        R                  " S/UR                   UR                  S9nOX4-
  nUR                  U R                   R                  5      nUR                  U R                   R                  5      nX-
  nU R                  [
        R"                  R                  :X  a0  U[$        R&                  " U5      [$        R&                  " U	5      -
  -  nUS:X  ah  [$        R(                  " U R*                  U-  5      * SU R,                  -
  -  [$        R(                  " U R*                  * U-  5      U R,                  -  -
  nGODUS:X  a{  [$        R(                  " U R*                  U-  5      * SU R,                  -
  -  [$        R(                  " U R*                  * U-  5      U R,                  -  -   SSU R,                  -  -
  -  nGOUS:X  a  SSKnU R,                  S:X  a  S	U l        U R*                  U-  R1                  5       [$        R(                  " U R*                  U-  5      UR3                  SU R,                  -
  5      -
  -  U R*                  * U-  R1                  5       [$        R(                  " U R*                  * U-  5      UR3                  U R,                  5      -
  -  -   nGOUS
:X  a(  [        R4                  " SU R*                  U-  -
  5      nGOUS:X  a  USSU R*                  -  -  -
  S-  nGOUS:X  a  X-
  nX$-
  n	U R*                  U-  nU R*                  U	-  n[        R6                  " UU4S5      R9                  5       R;                  5       nU R<                  R?                  U5        U R<                  R8                  n[$        R(                  " U R*                  U-  U-
  5      * [$        R(                  " U R*                  U	-  U-
  * 5      -
  nGOUS:X  a5  X-
  nX$-
  nUSU R*                  -  -
  S-  USU R*                  -  -   S-  -   nGOtUS:X  aq  X-
  U R*                  -  nX$-
  U R*                  -  n[$        R(                  " U5      * S[$        R(                  " U* 5      -  -
  S[$        R(                  " U* 5      -  -
  nGOUS:X  a  X-
  nX$-
  n	[        R@                  " USS9u  nn[        R@                  " U	SS9u  nnUU-
  n[$        R(                  " U R*                  U-  5      * SU R,                  -
  -  [$        R(                  " U R*                  * U-  5      U R,                  -  -
  nGORUS:X  a  X-
  nX4-
  n[        R@                  " USS9u  nn[        R@                  " USS9u  nnUU-
  n[$        R(                  " U R*                  U-  5      * SU R,                  -
  -  [$        R(                  " U R*                  * U-  5      U R,                  -  -
  nGOUS:X  aP  S[$        R0                  " U R*                  U-  5      -
  n[$        R0                  " U R*                  U	-  5      nUU-   nGOQUS:X  aQ  [$        R0                  " U R*                  U-  5      nS[$        R0                  " U R*                  X-
  -  5      -
  nUU-   nOUS:X  a  X-
  nX4-
  nX-
  nXR*                  -  n[        R0                  " XRB                  RD                  -  5      n[$        R(                  " U5      * n[        RF                  " U* 5      nUSU-
  -  UU-  -   nOqUS:X  aR  US   n URH                  S   n!U RK                  U!5      n[        RL                  " U5      n[        RL                  " U5      nO[O        SU RP                   S35      eU R*                  UR                  U5      UR                  U5      -
  R;                  5       -  nU R*                  UR                  U5      UR                  U5      -
  R;                  5       -  nUUU4$ )a  
Compute the DPO loss for a batch of policy and reference model log probabilities.

Args:
    chosen_logps (`torch.FloatTensor`):
        Log probabilities of the model for the chosen responses. Shape: `(batch_size,)`.
    rejected_logps (`torch.FloatTensor`):
        Log probabilities of the model for the rejected responses. Shape: `(batch_size,)`.
    ref_chosen_logps (`torch.FloatTensor`):
        Log probabilities of the reference model for the chosen responses. Shape: `(batch_size,)`.
    ref_rejected_logps (`torch.FloatTensor`):
        Log probabilities of the reference model for the rejected responses. Shape: `(batch_size,)`.
    loss_type (`str`, defaults to `"sigmoid"`):
        The type of loss to compute. One of:
        - `"sigmoid"`: Sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper.
        - `"hinge"`: Hinge loss on the normalized likelihood from the
          [SLiC](https://huggingface.co/papers/2305.10425) paper.
        - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper.
        - `"exo_pair"`: Pairwise EXO loss from the [EXO](https://huggingface.co/papers/2402.00856) paper.
        - `"nca_pair"`: Pairwise NCA loss from the [NCA](https://huggingface.co/papers/2402.05369) paper.
        - `"robust"`: Unbiased estimate of the DPO loss that is robust to preference noise from the [Robust
          DPO](https://huggingface.co/papers/2403.00409) paper.
        - `"bco_pair"`: Pairwise BCO loss from the [BCO](https://huggingface.co/papers/2404.04656) paper.
        - `"sppo_hard"`: SPPO loss with hard label from the [SPPO](https://huggingface.co/papers/2405.00675)
          paper.
        - `"aot"`: AOT loss for paired datasets from the [AOT](https://huggingface.co/papers/2406.05882) paper.
        - `"aot_pair"`: AOT loss for unpaired datasets from the [AOT](https://huggingface.co/papers/2406.05882)
          paper.
        - `"discopop"`: DiscoPOP (a.k.a Log-Ratio Modulated Loss, LRML) loss from the
          [DiscoPOP](https://huggingface.co/papers/2406.08414) paper.
        - `"apo_zero"`: APO-zero loss from the [APO](https://huggingface.co/papers/2408.06266) paper.
        - `"apo_down"`: APO-down loss from the [APO](https://huggingface.co/papers/2408.06266) paper.
        - `"sft"`: Negative log-likelihood loss (standard supervised fine-tuning loss).
    model_output (`dict[str, torch.FloatTensor]`, *optional*):
        The output of the model's forward pass. This is used to compute auxiliary losses if enabled.

Returns:
    A tuple of three tensors: `(losses, chosen_rewards, rejected_rewards)`. The losses tensor contains the DPO
    loss for each example in the batch. The `chosen_rewards` and `rejected_rewards` tensors contain the rewards
    for the chosen and rejected responses, respectively.
r   )r  r   r  rw   robustr   exo_pairNgMbP?r  r  r  r  g      ?r  aot_pairrv   aotr  r  discopopsftnll_losszUnknown loss type: z. Should be one of ['sigmoid', 'hinge', 'ipo', 'exo_pair', 'nca_pair', 'robust', 'bco_pair', 'sppo_hard', 'aot', 'aot_pair', 'discopop', 'apo_zero', 'apo_down', 'sft']))r  r   r|   rZ  r  r   ALPHA_DIVERGENCEvaluer   ALPHA_DIVERGENCE_COEF_DEFAULTr  r  r?  r*   rO   r   r  JS_DIVERGENCEr   softplus
logsigmoidrX  r[  mathr  logrelur>  meandetachr  updatesortr_   r_  exprz   expand
zeros_liker   r  )"r^   rW  rX  r(  r)  r  rg  r   chosen_logratiosrejected_logratios
alpha_coefr   	logratiosref_logratioslossesrv  chosen_rewardsrejected_rewardsrewardsdeltaabchosen_logratios_sorted_rejected_logratios_sortedlogratios_sortedref_logratios_sortedlosses_chosenlosses_rejectedlog_ratio_modulationlogistic_componentexp_componentsft_lossr   s"                                     rc   dpo_loss_UnslothDPOTrainer.dpo_loss  sm   d !!(( (??62$:M:M6MQaQdQdekQl5ll+..v6d>Q>Q:QUgUjUjkqUr9rr!!_%E%E%K%KK .KKJ'',@,Z,Z^b^v^v,v"4#;#;<P<j<j#kl
0;>?'JZ]hJhBiimwwF$5I"" %aS	PYP`P` a 0 E!T%5%5%<%<=I),,T-=-=-D-DEM.F%%)F)F)L)LL !**%56DV9WWW
 	!dii&011Q9M9M5MN,,		zF23d6J6JJK 
 ("dii&011Q9M9M5MN,,		zF23d6J6JJKQ----/F
 *$##q('+$ii&(113TYY/0488A@T@T<T3UU))f$--/1<<
V@S3TW[W_W_`d`t`tWu3uvwF '!ZZDII$6 67F%qA		M22q8F*$+>!/!D!YY)99N#yy+==ii1A BAFKKMTTVGLL(LL%%EllDII0@$@E#IJJQ\\))00589N F +%
 /A3A#		/)a/1sTYY3F12LLF*$*=JN . CtyyPn--n_556&6%6778  *$+>!/!D).4D!)L&#Q+0::6Ha+P(%q+.GGEdii%/00A8L8L4LM,,		zE12T5I5IIJ 
 %$5I,AM"'**YA">a&+jjA&F# !$';;Edii%/00A8L8L4LM,,		zE12T5I5IIJ 
 *$ 		$))6F*F GGMii		4F(FGO"_4F*$ IIdii2B&BCM!))DII9I9^,_"``O"_4F*$ %5I,AM.Fii'F#(==)):P:P1P#Q "#,,v"6!6!IIvg.M'1/C+CD}WkGkkF% $J/H%++A.J__Z0F"--l;N$//? %dnn%5 6% %  loof&=@P@S@STZ@[&[%c%c%ee99(9(9&(ADVDYDYZ`Da(a'i'i'kk~'777rf   c           
         U R                   R                  U5      nU R                  X R                  S9n0 nU R                  (       a  SUS'   SU;   a  US   US'   SU;   a  US   US'   SU;   a  US   US'   US   nUS   nU R
                  (       Ga  UR                  5       " US	   US   SS
9n[        US   UR                  R                  5      n	UR                  5       " U	US   UR                  US   SS9n
U
R                  nS nU R                  (       d  U R                  bv  U R                   R                  U R                  5      nUR                  5       " US	   US   SS
9nUR                  5       " U	US   UR                  US   SS9nUR                  nOzU R                  (       di  U R                  5          UR                  5       " US	   US   SS
9nUR                  5       " U	US   UR                  US   SS9nUR                  nS S S 5        US   nUR                  5       nGO[         R"                  " US	   US   4SS9n[         R"                  " US   US   4SS9n[         R"                  " [         R$                  " U5      U4SS9nU R&                  Gb  U R&                  UR)                  S5      :  a  U R*                  S:X  aQ  [-        UUU5      u  nnnUS S 2S U R&                  24   nUS S 2S U R&                  24   nUS S 2S U R&                  24   nOU R*                  S:X  ae  [/        UUU5      u  nnnUS S 2U R&                  * S 24   nUS S 2U R&                  * S 24   nUS S 2U R&                  * S 24   n[-        UUU5      u  nnnO*[1        SU R*                   S35      e[-        UUU5      u  nnnU R2                  (       aH  UR5                  SS9S   R7                  5       nUR8                  S   U-
  R;                  5       S-   nUUS'   SUS'   U R<                  (       a~  UUR                  5          R?                  S5      nUUR                  5          R?                  S5      nURA                  S5      UR                  5          R?                  S5      S-
  nUUS'   OUUS'   [C        US5      (       a"  UR                  5       b  UR                  5       nO.[E        USU RF                  RH                  5      n[E        UUU5      nU" U4SS0UD6nUR                  S S 2S S24   nS nU R                  (       d  U R                  b  U R                   R                  U R                  5      n[C        US5      (       a"  UR                  5       b  UR                  5       nO.[E        USU RF                  RH                  5      n[E        UUU5      nU" U4SS0UD6nUR                  S S 2S S24   nOU R                  (       d  [C        US5      (       a"  UR                  5       b  UR                  5       nO.[E        USU RF                  RH                  5      n[E        UUU5      nU R                  5          U" U4SS0UD6nUR                  S S 2S S24   nS S S 5        [         RJ                  " US:g  UU RL                  5      nUS S 2SS 24   nURO                  5       nS nS n U R                  (       d  U R                  b6  U R                   R                  U R                  5      nURO                  5       n!O)U R                  5          URO                  5       n!S S S 5        W!RP                  n[C        U!S5      (       a  U!RR                  OS n U RU                  URP                  UU[C        US5      (       a  URR                  OS U R                  (       d  UOS U R                  (       d  UOS U R                  (       d  U OS S9n"U"u  n#tn$n%n&n'n(n)U#U$U%U&U'U(U)S   U)S   S .n*U R                  (       a  WRV                  U*S!'   U*$ ! , (       d  f       GN<= f! , (       d  f       GN= f! , (       d  f       GN= f)"NrL  Tr  r!  r"  r#  r`  rd  r  )attention_maskreturn_dictr   F)r   r  encoder_hidden_statesencoder_attention_mask	use_cacherw   rv   
keep_startr~  Unknown truncation mode: '/'. Should be one of ['keep_end', 'keep_start'].as_tupler   output_hidden_statesr   position_idsr  get_decoderbase_model_prefixr  rq   bias)r  	ref_input
ref_weightref_bias)lossrW  rX  mean_chosen_logitsmean_rejected_logitsro  r  r  aux_loss),r  rO  re  rL  r  r  get_encoderrM   r  decoder_start_token_idr  last_hidden_staterZ  r  rR  boolrO   r>  r  rP  sizerQ  r1   r2   r   rJ  nonzerorh  rz   itemrR  r   cumsumr]   r  r_   rW  whererM  get_output_embeddingsweightr  r  r  )+r^   rZ   rT  unwrapped_modelconcatenated_batchmodel_kwargsr`  rd  encoder_outputsdecoder_input_idsdecoder_outputshidden_statesref_hidden_statesunwrapped_ref_modelref_encoder_outputsref_decoder_outputslabels	loss_maskr   r  first_compute_indexr   r  
base_model	base_attroutputsref_base_modelref_attrref_outputsmasked_input_idslm_headr  r  ref_lm_headloss_outputr  rW  rX  chosen_logits_meanrejected_logits_meanro  aux_outputsra   s+                                              rc   _compute_loss_liger&_UnslothDPOTrainer._compute_loss_liger  s	    **77>!55eK]K]5^  37L/0 //+=n+ML(!%773EF\3]L/0..*<]*KL' 23J K$67R$S!"""-99;"#5612IJ O !3"#9:&&==!
 .99;+12MN&5&G&G'9:Q'RO ,==M $&&4>>+E&*&6&6&C&CDNN&S#&9&E&E&G&'9:#56M#N $'#
 ':&E&E&G/#56Q#R*=*O*O+=>U+V#'# %8$I$I!((**,*9*E*E*G*+=>'9:Q'R$(+'
 +:*E*E*G"3'9:U'V.A.S.S/ABY/Z"'+' )<(M(M% - ((>?F1668I 		#$679KLb9cdjkI #YY#$;<>PQl>mnN
 		!!"78:STI *tATATUVAW/W''<7 <FnV_aj;k8NIy%3A7H7H4H%IN )!->t->*> ?I )!->t->*> ?I))Z7 <G~W`bk;l8NIy )!doo-=-?*? @I%3A7G7I4I%JN )!doo-=-?*? @I;EnV_aj;k8NIy$4T5I5I4J K) )  8B.R[]f7g4	9 &&&/&7&7&7&Fq&I&M&M&O#"+//!"47J"J!P!P!RUV!V1?-.37L/0   %n&9&9&;<FFqI	%n&9&9&;<FFqI	-44Q78K8K8MNXXYZ[^__/;^,1?-. 66?;V;V;X;d,88:
#O5H$))JmJmn	$_iQ
  G
 $55a"f=M !%&&4>>+E&*&6&6&C&CDNN&S#.>>CVCbCbCdCp%8%D%D%FN&':<OQUQZQZQtQtuH%,-@(L_%`N,# #
 %0$A$A!SbS&$I!((?M::?Z?Z?\?h%4%@%@%BN&8KTYYMpMpqH%,_h%XN**,"0!#"'# '#K
 )4(E(Ea"f(M% -  %{{9>9dF]F]^%ae,F "779 
""~~)&*&6&6&C&CDNN&S#1GGI**,"1"G"G"IK -$++J+2;+G+G{''TH &&NN!(&!9!9t/3/B/B')-)<)<z$%)%8%8Xd ' 
 	
l\>+=?SU]`k (,"4$8 )!n +A	
   !(!1!1F:U -,f -,, -,s%   =Aa;!aa'
a
a$'
a6rV  c                    US   R                   S   nU R                  X R                  S9nSS0nU R                  (       a  SUS'   SU;   a  US   US'   S	U;   a  US	   US	'   S
U;   a  US
   US
'   US   nUS   nUS   n	US   n
U R                  (       a=  U	nU R
                  XS:H  '   U" S,UUUS.UD6nUR                  nU
R                  5       nGO[        R                  " Xy4SS9n[        R                  " X4SS9n[        R                  " [        R                  " U5      U
4SS9nU R                  Gb  U R                  UR                  S5      :  a  U R                  S:X  aO  [        UX5      u  npUSS2SU R                  24   nUSS2SU R                  24   nUSS2SU R                  24   nOU R                  S:X  aa  [        UX5      u  npUSS2U R                  * S24   nUSS2U R                  * S24   nUSS2U R                  * S24   n[        UX5      u  npO([!        SU R                   S35      e[        UX5      u  npU R"                  (       aH  UR%                  SS9S   R'                  5       nUR                   S   U-
  R)                  5       S-   nUUS'   SUS'   U R*                  (       a~  UUR                  5          R-                  S5      nUUR                  5          R-                  S5      nUR/                  S5      UR                  5          R-                  S5      S-
  nUUS'   OUUS'   U" U40 UD6nUR                  n[        R0                  " USSS9n[        R0                  " USSS9R                  5       nU R"                  (       a  USS2W* S24   nUSS2U* S24   nUR                   SS UR                   SS :w  a  UR                   S   nUSS2U* S24   nSX) '   [3        X5      nSUU) '   [        R0                  " USSS9nU R*                  (       ac  WR                   u  nn[        R4                  " UUUR                  R6                  UR                  R8                  S9nUUUR                  5       '   UnUSS2SS24   R;                  S5      n0 nU R<                  (       a  [        R>                  " 5          [@        RB                  " USS9n[        RD                  " SU-  SS9nUU-
  nUU-  R;                  S5      UR;                  S5      -  nUSU nUUS n[        RF                  " [        RH                  " UU-   5      SS9US '   SSS5        U RJ                  RL                  c  S!U RN                  ;   a  U R                  (       d  USU2SS24   OUSU n U R                  (       d  USU2SS24   OUSU n![@        RP                  " [        RR                  " U SS"9[        RR                  " U!SS"9SS#9US$'   S%U RN                  ;   a  UUR;                  S5      -  nU RJ                  RT                  Gb  U(       Gd  UR;                  SS9n"U"SU n#U"US n$[        R&                  " U#U$5      n%[        R                  " U%U%/SS9n%UR                  S5      n[        RV                  " UUR6                  S&9RY                  U5      nUU%R-                  S5      :  n&UU"R-                  S5      :  n'U&U'-  R[                  5       n(U&) U'-  R[                  5       n)UU(-  R;                  SS9n*UU)-  R;                  SS9n+U*U RJ                  RT                  U+-  -   nUSU US''   UUS US('   U R*                  (       a[  WS:H  R%                  SS9S   U   n,USSU,24   USSU,24      R]                  5       n-USU,S24   USU,S24      R]                  5       n.O0USU USU    R]                  5       n-XS XS    R]                  5       n.U-US)'   U.US*'   U R                  (       a  UR^                  US+'   U$ ! , (       d  f       GN= f)-a  
Runs the given model on the given batch of inputs, concatenating the chosen and rejected inputs together.

We do this to avoid doing two forward passes, because it's faster for FSDP.

Args:
    model:
        Model to run the forward pass on.
    batch:
        Batch of input data.
    is_ref_model:
        Whether this method is being called for the reference model. If `True`, length desensitization is not
        applied.
r  r   r  r  FTr  r!  r"  r#  r`  r   rd  )r   r  r  rw   rv   Nr  r~  r  r  r  r   r  r  r  rq   )shiftsdimsr   )r   r  )ri  policy_weightsrn  )end_dim)r  ro  r  r   rW  rX  r  r  r  re  )0rz   re  rL  r  r  rM  r   r  rO   r>  r  rP  r  rQ  r1   r2   r   rJ  r  rh  r  rR  r   r  rollrL   zerosr   r  r   r\  rZ  r   log_softmaxr   clampr}  r_   r]  r  cross_entropyflattenr^  r   	expand_asr?  ry  r  )/r^   rZ   rT  rV  num_examplesr  r  r  r`  r   rd  r  r  r   r  r   r  r  r   r  seq_lenr   r   per_token_logps_	all_logpsra   logprobsweights_adjustment_factorper_token_logps_adjustedall_weightschosen_weightsrejected_weightschosen_logitschosen_labelscompletion_lengthschosen_lengthsrejected_lengthspublic_lengthsld_maskr   
front_mask	rear_maskfront_logps
rear_logps	split_idxr  r  s/                                                  rc   r[  '_UnslothDPOTrainer.concatenated_forward  s   " /066q9!55eK]K]5^#U+  37L/0 //+=n+ML(!%773EF\3]L/0..*<]*KL'-.@A 23J K12HI$67R$S!"")F595L5LF12 *4 	G ^^F1668I 		#3"JPQRI"YY(='Y_`aN		!!"78:STI *tATATUVAW/W''<7 <FnV_;k8NI%3A7H7H4H%IN )!->t->*> ?I )!->t->*> ?I))Z7 <G~W`;l8NI )!doo-=-?*? @I%3A7G7I4I%JN )!doo-=-?*? @I;EnV_;k8NIy$4T5I5I4J K) )  8B.R[7g4	&&
 '0&7&7&7&Fq&I&M&M&O#"+//!"47J"J!P!P!RUV!V1?-.37L/0   &n&9&9&;<FFqI	%n&9&9&;<FFqI	-44Q78K8K8MNXXYZ[^__/;^,1?-.I66G^^F ZZ	"1=F

9Ra@EEGI&&  N?#3 34%a.)9&9:	<<v||BQ//ll1oGAxyL)F z/?&'
#**_QQG"0"6"6J${{GGNN,A,AI]I]  7F^0023.O#AqrE*..r2	==R8,1OOALb,Q)+:=V+V(7)CHHLy}}]_O``!,]l!;#.|}#= +0;;uyyRbAb7cij+k'( ! 99*et~~.E>B>U>UF=L=#2##56[aboco[pM>B>U>UF=L=#2##56[aboco[pM "#mQ7}^_9`op"F: DNN"!IMM"$55I99),!*1!5/>N1,-@"YY~7GHN"YY'GQON%**1-G <<8N8NOYYZijL"^%=%=a%@@G"4">">q"AAD!D.//1J!D//1I*Z7<<<CK)I5::q:AJ#dii&8&8:&EEI!*=L!9~#,\]#;  
 &*33T3B1ElSI!':I:!6yJYJ7O!P!U!U!W#)!YZ-#81ij=9Q#R#W#W#Y !'!6y,7O!P!U!U!W#)-#8=9Q#R#W#W#Y '9#$)=%&  !(!1!1F:G !s   Ba
a
train_eval)r  r  c           	      ,	   0 nU R                   R                  (       a!  U R                  X5      nUS   nUS   nUS   nOU R                  X5      nSU;   a  SU;   a  US   n	US   n
OU R	                  U5      u  pSnSnSn[        U R                  5       H[  u  pU R                  US   US   U	U
UU5      u  pnU R                  (       a  U R                  U   OS	nXmU-  -   nX~U-  -   nXU-  -   nM]     Xx:  R                  5       nU R                   R                  b  X`R                   R                  US
   -  -   nU R                  (       a  XeS   -  nU R                  (       a  X`R                  US   -  -   nUS:X  a  SOSnU R                  R                  U5      R!                  5       R#                  5       UU S3'   U R                  R                  U5      R!                  5       R#                  5       UU S3'   U R                  R                  U5      R!                  5       R#                  5       UU S3'   U R                  R                  Xx-
  5      R!                  5       R#                  5       UU S3'   U R                  R                  US   5      R%                  5       R!                  5       R#                  5       UU S3'   U R                  R                  US   5      R%                  5       R!                  5       R#                  5       UU S3'   U R                  R                  US   5      R%                  5       R!                  5       R#                  5       UU S3'   U R                  R                  US   5      R%                  5       R!                  5       R#                  5       UU S3'   U R                   R                  c  SU R                  ;   aN  U R                  R                  US
   5      R%                  5       R!                  5       R#                  5       UU S
3'   U R                  (       aN  U R                  R                  US   5      R%                  5       R!                  5       R#                  5       UU S3'   UR!                  5       U4$ )zWCompute the DPO loss and other metrics for the given batch of inputs for train or test.r  r  r  r(  r)  r   rW  rX  ro  ro  r  r  r  eval_rw  zrewards/chosenzrewards/rejectedzrewards/accuracieszrewards/marginszlogps/chosenzlogps/rejectedr  zlogits/chosenr  zlogits/rejectedrn  )r_   rV  r  r[  r:  	enumerater  r  r`  r?  r]  r\  r  r  r  r;  ry  r  rz  )r^   rZ   rT  r  metricsrg  r  r  r  r(  r)  idxr  _losses_chosen_rewards_rejected_rewardsr  reward_accuraciesprefixs                      rc   get_batch_loss_metrics)_UnslothDPOTrainer.get_batch_loss_metrics^  sy    99##33EAL!&)F)*:;N+,>?44UBL "U*/Cu/L#();#< %*+?%@"7;7Q7QRW7X4  FN  #,DNN";>Bmm 0 !12$& ?;*; 483D3D**3/#F"22!/F2J!J#3&6P#P  #<" ,>EEG99*ii11L4LLLF+;<<F  00<
3KKKF&&0b-1-=-=-P-PQ_-`-e-e-g-l-l-n6(.)*/3/?/?/R/RSc/d/i/i/k/p/p/r6(*+,151A1A1T1TUf1g1l1l1n1s1s1u6(,-.//0QRWWY^^` 	6(/*+ //^0LMTTV[[]bbd 	6(,'( //=M0NOVVX]]_ddf 	6(.)* //=Q0RSZZ\aachhj 	6(-() //=S0TU\\^ccejjl 	6(/*+ 99*et~~.E  33L4LMTTV[[]bbd vhh'(     33L4LMTTV[[]bbd vhh'( {{}g%%rf   inputsc                 h   U R                   (       a)  [        U R                  R                  R                  5      O	[        5       nU   U R                  XSS9u  pgS S S 5        WR                  U R                  R                  5      nU R                  WSS9  U(       a  Xg4$ U$ ! , (       d  f       NO= f)Nr  r  )
r  r)   r  r   rY  rB   r  r|   r_   store_metrics)r^   rZ   r	  return_outputsnum_items_in_batchcompute_loss_context_managerr  r   s           rc   compute_loss_UnslothDPOTrainer.compute_loss  s     7;6X6XHT%%,,112^i^k 	% * 77RY7ZMD * wwtyy''(7w7=  *)s   B##
B1c           
      r   U R                   (       a)  [        U R                  R                  R                  5      O	[        5       nU   UR                  US   US   U R                  SU R                  S9nSU;   a  US   nOU R                  cQ  U R                  5          U R                  R                  US   US   U R                  SU R                  S9nSSS5        O7U R                  R                  US   US   U R                  SU R                  S9nSSS5        [        WU R                  U R                  5      nU R                  R                  USS9n[        WU R                  U R                  5      nU R                  R                  USS9nXg4$ ! , (       d  f       N= f! , (       d  f       N= f)zRGenerate samples from the model and reference model for the given batch of inputs.r  r`  T)r   r  rP  	do_sampler   
ref_outputN)skip_special_tokens)r  r)   r  r   rY  rB   generaterP  rL  r  rR  rZ   rE   r  batch_decode)r^   rZ   rT  generate_context_managerpolicy_outputr  policy_output_decodedref_output_decodeds           rc   generate_from_model_and_ref._UnslothDPOTrainer.generate_from_model_and_ref  s    7;6X6XHT%%,,112^i^k 	! &!NN 23$%<=??!// + M u$"<0
>>)..0%)ZZ%8%8&+,>&?+01H+I'+&*)-);); &9 &
 10 "&!8!8"'(:";',-D'E#'??"&%)%7%7 "9 "J/ &> &mT__dFXFXY $ 5 5 B B=fj B k":t@R@RS
!22??
`d?e$88/ 10 &%s&   AF(8FA F(
F%	!F((
F6r   ignore_keysc                 L   Uc+  [        US5      (       a  [        UR                  S/ 5      nO/ nU R                  (       a)  [	        U R
                  R                  R                  5      O	[        5       n[        R                  " 5          U   U R                  XSS9u  pgS S S 5        S S S 5        U R                  WSS9  U(       a  WR                  5       S S 4$ US   US   S.nUR                  5        V	V
s/ s H  u  pX;  d  M  U
PM     nn	n
[        R                  " XR
                  R                  S9n[        R                   " UR"                  S	   U R
                  R                  S9nWR                  5       X4$ ! , (       d  f       N= f! , (       d  f       N= fs  sn
n	f )
Nr  keys_to_ignore_at_inferencer  r  eval_logits/choseneval_logits/rejected)r!  r"  r   r   )r]   r  r  r  r)   r  r   rY  rB   rO   rZ  r  r  rz  r  r   r  rz   )r^   rZ   r	  r   r  prediction_context_managerr  r   logits_dictkvr   r  s                rc   prediction_step"_UnslothDPOTrainer.prediction_step  s\    uh''%ell4QSUV  7;6X6XHT%%,,112^i^k 	# ]]_8 77RX7YMD 9_ 	7v6;;=$,, #**>"?$+,B$C
 !, 1 1 3L 3q7K! 3Lf-=-=-D-DEV\\!_T5E5E5L5LMv..% 98__ Ms0   FE>F6F F >
F	F
Fr   c                 x    UR                  5        H&  u  p4U R                  U   U   R                  U5        M(     g r  )r  r  r   )r^   r   r  r  rq  s        rc   r   _UnslothDPOTrainer.store_metrics"	  s2    !--/JC  ,S188? *rf   
dataloaderdescriptionmetric_key_prefixc                   > U R                   (       Ga  [        UR                  5      n[        R                  " [        U5      U R                  R                  S9nUR                  R                  U5      nU R                  U5      n	U R                  U	5      n	U R                  U R                  U	5      u  p[        R                  " / SQ[        US   X5       VVVs/ s H"  u  pnX[        U5      S U[        U5      S /PM$     snnnS9nSU R                  R                   ;   aC  U R"                  R$                  (       a(  [&        R)                  S[&        R+                  US905        S	U R                  R                   ;   a
  [-        S
US9  SU R                  R                   ;   a/  U R"                  R$                  (       a  [.        R1                  USS9  [2        TU ]i  XX4U5      nU$ s  snnnf )z
Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by
`Trainer.evaluate()` and `Trainer.predict()`.

Works both with or without labels.
)r%  )PromptPolicyz	Ref Modelr  N)columnsdatawandbgame_log)r2  comet_mlzgame_log.csv)r7  tablemlflowzgame_log.json)r2  artifact_file)rd  lenr  rK   sampleranger_   eval_batch_sizeselectr  _prepare_inputsr  rZ   rF   	DataFramer{   r  r  is_main_processr3  rw  Tabler<   r7  	log_tablerj  evaluation_loop)r^   r+  r,  r   r  r-  num_samplesrandom_indicesrandom_batch_datasetrandom_batchr  r  r  polrefr6  initial_outputrl  s                    rc   rC  "_UnslothDPOTrainer.evaluation_loop&	  s     $$$j001K#]]5+=AZAZ[N $.#5#5#<#<^#L --.BCL//=L8<8X8XY]YcYceq8r5!LL9 -0,X68M--(S V/S[]1CD-E $))---$2B2B2R2R		:u{{{'>?@TYY000-'
 499...43C3C3S3S  e? K 0%9HY
 1s   )G)logs
start_timec                   > SU;   a  SOSnU R                   U   R                  5        H9  u  pE[        R                  " U5      R	                  5       R                  5       X'   M;     U R                   U	 [        TU ]  X5      $ )z
Log `logs` on the various objects watching training, including stored metrics.

Args:
    logs (`dict[str, float]`):
        The values to log.
    start_time (`float` or `None`, *optional*, defaults to `None`):
        Start time of the training.
r  r  r  )r  r  rO   r   ry  r  rj  rw  )r^   rL  rM  r  r  r   rl  s         rc   rw  _UnslothDPOTrainer.log^	  sr     !'$WF
 00<BBDLCW-22499;DI E  ,w{4,,rf   c                   > U R                   R                  c*  [        U R                   R                  5      R                  nO(U R                   R                  R                  S5      S   nU R                  US9  [        TU ]!  X5        g )Nr  rq   )r  )	r_   r#  r   r   r7  r  create_model_cardrj  _save_checkpoint)r^   rZ   trialr  rl  s       rc   rR  #_UnslothDPOTrainer._save_checkpointq	  sj    99!!)dii22388J//55c:2>J*5 .rf   r  tagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        R                   " S5      n[#        UUU R$                  UU['        5       (       a+  [(        R*                  b  [(        R*                  R,                  OS[/        5       SUS	S
S9nUR1                  [        R
                  R3                  U R4                  R6                  S5      5        g)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
Nr  unsloth_versionunslothJOB_IDhf_jobsaO              @inproceedings{rafailov2023direct,
                title        = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
                author       = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
                year         = 2023,
                booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
                url          = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
                editor       = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
            }DPOzNDirect Preference Optimization: Your Language Model is Secretly a Reward Modelz
2305.18290)r  r  r#  r  rU  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror]   rZ   r  rC   pathisdirr  setr  r  addenvironr{  r  rN   dedentr3   r#  r;   r3  runurlr4   savejoinr_   r   )r^   r  r  rU  r  citation
model_cards          rc   rQ  $_UnslothDPOTrainer.create_model_cardy	  sn   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$ ??

 )!!**%'9';';		@Ueiimm[_.0%h!

 	TYY%9%9;GHrf   )%r  r  r  r*  r  r  r  rX  rK  r  r  r  r  rd  r  r  r  rM  r[  r  r`  rO  rP  rN  r  rF  rR  rL  rS  rG  r  rZ  r  r  rQ  rJ  r\  )NNNNNNNN)NNNNN)F)NNTr  )r  Nr  )FN)NNr  )NNN)Fr  r  r  r  r  r  r	   r  rA   Moduler"   r   r   r   r   r   r  r#   r   r   r$   r   r   r  r(   tuplerO   r  	Optimizerlr_schedulerLambdaLRrY  r   r   rk  r  r  r  r  r  staticmethodr  r  r  r,  r   rB  rL  r+   rR  
LongTensorr:  re  FloatTensorr  r  r[  r   r?  r  r  r  r'  r  rC  rw  rR  rQ  r  r  r  s   @rc   r  r    s   >@ J
 GK$(04CGnr FJ59jvaehl.2!H<S"))_45H< E/299c"ABCH< y!	H<
  -H<  g&> ?@H< uWotCwXgOgIhDh?i%ijkH< #)+=?UWeef
H< "(N+;T+A"BCH< D12H< (5;;#8#898EKKD\D\DeDe;ffgH< #+5ekk6K6K1LdSVX[S[n1\+]"^H< (0%,,9UW\WcWc9c0d'eH<  l+!H< H<T# Y PT ap 03$31@3OR3Zc3	3j_ I $+w/0+   79KMces st+ 	+
 + 
w'	(+Z  ,0/3#'B
sCx.B
1B
 $C=B
  (}	B

 !B
 
c49n	B
 B
H  ,0/3#'.sCx..1. $C=.  (}	.
 !. 
c49n	. .`,.j ,.\1F0A 1FZ 1F 1Ff M MT4U5E5E0E+F T5QVQ]Q]_d_k_kQkKl T JCtU%5%55667JHKJ	c5###	$J Jd #59]8'']8 ))]8  ++	]8
 "--]8 ]8 3 1 112]8 
u  %"3"3U5F5FF	G]8~fYYf'+CtU=M=M7M1N,N'Of	c5<<	 fR gl[YY['+CtU=M=M7M1N,N'O[_c[	c5<<	 [B 07	V&_bii/0V& CtU%5%55667V& O,	V&
 
u||T#u*--	.V&x _bii/0 S%c 1223 
u||U5<<c5j1A#ABB	C..9S%BRBR=R8S .9X]^acf^fXg .9j ,0#/_bii/0#/ S%c 1223#/ #	#/
 d3i(#/ 
u||Xell3Xell5KK	L#/J@T#u*%5 @7?C[ @jn @ 04+/!'66 6 'tn	6
 d3i(6 6 
6 6p-S%Z( -huo -QU - -&/ %)&*,0	DISMDI smDI CcD()	DI DIrf   r  c                   F   ^  \ rS rSrSr           SU 4S jjrSrU =r$ )UnslothDPOTraineri	  a	  
    
Trainer for Direct Preference Optimization (DPO) method.

This class is a wrapper around the [`transformers.Trainer`] class and inherits all of its attributes and methods.

Args:
    model (`Union[str, PreTrainedModel]`):
        Model to be trained. Can be either:

        - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
          path to a *directory* containing model weights saved using
          [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
          using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
          `args.model_init_kwargs`.
        - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
    ref_model (`PreTrainedModelWrapper`):
        Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
        and loss. If no reference model is provided, the trainer will create a reference model with the same
        architecture as the model to be optimized.
    args ([`DPOConfig`], *optional*, defaults to `None`):
        Configuration for this trainer. If `None`, a default configuration is used.
    data_collator (`DataCollator`, *optional*):
        Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
        Will default to [`DataCollatorForPreference`].
    train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
        Dataset to use for training. DPO supports [preference](#preference) type and. The format of the samples can
        be either:

        - [Standard](dataset_formats#standard): Each sample contains plain text.
        - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role
          and content).
    eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
        Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If `None`, the processing class is loaded from the model's name
        with [`~transformers.AutoTokenizer.from_pretrained`].
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function that will be used to compute metrics at evaluation. Must take a [`EvalPrediction`] and return
        a dictionary string to metric values. *Note* When passing TrainingArgs with `batch_eval_metrics` set to
        `True`, your compute_metrics function must take a boolean `compute_result` argument. This will be triggered
        after the last eval batch to signal that the function needs to calculate and return the global summary
        statistics rather than accumulating the batch-level statistics.
    callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
        List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
        in [here](https://huggingface.co/docs/transformers/main_classes/callback).

        If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`]
        method.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
        A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
        model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
    optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`):
        A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in
        `args`. Incompatible with the `optimizers` argument.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`):
        A function that preprocess the logits right before caching them at each evaluation step. Must take two
        tensors, the logits and the labels, and return the logits once processed as desired. The modifications made
        by this function will be reflected in the predictions received by `compute_metrics`.

        Note that the labels (second parameter) will be `None` if the dataset does not have them.
    peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
        PEFT configuration used to wrap the model. If `None`, the model is not wrapped.

    c                   >^& Uc
  [        5       n[        USS5      n[        U5      [        La  Sn[        USS5      n[        U5      [        La  SnSn[        R
                  R                  SS5      S:H  nU(       d1  [        R
                  R                  SS5      S:X  a  [        S5        S	n[        R
                  R                  S
S5      n[        UR                  SS 5      =(       d    [        UR                  SS 5      nUc  UR                  5       R                  nSSKJn  U" U5      nU[        R                  :H  nU(       d  U(       a  U(       a  [        S5      eU(       d  U(       d  U(       a  [        S5      eU(       a"  SUl        SUl        S[        R
                  S'   OCU(       d<  U(       d5  US:X  a/  UUl        U(       + Ul        U(       a  SOS[        R
                  S'   [        USS 5      b-  [        USS5      S:X  a  SUl        [        USS 5      c  SUl        [        USS 5      nUb/  US:  a)  SSKJn  [-        U5      [-        S5      ::  a  [        S5        [        USS5      S:w  aL  [        USS5      nUS:X  a!  UR.                  U:  a  UR.                  Ul        [        US S 5      c
  Ub  UUl        [        US!S5      n[        U5      [        La  Sn[        US"S5      n[        U5      [        La  SnUR                   (       a  U(       a  SUl        S	Ul        UR"                  (       a  U(       a  S	Ul        SUl        U(       a  SUl        SUl        Oc[        R
                  R                  S
S5      S#:X  a  S	Ul        SUl        O0U(       d)  U(       d"  UR"                  Ul        UR                   Ul        Sn[9        5       R                  S$S 5      b  S	n[9        5       R                  S%S 5      b  S	nU(       a  S[        R
                  S&'   S'[9        5       ;  a  [;        US'5      (       d  OD[        US'S 5      n[        US'S 5      nUc'  Ub$  UR<                  n[;        US'5      (       a  UUl        Ub!  [;        US(5      (       a  UR?                  5         S)[9        5       ;   a   [;        [@        S*5      (       a  S+[@        l!        S,[9        5       ;   aU  [;        US*5      (       a  S+Ul!        [;        US)5      (       a,  [;        UR@                  S*5      (       a  S+UR@                  l!        S,[9        5       ;   a  UO[@        nSS-K"J#n   [I        UU 5      (       dx  [I        U[J        5      (       a(  S.URL                  ;  a  [O        USS/[        US0S 5      S19nO[I        U[N        5      (       a%  S.URL                  ;   a  [K        U[        US0S 5      S29nOJ[;        US35      (       a  SUl(        [;        US45      (       a  S5Ul)        [;        US65      (       a	  S7S	0Ul*        [I        UU 5      (       dx  [;        US85      (       dg  [;        US)5      (       aV  [I        U[J        5      (       a   [K        UR@                  [        US0S 5      S29nO![O        UR@                  SS/[        US0S 5      S19n/ n!SS9K+J,n"  U"" S:U!5        [;        US;5      (       aH  [[        URL                  5      m&/ S<Qn#[]        U&4S= jU# 5       5      (       a  UR_                  / S>Q5      nA#@&[        US?S 5      [`        Rb                  :X  a(  URd                  S:  a  [        US@S5      S:w  a  SUl3        SA[9        5       ;   a!  [;        US(5      (       a  UR?                  5         [h        T'U ]  " SHUUUUUUUUU	U
UUSB.UD6  SA[9        5       ;   a!  [;        USC5      (       a  URm                  5         [;        U SD5      (       a-  U Rn                  Rq                  5         [;        U SD5      (       a  U ?7[        USES 5      b  U Rr                  UR                  5       l9         [;        U SF5      (       aV  U Rt                  Rv                  n$Un%[;        U%SA5      (       a&  U$U%l<        U%Rz                  n%[;        U%SA5      (       a  M&  U$U%l<         [;        U SG5      (       a.  [}        [        U R                  R                  5      U 5      U lA        g )INr   Fr   UNSLOTH_ENABLE_FULL_FINETUNING01UNSLOTH_FORCE_FLOAT32zKUnsloth: Switching to float32 training since model cannot work with float16TUNSLOTH_MIXED_PRECISIONr}   r  torch_dtyper   )
_get_dtypezuUnsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`zuUnsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`r   ACCELERATE_MIXED_PRECISIONr  r   r   r   rq  r   rw   )__version__z4.45.2z**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!
`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`r      r   r   r   bfloat16r  r  UNSLOTH_RETURN_LOGITSr   r[   r  padding_siderightr  )UnslothVisionDataCollatorr  rx  pad_to_multiple_of)mlmmlm_probabilityr  )r  r  dataset_text_fieldrw  dataset_kwargsskip_prepare_datasetrD   )PatchRLStatisticsdpo_trainercolumn_names)r
  r  r  r  rb  r  r  rc  rejected_labelsr  r`  c              3   ,   >#    U  H	  oT;   v   M     g 7fr  re  ).0xr  s     rc   	<genexpr>-UnslothDPOTrainer.__init__.<locals>.<genexpr>
  s     4e$es   )r
  r  r  parallel_mode_n_gpurZ   )rZ   r  r_   r  r  r  r  r  r  r  r  r  r\   neftune_hook_handler<  r  r  re  )Br   r  rY  r  rC   rg  r  rf  r  r   r  unsloth_zoo.utilsr  rO   float16	TypeErrorr   r   r   r   transformersr  rS   r   r   r   r   r   localsr]   r   r[   r  r  unsloth_zoo.vision_utilsr  r  rT   r  +TransformersDataCollatorForLanguageModelingr  r  r  unsloth_zoo.logging_utilsr  re  allr  rV   NOT_DISTRIBUTEDn_gpur  rj  rk  r\   r  remover<  r  scaleraccelerator_scalerrZ   rW   ri   rl  r  )(r^   rZ   r  r_   r  r  r  r  r  r  r  r  r  r`   use_bf16use_fp16force_float32full_finetuningmixed_precision_dtyper  r  r  ga_stepstransformers_versioneval_bszr   r   _output_logitsmodel_max_seq_lengthargs_max_seq_lengthr   _UnslothDPOTrainer__tokenizerr  other_metricsr  checkr  current_modelr  rl  s(                                         @rc   rk  UnslothDPOTrainer.__init__ 
  s     < 0 24/>%%x4/>%%x**..)I3OSVVBJJNN3JC$PTW$W_` M "

/H) Tgt4bm]a8b=%"<"<">"D"D%05!5==('hy  JA  @B  :Bg(9  NE  DF  >FDIDI7;BJJ3481F)1SDI#DIAHvfBJJ344.:wt_^b?cgk?k!(Dt\408C$/4!>EHqLH+,0AA @ A4$/47t%A1EH1}!A!AH!Lpt  qQ  qQdNmt6=E(J^  @H`d`| '7?t+e^ '7?t+e^99u)<\`dFY99t)<[`TEX"'D"'DZZ^^5yAZO"&D"'D"&))D"&))D8<<)40<tn8<<7>J]aN25BJJ./68+GDBR4S4S#*52BD#I #*42BD#I"*/C/O!&!5!54!122.D4G!?!? &("wy.'I'Idk9Ka)'88Za:J:W'55'BRB\B\^l:m:m  Zao  pJ  pJ  pW*<*H&iF-)BCC-)?@@XUbUoUoEo K&))07KT)R	! M+VWW\dhu  iC  iC  ]C 6)07KT)R!
 t455TYt7Qt122bD4Kt-..G]_cFd0C-)BCC;..7;3T3Tm-CDD$:#---4T;OQU-V%M
 %P#--#*--4T;OQU-V	%M ?-7=.11}99:LBE 4e444 - < <=] ^| 4$/<3O3OOTXT^T^abTbtXq)Q.fh75.#A#A  	0!))'/-!'?,I%	0 )/	0 fh75/#B#B!4.//$$++-t2339Q4.5A?C?W?WE&&(<4''%%,,F!M-11390 - 3 3 -11 06M,4!!#$=dnn>R>R$SUYZDJrf   rp  )NNNNNNNNNNN)r  r  r  r  r  rk  r  r  r  s   @rc   rz  rz  	  s9    @H #'(,o orf   rz  	addFilterc                        \ rS rSrS rS rSrg)HideLoggingMessagei
  c                     Xl         g r  r   )r^   r   s     rc   rk  HideLoggingMessage.__init__
  s    d)rf   c                 <    U R                   UR                  5       ;  $ r  )r   
getMessage)r^   r  s     rc   filterHideLoggingMessage.filter
  s    alln)DErf   r  N)r  r  r  r  rk  r  r  re  rf   rc   r  r  
  s    2Erf   r  z`use_cache=True`)qr  rO   r   torch.nnrA   r   r   typingr   r   r   r   r	   r
   r   r   trl.trainer.dpo_trainerr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rP   dataclassesrR   packaging.versionrS   r@  np
contextlibr  rT   rU   r  transformers.training_argsrV   rg   typesrW   ri   torch_compile_optionscompiler   r  r   r   r   r   r  rz  r]   Filterr  r  re  rf   rc   <module>r     s,  0    $ I I I e  e  e  e  e  e  e  e  e  e  e  e  e  e  e  e  e  e  e  e  e 
  ( %   " $  3      4;PR S"||  \\	&,, %  	
 \\6ell C ELL  A-y A- A-D jI jIV9q* qf  6;FW^^ F 	
'(:;<  rf   