
    f:iQS             
          S r SSKJr  SSKrSSKJr  SSKJr  SSKJrJ	r	J
r
JrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ
r
JrJrJrJrJ r J!r!J"r"J#r#JrJ$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4JrJ5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=J>r>JrJrJ
r
JrJrJ"r"J-r-J1r1J7r7Jr  SSK7r7SSK7  SSK?J@r@JArA  SS	KBJCrC  SSKrSSKDr5SS
KEJ6r6  SSKJr  SSKFJGrGJHrI  SSKJJKrK  SSKLrLSSKMJNrN  S rO SSSSSS.rP\R                  " SS\PS9S 5       rRS\R                  S\SS\SS\R                  4S jrTS\R                  S\R                  S\SS\SS\R                  4
S jrUS\R                  S\SS\R                  4S jrV\@ " S  S!\5      5       rW  " S" S#\"5      rX " S$ S%\X5      rY \Z" \1S&5      (       a3  SSK2r2 " S' S(\2R                  5      r\ \1R                  " \\" S)5      5        gg)*z;
2025.10.10
2025.10.9
4.56.2
0.23.0
__UNSLOTH_VERSIONING__
    )TensorN)
functional)AnyListOptionalTupleUnionDictSetCallable)>r   AutoModelForCausalLMBaseImageProcessor	CPOConfig
CPOTrainerr   DPODataCollatorWithPaddingDataCollator
DataLoaderDatasetEvalLoopOutputFFeatureExtractionMixinLiteralr   PartialStatePath	PeftModelPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTrainerTrainerCallbackr	   add_bos_token_if_neededadd_eos_token_if_neededautocastdefaultdictdisable_dropout_in_modelgenerate_model_cardget_comet_experiment_urlinspectis_comet_availableis_peft_availableis_torch_fx_proxyis_wandb_availablelog_table_to_comet_experimentloggerloggingmaybe_apply_chat_templatemaybe_extract_promptnnnpnullcontextospad_to_lengthpdpeft_module_casting_to_bf16prepare_model_for_kbit_trainingrandomselective_log_softmaxtextwraptorchr   r   r   r   r   r*   r.   r5   r=   )*)	dataclassfield)Version)r4   )DataCollatorForSeq2SeqDataCollatorForLanguageModeling)ParallelMode)
MethodTypec                 F   ^  [         R                  " T 5      U 4S j5       nU$ )Nc                 8  > [        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         T" U /UQ70 UD6n[        U S5      (       a5  [        U R                  S5      (       a  U R                  R                  5         U$ )Nmodelfor_trainingfor_inference)hasattrrH   rI   rJ   )selfargskwargsoutputfs       >/home/james-whalen/unsloth_compiled_cache/UnslothCPOTrainer.pywrapper*prepare_for_training_mode.<locals>.wrapper0   sx     4!!gdjj.&I&IJJ##%4)$)&)4!!gdjj/&J&JJJ$$&    )	functoolswraps)rP   rR   s   ` rQ   prepare_for_training_moderW   /   s%    __Q  NrT   TF)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphs)dynamic	fullgraphoptionsc                 d   [         R                  " U R                  SU R                  S   5      SSS9n[         R                  " UR                  S5      SSS9n/ n[	        X#5       H  u  pVUR                  [         R                  5      n[         R                  " USUR                  S5      S9R                  S5      n[         R                  " USS9nXx-
  n	UR                  U	5        M      [         R                  " U5      nUR                  U R                  S   U R                  S   45      nU$ )N   r   )chunksdim)rb   indexrb      )r=   chunkreshapeshapeziptofloat32gather	unsqueezesqueeze	logsumexpappendconcat)
logitsrc   chunked_logitschunked_indexall_per_token_logpschunk_logitschunk_indexselected_logitslogsumexp_valuesper_token_logpss
             rQ   chunked_selective_log_softmaxr{   E   s    [[FLL4D!EPQYZ[N[[r!2QaHM%(%G!#u}}5,,|2{G\G\]_G`aiijlm ??<rB)<""?3 &H 	,,':;-55v||AUV6XYrT   	input_idslogits_to_keeppad_token_idreturnc                 ~    XR                   S   :  a  [        S5      eU SS2SU* 24   nX2:H  nUR                  SS9nU$ )zr
Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens 
re   z8logits_to_keep must be smaller than the sequence length.Nrd   )rh   
ValueErrorsum)r|   r}   r~   prompt_sectionpadding_maskpad_token_countss         rQ   calculate_pad_tokens_in_promptr   W   sX     ++STTq"2N?"223N"2L#''A'.rT   completion_input_idsleft_pad_tokens_per_promptmax_left_padc                     U R                   u  pEU R                  nX!-
  n[        R                  " XVS9R	                  S5      nXR	                  S5      :  n	X:g  n
X-  nU$ )a)  
Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]

Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
and pad are pad tokens, this function would make a completion mask that would 0 out the pad
and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
devicer   re   )rh   r   r=   arangerm   )r   r   r   r~   
batch_sizecompletion_lenr   num_tokens_to_maskindices
shift_masknon_padding_mask
final_masks               rQ    create_completion_attention_maskr   j   si     "6!;!;J!((F%Bll>9CCAFG88;;J,<.JrT   tensorpad_idc                 l    X:g  n[         R                  " USSSS9n[         R                  " U SU5      nU$ )zD
Moves all padding tokens in each sequence of a batch to the right.
re   T)rb   
descendingstable)r=   argsortrl   )r   r   masksorted_indicespacked_tensors        rQ   left_pack_paddingr      s8     D]]4Q4MNLLN;MrT   c                     ^  \ rS rSr% Sr\" SSS0S9r\\   \	S'   \" SSS	0S9r
\\   \	S
'   \" SSS0S9r\\   \	S'                                                                                                                                                        SU 4S jjrSrU =r$ )UnslothCPOConfig   u  
    
Configuration class for the [`CPOTrainer`].

This class includes only the parameters that are specific to CPO training. For a full list of training arguments,
please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may
differ from those in [`~transformers.TrainingArguments`].

Using [`~transformers.HfArgumentParser`] we can turn this class into
[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
command line.

Parameters:
    max_length (`int` or `None`, *optional*, defaults to `1024`):
        Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want
        to use the default data collator.
    max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
        Maximum length of the prompt. This argument is required if you want to use the default data collator.
    max_completion_length (`int` or `None`, *optional*, defaults to `None`):
        Maximum length of the completion. This argument is required if you want to use the default data collator
        and your model is an encoder-decoder.
    beta (`float`, *optional*, defaults to `0.1`):
        Parameter controlling the deviation from the reference model. Higher β means less deviation from the
        reference model. For the IPO loss (`loss_type="ipo"`), β is the regularization parameter denoted by τ in
        the [paper](https://huggingface.co/papers/2310.12036).
    label_smoothing (`float`, *optional*, defaults to `0.0`):
        Label smoothing factor. This argument is required if you want to use the default data collator.
    loss_type (`str`, *optional*, defaults to `"sigmoid"`):
        Type of loss to use. Possible values are:

            - `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper.
            - `"hinge"`: hinge loss on the normalized likelihood from the
              [SLiC](https://huggingface.co/papers/2305.10425) paper.
            - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper.
            - `"simpo"`: SimPO loss from the [SimPO](https://huggingface.co/papers/2405.14734) paper.
            - `"alphapo"`: AlphaPO loss from the [AlphaPO](https://huggingface.co/papers/2501.03884) paper. This
              automatically sets `loss_type="simpo"` and `cpo_alpha=0.0`.

    disable_dropout (`bool`, *optional*, defaults to `True`):
        Whether to disable dropout in the model.
    cpo_alpha (`float`, *optional*, defaults to `1.0`):
        Weight of the BC regularizer in CPO training.
    simpo_gamma (`float`, *optional*, defaults to `0.5`):
        Target reward margin for the SimPO loss, used only when the `loss_type="simpo"`.
    alpha (`float`, *optional*, defaults to `0.0`):
        Alpha parameter that controls reward function shape across all loss types. When alpha=0 (default), uses
        standard log probability rewards. When `alpha != 0`, applies AlphaPO transformation: `r = (1 - p^(-alpha))
        / alpha` from the [AlphaPO paper](https://huggingface.co/papers/2501.03884). This parameter works with all
        loss types.
    label_pad_token_id (`int`, *optional*, defaults to `-100`):
        Label pad token id. This argument is required if you want to use the default data collator.
    padding_value (`int` or `None`, *optional*, defaults to `None`):
        Padding value to use. If `None`, the padding value of the tokenizer is used.
    truncation_mode (`str`,*optional*,  defaults to `"keep_end"`):
        Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
        This argument is required if you want to use the default data collator.
    generate_during_eval (`bool`, *optional*, defaults to `False`):
        If `True`, generates and logs completions from the model to W&B or Comet during evaluation.
    is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
        When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
        you need to specify if the model returned by the callable is an encoder-decoder model.
    model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
        Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
        string.
    dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
        Number of processes to use for processing the dataset.

    NhelpzvLLM SamplingParams)defaultmetadatavllm_sampling_paramsr_   z8Chunk size to reduce memory usage. -1 is most efficient.unsloth_num_chunksz'Maximum sequence length to truncate to.max_seq_lengthc                   > US:  a  [        SU S35        US:  a  [        SU S35        Uc  U#S:X  a
  U$S:X  a  SnS	n#Wc$  S
SKJn  [        [	        U" 5       S-   S5      S5      n[
        TU ]  " S0 SU_SU_SU_SU_SU_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_S U_S!U_S"U_S#U_S$U_S%U_S&U_S'U_S(U_S)U_S*U_S+U_S,U_S-U_S.U _S/U!_S0U"_S1U#_S2U$_S3U%_S4U&_S5U'_S6U(_S7U)_S8U*_S9U+_S:U,_S;U-_S<U._S=U/_S>U0_S?U1_S@U2_SAU3_SBU4_SCU5_SDU6_SEU7_SFU8_SGU9_SHU:_SIU;_SJU<_SKU=_SLU>_SMU?_SNW@_SOWA_SPWB_SQWC_SRWD_SSWE_STWF_SUWG_SVWH_SWWI_SXWJ_SYWK_SZWL_S[WM_S\WN_S]WO_S^WP_S_WQ_S`WR_SaWS_SbWT_ScWU_SdWV_SeWW_SfWX_SgWY_ShWZ_SiW[_SjW\_SkW]_SlW^_SmW__SnW`_SoWa_SpWb_SqWc_SrWd_SsWe_StWf_SuWg_SvWh_SwWi_SxWj_SyWk_SzWl_S{Wm_S|Wn_S}Wo_S~Wp_SWq_SWr_SWs_SWt_SWu_SWv_SWw_SWx_SWy_SWz_SW{_SW|_SW}_SW~_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_SW_WD6  WU l        WU l        WU l	        g )NgHz>z Unsloth: Your learning rate of `zi` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!re   za` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!steps  unsloth_training_checkpointsnor   )	cpu_countr`      @   
output_diroverwrite_output_dirdo_traindo_eval
do_predicteval_strategyprediction_loss_onlyper_device_train_batch_sizeper_device_eval_batch_sizeper_gpu_train_batch_sizeper_gpu_eval_batch_sizegradient_accumulation_stepseval_accumulation_steps
eval_delaytorch_empty_cache_stepslearning_rateweight_decay
adam_beta1
adam_beta2adam_epsilonmax_grad_normnum_train_epochs	max_stepslr_scheduler_typewarmup_ratiowarmup_steps	log_levellog_level_replicalog_on_each_nodelogging_dirlogging_strategylogging_first_steplogging_stepslogging_nan_inf_filtersave_strategy
save_stepssave_total_limitsave_safetensorssave_on_each_nodesave_only_model'restore_callback_states_from_checkpointno_cudause_cpuuse_mps_deviceseed	data_seedjit_mode_evaluse_ipexbf16fp16fp16_opt_levelhalf_precision_backendbf16_full_evalfp16_full_evaltf32
local_rankddp_backendtpu_num_corestpu_metrics_debugdebugdataloader_drop_last
eval_stepsdataloader_num_workersdataloader_prefetch_factor
past_indexrun_namedisable_tqdmremove_unused_columnslabel_namesload_best_model_at_endmetric_for_best_modelgreater_is_betterignore_data_skipfsdpfsdp_min_num_paramsfsdp_config"fsdp_transformer_layer_cls_to_wrapaccelerator_configparallelism_config	deepspeedlabel_smoothing_factoroptim
optim_args	adafactorgroup_by_lengthlength_column_name	report_toddp_find_unused_parametersddp_bucket_cap_mbddp_broadcast_buffersdataloader_pin_memorydataloader_persistent_workersskip_memory_metricsuse_legacy_prediction_looppush_to_hubresume_from_checkpointhub_model_idhub_strategy	hub_tokenhub_private_repohub_always_pushhub_revisiongradient_checkpointinggradient_checkpointing_kwargsinclude_inputs_for_metricseval_do_concat_batchesfp16_backendpush_to_hub_model_idpush_to_hub_organizationpush_to_hub_tokenmp_parametersauto_find_batch_sizefull_determinismtorchdynamo	ray_scopeddp_timeouttorch_compiletorch_compile_backendtorch_compile_modeinclude_tokens_per_secondinclude_num_input_tokens_seenneftune_noise_alphaoptim_target_modulesbatch_eval_metricseval_on_startuse_liger_kernelliger_kernel_configeval_use_gather_objectaverage_tokens_across_devices
max_lengthmax_prompt_lengthmax_completion_lengthbetalabel_smoothing	loss_typedisable_dropout	cpo_alphasimpo_gammaalphalabel_pad_token_idpadding_valuetruncation_modegenerate_during_evalis_encoder_decodermodel_init_kwargsdataset_num_proc )
printmultiprocessingr   minmaxsuper__init__r   r   r   )rL   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r"  r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  r   r   r   rN   r   	__class__s                                                                                                                                                           rQ   rI  UnslothCPOConfig.__init__   s   r 4)I-  YB  (C  "D1e&F}o  Vw  %x  y-7":zS?P7J M#1"3y{1}a#8"= R	:#R	:#7R	:  R	: 	R	:
 $R	: *R	: $8R	: +FR	: *DR	: (@R	: '>R	: +FR	: '>R	: $R	: '>R	:  *!R	:" (#R	:$ $%R	:& $'R	:( ()R	:* *+R	:,  0-R	:. "/R	:0 !21R	:2 (3R	:4 (5R	:6 "7R	:8 !29R	::  0;R	:< &=R	:>  0?R	:@ "4AR	:B *CR	:D &<ER	:F *GR	:H $IR	:J  0KR	:L  0MR	:N !2OR	:P .QR	:R 7^SR	:T UR	:V WR	:X ,YR	:Z [R	:\ "]R	:^ *_R	:`  aR	:b cR	:d eR	:f ,gR	:h &<iR	:j ,kR	:l ,mR	:n oR	:p $qR	:r &sR	:t *uR	:v !2wR	:x yR	:z $8{R	:| $}R	:~ &<R	:@ *DAR	:B $CR	:D  ER	:F (GR	:H %:IR	:J &KR	:L &<MR	:N %:OR	:P !2QR	:R  0SR	:T UR	:V #6WR	:X &YR	:Z 2T[R	:\ "4]R	:^ "4_R	:` "aR	:b &<cR	:d eR	:f $gR	:h "iR	:j .kR	:l "4mR	:n "oR	:p *DqR	:r !2sR	:t %:uR	:v %:wR	:x -JyR	:z #6{R	:| *D}R	:~ &R	:@ &<AR	:B (CR	:D (ER	:F "GR	:H  0IR	:J .KR	:L (MR	:N &<OR	:P -JQR	:R *DSR	:T &<UR	:V (WR	:X $8YR	:Z (@[R	:\ !2]R	:^ *_R	:` $8aR	:b  0cR	:d &eR	:f "gR	:h &iR	:j *kR	:l %:mR	:n "4oR	:p )BqR	:r -JsR	:t #6uR	:v $8wR	:x "4yR	:z *{R	:|  0}R	:~ #6R	:@ &<AR	:B -JCR	:D $ER	:F !2GR	:H %:IR	:J KR	:L .MR	:N "OR	:P .QR	:R "SR	:T &UR	:V WR	:X "4YR	:Z *[R	:\ .]R	:^ $8_R	:` "4aR	:b !2cR	:d  0&eR	:f %9!"4,rT   )r   r   r   )NNFFFr   Fr`   r`   NNr   r   r      g-C6
?g{Gz?g?g+?g:0yE>      ?g      @r_   linear皙?r   passivewarningTNr   Fre   Fr   r   NTFFFFFFO  rR  FFFFO1autoFFNr_   NNF FNr   Nr_   NNTNFNNFrU  r   NNNNN        
adamw_8bitNFFlengthNNNNTFTFFNN
every_saveNNFNTNFTrT  NNNrU  FFNlasti  FNNFFNNFFFNFTi      NrO  rV  sigmoidTrM  g      ?rV  Nkeep_endFNNNNr_   N)__name__
__module____qualname____firstlineno____doc__r@   r   r   r   __annotations__r   intr   rI  __static_attributes____classcell__rJ  s   @rQ   r   r      sC   CH +012+(3-  */VW*#  &+EF&NXc]  #$&'%&#'"&&'"#"%$%""!&!27!'!$!"%) $!& $  -1!!!$%%)  $ $(-"%*!%#!%(,%*!%##' $  $!$)(-"#" "!&(, $!$$! #mw- w-rT   r   c                     ^  \ rS rSrSrSS/r            S=S\\\\	R                  \4      S\\   S\\   S	\\   S
\\\\\\4   4      S\\\\\\4      S\\/ \4      S\\\      S\\R2                  R4                  \R2                  R6                  R8                  4   S\\\R:                  \R:                  /\R:                  4      S\\   S\\\/\4      4U 4S jjjrS r S>S\\\\	R                  4      S\4S jjr!\"    S?S\\\\\RF                  4   4   S\$S\%S\%S\\RL                     S\\\RF                  4   4S jj5       r'S\RP                  S\RP                  S\\RP                  \RP                  \RP                  4   4S jr)\"   S@S\RP                  S \RF                  S!\$S\%S\$S\RP                  4S" jj5       r*S\	R                  S\\\\\RF                  4   4   S\\RP                  \RP                  \RP                  \RP                  4   4S# jr+ SAS\\\\\RF                  4   4   S$\,S%   4S& jjr-  SBS\\\	R                  4   S'\\\\R:                  \.4   4   S\\R:                  \\R:                  \\\R:                  4   4   4   4S( jjr/S\\\RF                  4   S\4S) jr0 S>S\\\	R                  4   S'\\\\R:                  \.4   4   S*\$S+\\\      4S, jjr1SAS-\\\24   S$\,S%   SS4S. jjr3   SCS/\4S0\S*\\$   S+\\\      S1\S\4U 4S2 jjjr5S>S3\\\24   S4\\2   SS4U 4S5 jjjr6S6 r7U 4S7 jr8   SDS8\\   S9\\   S:\\\\   S4   4S; jjr9S<r:U =r;$ )E_UnslothCPOTraineri  a  
Initialize CPOTrainer.

Args:
    model (`transformers.PreTrainedModel`):
        The model to train, preferably an `AutoModelForSequenceClassification`.
    args (`CPOConfig`):
        The CPO config arguments to use for training.
    data_collator (`transformers.DataCollator`):
        The data collator to use for training. If None is specified, the default data collator
        (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
        sequences in the batch, given a dataset of paired sequences.
    train_dataset (`datasets.Dataset`):
        The dataset to use for training.
    eval_dataset (`datasets.Dataset`):
        The dataset to use for evaluation.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If provided, will be used to automatically process the inputs
        for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
        reuse the fine-tuned model.
    model_init (`Callable[[], transformers.PreTrainedModel]`):
        The model initializer to use for training. If None is specified, the default model initializer will be
        used.
    callbacks (`list[transformers.TrainerCallback]`):
        The callbacks to use for training.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
        The optimizer and scheduler to use for training.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
        The function to use to preprocess the logits before computing the metrics.
    peft_config (`dict`, defaults to `None`):
        The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
        a PEFT model.
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
        metric values.
trlcpoNrH   rM   data_collatortrain_dataseteval_datasetprocessing_class
model_init	callbacks
optimizerspreprocess_logits_for_metricspeft_configcompute_metricsc                 R  > UR                   c  0 nO[        U[        5      (       d  [        S5      eUR                   nUR	                  S5      nUbc  [        U[        5      (       a  US:w  a  [        [        U5      nUS:w  a.  [        U[        R                  5      (       d  [        SU S35      eXS'   [        U[        5      (       a  [        R                  " U40 UD6nSU l
        [        5       (       d  Ub  [        S5      e[        5       (       GaN  UGbJ  [        U[        5      (       a  UR                  5       n[        USS5      (       d  [        US	S5      (       at  [        US
5      =(       a0    S
[        [         R"                  " [$        5      R&                  5      ;   nSUR(                  0nU(       a  UR*                  US
'   [%        U40 UD6nOUUR(                  (       aD  [        US5      (       a  UR-                  5         O"S nUR/                  5       R1                  U5        UnUR2                  (       a$  [        US	S5      (       a  [5        U5        SU l
        OUUR(                  (       aD  [        US5      (       a  UR-                  5         O"S nUR/                  5       R1                  U5        UR6                  (       a)  [9        5       (       d  [;        5       (       d  [        S5      eUb  UR<                  R>                  U l        O)UR>                  c  [        S5      eUR>                  U l        U R>                  (       a6  UR<                  R@                  U l         UR<                  RB                  U l!        Uc  [        S5      eURD                  c  [F        RH                  " S5        SnOURD                  nURJ                  c  [F        RH                  " S5        SnOURJ                  nUU:  d  [        SU SU S35      eURL                  c*  U R>                  (       a  [F        RH                  " S5        SnOURL                  nUc_  [O        URB                  URP                  U R>                  S9nURR                  (       a  SUl)        [F        RH                  " S5        SU l*        OSU l*        URV                  (       a  [Y        U5        UU l"        UR6                  U l        URP                  U l(        URZ                  b  URZ                  OURB                  U l-        UU l%        UR\                  U l.        UU l&        X`l/        UR`                  S;   a4  URb                  S:  a$  [F        RH                  " SUR`                   S 35        UR`                  S!:X  a  [        S"5      eURd                  U l2        URb                  U l1        UR`                  U l0        URf                  U l3        [        UR<                  S#S5      U l4        [        UR<                  S$S%5      U l5        U Rh                  (       a&  U Rj                  S%:X  a  [F        RH                  " S&5        UR`                  S':X  a  URl                  U l6        URn                  U l7        [q        S( 5      U l9        SURt                  S)'   [w        5       Ry                  5          UR{                  [|        UR~                  S*9nUR{                  [        S+U0UR~                  S,9nUb?  UR{                  [|        UR~                  S*9nUR{                  [        S+U0UR~                  S,9nUR{                  U R                  UR~                  S*9nUb$  UR{                  U R                  UR~                  S*9nS S S 5        [        TU G]  UUUUUUUUUU	U
S-9  SU lD        [        U R                  S.5      (       a%  U R                  R                  U R                  5        [        U S/5      (       d  [        S05      eg ! , (       d  f       N= f)1NzRYou passed model_kwargs to the CPOTrainer. But your model is already instantiated.dtyperT  zhInvalid `dtype` passed to the CPOConfig. Expected a string with either `torch.dtype` or 'auto', but got .FzvPEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT modelsis_loaded_in_8bitis_loaded_in_4bitr  use_gradient_checkpointingenable_input_require_gradsc                 &    UR                  S5        g NTrequires_grad_moduleinputrO   s      rQ   make_inputs_require_grad=_UnslothCPOTrainer.__init__.<locals>.make_inputs_require_grad  s    --d3rT   Tc                 &    UR                  S5        g r  r  r  s      rQ   r  r    s    ))$/rT   z`generate_during_eval=True` requires Weights and Biases or Comet to be installed. Please install `wandb` or `comet-ml` to resolve.zMWhen no model is provided, you need to pass the parameter is_encoder_decoder.z=processing_class must be specified to tokenize a CPO dataset.z`max_length` is not set in the CPOConfig's init it will default to `512` by default, but you should do it yourself in the future.r[  z`max_prompt_length` is not set in the CPOConfig's init it will default to `128` by default, but you should do it yourself in the future.   zmax_prompt_length (z+) should be strictly less than max_length (z).zWhen using an encoder decoder architecture, you should set `max_completion_length` in the CPOConfig's init it will default to `128` by default, but you should do it yourself in the future.)r~   r<  r@  zWhen using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments we have set it for you, but you should do it yourself in the future.)hingeipor   zYou are using the z loss type that does not support label smoothing. The `label_smoothing` parameter will be ignored. Set `label_smoothing` to `0.0` to remove this warning.kto_pairzKSupport for kto_pair has been removed in CPOTrainer. Please use KTOTrainer.output_router_logitsrouter_aux_loss_coefrV  a-  You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to `0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary loss.simpoc                       [        [        5      $ N)r$   listrC  rT   rQ   <lambda>-_UnslothCPOTrainer.__init__.<locals>.<lambda>  s	    ;t3DrT   estimate_tokens)num_proc	tokenizer)	fn_kwargsr  )rH   rM   rm  rn  ro  rp  rq  rv  rr  rs  rt  add_model_tagsacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`.)IrA  
isinstancestrr   getgetattrr=   rx  r   from_pretrained_peft_has_been_casted_to_bf16r*   r   merge_and_unloadrK   r  r(   	signaturer9   
parametersr  r  r}  get_input_embeddingsregister_forward_hookr   r8   r?  r,   r)   configr@  decoder_start_token_idr~   r2  r.   rQ  r3  r4  r   r<  r   use_dpo_data_collatorr8  r%   r=  r>  rp  r7  r6  r5  r9  aux_loss_enabledaux_loss_coefr:  r;  r$   _stored_metricswarnings_issuedr   main_process_firstmapr1   rB  r0   tokenize_rowrH  rI  model_accepts_loss_kwargsrH   r  
_tag_namesAttributeError)rL   rH   rM   rm  rn  ro  rp  rq  rr  rs  rt  ru  rv  rA  rx  _support_gc_kwargsprepare_model_kwargsr  r2  r3  r4  rJ  s                        rQ   rI  _UnslothCPOTrainer.__init__B  s   " !!) "E3''qrr $ 6 6%))'2E eS))evo#E51EF?:eU[[+I+I$ C  DI  CJ  JK  L  .3'*eS!!(88TBSTE .3* ""{'> I    [%<%++..0u1599WUL_af=g=g%,9& &5%%&EFQQ:  # )EdFaFa'b$%LPLnLn()HI7VAUV,,5">??4464 ..0FFG_` EyyWU,?GG+E2592
 ((u:;;0020 **,BBC[\$$.@.B.BFXFZFZD 
 &+ll&E&ED#$$,lmm&*&=&=D#""*/,,*M*MD' % 9 9D#\]]??"NNe JJ!!)NNe !$ $ 6 6 :-%&7%88cdncooqr  %%-$2I2INNe %(!$($>$>! 6-::#'#:#:#'#:#:M ))-2*\
 *.D&).D& $U+$$($=$=!"&"9"9373E3E3QT//WgWtWt!2#33%:" 0>>--$2F2F2JNN$T^^$4 5v v >>Z'jkkII	#33 '6Le T$U\\3I3O  T%7%73%>NN >>W$#//D ZZ
*+DE 48/0 ^..0)--.BTMbMb-cM)--)kCS5T_c_t_t . M '+//0DtOdOd/e+//-*,<=!22  0   *--d.?.?$J_J_-`M'+//0A0ADLaLa/b# 1& 	''%-!+!*G 	 	
" */& 4::/00JJ%%doo6t]++ j  ,U 10s   C`
`&c                 
   U R                  X-   SS9nU R                  USS9S   nUS   [        U5      S nUS   [        U5      S n[        R                  " XE/5      n[        R                  " US   5      n[        U5      [        U5      :w  a  [        S5      e[        U5      n	XCS   SU	 :w  a  U	S-  n	US   SU	 nUS   SU	 n
[        U5      [        U
5      :w  a  [        S5      eUS   U	S nUS   U	S n[        UU
UUS	9$ )
z
Llama tokenizer does satisfy `enc(a + b) = enc(a) + enc(b)`. It does ensure `enc(a + b) = enc(a) + enc(a +
b)[len(enc(a)):]`. Reference:
    https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257
Fadd_special_tokensr|   Nattention_maskzBPrompt input ids and answer input ids should have the same length.re   z@Prompt input ids and attention mask should have the same length.)prompt_input_idsprompt_attention_maskr|   r  )rp  lenr3   concatenatearrayr   dict)rL   promptanswerfull_tokenizedr  answer_input_idsanswer_attention_maskfull_concat_input_idsfull_input_idsresponse_token_ids_start_idxr  s              rQ   build_tokenized_answer)_UnslothCPOTrainer.build_tokenized_answerK  sn    ..vSX.Y00E0RS^_)+6s;K7L7NO ./? @EUAVAX Y !#0@/S T ."=>~#&;"<<abb (++;'<$ k:;X<XYY(A-()+67T8TU ./? @A^B^ _ C(=$>>_``)+67S7TU ./? @A]A^ _-"7&0	
 	
rT   r   c           	      	   0 nUS   nUS   nUS   nU R                   (       Gd  [        U[        5      (       d  [        S[	        U5       35      eU R                  USS9nUR                  5        VV	s0 s H  u  pSU 3U	_M     nnn	[        U[        5      (       d  [        S[	        U5       35      eU R                  XE5      n
[        U[        5      (       d  [        S	[	        U5       35      eU R                  XF5      n[        US
   5      n[        U
S
   5      n[        US
   5      n[        X5      nUR                  5        H  u  pU	SU Xx'   M     [        [        U
S
   US
   5       VVs/ s H  u  nnUU:g  PM     snn5      n[        X-
  5      nUS:  d  US:  a  [        S5      e[        U R
                  R                  UUUU
UU5      u  pzn[        U R
                  R                   X5      u  p[#        [        U
S   5      [        US   5      5      nXU4 H  n[        US
   5      U-   U R$                  :  d  M$  U R&                  S:X  a   S H  nUU   SU R(                   UU'   M     MT  U R&                  S:X  a!  S H  nUU   U R(                  * S UU'   M     M  [        SU R&                   35      e   X4 HO  n[        US
   5      U-   U R$                  :  d  M$  S H%  nUU   SU R$                  U R(                  -
   UU'   M'     MQ     S Vs0 s H  oU
SU 3   X   -   _M     nnS Vs0 s H  oUSU 3   X   -   _M     nnUS   SS US'   U R*                  /[        U
S
   5      -  US   S[        U
S
   5      & US   SS US'   U R*                  /[        US
   5      -  US   S[        US
   5      & UUUS.R                  5        H0  u  nnUR                  5        H  u  nnUS:X  a  M  UX8 U 3'   M     M2     U$ U R                  USU R,                  SS9n
U R                  USU R,                  SS9nU R                  USU R(                  SS9nU
S   US'   US   US'   US   US
'   US   US'   Ubc  [/        US5      (       aR  UR1                  [2        R4                  " US   5      S9US'   UR1                  [2        R4                  " US   5      S9US '   U$ s  sn	nf s  snnf s  snf s  snf )!a  Tokenize a single row from a CPO specific dataset.

At this stage, we don't convert to PyTorch tensors yet; we just handle the truncation in case the prompt +
chosen or prompt + rejected responses is/are too long. First we truncate the prompt; if we're still too long,
we truncate the chosen/rejected.

We also create the labels for the chosen/rejected responses, which are of length equal to the sum of the length
of the prompt and the chosen/rejected response, with label_pad_token_id for the prompt tokens.
r  chosenrejectedz prompt should be an str but got Fr  prompt_z chosen should be an str but got z"rejected should be an str but got r  Nre   zdChosen and rejected prompt_input_ids might only differ on the last token due to tokenizer merge ops.r|   
keep_start)r  r  r^  zUnknown truncation mode: )r|   r  labels)chosen_	rejected_rU  token_type_idsT)
truncationr2  r  chosen_labelsrejected_labelsr  r  %prepare_decoder_input_ids_from_labels)r  rejected_decoder_input_idschosen_decoder_input_ids)r@  r  r  r   typerp  itemsr  r  rF  r   ri   absr!   bos_token_idr"   eos_token_idrG  r2  r>  r3  r<  r4  rK   r  r=   r   )rL   featurerH   batchr  r  r  prompt_tokenskvchosen_tokensrejected_tokensprompt_len_input_idschosen_prompt_len_input_idsrejected_prompt_len_input_idsabnum_diff_tokensnum_diff_lenlonger_response_lengthanswer_tokenschosen_sequence_tokensrejected_sequence_tokenstokstype_keytokenss                             rQ   r  _UnslothCPOTrainer.tokenize_row|  s3    "":&&&& fc** #CDL>!RSS 11&U1SM:G:M:M:OP:O$!wqc]A-:OMPfc** #CDL>!RSS 77GMh,, #Ed8nEU!VWW"99&KO $'}5G'H#I *-m<N.O*P',/@R0S,T)#&'B#b %++-#$%:&:#;  .
 "$'6H(I?[mKn$op$oDAqa$opO :Z[L"lQ&6 =  =T%%22$+-=9M/ .E%%22M.*M &)];-G)H#o^iNjJk%l" #0-!P}%789<RRUYUdUdd++|;!NA/<Q/?@X$BXBX/YM!, "O--;!NA/<Q/?AWAW@W@Y/ZM!, "O )+DTEYEYDZ)[\\ "Q #0!A}%789<RRUYUdUdd<+8+;<fdooPTPfPf>f+ga( = "B Ml&Lkq=71#/-2BBBLk # & Qp(Po1?WQC=1O4FFFPo % ( 0Fk/RST/U"8,''ZM"456Z7"8,-Us=AS3T/UV 2J+1VWX1Y$X.''^O$678^9$X./Y_EW5X1YZ
 25! eg	4
 )-

$Hf#33 .4ECz*+ )5D / !114D4N4Ncg 2 M #33Td6P6Pei 4 O !114D4J4J_c 2 M &3;%?E/"'6{'CE#$(5k(BE$%-:;K-LE)* WU4[%\%\6;6a6a <<.?(@A 7b 723 5:4_4_ <<o(>? 5` 501 o Q0 q\&(s   3S(1S.
S47S9r  r@  r<  r=  r   c           
      L   0 nU(       a-  [        U S   R                  S   U S   R                  S   5      nO,[        U S   R                  S   U S   R                  S   5      nU  H  nUR                  S5      (       d  M  [        X   [        R
                  5      (       d  M>  SU;   d  U(       a  UnO1UR                  S5      (       a  UnOUR                  S	5      (       a  S
nUR                  SS5      n	[        X   UWS9XY'   M     U  H  nUR                  S5      (       d  M  [        X   [        R
                  5      (       d  M>  SU;   d  U(       a  UnO1UR                  S5      (       a  UnOUR                  S	5      (       a  S
nUR                  SS5      n	[        R                  " XY   [        X   UWS94S
S9R                  US9XY'   M     U(       aJ  U S   R                  SS5      R                  US9US'   U S   R                  SS5      R                  US9US'   U$ )ao  Concatenate the chosen and rejected inputs into a single tensor.

Args:
    batch:
        A batch of data. Must contain the keys 'chosen_input_ids' and 'rejected_input_ids', which are tensors
        of shape (batch_size, sequence_length).
    is_encoder_decoder:
        Whether the model is an encoder-decoder model.
    label_pad_token_id:
        The label pad token id.
    padding_value:
        The padding value to use for the concatenated inputs_ids.
    device:
        The device for the concatenated inputs.

Returns:
    A dictionary containing the concatenated inputs under the key 'concatenated_input_ids'.
r  re   r  chosen_input_idsrejected_input_idsr  r  
_input_ids_attention_maskr   concatenated)	pad_valuer  rd   r   r  r   concatenated_input_idsr  concatenated_attention_mask)rG  rh   
startswithr  r=   r   endswithreplacer6   catrj   repeat)
r  r@  r<  r=  r   concatenated_batchr2  r  r  concatenated_keys
             rQ   concatenated_inputs&_UnslothCPOTrainer.concatenated_inputs  s   4  U?399!<eDU>V>\>\]^>_`JU#56<<Q?G[A\AbAbcdAefJA||H%%*UXu||*L*Lq=$6 2IZZ-- -IZZ 122 !I#$99X~#F 7DUXzen7o"4  A||J''Jux,N,Nq=$6 2IZZ-- -IZZ 122 !I#$99Z#H 7<yy*<%eh
iP 8 "F"# #4 " ;@AS;T;[;[\]_`;a;d;dlr;d;s78-.55a;>>f>M <= "!rT   policy_chosen_logpspolicy_rejected_logpsc                    U R                   S:w  a  [        R                  " U5      n[        R                  " U5      nSUR                  U R                   * 5      -
  U R                   -  nSUR                  U R                   * 5      -
  U R                   -  nXV-
  R	                  U R
                  R                  5      nO'X-
  R	                  U R
                  R                  5      nU R                  S:X  a  U R                  U R                  -  nXx-
  n[        R                  " U R                  U-  5      * SU R                  -
  -  [        R                  " U R                  * U-  5      U R                  -  -
  n	OU R                  S:X  ag  [        R                  " U R                  U-  5      * SU R                  -
  -  [        R                  " U R                  * U-  5      U R                  -  -
  n	OyU R                  S:X  a'  [        R                  " SU R                  U-  -
  5      n	OBU R                  S:X  a  USSU R                  -  -  -
  S-  n	O[        SU R                   S	35      eU R                   S:w  a  U R                  WR	                  U R
                  R                  5      R                  5       -  n
U R                  WR	                  U R
                  R                  5      R                  5       -  nOU R                  UR	                  U R
                  R                  5      R                  5       -  n
U R                  UR	                  U R
                  R                  5      R                  5       -  nXU4$ )
ae  Compute the CPO loss for a batch of policy and reference model log probabilities.

Args:
    policy_chosen_logps:
        Log probabilities of the policy model for the chosen responses. Shape: (batch_size,)
    policy_rejected_logps:
        Log probabilities of the policy model for the rejected responses. Shape: (batch_size,)

Returns:
    A tuple of three tensors: (losses, chosen_rewards, rejected_rewards). The losses tensor contains the CPO
    loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for
    the chosen and rejected responses, respectively.
rV  re   r  r\  r  r  r   zUnknown loss type: z7. Should be one of ['sigmoid', 'hinge', 'ipo', 'simpo'])r;  r=   exppowrj   r  r   r7  r:  r5  r   
logsigmoidr6  relur   detach)rL   r  r  chosen_probsrejected_probspolicy_chosen_rewardspolicy_rejected_rewardsrr   gamma_logratioslosseschosen_rewardsrejected_rewardss               rQ   cpo_loss_UnslothCPOTrainer.cpo_lossQ  s   & :: 99%89L"YY'<=N &')9)94::+)F%F$**$T!'(>+=+=tzzk+J'Jdjj&X#+EII$JZJZJaJabF *AEEdFVFVF]F]^F >>W$"..:O-F dii&011Q9M9M5MN,,		zF23d6J6JJK  ^^y( dii&011Q9M9M5MN,,		zF23d6J6JJK  ^^w&ZZDII$6 67F^^u$qA		M22q8F%dnn%55lm 
 ::!YY)>)A)A$BRBRBYBY)Z)a)a)ccN#yy+B+E+EdFVFVF]F]+^+e+e+gg "YY*=*@*@AQAQAXAX*Y)a)a)ccN#yy,A,D,DTEUEUE\E\,]+e+e+gg'777rT   rr   r  average_log_probc                 X   U R                   SS UR                   :w  a  [        S5      eU(       d'  USS2SS24   R                  5       nU SS2SS2SS24   n X:g  nSXU:H  '   [        X5      nU(       a%  Xe-  R	                  S5      UR	                  S5      -  $ Xe-  R	                  S5      $ )a  Compute the log probabilities of the given labels under the given logits.

Args:
    logits: Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size)
    labels:
        Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are
        ignored. Shape: (batch_size, sequence_length)
    average_log_prob:
        If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the
        log probabilities of the (non-masked) tokens.
    label_pad_token_id: The label pad token id.
    is_encoder_decoder: Whether the model is an encoder-decoder model.

Returns:
    A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the
    given logits.
Nr_   zKLogits (batch and sequence length dim) and labels must have the same shape.re   r   )rh   r   cloner;   r   )rr   r  r  r<  r@  	loss_maskrz   s          rQ   get_batch_logps"_UnslothCPOTrainer.get_batch_logps  s    2 <<,jkk!AqrE]((*FAssAI&F0	 01++,/?#/44R89==;LLL#/44R88rT   c                 ,  ^  T R                  UT R                  T R                  T R                  T R                  R
                  S9nUS   R                  S   nT R                  (       a  ST R                  US   5      0O0 nT R                  (       a  SUS'   U" US   4US	   S
S.UD6nUR                  nU 4S jnUS   R                  5       n	T R                  S:X  a:  [        R                  " S5      R                  T R                  R
                  5      n
OU" USU U	SU 5      n
T R                  UUS   T R                   S;   T R                  T R                  S9nUSU nXS nUSU nXtS nT R                  (       a  XXXR"                  4$ XXU
4$ )zRun the given model on the given batch of inputs, concatenating the chosen and rejected inputs together.

We do this to avoid doing two forward passes, because it's faster for FSDP.
)r@  r<  r=  r   r  r   decoder_input_idsconcatenated_labelsTr  r  r  F)r  	use_cachec                 \  > TR                   (       d1  U SS S2S S 24   R                  5       n USSS 24   R                  5       n[        R                  " 5       nU R	                  SU R
                  S   5      n UR	                  S5      nUR                  U R                  5      nU" X5      nU$ )N.r_   re   )r@  
contiguousr2   CrossEntropyLossviewrh   rj   r   )rr   r  loss_fctlossrL   s       rQ   cross_entropy_lossC_UnslothCPOTrainer.concatenated_forward.<locals>.cross_entropy_loss  s    **SbS!,779QR335**,H[[V\\"%56F[[_FYYv}}-FF+DKrT   rV  N)r  r  )r  r@  r<  )r  r@  r<  r=  r  r   rh   _shift_rightr  rr   r  r9  r=   r   rj   r  r7  aux_loss)rL   rH   r  r  
len_chosenmodel_kwargsoutputs
all_logitsr'  r  nll_loss	all_logpschosen_logpsrejected_logpschosen_logitsrejected_logitss   `               rQ   concatenated_forward'_UnslothCPOTrainer.concatenated_forward  s    "55#66#66,,##** 6 
 ?+11!4
 && $T%6%67IJ_7`%a  	   37L/078
-.KL
 	
 ^^
	 $$9:@@B>>Q||C(++D,<,<,C,CDH)*[j*A6+:CVWH((45!^^/??#66#66 ) 
	 !*-";/";J/$[1   -RZ\l\lmmmhWWrT   
train_eval)trainevalc                 h   0 nU R                  X5      nUSS u  nnnn	n
U R                  (       a  US   nU R                  UU5      u  pnUR                  5       U R                  U
-  -   nX:  R                  5       nUS:X  a  SOSnU R                  R                  U5      R                  5       R                  5       UU S3'   U R                  R                  U5      R                  5       R                  5       UU S3'   U R                  R                  U5      R                  5       R                  5       UU S3'   U R                  R                  X-
  5      R                  5       R                  5       UU S	3'   U R                  R                  U5      R                  5       R                  5       R                  5       UU S
3'   U R                  R                  U5      R                  5       R                  5       R                  5       UU S3'   U R                  R                  U	R                  5       R                  5       5      R                  5       R                  5       UU S3'   U R                  R                  UR                  5       R                  5       5      R                  5       R                  5       UU S3'   U R                  R                  U
5      R                  5       R                  5       R                  5       UU S3'   U R                  (       a  XR                  W-  -  nX4$ )zWCompute the CPO loss and other metrics for the given batch of inputs for train or test.N   r9  eval_rU  zrewards/chosenzrewards/rejectedzrewards/accuracieszrewards/marginszlogps/rejectedzlogps/chosenzlogits/rejectedzlogits/chosenr/  )r5  r  r  meanr9  floatr  gather_for_metricsitemr  r  )rL   rH   r  r7  metricsforward_outputr  r  policy_chosen_logitspolicy_rejected_logitspolicy_nll_lossr*  r  r  r  r&  reward_accuraciesprefixs                     rQ   get_batch_loss_metrics)_UnslothCPOTrainer.get_batch_loss_metrics  s    225@ 2A	
! "  %a(H37==!4
0 0
 {{}t~~??+>EEG&&0b-1-=-=-P-PQ_-`-e-e-g-l-l-n6(.)*/3/?/?/R/RSc/d/i/i/k/p/p/r6(*+,151A1A1T1TUf1g1l1l1n1s1s1u6(,-.//0QRWWY^^` 	6(/*+ //0EFMMOTTV[[] 	6(.)* //0CDKKMRRTYY[ 	6(,'( //0F0M0M0O0T0T0VW\\^cce 	6(/*+ //0D0K0K0M0R0R0TUZZ\aac 	6(-() (,'7'7'J'J?'['b'b'd'i'i'k'p'p'r6((#$  &&11D}rT   inputsc                     U R                   (       a)  [        U R                  R                  R                  5      O	[        5       nU   U R                  XSS9u  pgS S S 5        U R                  WSS9  U(       a  WU4$ W$ ! , (       d  f       N+= f)Nr8  r7  )r  r#   r  r   r  r4   rH  store_metrics)rL   rH   rJ  return_outputsnum_items_in_batchcompute_loss_context_managerr&  rA  s           rQ   compute_loss_UnslothCPOTrainer.compute_lossF  s     7;6X6XHT%%,,112^i^k 	% * 77RY7ZMD * 	7w7'?" *)s   A??
Bc           	         U R                   (       a)  [        U R                  R                  R                  5      O	[        5       nU   UR                  US   US   U R                  SU R                  R                  S9nSSS5        [        WU R                  U R                  R                  5      nU R                  R                  USS9nU$ ! , (       d  f       NU= f)zRGenerate samples from the model and reference model for the given batch of inputs.r  r  T)r|   r  r2  	do_sampler~   N)skip_special_tokens)r  r#   r  r   r  r4   generater2  rp  r~   r6   batch_decode)rL   rH   r  generate_context_managerpolicy_outputpolicy_output_decodeds         rQ   generate_from_model&_UnslothCPOTrainer.generate_from_model[  s     7;6X6XHT%%,,112^i^k 	! &!NN 23$%<=??!22?? + M & &mT__dF[F[FhFhi $ 5 5 B B=fj B k$$ &%s   8C
Cr   ignore_keysc                 L   Uc+  [        US5      (       a  [        UR                  S/ 5      nO/ nU R                  (       a)  [	        U R
                  R                  R                  5      O	[        5       n[        R                  " 5          U   U R                  XSS9u  pgS S S 5        S S S 5        U R                  WSS9  U(       a  WR                  5       S S 4$ US   US   S.nUR                  5        V	V
s/ s H  u  pX;  d  M  U
PM     nn	n
[        R                  " XR
                  R                  S9n[        R                   " UR"                  S	   U R
                  R                  S9nWR                  5       X4$ ! , (       d  f       N= f! , (       d  f       N= fs  sn
n	f )
Nr  keys_to_ignore_at_inferencer9  rL  eval_logits/choseneval_logits/rejected)r`  ra  r   r   )rK   r  r  r  r#   r  r   r  r4   r=   no_gradrH  rM  r  r  r   zerosrh   )rL   rH   rJ  r   r]  prediction_context_managerr&  rA  logits_dictr  r  rr   r  s                rQ   prediction_step"_UnslothCPOTrainer.prediction_stepr  s\    uh''%ell4QSUV  7;6X6XHT%%,,112^i^k 	# ]]_8 77RX7YMD 9_ 	7v6KKM4.. #**>"?$+,B$C
 !, 1 1 3L 3q7K! 3Lf-=-=-D-DEV\\!_T5E5E5L5LMv..% 98__ Ms0   FE>F6F F >
F	F
FrA  c                 x    UR                  5        H&  u  p4U R                  U   U   R                  U5        M(     g r  )r  r  rp   )rL   rA  r7  keyvalues        rQ   rM   _UnslothCPOTrainer.store_metrics  s2    !--/JC  ,S188? *rT   
dataloaderdescriptionmetric_key_prefixc                   > U R                   (       GaL  [        UR                  5      n[        R                  " [        U5      U R                  R                  S9nUR                  R                  U5      nU R                  U5      n	U R                  U	5      n	U R                  U R                  U	5      n
[        R                  " SS/[        U	S   U
5       VVs/ s H  u  pX[        U5      S /PM     snnS9nSU R                  R                   ;   a(  ["        R%                  S["        R'                  US	905        S
U R                  R                   ;   a
  [)        SUS9  [*        TU ]Y  XX4U5      nU$ s  snnf )z
Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by
`Trainer.evaluate()` and `Trainer.predict()`.

Works both with or without labels.
)r  PromptPolicyr  N)columnsdatawandbgame_log)rs  comet_mlzgame_log.csv)nametable)r?  r  datasetr:   samplerangerM   eval_batch_sizeselectrm  _prepare_inputsr[  rH   r7   	DataFrameri   r  rt  logTabler-   rH  evaluation_loop)rL   rl  rm  r   r]  rn  num_samplesrandom_indicesrandom_batch_datasetrandom_batchrZ  r  polrx  initial_outputrJ  s                  rQ   r  "_UnslothCPOTrainer.evaluation_loop  sT     $$$j001K#]]5+=AZAZ[N $.#5#5#<#<^#L --.BCL//=L$($<$<TZZ$V!LL!8,DGU]H^`uDvDv[VVV/0DvE $))---		:u{{{'>?@TYY000-' 0%9HY
 %s   E3logs
start_timec                   > SU;   a  SOSnU R                   U   R                  5        H9  u  pE[        R                  " U5      R	                  5       R                  5       X'   M;     U R                   U	 [        TU ]  X5      $ )z
Log `logs` on the various objects watching training, including stored metrics.

Args:
    logs (`dict[str, float]`):
        The values to log.
    start_time (`float` or `None`, *optional*, defaults to `None`):
        Start time of the training.
r&  r8  r9  )r  r  r=   r   r=  r@  rH  r  )rL   r  r  r7  ri  rA  rJ  s         rQ   r  _UnslothCPOTrainer.log  sr     !'$WF
 00<BBDLCW-22499;DI E  ,w{4,,rT   c                    U R                   c  [        S5      e[        U5      (       aO  [        R                  " UR
                  S S S-   U R                   5      n[        R                  " X!SS S24   /SS9nOHUR                  UR
                  5      nUSS S24   R                  5       USSS 24'   U R                   US'   U R                  c  [        S5      eUR                  US	:H  U R                  5        U$ )
Nz]model.config.decoder_start_token_id has to be defined. It is usually set to the pad_token_id.r_   )re   .rd   re   ).r   z,model.config.pad_token_id has to be defined.r]  )r  r   r+   r=   fullrh   r  	new_zerosr  r~   masked_fill_)rL   r|   shifted_input_idss      rQ   r)  _UnslothCPOTrainer._shift_right  s    &&.o 
 Y'' %

9??3B+?$+FHcHc d %		+<SbS>Q*RXZ [ ) 3 3IOO D)238)<)B)B)Dc12g&(,(C(Cf%$KLL&&'8D'@$BSBST  rT   c                   > U R                   R                  c*  [        U R                   R                  5      R                  nO(U R                   R                  R                  S5      S   nU R                  US9  [        TU ]!  X5        g )N/r_   )
model_name)	rM   r  r   r   rw  splitcreate_model_cardrH  _save_checkpoint)rL   rH   trialr  rJ  s       rQ   r  #_UnslothCPOTrainer._save_checkpoint  sj    99!!)dii22388J//55c:2>J*5 .rT   r  dataset_nametagsc                    U R                  5       (       d  g[        U R                  R                  S5      (       ac  [        R
                  R                  U R                  R                  R                  5      (       d!  U R                  R                  R                  nOSnUc  [        5       nO$[        U[        5      (       a  U1nO[        U5      n[        U R                  R                  S5      (       a  UR                  S5        S[        R                  ;   a  UR                  S5        UR                  U R                  5        [        R                   " S5      n[#        UUU R$                  UU['        5       (       a+  [(        R*                  b  [(        R*                  R,                  OS[/        5       SUS	S
S9nUR1                  [        R
                  R3                  U R4                  R6                  S5      5        g)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    model_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the model.
    dataset_name (`str` or `None`, *optional*, defaults to `None`):
        Name of the dataset used for training.
    tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
        Tags to be associated with the model card.
N_name_or_pathunsloth_versionunslothJOB_IDhf_jobsay          @inproceedings{xu2024contrastive,
            title        = {{Contrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translation}},
            author       = {Haoran Xu and Amr Sharaf and Yunmo Chen and Weiting Tan and Lingfeng Shen and Benjamin Van Durme and Kenton Murray and Young Jin Kim},
            year         = 2024,
            booktitle    = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024},
            publisher    = {OpenReview.net},
            url          = {https://openreview.net/forum?id=51iwkioZpn}
        }CPOzeContrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translationz
2401.08417)
base_modelr  r  r  r  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zerorK   rH   r  r5   pathisdirr  setr  r  addenvironupdater  r<   dedentr&   r  r,   rt  runurlr'   savejoinrM   r   )rL   r  r  r  r  citation
model_cards          rQ   r  $_UnslothCPOTrainer.create_model_card  sn   " ))++4::$$o66rww}}TZZM^M^MlMl?m?m**88JJ <5Dc""6Dt9D4::$$&788HHYrzz!HHYDOO$ ?? $  )!!**%'9';';		@Ueiimm[_.0%!

 	TYY%9%9;GHrT   )r  r  r;  r  r  r5  r9  r  r?  r@  r<  r6  r7  r4  r2  r3  r  r~   r=  rp  r:  r>  r  )NNNNNNNN)NNNNNr  )Fr]  r   N)Fr]  Fr8  )FN)NNr9  )NNN)<r_  r`  ra  rb  rc  r  r   r	   r   r2   Moduler  r   r   r   r  r   r   r   r   r   r  r    tupler=   r  	Optimizerlr_schedulerLambdaLRr   r   rI  r  r  staticmethod
LongTensorboolre  r   r  FloatTensorr  r  r5  r   rH  r   rQ  r[  rf  r>  rM  r   r  r  r)  r  r  rf  rg  rh  s   @rQ   rj  rj    s   #J J CG$(04+/EI >B59Vbhl&*FJGoryy#=>?G y!G  -	G
  (G uWd3<.@%@ABG #)+=?UWeef
G Xb/&9:;G D12G %++//1I1I1R1RRSG (0%,,9UW\WcWc9c0d'eG d^G "(N+;T+A"BCG GR/
bO8E/299:T4U+V Obf Ob  $)"&)-A"CtU%5%55667A" A"  A" 	A"
 &A" 
c5###	$A" A"FG8"..G8  %00G8 
u  %"3"3U5F5FF	G	G8R  "'"&#((9!!(9  (9 (9  	(9
 !(9 
		(9 (9TIXYYIX'+CtU=M=M7M1N,N'OIX	u  %"3"3U5F5FHYHYY	ZIX^ 07	4 CtU%5%556674 O,	4t _bii/0 S%c 1223 
u||U5<<c5<<6G1H#HII	J*%S%:J:J5J0K %PS %8 ,0#/_bii/0#/ S%c 1223#/ #	#/
 d3i(#/J@T#u*%5 @7?C[ @jn @ 04+/!'00 0 'tn	0
 d3i(0 0 
0 0d-S%Z( -huo -QU - -$!0/ %)&*,0	AISMAI smAI CcD()	AI AIrT   rj  c                   F   ^  \ rS rSrSr           SU 4S jjrSrU =r$ )UnslothCPOTraineriA  a  
    
Initialize CPOTrainer.

Args:
    model (`transformers.PreTrainedModel`):
        The model to train, preferably an `AutoModelForSequenceClassification`.
    args (`CPOConfig`):
        The CPO config arguments to use for training.
    data_collator (`transformers.DataCollator`):
        The data collator to use for training. If None is specified, the default data collator
        (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
        sequences in the batch, given a dataset of paired sequences.
    train_dataset (`datasets.Dataset`):
        The dataset to use for training.
    eval_dataset (`datasets.Dataset`):
        The dataset to use for evaluation.
    processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
        Processing class used to process the data. If provided, will be used to automatically process the inputs
        for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
        reuse the fine-tuned model.
    model_init (`Callable[[], transformers.PreTrainedModel]`):
        The model initializer to use for training. If None is specified, the default model initializer will be
        used.
    callbacks (`list[transformers.TrainerCallback]`):
        The callbacks to use for training.
    optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
        The optimizer and scheduler to use for training.
    preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
        The function to use to preprocess the logits before computing the metrics.
    peft_config (`dict`, defaults to `None`):
        The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
        a PEFT model.
    compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
        The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
        metric values.

    c                 0  > Uc
  [        5       n[        USS5      n[        U5      [        La  Sn[        USS5      n[        U5      [        La  SnSn[        R
                  R                  SS5      S:H  nU(       d1  [        R
                  R                  SS5      S:X  a  [        S5        S	n[        R
                  R                  S
S5      n[        UR                  SS 5      =(       d    [        UR                  SS 5      nUc  UR                  5       R                  nSSKJn  U" U5      nU[        R                  :H  nU(       d  U(       a  U(       a  [        S5      eU(       d  U(       d  U(       a  [        S5      eU(       a"  SUl        SUl        S[        R
                  S'   OCU(       d<  U(       d5  US:X  a/  UUl        U(       + Ul        U(       a  SOS[        R
                  S'   [        USS 5      b-  [        USS5      S:X  a  SUl        [        USS 5      c  SUl        [        USS 5      nUb/  US:  a)  SSKJn  [-        U5      [-        S5      ::  a  [        S5        [        USS5      S:w  aL  [        USS5      nUS:X  a!  UR.                  U:  a  UR.                  Ul        [        US S 5      c
  Ub  UUl        [        US!S5      n[        U5      [        La  Sn[        US"S5      n[        U5      [        La  SnUR                   (       a  U(       a  SUl        S	Ul        UR"                  (       a  U(       a  S	Ul        SUl        U(       a  SUl        SUl        Oc[        R
                  R                  S
S5      S#:X  a  S	Ul        SUl        O0U(       d)  U(       d"  UR"                  Ul        UR                   Ul        Sn[9        5       R                  S$S 5      b  S	n[9        5       R                  S%S 5      b  S	nU(       a  S[        R
                  S&'   S'[9        5       ;  a  [;        US'5      (       d  OD[        US'S 5      n[        US'S 5      nUc'  Ub$  UR<                  n[;        US'5      (       a  UUl        Ub!  [;        US(5      (       a  UR?                  5         S)[9        5       ;   a   [;        [@        S*5      (       a  S+[@        l!        S,[9        5       ;   aU  [;        US*5      (       a  S+Ul!        [;        US)5      (       a,  [;        UR@                  S*5      (       a  S+UR@                  l!        S,[9        5       ;   a  UO[@        nSS-K"J#n  [I        UU5      (       dx  [I        U[J        5      (       a(  S.URL                  ;  a  [O        USS/[        US0S 5      S19nO[I        U[N        5      (       a%  S.URL                  ;   a  [K        U[        US0S 5      S29nOJ[;        US35      (       a  SUl(        [;        US45      (       a  S5Ul)        [;        US65      (       a	  S7S	0Ul*        [I        UU5      (       dx  [;        US85      (       dg  [;        US)5      (       aV  [I        U[J        5      (       a   [K        UR@                  [        US0S 5      S29nO![O        UR@                  SS/[        US0S 5      S19n/ n SS9K+J,n!  U!" S:U 5        [        US;S 5      [Z        R\                  :X  a(  UR^                  S:  a  [        US<S5      S:w  a  SUl0        S=[9        5       ;   a!  [;        US(5      (       a  UR?                  5         [b        T$U ]  " SDUUUUUUUUU	U
US>.UD6  S=[9        5       ;   a!  [;        US?5      (       a  URg                  5         [;        U S@5      (       a-  U Rh                  Rk                  5         [;        U S@5      (       a  U ?4[        USAS 5      b  U Rl                  UR                  5       l6         [;        U SB5      (       aV  U Rn                  Rp                  n"Un#[;        U#S=5      (       a&  U"U#l9        U#Rt                  n#[;        U#S=5      (       a  M&  U"U#l9         [;        U SC5      (       a.  [w        [y        U Rz                  R|                  5      U 5      U l>        g )ENr   Fr   UNSLOTH_ENABLE_FULL_FINETUNING01UNSLOTH_FORCE_FLOAT32zKUnsloth: Switching to float32 training since model cannot work with float16TUNSLOTH_MIXED_PRECISIONrk   rx  torch_dtyper   )
_get_dtypezuUnsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`zuUnsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`r   ACCELERATE_MIXED_PRECISIONro  r   r   r   rO  r   re   )__version__z4.45.2z**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!
`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`r      r   r   r   bfloat16rv  rt  UNSLOTH_RETURN_LOGITSr   rI   r  padding_siderightrp  )UnslothVisionDataCollatorr  rV  pad_to_multiple_of)mlmmlm_probabilityr  )r  r   dataset_text_fieldrU  dataset_kwargsskip_prepare_datasetpad)PatchRLStatisticscpo_trainerparallel_mode_n_gpurH   )rH   rM   rm  rn  ro  rp  rq  rr  rt  ru  rv  rJ   neftune_hook_handler*  r  r8  rC  )?r   r  r  r  r5   r  r  rD  r  r  rx  unsloth_zoo.utilsr  r=   float16	TypeErrorr   r   r   r   transformersr  rA   r   r   r   r   r   localsrK   r   rI   r  r  unsloth_zoo.vision_utilsr  r  rB   column_names+TransformersDataCollatorForLanguageModelingr   r  r  unsloth_zoo.logging_utilsr  rD   NOT_DISTRIBUTEDn_gpur  rH  rI  rJ   r  remover*  r  scaleraccelerator_scalerrH   rE   rW   rJ  r8  )%rL   rH   rM   rm  rn  ro  rp  rq  rr  rt  ru  rv  rN   use_bf16use_fp16force_float32full_finetuningmixed_precision_dtyperx  r  r  ga_stepstransformers_versioneval_bszr   r   _output_logitsmodel_max_seq_lengthargs_max_seq_lengthr   _UnslothCPOTrainer__tokenizerr  other_metricsr  r  current_modelrJ  s%                                       rQ   rI  UnslothCPOTrainer.__init__h  s    < 0 24/>%%x4/>%%x**..)I3OSVVBJJNN3JC$PTW$W_` M "

/H) Tgt4bm]a8b=%"<"<">"D"D%05!5==('hy  JA  @B  :Bg(9  NE  DF  >FDIDI7;BJJ3481F)1SDI#DIAHvfBJJ344.:wt_^b?cgk?k!(Dt\408C$/4!>EHqLH+,0AA @ A4$/47t%A1EH1}!A!AH!Lpt  qQ  qQdNmt6=E(J^  @H`d`| '7?t+e^ '7?t+e^99u)<\`dFY99t)<[`TEX"'D"'DZZ^^5yAZO"&D"'D"&))D"&))D8<<)40<tn8<<7>J]aN25BJJ./68+GDBR4S4S#*52BD#I #*42BD#I"*/C/O!&!5!54!122.D4G!?!? &("wy.'I'Idk9Ka)'88Za:J:W'55'BRB\B\^l:m:m  Zao  pJ  pJ  pW*<*H&iF-)BCC-)?@@XUbUoUoEo K&))07KT)R	! M+VWW\dhu  iC  iC  ]C 6)07KT)R!
 t455TYt7Qt122bD4Kt-..G]_cFd0C-)BCC;..7;3T3Tm-CDD$:#---4T;OQU-V%M
 %P#--#*--4T;OQU-V	%M ?-7 4$/<3O3OOTXT^T^abTbtXq)Q.fh75.#A#A  	8))'/#!,I%-	8 17	8 fh75/#B#B!4.//$$++-t2339Q4.5A?C?W?WE&&(<4''%%,,F!M-11390 - 3 3 -11 06M,4!!#$=dnn>R>R$SUYZDJrT   r  )NNNNNNNNNNN)r_  r`  ra  rb  rc  rI  rf  rg  rh  s   @rQ   r  r  A  s8    %P (,e erT   r  	addFilterc                        \ rS rSrS rS rSrg)HideLoggingMessagei  c                     Xl         g r  text)rL   r  s     rQ   rI  HideLoggingMessage.__init__  s    d)rT   c                 <    U R                   UR                  5       ;  $ r  )r  
getMessage)rL   xs     rQ   filterHideLoggingMessage.filter  s    alln)DErT   r  N)r_  r`  ra  rb  rI  r
  rf  rC  rT   rQ   r  r    s    2ErT   r  z`use_cache=True`)^rc  r=   r   torch.nnr2   r   r   typingr   r   r   r   r	   r
   r   r   trl.trainer.cpo_trainerr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   dataclassesr?   r@   packaging.versionrA   numpy
contextlibr  rB   rC   r  transformers.training_argsrD   rU   typesrE   rW   torch_compile_optionscompiler{   re  r   r   r   r   rj  r  rK   Filterr  r   rC  rT   rQ   <module>r     s  0    $ I I I h  h  h  h  h  h  h  h  h  h  h  h  h  h  h  h 
  ( %   " $  3      4;PR S"||  \\	&,, %  	
 \\6ell C ELL  H-y H- H-R fI fIN!L* L\  6;FW^^ F 	
'(:;<  rT   