
    h,                     P    S SK JrJr  S SKJrJr  S SKJr  \ " S S\5      5       rg)    )	dataclassfield)AnyOptional)TrainingArgumentsc                   *  ^  \ rS rSr% Sr\R                  SS/-   r\" SSS0S9r\	\
S	'   \" S
SS0S9r\	\
S'   \" SSS0S9r\\
S'   \" SSS0S9r\\   \
S'   \" SSS0S9r\\   \
S'   \" SSS0S9r\\   \
S'   \" SSS0S9r\\   \
S'   \" SSS0S9r\	\
S'   \" SSSS /S!.S9r\\
S"'   \" S#SS$0S9r\	\
S%'   \" S#SS&0S9r\	\
S''   \" S(SS)0S9r\\
S*'   \" SSS+0S9r\\   \
S,'   \" S-S.S-S//S!.S9r\\
S0'   \" S1SS20S9r\\
S3'   \" SSS40S9r\\   \
S5'   \" SSS60S9r\\
S7'   \" S1SS80S9r\\
S9'   \" SSS:0S9r \\!\\"4      \
S'   \" SSS;0S9r#\\!\\"4      \
S'   \" SSS<0S9r$\\   \
S='   \" S1SS>0S9r%\\
S?'   \" S@SSA0S9r&\\
SB'   U 4SC jr'SDr(U =r)$ )E	KTOConfig   u  
Configuration class for the [`KTOTrainer`].

This class includes only the parameters that are specific to KTO training. For a full list of training arguments,
please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may
differ from those in [`~transformers.TrainingArguments`].

Using [`~transformers.HfArgumentParser`] we can turn this class into
[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
command line.

Parameters:
    max_length (`int` or `None`, *optional*, defaults to `1024`):
        Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want
        to use the default data collator.
    max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
        Maximum length of the prompt. This argument is required if you want to use the default data collator.
    max_completion_length (`int` or `None`, *optional*, defaults to `None`):
        Maximum length of the completion. This argument is required if you want to use the default data collator
        and your model is an encoder-decoder.
    beta (`float`, *optional*, defaults to `0.1`):
        Parameter controlling the deviation from the reference model. Higher β means less deviation from the
        reference model.
    loss_type (`str`, *optional*, defaults to `"kto"`):
        Type of loss to use. Possible values are:

            - `"kto"`: KTO loss from the [KTO](https://huggingface.co/papers/2402.01306) paper.
            - `"apo_zero_unpaired"`: Unpaired variant of APO-zero loss from the
              [APO](https://huggingface.co/papers/2408.06266) paper.

    desirable_weight (`float`, *optional*, defaults to `1.0`):
        Desirable losses are weighed by this factor to counter unequal number of desirable and undesirable paris.
    undesirable_weight (`float`, *optional*, defaults to `1.0`):
        Undesirable losses are weighed by this factor to counter unequal number of desirable and undesirable pairs.
    label_pad_token_id (`int`, *optional*, defaults to `-100`):
        Label pad token id. This argument is required if you want to use the default data collator.
    padding_value (`int` or `None`, *optional*, defaults to `None`):
        Padding value to use. If `None`, the padding value of the tokenizer is used.
    truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
        Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
        This argument is required if you want to use the default data collator.
    generate_during_eval (`bool`, *optional*, defaults to `False`):
        If `True`, generates and logs completions from both the model and the reference model to W&B or Comet
        during evaluation.
    is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
        When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
        you need to specify if the model returned by the callable is an encoder-decoder model.
    precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
        Whether to precompute reference model log probabilities for training and evaluation datasets. This is
        useful when training without the reference model to reduce the total GPU memory needed.
    model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
        Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
        string.
    ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
        Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model
        from a string.
    dataset_num_proc: (`int` or `None`, *optional*, defaults to `None`):
        Number of processes to use for processing the dataset.
    disable_dropout (`bool`, *optional*, defaults to `True`):
        Whether to disable dropout in the model and reference model.
    use_liger_loss (`bool`, *optional*, defaults to `False`):
        Whether to use Liger loss. It requires liger-kernel to be installed.
    base_model_attribute_name (`str`, *optional*, defaults to `"model"`):
        Name of the attribute in the model that contains the base model. This is used to get the base model from
        the model when the model does not have a `get_decoder` method in the case when `use_liger_loss` is `True`.
model_init_kwargsref_model_init_kwargsgư>helpz$The initial learning rate for AdamW.)defaultmetadatalearning_rate
   zLog every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps.logging_stepsTzZIf True, use gradient checkpointing to save memory at the expense of slower backward pass.gradient_checkpointingNzWhether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if `fp16` is not set.bf16i   zCMaximum length of the sequences (prompt + completion) in the batch.
max_lengthi   zMaximum length of the prompt. This argument is required if you want to use the default data collator and your model is an encoder-decoder.max_prompt_lengthzMaximum length of the completion. This argument is required if you want to use the default data collator and your model is an encoder-decoder.max_completion_lengthg?uv   Parameter controlling the deviation from the reference model. Higher β means less deviation from the reference model.betaktozType of loss to use.apo_zero_unpaired)r   choices	loss_typeg      ?ziDesirable losses are weighed by this factor to counter unequal number of desirable and undesirable pairs.desirable_weightzkUndesirable losses are weighed by this factor to counter unequal number of desirable and undesirable pairs.undesirable_weightiz[Label pad token id. This argument is required if you want to use the default data collator.label_pad_token_idzLPadding value to use. If `None`, the padding value of the tokenizer is used.padding_valuekeep_endz3Truncation mode to use when the prompt is too long.
keep_starttruncation_modeFzoIf `True`, generates and logs completions from both the model and the reference model to W&B during evaluation.generate_during_evalzWhen using the `model_init` argument (callable) to instantiate the model instead of the `model` argument, you need to specify if the model returned by the callable is an encoder-decoder model.is_encoder_decoderz(Whether to disable dropout in the model.disable_dropoutzWhether to precompute reference model log probabilities for training and evaluation datasets. This is useful when training without the reference model to reduce the total GPU memory needed.precompute_ref_log_probszoKeyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a string.zyKeyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model from a string.z6Number of processes to use for processing the dataset.dataset_num_proczDWhether to use Liger loss. It requires liger-kernel to be installed.use_liger_lossmodelzName of the attribute in the model that contains the base model. This is used to get the base model from the model when the model does not have a `get_decoder` method in the case when `use_liger_loss` is `True`.base_model_attribute_namec                    > U R                   c  U R                  (       + OU R                   U l         [        TU ]  5         g )N)r   fp16super__post_init__)self	__class__s    P/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/kto_config.pyr/   KTOConfig.__post_init__   s*    '+yy'8Odii	    )r   )*__name__
__module____qualname____firstlineno____doc__r   _VALID_DICT_FIELDSr   r   float__annotations__r   r   boolr   r   r   intr   r   r   r   strr   r   r   r    r#   r$   r%   r&   r'   r   dictr   r   r(   r)   r+   r/   __static_attributes____classcell__)r1   s   @r2   r	   r	      s   AF +==ATVm@nn !@AM5  ! D
M5  $)p
$D  ! !
D(4.  !&_`!J  (- =
(x}  ,1 =
,8C=   #
D%  *23
Is  $ !
e  !& !
!  $q
  $)hi$M8C=  !I"L1
OS  "' !
"$  */ o
*  "DEOT  &+ n
&d  38 
3xS#X/  7< -
78DcN3  ',RS'hsm  !`aND  &+ *
&s    r4   r	   N)	dataclassesr   r   typingr   r   transformersr   r	    r4   r2   <module>rG      s/    )   * [ ! [  [ r4   