
    hU                     \    S SK JrJr  S SKJrJr  S SKJr  SSKJ	r	  \ " S S\	5      5       r
g)	    )	dataclassfield)AnyOptional)TrainingArguments   )	SFTConfigc                   D  ^  \ rS rSr% Sr\R                  S/-   r\" SSS0S9r\	\
S'   \" S	SS
0S9r\	\
S'   \" S	SS0S9r\	\
S'   \" SSS0S9r\\
S'   \" SSS0S9r\\   \
S'   \" SSS0S9r\\\\4      \
S'   \" SSS0S9r\\
S'   \" SSS0S9r\\
S'   U 4S jrSrU =r$ )	GKDConfig   a  
Configuration class for [`GKDTrainer`].

This class includes only the parameters that are specific to GKD training. For a full list of training arguments,
please refer to the [`~transformers.TrainingArguments`] and [`SFTConfig`] documentation.

Args:
    temperature (`float`, *optional*, defaults to `0.9`):
        Temperature for sampling. The higher the temperature, the more random the completions.
    lmbda (`float`, *optional*, defaults to `0.5`):
        Lambda parameter that controls the student data fraction (i.e., the proportion of on-policy
        student-generated outputs).
    beta (`float`, *optional*, defaults to `0.5`):
        Interpolation coefficient between `0.0` and `1.0` of the Generalized Jensen-Shannon Divergence loss. When
        beta is `0.0`, the loss is the KL divergence. When beta is `1.0`, the loss is the Inverse KL Divergence.
    max_new_tokens (`int`, *optional*, defaults to `128`):
        Maximum number of tokens to generate per completion.
    teacher_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
        Model name or path of the teacher model. If `None`, the teacher model will be the same as the model being
        trained.
    teacher_model_init_kwargs (`dict[str, Any]]` or `None`, *optional*, defaults to `None`):
        Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model
        from a string.
    disable_dropout (`bool`, *optional*, defaults to `True`):
        Whether to disable dropout in the model.
    seq_kd (`bool`, *optional*, defaults to `False`):
        Seq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT on
        teacher-generated output).
teacher_model_init_kwargsg?helpzVTemperature for sampling. The higher the temperature, the more random the completions.)defaultmetadatatemperatureg      ?zwLambda parameter that controls the student data fraction (i.e., the proportion of on-policy student-generated outputs).lmbdazInterpolation coefficient between `0.0` and `1.0` of the Generalized Jensen-Shannon Divergence loss. When beta is `0.0`, the loss is the KL divergence. When beta is `1.0`, the loss is the Inverse KL Divergence.beta   z4Maximum number of tokens to generate per completion.max_new_tokensNzrModel name or path of the teacher model. If `None`, the teacher model will be the same as the model being trained.teacher_model_name_or_pathzwKeyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model from a string.Tz'Whether to disable dropouts in `model`.disable_dropoutFzSeq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT on teacher-generated output).seq_kdc                    > [         TU ]  5         U R                  S:  d  U R                  S:  a  [        S5      eU R                  S:  d  U R                  S:  a  [        S5      eg )Ng        g      ?z&lmbda must be in the range [0.0, 1.0].z%beta must be in the range [0.0, 1.0].)super__post_init__r   
ValueErrorr   )self	__class__s    P/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/gkd_config.pyr   GKDConfig.__post_init__j   sW    ::tzzC/EFF99s?dii#oDEE .     )__name__
__module____qualname____firstlineno____doc__r   _VALID_DICT_FIELDSr   r   float__annotations__r   r   r   intr   r   strr   dictr   r   boolr   r   __static_attributes____classcell__)r   s   @r   r   r      sI   < +==A\@]]rsK   *
E5   
D%   PQNC  16 #
1  ;@ +
;xS#X7  "CDOT   /
FD F Fr!   r   N)dataclassesr   r   typingr   r   transformersr   
sft_configr	   r   r"   r!   r   <module>r5      s4    )   * ! XF	 XF XFr!   