
    h4                     @    S SK JrJr  S SKJr  \ " S S\5      5       rg)    )	dataclassfield)OnlineDPOConfigc                   R   ^  \ rS rSr% Sr\" S SS0S9r\\   \	S'   U 4S jr
S	rU =r$ )
NashMDConfig   a  
Configuration class for the [`NashMDTrainer`].

Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following:

Parameters:
    mixture_coef (`float` or `list[float]`, *optional*, defaults to `0.5`):
        Logit mixture coefficient for the model and reference model. If a list of floats is provided then the
        mixture coefficient is selected for each new epoch and the last coefficient is used for the rest of the
        epochs.
c                      S/$ )Ng      ? r
       T/home/james-whalen/.local/lib/python3.13/site-packages/trl/trainer/nash_md_config.py<lambda>NashMDConfig.<lambda>#   s    r   helpzLogit mixture coefficient for the model and reference model. If a list of floats is provided then the mixture coefficient is selected for each new epoch and the last coefficient is used for the rest of the epochs.)default_factorymetadatamixture_coefc                    > [         TU ]  5         [        U R                  S5      (       a/  [	        U R                  5      S:X  a  U R                  S   U l        g g g )N__len__   r   )super__post_init__hasattrr   len)self	__class__s    r   r   NashMDConfig.__post_init__+   sO    4$$i00S9J9J5Kq5P $ 1 1! 4D 6Q0r   )r   )__name__
__module____qualname____firstlineno____doc__r   r   listfloat__annotations__r   __static_attributes____classcell__)r   s   @r   r   r      s9    
 !&% "
!L$u+ 5 5r   r   N)dataclassesr   r   trl.trainer.online_dpo_configr   r   r
   r   r   <module>r)      s(    ) 9 5? 5 5r   