
    oi                     0   S S/r SSKrSSKJrJrJrJrJrJrJ	r	J
r
  SSKrSSKrSSKrSSKJr  SSKJr  SSKJr  SSKJr  S	S
KJrJrJrJrJrJr  SSSSSS.r SSKJr   \!" \ S5      (       d   " S S5      r"\"\ l"        SSK$J%r&  SSK'J(r)  SSK*J+r+   \+" \&5      r,S r-S r.\S   r/\S   r0\S   r1\S   r2\S   r3\S   r4\S   r5Sr6S r7S%S  jr8S! r9S" r:S# r;S&S$ jr<g! \# a     Nef = f! \# a'     \+" \)" S5      5      r, Ne! \# a    \+" S5      r,  Nvf = ff = f)'PatchFastRLvLLMSamplingParams    N)AnyCallableDictListLiteralOptionalTupleUnion)create_new_function)logger)PatchRLStatistics)RL_REPLACEMENTS   )RL_EXTRA_ARGSRL_FUNCTIONSRL_PRE_ITEMSRL_CONFIG_CHANGESRL_METRICS_CHANGESRL_ADDITIONAL_FUNCTIONSTF)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphsGuidedDecodingParamsc                       \ rS rSrS rSrg)r   4   c                     Xl         g Nkwargs)selfr!   s     K/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/models/rl.py__init__GuidedDecodingParams.__init__5   s    $    r    N)__name__
__module____qualname____firstlineno__r$   __static_attributes__ r&   r#   r   r   4   s    %r&   )__version__)version)Versiontrlz0.0.0c                  .    SSK Jn  U" S0 U D6nXl        U$ )Nr   )SamplingParamsr,   )vllmr2   _set_kwargs)r!   r2   sampling_paramss      r#   r   r   I   s    #$.v.O"(r&   c                   ^ ^^ SSK Jm  SSKJn  UU U4S j5       nSSKJn  SSKJm  [        R                  " 5       U4S j5       nSS K
n[        UR                  5      nU Vs/ s H  owR                  S5      (       d  M  UPM     nnS	nU H9  n	 [        UR                  U	5      n
[        X5      (       d  M,   [!        XU5        M;     XCl        g s  snf !    MN  = f!    MV  = f)
Nr   )unwrap_model_for_generation)contextmanagerc              ?   $  >^#    T" U /UQ70 UD6 nTR                  U 5        UR                  mU4S jnXCl         Uv   TUl        TR                  U 5         S S S 5        g ! TUl        TR                  U 5        f = f! , (       d  f       g = f7f)Nc                  v   > T" U 0 UD6n[        U[        R                  5      (       a  UR                  5       $ U$ r   )
isinstancetorchTensorclone)argsr!   outoriginal_generates      r#   generate_with_cloneQPatchRL.<locals>.unsloth_unwrap_model_for_generation.<locals>.generate_with_clone_   s4    '88c5<<0099;&
r&   )for_inferencegeneratefor_training)modelr?   r!   unwrapped_modelrB   rA   FastLanguageModelr7   s        @r#   #unsloth_unwrap_model_for_generation4PatchRL.<locals>.unsloth_unwrap_model_for_generationU   s     (@@@O++E2 !0 8 8 (;$6%% ,=(!..u5+ A@( ,=(!..u5+ A@s3   B*A?A" A?	B"A<<A??
B	B)Trainer)nested_detachc           	      B  >^^ [        U R                  5      S:X  a  SO[        U4S jU R                   5       5      nTR                  SS5      nUc  U R                  n[        U R                  5      S:X  a	  U(       a  SOSnU R                  T5      mTc?  [        U R                  S5      (       a"  [        U R                  R                  S/ 5      mO/ mU(       d  U(       a:  T" [        U4S	 jU R                   5       5      5      n[        U5      S
:X  a  US   nOSnS[        R                  S'   [        R                  " 5          U(       d  U(       a  U R                  5          U R!                  UTSS9u  pSSS5        W	R#                  5       R%                  5       n	['        W
[(        5      (       a$  [        U4S jU
R+                  5        5       5      nOU
S
S nOSn	U R                  5          U R-                  TS   SSSS9R/                  UR0                  5      nU" S0 UD6n
SSS5        ['        W
[(        5      (       a$  [        U4S jU
R+                  5        5       5      nOU
nU R2                  R4                  S:  a   XR2                  R4                  S
-
     U l        SSS5        S[        R                  S'   U(       a  W	SS4$ T" W5      n[        U5      S
:X  a  US   nW	X4$ ! , (       d  f       GNy= f! , (       d  f       N= f! , (       d  f       Np= f)ar  
Perform an evaluation step on `model` using `inputs`.
Subclass and override to inject custom behavior.
Args:
    model (`nn.Module`):
        The model to evaluate.
    inputs (`Dict[str, Union[torch.Tensor, Any]]`):
        The inputs and targets of the model.
        The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
        argument `labels`. Check your model's documentation for all accepted arguments.
    prediction_loss_only (`bool`):
        Whether or not to return the loss only.
    ignore_keys (`List[str]`, *optional*):
        A list of keys in the output of your model (if it is a dictionary) that should be ignored when
        gathering predictions.
Return:
    Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss,
    logits and labels (each being optional).
r   Fc              3   J   >#    U  H  nTR                  U5      S Lv   M     g 7fr   get).0kinputss     r#   	<genexpr>;PatchRL.<locals>.unsloth_prediction_step.<locals>.<genexpr>   s      I8H1VZZ]$.8Hs    #return_lossNTconfigkeys_to_ignore_at_inferencec              3   F   >#    U  H  nTR                  U5      v   M     g 7fr   rP   )rR   namerT   s     r#   rU   rV      s     (WFVdD)9)9FVs   !r   1UNSLOTH_RETURN_LOGITS)return_outputsc              3   D   >#    U  H  u  pUTS /-   ;  d  M  Uv   M     g7f)lossNr,   rR   rS   vignore_keyss      r#   rU   rV      s(      #&5da+QWPXBX9Xos    	 promptpt)padding
truncationreturn_tensorsc              3   <   >#    U  H  u  pUT;  d  M  Uv   M     g 7fr   r,   ra   s      r#   rU   rV      s      #&5da+9Mos   	0r,   )lenlabel_namesallrQ   can_return_loss_prepare_inputshasattrrG   getattrrX   tupleosenvironr<   no_gradcompute_loss_context_managercompute_lossmeandetachr;   dictitemsprocessing_classtodevicer?   
past_index_past)r"   rG   rT   prediction_loss_onlyrc   
has_labelsrW   loss_without_labelslabelsr`   outputslogitstokenized_outputrM   s     ` `        r#   unsloth_prediction_step(PatchRL.<locals>.unsloth_prediction_stepq   s   : 4##$) I8H8HII 	 jj5..K(()Q.;DE 	 %%f-tzz8,,%JJ%%'Db ! ,"5(WdFVFV(W#WXF6{aF.1

*+]]_0668$($5$5v %6 %MD 9 yy{))+gt,," #&-mmo# F %QR[F668'+'<'<x("&%))-	 (= (
 b& % $7&67G 9 gt,," #&-mmo# F %F99''1,!()=)=)A!BDJA B /2

*+$%%v&v;!AYFf%%O 98 98! _s>   L'K-;A7L27K?)A=L-
K<	7L?
L		L
L_trainerr7   )trl.models.utilsr7   
contextlibr8   transformersrL   transformers.trainer_pt_utilsrM   r<   ru   trl.trainerdirtrainerendswithrq   rp   setattrprediction_step)rI   r8   rJ   rL   r   r0   trainersxunwrapr   current_trainerrM   r7   s   `          @@r#   PatchRLr   Q   s    <)6 60 %;
]]_d& d&L 3;;H#>8azz*'=8H>*F	%ckk7;O ?++1TU  6 ?
	s$   #C CC9CCC"grpo_selective_log_softmaxselective_log_softmaxcalculate_pad_tokens_in_prompt create_completion_attention_maskleft_pack_paddingalign_logprobs_with_maskgrpo_autotune_batch_and_chunksa  
import os
from typing import *
from dataclasses import dataclass, field
from packaging.version import Version
import torch
import numpy as np
from contextlib import nullcontext
from torch.nn import functional as F
import inspect
from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling
from transformers.training_args import ParallelMode

# Wrap trainer with padding to right and enable training mode
# Also patches W&B since multiple runs must use wandb.finish()
import functools
from types import MethodType
try:
    from unsloth_zoo.gradient_checkpointing import reset_unsloth_gradient_checkpointing_buffers
except:
    def reset_unsloth_gradient_checkpointing_buffers(): pass
def prepare_for_training_mode(f):
    @functools.wraps(f)
    def wrapper(self, *args, **kwargs):
        # Enable training mode
        _was_training = None
        # Get gradient checkpointing setting from training arguments
        use_gc = getattr(self.args, 'gradient_checkpointing', True)
        if hasattr(self, 'model') and hasattr(self.model, "training"):
            _was_training = self.model.training
        if hasattr(self, 'model') and hasattr(self.model, "for_training"):
            self.model.for_training(use_gradient_checkpointing=use_gc)
        output = f(self, *args, **kwargs)
        # Restore previous mode when possible
        if hasattr(self, 'model') and hasattr(self.model, "for_inference"):
            if _was_training is False:
                self.model.for_inference()
            elif _was_training is True and hasattr(self.model, "for_training"):
                self.model.for_training(use_gradient_checkpointing=use_gc)
        # Reset gradient checkpointing buffers to free memory while staying ready for next run
        try:
            reset_unsloth_gradient_checkpointing_buffers()
        except:
            pass
        # Patch W&B to enable logging on future runs, otherwise it'll overwrite the first run
        try:
            import wandb
            wandb.finish()
        except:
            pass
        return output
    return wrapper
pass

torch_compile_options = {{
    "epilogue_fusion"   : True,
    "max_autotune"      : False,
    "shape_padding"     : True,
    "trace.enabled"     : False,
    "triton.cudagraphs" : False,
}}

{grpo_selective_log_softmax_code}
{selective_log_softmax_code}
{calculate_pad_tokens_in_prompt_code}
{create_completion_attention_mask_code}
{left_pack_padding_code}
{align_logprobs_with_mask_code}
{autotune_batch_and_chunks_code}

{RL_pre}

@dataclass
class Unsloth{RLConfig_name}({RLConfig_name}):
    """
    {__RLConfig_doc__}
    """
    vllm_sampling_params: Optional[Any] = field(
        default = None,
        metadata = {{'help': 'vLLM SamplingParams'}},
    )
    unsloth_num_chunks : Optional[int] = field(
        default = -1,
        metadata = {{'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}},
    )
    unsloth_logit_chunk_multiplier : Optional[int] = field(
            default = None,
            metadata = {{'help': 'Multiplier for chunked logit computations.'}},
        )
    unsloth_grpo_mini_batch : Optional[int] = field(
        default = None,
        metadata = {{'help': 'Mini batch size for GRPO hidden state accumulation. Default is None unless user defines it.'}},
    )
    {max_seq_length_pre}
    def __init__({RLConfig_arguments},
        vllm_sampling_params = None,
        unsloth_num_chunks = -1,
        unsloth_logit_chunk_multiplier = None, 
        unsloth_grpo_mini_batch = None, 
        {max_seq_length_call}
        **kwargs,
    ):
{RLConfig_extra_args}
        super().__init__({RLConfig_call_args}{RLConfig_kwargs})
        self.vllm_sampling_params = vllm_sampling_params
        self.unsloth_num_chunks = unsloth_num_chunks
        if unsloth_grpo_mini_batch is not None:
            if self.generation_batch_size >= unsloth_grpo_mini_batch:
                self.unsloth_grpo_mini_batch = unsloth_grpo_mini_batch
            else:
                raise ValueError(
                    f"Unsloth GRPO mini batch size needs to be less than or equal to the effective generation batch size, "
                    f"which is self.per_device_train_batch_size * gradient_accumulation_steps."
                )
        self.unsloth_logit_chunk_multiplier = unsloth_logit_chunk_multiplier
        {max_seq_length_post}
pass

{RLTrainer_extras}

class Unsloth{RLTrainer_name}(_Unsloth{RLTrainer_name}):
    """
    {__RLTrainer_doc__}
    """
    def __init__({RLTrainer_arguments},
        **kwargs
    ):
        if args is None: args = Unsloth{RLConfig_name}()
{RLTrainer_extra_args}
        # [TODO] Fix up DataParallel multiplying batch sizes
        # [TODO] DDP works, but DP seems to not work? [TODO]
        if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1:
            if getattr(args, "_n_gpu", 1) != 1:
                args._n_gpu = 1
        if "model" in locals() and hasattr(model, "for_training"):
            model.for_training(use_gradient_checkpointing=getattr(args, 'gradient_checkpointing', True))
        super().__init__({RLTrainer_call_args}{RLTrainer_kwargs})
        if "model" in locals() and hasattr(model, "for_inference"):
            model.for_inference()
{RLTrainer_post}
pass
c                    ^ [        U S5      (       d  g U R                  m[        TSS5      (       a  g U4S jnSUl        Xl        g )N_generate_and_score_completions!_unsloth_restore_training_wrappedFc                   > [        [        U SS 5      SS 5      n T" U /UQ70 UD6USL aJ  [        U S5      (       a8  [        U R                  S5      (       a   U R                  R                  5         $ $ $ $ ! [         a     $ f = f! USL aZ  [        U S5      (       aH  [        U R                  S5      (       a,   U R                  R                  5         f ! [         a     f f = ff f f = f)NrG   trainingFrD   )rq   rp   rG   rD   	Exception)r"   r?   r!   was_trainingoriginals       r#   wrapped._wrap_grpo_generate_and_score.<locals>.wrapped  s    wtWd;ZN	D24262 %D'**DJJ88JJ,,. 9 + & !  %D'**DJJ88JJ,,.  	 9 + &sA   B A66
BB2C'9CC'
C!C' C!!C'T)rp   r   rq   r   )trainer_clsr   r   s     @r#   _wrap_grpo_generate_and_scorer     sG    ; ABB::Hx<eDD 15G-29/r&   c                 L   SS K nSS Kn [        SU  35      n[        U5       Vs/ s HM  nUR                  S5      (       d  M  US:w  d  M#  U R                  S5      S   UR                  5       ;   d  MK  UPMO     nn[        U5       Vs/ s HM  nUR                  S5      (       d  M  US:w  d  M#  U R                  S5      S   UR                  5       ;   d  MK  UPMO     nn[        U5      S:w  a  [        R                  " S	U  S
U 35        g [        U5      S:w  a  [        R                  " SU  S
U 35        g US   nUS   n [        SU  SU 35      n	 [        SU  SU 35      nU	R                  R                  S5      (       a  [	        SU	R                   S35        g UR                  R                  S5      (       a  [	        SUR                   S35        g [        R                  " U	5      n[        R                  " U5      n[        U5      nU Vs/ s H  oDR                  S5      (       a  M  UPM     nn[        R                   R"                  n/ nX4 GHc  n[        R$                  " UR&                  5      R(                  n[*        [-        S 5      [.        [0        [2        4nS/n/ nUR5                  5        H  u  nnUS:X  a  M  UR6                  nUS:X  a  [8        R:                  " S5      nUUL a  UR=                  U5        OR[-        U5      [2        L a  UR=                  U SU S35        O([-        U5      U;   a  UR=                  U SU 35        OM  UR=                  U SU 35        M     SS 3SS 3R?                  U5      -   nSS 3SS 3R?                  U5      -   nUR=                  UU45        GMf     US   u  nnSnSW;  a!  SU;   a  USS S3-  nURA                  SS 5      nSnS!U;   a  S"U;   a  S#nUU-  nS!U;   a  S$U;   a  S%nUU-  nS&nUU-  nS'nUU-  nS"U;   a  S(nUU-  nS"U;   a  S)n UU -  nU S*:X  a  S+n!UU!-  nS"U;   a  S,n"UU"-  nS-U;   a  S.U;   a  S/n#UU#-  nS0n$UU$-  nS"U;   a  S1n%UU%-  nS"U;   a  S2n&UU&-  nS"U;   a  S3n"UU"-  nS"U;   a  S4n'UU'-  nSn(U [B        ;   a  [B        U    n)U) H  n*U(U*" X5      -  n(M     US5U( S6U  S73-  nU [D        ;   a  [D        U    n)U) H  n*UU*" UU5      -  nM     UR                  S5      nSR?                  S8 U 5       5      nUR                  S5      nSR?                  S9 U 5       5      nUn+Un,Un-US   u  nnSn0 S:S _S;S<_S=S>_S?S@_SASB_SCSD_SESF_SGSH_SISJ_SKS>_SLS@_SMSN_SOS_SPS _SQSR_SSS _STSU_0 S<S<SVS<S<S<SWSX.En.U.R5                  5        HF  u  nnU SY3n[-        U5      [2        L a  SU S3OU n/U SU/ S3n/[8        RF                  " UU/U5      nMH     U SZ:X  aa  S[S\S<S<S].n.U.R5                  5        HF  u  nnU SY3n[-        U5      [2        L a  SU S3OU n/U SU/ S3n/[8        RF                  " UU/U5      nMH     SIU;   a  S^n0UU0-  nSPU;  a  S_U;   a  S`n1San2Sbn3OSn1Sn2Sn3S:U;   a  Scn4UU4-  nSdU;   a  Sen5UU5-  nSfU;   a  Sgn6UU6-  nShU;   a  SiU;   a  Sjn7UU7-  nS=U;   a  SQU;   a  SkU;   a  SlU;   a  Smn8UU8-  nO;S=U;   a5  SQU;   a/  SkU;  a  [	        SnU  35        SlU;  a  [	        SoU  35        Spn8UU8-  nSqU;   a  Srn9UU9-  nU [H        ;   a  [H        U    n)U) H  n*UU*" X5      -  nM     UR                  S5      nSR?                  Ss U 5       5      nUn:Un;Un<[K        XX~U5      n=U=c  StU SU 3n=[M        SuU SvU Sw35        [        SU 35      RN                  n>U>c  Sn>[        SU 35      RN                  n?U?c  Sn?U [P        ;   a  SR?                  [P        U    5      n@OSn@SxU;   a   W@S-   [        R                  " [R        5      -   n@[        R                  " [T        5      nA[        R                  " [V        5      nB[        R                  " [X        5      nC[        R                  " [Z        5      nD[        R                  " [\        5      nE[        R                  " [^        5      nF[        R                  " [`        5      nG[b        Re                  UU>U+U,U-SyU-R                  Sz5      (       a  SOSS  UU?U:U;U<SyU<R                  Sz5      (       a  SOSS  U=UW@U1U2U3WAWBWCWDWGWEWFS{9nHUS|:X  a,  S}nIS~nJWHRA                  UIUJ5      nHSnKSnLUHRA                  UKUL5      nHU?S:w  a(  WHRg                  U>5      S@:X  a  WHRA                  U>SS5      nH[8        RF                  " SSWH5      nH[i        SU 3UHSU  3US<S9nM[M        SU SU 3[k        5       [m        5       5        [M        SU SU 3[k        5       [m        5       5        [M        SU  SU SU 3[k        5       [m        5       5        [M        SU SU 3[k        5       [m        5       5        [M        SU SU 3[k        5       [m        5       5        [M        SU  SU SU 3[k        5       [m        5       5        U SZ:X  a   [o        [q        WMSU 35      5        g g ! [         a  n[	        SU  SU 35         S nAg S nAff = fs  snf s  snf ! [         a)  n
[        R                  " SU SU  SU
 35         S n
A
g S n
A
ff = f! [         a)  n
[        R                  " SU SU  SU
 35         S n
A
g S n
A
ff = fs  snf ! [         a&  n
[        R                  " SU SU
 35         S n
A
g S n
A
ff = f)Nr   ztrl.trainer.z&Unsloth: Could not import trl.trainer.z: rL   _Configr   z5Unsloth: Could not find Trainer class in trl.trainer.z	. Found: z4Unsloth: Could not find Config class in trl.trainer..zUnsloth: Could not load z from trl.trainer.Unslothz	Unsloth: z is already patched.__r"   
z = ''z =         ,
z             	tokenizerr|   ztokenizer = Nonez#processing_class = processing_classzKprocessing_class = tokenizer if tokenizer is not None else processing_classr?   rG   a  use_bf16 = getattr(args, 'bf16', False)
if type(use_bf16) is not bool: use_bf16 = False
use_fp16 = getattr(args, 'fp16', False)
if type(use_fp16) is not bool: use_fp16 = False
force_float32 = False
full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1'
if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'):
    print('Unsloth: Switching to float32 training since model cannot work with float16')
    force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().weight.dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
float16 = dtype == torch.float16
if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`')
if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`')
if force_float32:
    # Forced float32 training
    args.fp16 = False
    args.bf16 = False
    os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'
    if hasattr(args, 'mixed_precision'): args.mixed_precision = 'no'
    # args.mixed_precision is a new argument which needs to be set now
elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32':
    # Mixed precision training
    args.fp16 = float16
    args.bf16 = not float16
    os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16'
    if hasattr(args, 'mixed_precision'): args.mixed_precision = 'fp16' if float16 else 'bf16'
    # args.mixed_precision is a new argument which needs to be set now
elif mixed_precision_dtype == 'bfloat16':
    # Both False since bfloat16 full finetuning doesn't do any autocasting.
    args.fp16 = False
    args.bf16 = False
    os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'
    if hasattr(args, 'mixed_precision'): args.mixed_precision = 'no'
    # args.mixed_precision is a new argument which needs to be set now

eval_datasetzif getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no':
    args.eval_strategy = 'steps'
    if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1
a  ga_steps = getattr(args, 'gradient_accumulation_steps', None)
if ga_steps is not None and ga_steps > 1:
    from transformers import __version__ as transformers_version
    if Version(transformers_version) <= Version('4.45.2'):
        print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n'
              '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`')
ad  if getattr(args, 'eval_strategy', 'no') != 'no':
    eval_bsz = getattr(args, 'per_device_eval_batch_size', 8)
    if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size
    if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps
fp16_full_eval = getattr(args, 'fp16_full_eval', False)
if type(fp16_full_eval) is not bool: fp16_full_eval = False
bf16_full_eval = getattr(args, 'bf16_full_eval', False)
if type(bf16_full_eval) is not bool: bf16_full_eval = False
if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True
if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False
if force_float32:
    args.bf16_full_eval = False
    args.fp16_full_eval = False
elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16':
    args.bf16_full_eval = True
    args.fp16_full_eval = False
elif not bf16_full_eval and not fp16_full_eval:
    args.bf16_full_eval = args.bf16
    args.fp16_full_eval = args.fp16
z_output_logits = False
if locals().get('compute_metrics', None) is not None: _output_logits = True
if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True
if _output_logits:
    os.environ['UNSLOTH_RETURN_LOGITS'] = '1'
a3  if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'):
    pass
else:
    model_max_seq_length = getattr(model, 'max_seq_length', None)
    args_max_seq_length  = getattr(args,  'max_seq_length', None)
    if args_max_seq_length is None and model_max_seq_length is not None:
        max_seq_length = model.max_seq_length
        if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length
    elif args_max_seq_length is not None and model_max_seq_length is not None:
        if args_max_seq_length > model_max_seq_length:
            print('Unsloth: You set `max_seq_length` as ' + str(args_max_seq_length) + ' but '
                   'the maximum the model supports is ' + str(model_max_seq_length) + '. We shall reduce it.')
            args.max_seq_length = model_max_seq_length
sft_trainera1  if 'max_length' not in locals() and not hasattr(args, 'max_length'):
    pass
else:
    if hasattr(args, 'max_seq_length') and args.max_seq_length is not None and args.max_seq_length > 0:
        if hasattr(args, 'max_length'):
            args.max_length = args.max_seq_length
            max_length = args.max_length
    else:
        model_max_length = getattr(model, 'max_seq_length', None)
        if model_max_length is None: model_max_length = getattr(model, 'max_length', None)
        if model_max_length is not None:
            args.max_length = model_max_length
            max_length = args.max_length
        elif hasattr(args, 'max_length') and args.max_length is not None:
            max_length = args.max_length
            # if we are here, then we are in a weird case where max_length is set but max_seq_length is not set
            setattr(model, 'max_seq_length', max_length)
        else:
            print('Unsloth: We did not find `max_seq_length` or `max_length` in the model or args. We will set it to 1024.')
            args.max_length = 1024
a  if model is not None and hasattr(model, 'for_training'):
    model.for_training(use_gradient_checkpointing=getattr(args, 'gradient_checkpointing', True))
if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right'
if 'processing_class' in locals():
    if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right'
    if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right'
data_collatortrain_dataseta>  __tokenizer = processing_class if 'processing_class' in locals() else tokenizer
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
    if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
        data_collator = TransformersDataCollatorForLanguageModeling(
            __tokenizer,
            mlm = False,
            mlm_probability = 0.0,
            pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
        )
    elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
        data_collator = DataCollatorForSeq2Seq(
            __tokenizer,
            pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
        )
else:
    if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
    if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
    if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True}
a  if not isinstance(data_collator, UnslothVisionDataCollator):
    if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
        if isinstance(data_collator, DataCollatorForSeq2Seq):
            data_collator = DataCollatorForSeq2Seq(
                __tokenizer.tokenizer,
                pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
            )
        else:
            data_collator = TransformersDataCollatorForLanguageModeling(
                __tokenizer.tokenizer,
                mlm = False,
                mlm_probability = 0.0,
                pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
            )
a)  if hasattr(self, 'neftune_hook_handle'):
    self.neftune_hook_handle.remove()
    if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle
if getattr(args, 'neftune_noise_alpha', None) is not None:
    model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha
pass
a  if hasattr(self, 'accelerator'):
    scaler = self.accelerator.scaler
    current_model = model
    while hasattr(current_model, 'model'):
        current_model.accelerator_scaler = scaler
        current_model = current_model.model
    current_model.accelerator_scaler = scaler
pass
zsif hasattr(self, 'train'):
    self.train = MethodType(prepare_for_training_mode(self.__class__.train), self)
pass
a  if hasattr(self, 'llm') and self.llm is not None and hasattr(self.llm, 'get_tokenizer'):
    _vllm_tok = self.llm.get_tokenizer()
    _pc = getattr(self, 'processing_class', None) or getattr(self, 'tokenizer', None)
    if _vllm_tok is not None and _pc is not None and getattr(_pc, 'chat_template', None) is not None and getattr(_vllm_tok, 'chat_template', None) is None:
        _vllm_tok.chat_template = _pc.chat_template
pass
zother_metrics = []
zL
from unsloth_zoo.logging_utils import PatchRLStatistics
PatchRLStatistics('z', other_metrics)
c              3   ,   #    U  H
  nS U-   v   M     g7fr   Nr,   rR   r   s     r#   rU   )_patch_trl_rl_trainers.<locals>.<genexpr>,       ;
17Q;
   c              3   ,   #    U  H
  nS U-   v   M     g7fr   r,   r   s     r#   rU   r   .  s     CNqw{Nr   
output_dirlogging_nan_inf_filterFper_device_train_batch_size   gradient_accumulation_steps   weight_decayg{Gz?warmup_ratiog?seediO  optim
adamw_8bitlearning_rateg-C6
?per_device_eval_batch_sizeeval_accumulation_stepstorch_empty_cache_steps   logging_stepsmax_seq_lengthnum_generations   top_k	vllm_modecolocatenoneT)generation_kwargsbf16fp16	report_toinclude_tokens_per_secondinclude_num_input_tokens_seenauto_find_batch_sizedataloader_pin_memoryz( = [^,
]{1,})?,
grpo_trainerbnpogMbP?)	loss_typebetar   #vllm_importance_sampling_correctionam  if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!')

max_lengthzmax_seq_length : Optional[int] = field(
        default = None,
        metadata = {'help': 'Maximum sequence length to truncate to.'},
    )zmax_seq_length = None,z$self.max_seq_length = max_seq_lengthzif output_dir is None and save_strategy == 'steps' and save_steps == 500:
    output_dir = 'unsloth_training_checkpoints'
    save_strategy = 'no'
dataset_num_proca  if dataset_num_proc is None:
    import psutil
    dataset_num_proc = min(max((psutil.cpu_count() or 1)+4, 2), 64)
    memory_gb_left = psutil.virtual_memory().available / (1024**3)
    if   memory_gb_left <=  4: dataset_num_proc = 1 # Too risky, so set to 1
    elif memory_gb_left <=  6: dataset_num_proc = min(2, dataset_num_proc)
    elif memory_gb_left <= 10: dataset_num_proc = min(4, dataset_num_proc)
    elif memory_gb_left <= 14: dataset_num_proc = min(6, dataset_num_proc)
pad_to_multiple_ofa9  if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1':
    from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION
    if HAS_FLEX_ATTENTION and pad_to_multiple_of is None:
        from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE
        pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE

r   scale_rewardsa  if loss_type.lower() == 'dr_grpo':
    loss_type = 'dr_grpo'
elif loss_type.lower() == 'dapo':
    loss_type = 'dapo'
if loss_type.lower() == 'dr_grpo':
    if scale_rewards == None:
        scale_rewards = True
    elif scale_rewards == True:
        print('Unsloth: The Dr GRPO paper recommends setting `scale_rewards` to False! Will override. Set it to `None` to force False.')
        scale_rewards = False
elif loss_type.lower() == 'dapo':
    if mask_truncated_completions != True:
        print('Unsloth: The DAPO paper recommends `mask_truncated_completions = True` - we will set it.')
    if epsilon_high != 0.28:
        print('Unsloth: The DAPO paper recommends `epsilon_high = 0.28` - we will set it.')
    if beta != 0.0:
        print(f'[WARNING] Unsloth: The DAPO paper recommends setting `beta = 0.0` to remove the KL term - You have set it to {beta}.')
    mask_truncated_completions = True
    epsilon_high = 0.28

steps_per_generationgeneration_batch_sizeaA  if steps_per_generation is None and generation_batch_size is None:
    ga = gradient_accumulation_steps
    world_size = int(os.environ.get('WORLD_SIZE', '1'))
    if (ga * world_size * per_device_train_batch_size) % num_generations != 0:
        print('Unsloth: We now expect `per_device_train_batch_size` * `gradient_accumulation_steps` * `world_size` to be a multiple of `num_generations`.\nWe will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations))
        per_device_train_batch_size = num_generations

z2Unsloth: Could not find `steps_per_generation` in z3Unsloth: Could not find `generation_batch_size` in a  if (per_device_train_batch_size // num_generations) * num_generations != per_device_train_batch_size:
    print('Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.\nWe will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations))
    per_device_train_batch_size = num_generations

temperaturea  if temperature <= 0:
    raise ValueError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
elif temperature >= 10:
    raise ValueError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.')

c              3   ,   #    U  H
  nS U-   v   M     g7fr   r,   r   s     r#   rU   r     r   r   _Unslothzfrom trl.trainer import (z, z,)r2   z	,**kwargs,)RLTrainer_name__RLTrainer_doc__RLTrainer_argumentsRLTrainer_extra_argsRLTrainer_call_argsRLTrainer_kwargsRLConfig_name__RLConfig_doc__RLConfig_argumentsRLConfig_extra_argsRLConfig_call_argsRLConfig_kwargsRLTrainer_extrasRLTrainer_postRL_premax_seq_length_premax_seq_length_callmax_seq_length_postselective_log_softmax_codegrpo_selective_log_softmax_code#calculate_pad_tokens_in_prompt_code%create_completion_attention_mask_codeautotune_batch_and_chunks_codeleft_pack_padding_codealign_logprobs_with_mask_code
SFTTrainerzLself._signature_columns = ["input_ids", "attention_mask", "completion_mask"]zUself._signature_columns = ["input_ids", "attention_mask", "completion_mask","labels"]zself._is_vlm = Truezself._is_vlm = Falsez[\n]{3,})	overwriteztrl.z = created_module.Unslothz<Unsloth: Could not wrap _generate_and_score_completions for )9r0   r   evalr   printr   r   splitlowerrk   r   infor'   
startswithinspect	getsource	Parameterempty	signaturer$   
parametersbooltypeintfloatstrr{   defaultreescapeappendjoinreplacer   r   subr   patch_functionsexec__doc__r   r   r   r   r   r   r   r   autotune_batch_and_chunksRLTrainer_replacementformatcountr   localsglobalsr   rq   )Ntrainer_filer0   r   errorr   r[   rX   r   r   	RLTrainereRLConfigold_RLTrainer_sourceold_RLConfig_sourceall_importsimportsEMPTY	processedRLobjectr  types	arguments	call_argsrS   rb   r   
extra_argsmixed_precisioncheck_eval_datasetcheck_gaeval_changeslogits_checklength_checkmax_length_checktraining_checkdata_collator_check	pad_checkneftune_checkaccelerator_checkvllm_chat_template_syncother_metrics_processorprocess_extra_argsprocess_extra_argr   r   r   replacementsylearning_rate_checkr  r  r  saving_checknum_proc_checkr   check_dr_grpocheck_num_generationscheck_temperaturer   r   r   r   r   r   r  r  r  r  r  r
  r  r	  RLTrainer_sourceoriginal_textnew_textoriginal_is_vlm_textnew_is_vlm_textcreated_modulesN                                                                                 r#   _patch_trl_rl_trainersr]    s   l^45 WA::i  	
 N 	
 s#A&!'')3	 	
 	  WA::h 	
 M 	
 s#A&!'')3	 	
   4yA~CL>QZ[_Z`a	
 	
6{aB<.PYZ`Yab	
 	 !WN1IM<~Q~6FGH	,|nAm_EF $$Y//	),,--ABC##I..	(++,,@AB #,,Y7!++H5g,K%@+Q\\$-?q+G@ ##EI)&&x'8'89DD
J
 H		$$&DAqF{		ADyIIdOEz  #aC  A3d1#Q0aE!  A3c!.s#aS\* ' LS=#5#5i#@@	Mc&N$7$7	$BB		
9 *H %Q<IyN *$);z)Is5'!122	%%1Y
	 Jw)3& 	R 	o%
 Y&\  ,,JI 	 	h
4 	* 	l"
 )> 	 	l"
 )G 	 	l"
 =(7 , **J )B 	 	n$
 )#9(Dm 	* 	))
 	  	i
 ) 	 	-' ) 	 	++ ) 	
 	.( ) 	  	11 !))/=!3#'8$( # "4 "
# $*^+>	@J }$*<8!3+IzBBJ "4 !!$'J;
;;J#))$/NYYCNCCN#%# %Q<IyJ d  %  	&q  	&q	 
 	  	  	  	  	  	%a  	"1  	"3  	  	$  	1 $ 	% & 	Z' (  %*). %!%7 LB ""$1c&'Q3as!HqccQCsOFF1a+		 % ~%$) 49
 !&&(DAq#*+A GsN!A3a1#A#S3Aq!Y/I	 ) )#[ 	 	))
 y(\Y-F	 7D   y ) 	
 	l"
 Y&[ 	 	n$
 y( 	 	((
 iOy$@ 	, 	m#
 	&2*"i/#y0 	 	++
	&)	38IY8V!2F|nUV")3G~VW 	 	++
 	! 	 	''
 ((.|<!3+,@VVJ "4 !!$'J;
;;J"$" 'g %n%5S8HI 	$^$4B}oR	HI|N+;<=EE l=/:;CC |#<56 //$!2!23E!FF ")!2!23H!I&-&7&78R&S#*1*;*;&+' -4,=,=(-) %../@A$+$5$56N$O!%,%6%67P%Q",33'-131&,?,H,H,M,MqST'VW%+/1/%+=+F+Fs+K+KaQR&TU+'/11%?*I.Q0U)G!7(E3 4 8 %fj+33M8L  50+33 /

 2"2"8"89J"Kq"P+334Er1M vvk41AB )
.!"
|n%N 	
~77GH	
 	
~&&??OP	
 	
|nAn%55N~N^_	 	
}o6}oF	
 	
}o%>}oN	
 	
|nAm_4Mm_]	 ~%	)'.1A(BC &C  6|nBugNO
.  &~&66HVXYZX[\	
 		  &}o5G~UWXYWZ[	
 		( AP  	KKN~N^^`ab`cd 	s   k k8k8$k83k8k=&k=.$k=k=l l8 m.0m.6m3 
k5k00k5
l5l00l58
m+m&&m+3
n#=nn#c           	         [         R                  " U R                  5      nUn[        R                  " SU5      nU Vs/ s H  nSU;   d  SU;   d  M  UPM     n	nU	 H5  n
UR                  U
U
R                  SS5      R                  SS5      5      nM7     UR                  SS5      nUR                  SS5      nUR                  S	S
5      nUR                  SS
5      nUR                  SS5      nUR                  SS
5      nUR                  SS5      nSnS n[        R                  " XU[        R                  S9nSU;   a  SU;   a  SU;   a  [        R                  " SU[        R                  [        R                  -  S9n[        U5      S:w  aQ  US   nSnSU;   a0  [        [        S5      :  a  US-  n[        [        S5      :  a  US-  nUR                  XU-   5      n[        R                  " SU[        R                  [        R                  -  S9n[        U5      S:X  Ga  US   S   US   S   nn[        R                  " S S!U[        R                  S9n[        R                  " S"S!U[        R                  S9n[        R                  " S#U[        R                  [        R                  -  S9n[        U5      S:X  a  US   nUR                  S$S%5      nS&U-   nUR                  S'5      n[        U5      S(:  a  US)   nUS*   n[        U5      [        UR                  5       5      -
  n[        U5      [        UR                  5       5      -
  nS+nS,S-U-  -   U-   S,-   S-U-  -   S-   nUR                  UR                  SS5      5      n[        R                  " S.S/U5      nS'S0 S1U S2U S'S0 S33	n[        [        S5      :  a)  S4nS5n[        R                  " UUU[        R                  S9nUR                  UU5      n[!        U 5      n[         R                  " U 5      nU Vs/ s H  nS6U 3U;   d  M  UPM     nnS7UU40n["        R$                  " U/ 5      nU GH  n ['        U U 5      (       d  M  [)        U U 5      n! [         R                  " U!5      n"U"n#U H  n$U$" U U"5      n"M      [        R                  " S8S9U"5      n"[        R                  " S:S;U"5      n"[        R                  " S<S;U"5      n"[        R                  " S=S!U"5      n"S>[*        R,                  ;   a	  US?-   S@-   n%OUSA-   n%[        R                  " SBSCU%-   SD-   U"5      n"[        R                  " SESFU"5      n"[        R                  " SGSFU"5      n"[        R                  " SHSFU"5      n"U"R                  SISJ5      n"U"U#:X  a  GMR  XC Vs/ s H$  oR/                  SK5      (       a  M  UU";   d  M"  UPM&     sn-  nU#U"4UU '   GM     [1        [3        U5      5      nU H  n UU    u  n&n'UR                  U&U'5      nM     UR                  SLU 3SMU 3S5      nU$ s  snf s  snf !    GM  = fs  snf )NNz\#[^\n]{1,}\n()[]zelif peft_config is None:zelif False:zelif peft_config is not None:zif peft_config is None:z	if False:zif peft_config is not None:z"get_peft_model(model, peft_config)rG   zUif peft_config is not None or (is_peft_available() and isinstance(model, PeftModel)):z;model = self._prepare_peft_model(model, peft_config, args)
zpass
z([ \t]*)if\s+is_peft_available\(\)\s+and\s+is_peft_model\(model\)\s+and\s+args\.beta\s*!=\s*0\.0\s*:(.*?)ref_param\.data\.copy_\(param\.data\)c                    U R                  S5      nU R                  S5      nUR                  S5      n/ nUR                  U S35        U Hk  nUR                  5       (       aB  UR	                  5       nUS[        U5      [        U5      -
   nUR                  U SU 35        MZ  UR                  U5        Mm     SR                  U5      $ )zCComment out each line in the matched block, preserving indentation.r   r   r   zJ# Unsloth: Commented out - use base model as reference, not SFT/LoRA modelNz# )groupr  r"  striplstriprk   r#  )match
full_matchindentlinescommented_lineslinestripped
leading_wss           r#   comment_out_block*patch_functions.<locals>.comment_out_block  s    [[^
Q  &h`a	
 Dzz||;;=!"=CIH$=>
&&*Rz'BC&&t,  yy))r&   )flagszargs.use_vllmr?   zdef __init__\(.*?\).*?\:\nr   z
        if hasattr(model, 'vllm_engine') and hasattr(args, 'use_vllm'):
            if (getattr(args, 'use_vllm', False) == False):
                args.use_vllm = True
grpoz0.18.0z&            args.vllm_mode='colocate'
z0.23.0zt            if os.environ.get('UNSLOTH_VLLM_STANDBY', '0') == '1':
                args.vllm_enable_sleep_mode=True
z7(\n[\s]{8}if (self|args)\.use_vllm\:.*?\n[\s]{8}else:
)r   z^\s*\#[^\n]*\n?r   z\s*\#.*$zB\n[\s]{4,}(self\.[^\s]{1,}[\s]{0,}\=[\s]{0,}SamplingParams\(.+?\))z guided_decoding=guided_decoding,zguided_decoding=GuidedDecodingParams(backend="outlines", regex=args.vllm_guided_decoding_regex) if getattr(args, "vllm_guided_decoding_regex", None) is not None else None,zF            self.llm = model.vllm_engine; self._last_loaded_step = 0; r   r   zY**getattr(getattr(args, 'vllm_sampling_params', vLLMSamplingParams()), '_set_kwargs', {})r    z[\,][\s]{0,}\,r   r   zif z.use_vllm:
zelse:
z(self\.llm\s*=\s*LLM\(.*?\)*\)\s*?\n(?!,)zself.llm = model.vllm_engine
zdef r$   zJ(\n[\s]{4,})generation_batch = shuffle_sequence_dict\(generation_batch\)\nzU\n\1try: generation_batch = shuffle_sequence_dict(generation_batch)\n\1except: pass\nz1(\n[\s]{4,}).+?model_executor\.driver_worker.+?\nz
\n\1pass\nz"(\n[\s]{4,}).+?load_weights\(.+?\nz\.state_dict\(\)CUDA_VISIBLE_DEVICESz_lora_model_' + z=(os.environ.get('CUDA_VISIBLE_DEVICES', '0').replace(',',''))z_lora_model'z+(self\.llm\.(?:generate|chat)\([^\)]{1,})\)z)\1, lora_request = self.model.load_lora('z, load_tensors = True))z \,[\s]{1,}\,[\s]{0,}lora_requestz, lora_requestz[\s]{1,}\,[\s]{0,}lora_requestz[\,]{1,}[\s]{0,}lora_requestz5sampling_params = SamplingParams(**generation_kwargs)zsampling_params = SamplingParams(**grpo_update_SamplingParams(SamplingParams, generation_kwargs, getattr(self.args, 'vllm_sampling_params', None)))r   zclass zclass _Unsloth)r  r  r$   r   findallr$  r%  DOTALL	MULTILINErk   trl_versionr/   r  rf  r#  rsplitr   r   rQ   rp   rq   rs   rt   r  listset)(r1  r/  r   r6  r7  initold_initcommentsr   bracketed_commentsbracketed_commentadd_adapter_block_patternro  replacervllm_setter	vllm_partr?   new_vllm_partr5   splitted_sampling_params	last_linelast_prev_linelast_prev_indentationlast_indentationextra
to_replacevllm_llm_init_patternvllm_llm_replacement	functionsrW  changededit_functionsfunctionfxsourceoriginal_sourceedit_function	lora_nameoldnews(                                           r#   r&  r&    s   Y//0DH zz*D1H%-FXSAX!XF/||%%c3/77SA
 0 <<3]CD<<7GD<<1;?D<<5{CD<<<gFD<<_D
 <<FD	1 *( 66+biiXD $7d?v~::)LL299,

 x=A{H+  %+9J*JGG'("33?K <<[*@AD 

Nryy(I 9~#A,q/9Q<?4	Ir||
 ]BLL

 **ULL299,
 1$-a0O-552^OO!"  (7'<'<T'B$+,14R8	!9"!=(+N(;c"))+? )% $'y>C	8H8H8J4K#K  t 112  ,,	-
   #-///2H2Ha2P"Q"$&&):C"Q s4&_4Ew( 
 '(++$O!#C FF%$			M ||I}5 II((3%HIq4s7G)GIIH 	
G "%%lB7Ny(++Y)	&&r*F ! ,M"8V4F ,	 Yd
 @
 1
 
 "RZZ/$%QR  %~5I:8() 
 ;=MvV9;KVT79I6R C
 _$ 	{T{!,,s2CAVA{TT 
} H 3w< G 8$S+33C=  (//
 !^N3C#Da ]	 Gl I 	j Us5   YY*Y;YY8YYYYc                      SS K n [        U R                  5      nU Vs/ s H<  nUR                  5       (       d  M  UR	                  S5      (       d  M2  US:w  d  M:  UPM>     nnU H  n[        U5        M     g s  snf )Nr   r   base_trainer)r   r   r   islowerr   r]  )r0   all_trainersr   r   s       r#   patch_trl_rl_trainersr    sx    s{{#L A99; 	
::j1 	
67>6I 	
  
  w'  
s   A8A8A8A8c                  x    [         S    H-  n [        R                  " SU R                   35        U " 5         M/     g )Nopenenvz-Unsloth: Patching trl openenv with function: )r   r   r  r'   )r  s    r#   patch_trl_openenvr    s6    +I6CHDUDUCVWX
 7 r&   c                     Ub  [        U5        [        5         [        5         [        U 5      [        L a"  U R                  5       (       a  [        U 5        g g g r   )r   r  r  r  r  r  r   )	algorithmrI   s     r#   r   r     sG    $!"I#)"3"3"5"5)$ #6r&   )r   )NN)=__all__r<   typingr   r   r   r   r	   r
   r   r   r  rs   r   unsloth_zoo.compilerr   unsloth_zoo.logr   unsloth_zoo.logging_utilsr   unsloth_zoo.rl_replacementsr   rl_replacementsr   r   r   r   r   r   torch_compile_optionsvllm.sampling_paramsr5   _unsloth_vllm_sprp   r   r   r0   r-   trl_version_rawimportlib.metadatar.   importlib_versionunsloth_zoo.utilsr/   rz  r   r   r   r   r   r   r   r   r)  r*  r   r]  r&  r  r  r   r,   r&   r#   <module>r     s    
  M M M  	 	 4 " 7 7   	3#%;<<	% 	% 1E- / ; %'/*KV6r --IJ '(?@ !01Q!R #23U#V  #$78 *+EF +,LM M `:4pftn	%G-  		  ''/67 'g&''s<   %C C( C%$C%(D/C??DDDD