
    hk                        S SK r S SKrS SKJrJr  S SKJrJrJrJ	r	J
r
  S SKrS SKrS SKJr   S SKJr  \R&                  " S\S9  S SKJr  S S	KJr  S S
KJrJrJr  \(       a  S SKJr   " S S\5      r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\%5      r& " S S\5      r' " S  S!\5      r( " S" S#\5      r)g! \ a    Sr Nf = f)$    N)ABCabstractmethod)TYPE_CHECKINGAnyCallableOptionalUnion)Logger)TqdmExperimentalWarningignore)category)tqdm)evaluate_policy)DummyVecEnvVecEnvsync_envs_normalization)
base_classc                   V  ^  \ rS rSr% SrS\S'   SS\4U 4S jjjr\S\	4S j5       r
\S\4S	 j5       rSS jrSS jrS\\\4   S\\\4   SS
4S jrSS jrSS jrSS jr\S\4S j5       rS\4S jrSS jrSS jrSS jrSS jrS\\\4   SS
4S jrS\\\4   SS
4S jrSr U =r!$ )BaseCallback   zw
Base class for callback.

:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
base_class.BaseAlgorithmmodelverbosec                 t   > [         TU ]  5         SU l        SU l        Xl        0 U l        0 U l        S U l        g Nr   )super__init__n_callsnum_timestepsr   localsglobalsparent)selfr   	__class__s     \/home/james-whalen/.local/lib/python3.13/site-packages/stable_baselines3/common/callbacks.pyr   BaseCallback.__init__)   s9    &(')     returnc                 N    U R                   R                  5       nUc   S5       eU$ )Nzc`model.get_env()` returned None, you must initialize the model with an environment to use callbacks)r   get_env)r#   training_envs     r%   r+   BaseCallback.training_env6   s2    zz))+$	qp	q$r'   c                 .    U R                   R                  $ N)r   loggerr#   s    r%   r/   BaseCallback.logger>   s    zz   r'   Nc                 0    Xl         U R                  5         g)zl
Initialize the callback by saving references to the
RL model and the training environment for convenience.
N)r   _init_callback)r#   r   s     r%   init_callbackBaseCallback.init_callbackC   s    
 
r'   c                     g r.    r0   s    r%   r3   BaseCallback._init_callbackK       r'   locals_globals_c                 r    Xl         X l        U R                  R                  U l        U R	                  5         g r.   )r    r!   r   r   _on_training_start)r#   r:   r;   s      r%   on_training_startBaseCallback.on_training_startN   s*    !ZZ55!r'   c                     g r.   r7   r0   s    r%   r=   BaseCallback._on_training_startV   r9   r'   c                 $    U R                  5         g r.   )_on_rollout_startr0   s    r%   on_rollout_startBaseCallback.on_rollout_startY   s     r'   c                     g r.   r7   r0   s    r%   rC   BaseCallback._on_rollout_start\   r9   r'   c                     g)zD
:return: If the callback returns False, training is aborted early.
Tr7   r0   s    r%   _on_stepBaseCallback._on_step_   s    
 r'   c                     U =R                   S-  sl         U R                  R                  U l        U R                  5       $ )z
This method will be called by the model after each call to ``env.step()``.

For child callback (of an ``EventCallback``), this will be called
when the event is triggered.

:return: If the callback returns False, training is aborted early.
   )r   r   r   rI   r0   s    r%   on_stepBaseCallback.on_stepf   s0     	!ZZ55}}r'   c                 $    U R                  5         g r.   )_on_training_endr0   s    r%   on_training_endBaseCallback.on_training_endt   s    r'   c                     g r.   r7   r0   s    r%   rP   BaseCallback._on_training_endw   r9   r'   c                 $    U R                  5         g r.   )_on_rollout_endr0   s    r%   on_rollout_endBaseCallback.on_rollout_endz   s    r'   c                     g r.   r7   r0   s    r%   rV   BaseCallback._on_rollout_end}   r9   r'   c                 \    U R                   R                  U5        U R                  U5        gzn
Update the references to the local variables.

:param locals_: the local variables during rollout collection
N)r    updateupdate_child_localsr#   r:   s     r%   update_localsBaseCallback.update_locals   s$     	7#  )r'   c                     g)z
Update the references to the local variables on sub callbacks.

:param locals_: the local variables during rollout collection
Nr7   r_   s     r%   r^    BaseCallback.update_child_locals   s     	r'   )r!   r    r   r   r   r"   r   r   r   r   r(   Nr(   N)"__name__
__module____qualname____firstlineno____doc____annotations__intr   propertyr   r+   r
   r/   r4   r3   dictstrr   r>   r=   rD   rC   r   boolrI   rM   rQ   rP   rW   rV   r`   r^   __static_attributes____classcell__r$   s   @r%   r   r      s    &%   f   ! ! !"c3h "4S> "VZ "! $    *T#s(^ * *4S> d  r'   r   c                      ^  \ rS rSrSrSS\\   S\4U 4S jjjrSU 4S jjr	SS	 jr
S\4S
 jrS\4S jrS\\\4   SS4S jrSrU =r$ )EventCallback   z
Base class for triggering callback on event.

:param callback: Callback that will be called
    when an event is triggered.
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
Ncallbackr   c                 r   > [         TU ]  US9  Xl        Ub   U R                  c   eX R                  l        g g Nr   )r   r   rx   r"   r#   rx   r   r$   s      r%   r   EventCallback.__init__   s=    ) ==,,,#'MM   r'   r(   c                    > [         TU ]  U5        U R                  b&  U R                  R                  U R                  5        g g r.   )r   r4   rx   r   )r#   r   r$   s     r%   r4   EventCallback.init_callback   s5    e$==$MM''

3 %r'   c                     U R                   b1  U R                   R                  U R                  U R                  5        g g r.   )rx   r>   r    r!   r0   s    r%   r=    EventCallback._on_training_start   s-    ==$MM++DKKF %r'   c                 R    U R                   b  U R                   R                  5       $ gNT)rx   rM   r0   s    r%   	_on_eventEventCallback._on_event   s"    ==$==((**r'   c                     gr   r7   r0   s    r%   rI   EventCallback._on_step   s    r'   r:   c                 V    U R                   b  U R                   R                  U5        ggr\   rx   r`   r_   s     r%   r^   !EventCallback.update_child_locals   s%     ==$MM''0 %r'   rx   r   re   rf   )rg   rh   ri   rj   rk   r   r   rm   r   r4   r=   rq   r   rI   ro   rp   r   r^   rr   rs   rt   s   @r%   rv   rv      sh    (,!7 ( ( (4
G4 
$ 14S> 1d 1 1r'   rv   c                      ^  \ rS rSrSrS\\   4U 4S jjrSS jrSS jr	SS	 jr
S\4S
 jrSS jrSS jrS\\\4   SS4S jrSrU =r$ )CallbackList   zl
Class for chaining callbacks.

:param callbacks: A list of callbacks that will be called
    sequentially.
	callbacksc                 \   > [         TU ]  5         [        U[        5      (       d   eXl        g r.   )r   r   
isinstancelistr   )r#   r   r$   s     r%   r   CallbackList.__init__   s&    )T****"r'   r(   Nc                     U R                    H/  nUR                  U R                  5        U R                  Ul        M1     g r.   )r   r4   r   r"   r#   rx   s     r%   r3   CallbackList._init_callback   s/    H""4::. #kkHO 'r'   c                 v    U R                    H)  nUR                  U R                  U R                  5        M+     g r.   )r   r>   r    r!   r   s     r%   r=   CallbackList._on_training_start   s(    H&&t{{DLLA 'r'   c                 J    U R                    H  nUR                  5         M     g r.   )r   rD   r   s     r%   rC   CallbackList._on_rollout_start   s    H%%' 'r'   c                 b    SnU R                    H  nUR                  5       =(       a    UnM     U$ r   )r   rM   )r#   continue_trainingrx   s      r%   rI   CallbackList._on_step   s3     H ( 0 0 2 H7H ' ! r'   c                 J    U R                    H  nUR                  5         M     g r.   )r   rW   r   s     r%   rV   CallbackList._on_rollout_end   s    H##% 'r'   c                 J    U R                    H  nUR                  5         M     g r.   )r   rQ   r   s     r%   rP   CallbackList._on_training_end   s    H$$& 'r'   r:   c                 L    U R                    H  nUR                  U5        M     gr\   )r   r`   )r#   r:   rx   s      r%   r^    CallbackList.update_child_locals   s      H""7+ 'r'   )r   rf   )rg   rh   ri   rj   rk   r   r   r   r3   r=   rC   rq   rI   rV   rP   ro   rp   r   r^   rr   rs   rt   s   @r%   r   r      s_    #$|"4 #
*B(!$ !&',4S> ,d , ,r'   r   c                      ^  \ rS rSrSr    SS\S\S\S\S\S\4U 4S	 jjjrSS jr	SS\S\S
\4S jjr
S
\4S jrSrU =r$ )CheckpointCallback   a  
Callback for saving a model every ``save_freq`` calls
to ``env.step()``.
By default, it only saves model checkpoints,
you need to pass ``save_replay_buffer=True``,
and ``save_vecnormalize=True`` to also save replay buffer checkpoints
and normalization statistics checkpoints.

.. warning::

  When using multiple environments, each call to  ``env.step()``
  will effectively correspond to ``n_envs`` steps.
  To account for that, you can use ``save_freq = max(save_freq // n_envs, 1)``

:param save_freq: Save checkpoints every ``save_freq`` call of the callback.
:param save_path: Path to the folder where the model will be saved.
:param name_prefix: Common prefix to the saved models
:param save_replay_buffer: Save the model replay buffer
:param save_vecnormalize: Save the ``VecNormalize`` statistics
:param verbose: Verbosity level: 0 for no output, 2 for indicating when saving model checkpoint
	save_freq	save_pathname_prefixsave_replay_buffersave_vecnormalizer   c                 `   > [         TU ]  U5        Xl        X l        X0l        X@l        XPl        g r.   )r   r   r   r   r   r   r   )r#   r   r   r   r   r   r   r$   s          r%   r   CheckpointCallback.__init__  s.     	!""&"4!2r'   r(   c                 ^    U R                   b   [        R                  " U R                   SS9  g g )NTexist_ok)r   osmakedirsr0   s    r%   r3   !CheckpointCallback._init_callback  s#    >>%KK6 &r'   checkpoint_type	extensionc           	          [         R                  R                  U R                  U R                   SU U R
                   SU 35      $ )a  
Helper to get checkpoint path for each type of checkpoint.

:param checkpoint_type: empty for the model, "replay_buffer_"
    or "vecnormalize_" for the other checkpoints.
:param extension: Checkpoint file extension (zip for model, pkl for others)
:return: Path to the checkpoint
_z_steps.)r   pathjoinr   r   r   )r#   r   r   s      r%   _checkpoint_path#CheckpointCallback._checkpoint_path   sE     ww||DNNt/?/?.@/ARSWSeSeRffmnwmx,yzzr'   c                    U R                   U R                  -  S:X  GaW  U R                  SS9nU R                  R	                  U5        U R
                  S:  a  [        SU 35        U R                  (       a{  [        U R                  S5      (       a`  U R                  R                  bI  U R                  SSS9nU R                  R                  U5        U R
                  S	:  a  [        S
U 35        U R                  (       ar  U R                  R                  5       bW  U R                  SSS9nU R                  R                  5       R	                  U5        U R
                  S:  a  [        SU 35        g)Nr   zip)r      zSaving model checkpoint to replay_bufferreplay_buffer_pklrL   z)Saving model replay buffer checkpoint to vecnormalize_zSaving model VecNormalize to T)r   r   r   r   saver   printr   hasattrr   r   get_vec_normalize_env)r#   
model_pathreplay_buffer_pathvec_normalize_paths       r%   rI   CheckpointCallback._on_step+  s9   <<$..(A-...?JJJOOJ'||q 3J<@A&&74::+O+OTXT^T^TlTlTx%)%:%:;KW\%:%]"

--.@A<<!#EFXEYZ[%%$***J*J*L*X%)%:%:?V[%:%\"

002778JK<<1$9:L9MNOr'   )r   r   r   r   r   )rl_modelFFr   rf   ) r   )rg   rh   ri   rj   rk   rm   rp   rq   r   r3   r   rI   rr   rs   rt   s   @r%   r   r      s    4 &#("'33 3 	3
 !3  3 3 3 7
	{ 	{S 	{RU 	{$  r'   r   c                   r   ^  \ rS rSrSrS	S\\\\\	4   \\\	4   /\
4      S\4U 4S jjjrS\
4S jrSrU =r$ )
ConvertCallbackiC  z
Convert functional callback (old-style) to object.

:param callback:
:param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
rx   r   c                 0   > [         TU ]  U5        Xl        g r.   )r   r   rx   r|   s      r%   r   ConvertCallback.__init__K  s    ! r'   r(   c                 j    U R                   b&  U R                  U R                  U R                  5      $ gr   )rx   r    r!   r0   s    r%   rI   ConvertCallback._on_stepO  s(    ==$==dll;;r'   r   rd   )rg   rh   ri   rj   rk   r   r   ro   rp   r   rq   rm   r   rI   rr   rs   rt   s   @r%   r   r   C  sW    !(DcNDcN3SUY3Y*Z![ !fi ! !$  r'   r   c                     ^  \ rS rSrSr          SS\\R                  \4   S\	\
   S\	\
   S\S\S	\	\   S
\	\   S\S\S\S\4U 4S jjjrSS jrS\\\4   S\\\4   SS4S jrS\4S jrS\\\4   SS4S jrSrU =r$ )EvalCallbackiU  a
  
Callback for evaluating an agent.

.. warning::

  When using multiple environments, each call to  ``env.step()``
  will effectively correspond to ``n_envs`` steps.
  To account for that, you can use ``eval_freq = max(eval_freq // n_envs, 1)``

:param eval_env: The environment used for initialization
:param callback_on_new_best: Callback to trigger
    when there is a new best model according to the ``mean_reward``
:param callback_after_eval: Callback to trigger after every evaluation
:param n_eval_episodes: The number of episodes to test the agent
:param eval_freq: Evaluate the agent every ``eval_freq`` call of the callback.
:param log_path: Path to a folder where the evaluations (``evaluations.npz``)
    will be saved. It will be updated at each evaluation.
:param best_model_save_path: Path to a folder where the best model
    according to performance on the eval env will be saved.
:param deterministic: Whether the evaluation should
    use a stochastic or deterministic actions.
:param render: Whether to render or not the environment during evaluation
:param verbose: Verbosity level: 0 for no output, 1 for indicating information about evaluation results
:param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been
    wrapped with a Monitor wrapper)
Neval_envcallback_on_new_bestcallback_after_evaln_eval_episodes	eval_freqlog_pathbest_model_save_pathdeterministicrenderr   warnc                   >^ [         TU ]  X:S9  X l        U R                  b  X R                  l        X@l        XPl        [        R                  * U l        [        R                  * U l	        Xl
        Xl        Xl        [        T[        5      (       d  [        U4S j/5      mTU l        Xpl        Ub   [$        R&                  R)                  US5      nX`l        / U l        / U l        / U l        / U l        / U l        g )Nr{   c                     > T $ r.   r7   )r   s   r%   <lambda>'EvalCallback.__init__.<locals>.<lambda>  s    Hr'   evaluations)r   r   r   r"   r   r   npinfbest_mean_rewardlast_mean_rewardr   r   r   r   r   r   r   r   r   r   r   r   evaluations_resultsevaluations_timestepsevaluations_length_is_success_bufferevaluations_successes)r#   r   r   r   r   r   r   r   r   r   r   r   r$   s    `          r%   r   EvalCallback.__init__q  s     	,>$8!$$0/3%%,."!#!#*	 (F++"$4#56H $8!ww||Hm<H 68 02"35.079"r'   r(   c                    [        U R                  [        U R                  5      5      (       d0  [        R
                  " SU R                   SU R                   35        U R                  b  [        R                  " U R                  SS9  U R                  b<  [        R                  " [        R                  R                  U R                  5      SS9  U R                  b&  U R                  R                  U R                  5        g g )Nz.Training and eval env are not of the same typez != Tr   )r   r+   typer   warningsr   r   r   r   r   r   dirnamer   r4   r   r0   s    r%   r3   EvalCallback._init_callback  s    $++T$---@AAMMJdN_N_M``deiererdstu $$0KK11DA==$KK6F $$0%%33DJJ? 1r'   r:   r;   c                     US   nUS   (       a1  UR                  S5      nUb  U R                  R                  U5        ggg)z
Callback passed to the  ``evaluate_policy`` function
in order to log the success rate (when applicable),
for instance when using HER.

:param locals_:
:param globals_:
infodone
is_successN)getr   append)r#   r:   r;   r   maybe_is_successs        r%   _log_success_callback"EvalCallback._log_success_callback  sH     v6?#xx5+''../?@ , r'   c                 :	   SnU R                   S:  Gai  U R                  U R                   -  S:X  GaK  U R                  R                  5       b!   [	        U R
                  U R                  5        / U l	        [        U R                  U R                  U R                  U R                  U R                  SU R                  U R                  S9u  p4U R                   Gb  [#        U[$        5      (       d   e[#        U[$        5      (       d   eU R&                  R)                  U R*                  5        U R,                  R)                  U5        U R.                  R)                  U5        0 n[1        U R                  5      S:  a8  U R2                  R)                  U R                  5        [5        U R2                  S9n[6        R8                  " U R                   4U R&                  U R,                  U R.                  S.UD6  [6        R:                  " U5      [6        R<                  " U5      pv[6        R:                  " U5      [6        R<                  " U5      p[?        U5      U l         U RB                  S:  a3  [E        SU R*                   S	US
 SUS
 35        [E        SUS
 SU	S
 35        U RF                  RI                  S[?        U5      5        U RF                  RI                  SU5        [1        U R                  5      S:  a_  [6        R:                  " U R                  5      n
U RB                  S:  a  [E        SSU
-  S
 S35        U RF                  RI                  SU
5        U RF                  RI                  SU R*                  SS9  U RF                  RK                  U R*                  5        X`RL                  :  a  U RB                  S:  a  [E        S5        U RN                  bC  U R                  RQ                  [R        RT                  RW                  U RN                  S5      5        [?        U5      U l&        U RX                  b  U RX                  R[                  5       nU R\                  b  U=(       a    U R_                  5       nU$ ! [         a  n[        S5      UeS nAff = f)NTr   zTraining and eval env are not wrapped the same way, see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback and warning above.)r   r   r   return_episode_rewardsr   rx   )	successes)	timestepsresults
ep_lengthsrL   zEval num_timesteps=z, episode_reward=.2fz +/- zEpisode length: zeval/mean_rewardzeval/mean_ep_lengthzSuccess rate: d   %zeval/success_rateztime/total_timestepstensorboard)excludezNew best mean reward!
best_model)0r   r   r   r   r   r+   r   AttributeErrorAssertionErrorr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   lenr   ro   r   savezmeanstdfloatr   r   r   r/   recorddumpr   r   r   r   r   r   r   rM   rx   r   )r#   r   eepisode_rewardsepisode_lengthskwargsmean_reward
std_rewardmean_ep_lengthstd_ep_lengthsuccess_rates              r%   rI   EvalCallback._on_step  s    >>A$,,"?1"Dzz//1=+D,=,=t}}M ')D#/>

 $ 4 4{{"00'+YY33	0,O }}(!/48888!/48888**11$2D2DE((//@''..?t../!3..55d6M6MN!D,F,FGFMM"88 44#66	
  ')ggo&>@W,.GGO,Dbff_F]M$)+$6D!||q +D,>,>+??TU`adTeejkuvyjz{|((<E-PSATUVKK153EFKK4nE4**+a/!wwt'>'>?<<1$N3+=c*B!DE""#6E KK5t7I7IS`aKKT//0222<<1$12,,8JJOOBGGLL1J1JL$YZ(-k(:%,,8(,(A(A(I(I(K% }}($5$J$..:J!  [ & (- 	s    Q? ?
R	RRc                 ^    U R                   (       a  U R                   R                  U5        ggr\   r   r_   s     r%   r^    EvalCallback.update_child_locals  s"     ==MM''0 r'   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )
NN   i'  NNTFrL   Trf   )rg   rh   ri   rj   rk   r	   gymEnvr   r   r   rm   rp   rq   r   r3   ro   r   r   rI   r^   rr   rs   rt   s   @r%   r   r   U  s#   < 8<6: "&.2",:(,: '|4,: &l3	,:
 ,: ,: 3-,: 'sm,: ,: ,: ,: ,: ,:\@AT#s(^ AtCQTH~ AZ^ A U!$ U!n14S> 1d 1 1r'   r   c                   V   ^  \ rS rSr% Sr\\S'   S
S\S\4U 4S jjjr	S\
4S jrS	rU =r$ )StopTrainingOnRewardThresholdi  at  
Stop the training once a threshold in episodic reward
has been reached (i.e. when the model is good enough).

It must be used with the ``EvalCallback``.

:param reward_threshold:  Minimum expected reward per episode
    to stop training.
:param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because episodic reward
    threshold reached
r"   reward_thresholdr   c                 ,   > [         TU ]  US9  Xl        g rz   )r   r   r"  )r#   r"  r   r$   s      r%   r   &StopTrainingOnRewardThreshold.__init__.  s    ) 0r'   r(   c                    U R                   c   S5       e[        U R                   R                  U R                  :  5      nU R                  S:  a7  U(       d0  [        SU R                   R                  S SU R                   35        U$ )NzN``StopTrainingOnMinimumReward`` callback must be used with an ``EvalCallback``rL   z*Stopping training because the mean reward r  z  is above the threshold )r"   rq   r   r"  r   r   r#   r   s     r%   rI   &StopTrainingOnRewardThreshold._on_step2  s    {{&x(xx& !=!=@U@U!UV<<1%6<T[[=Y=YZ]<^ _++/+@+@*AC ! r'   )r"  rd   )rg   rh   ri   rj   rk   r   rl   r  rm   r   rq   rI   rr   rs   rt   s   @r%   r!  r!    s9    
 1 1 1 1!$ ! !r'   r!  c                   F   ^  \ rS rSrSrS\S\4U 4S jjrS\4S jr	Sr
U =r$ )	EveryNTimestepsi=  z
Trigger a callback every ``n_steps`` timesteps

:param n_steps: Number of timesteps between two trigger.
:param callback: Callback that will be called
    when the event is triggered.
n_stepsrx   c                 >   > [         TU ]  U5        Xl        SU l        g r   )r   r   r*  last_time_trigger)r#   r*  rx   r$   s      r%   r   EveryNTimesteps.__init__F  s    "!"r'   r(   c                     U R                   U R                  -
  U R                  :  a!  U R                   U l        U R                  5       $ gr   )r   r,  r*  r   r0   s    r%   rI   EveryNTimesteps._on_stepK  s=    !7!77DLLH%)%7%7D">>##r'   )r,  r*  )rg   rh   ri   rj   rk   rm   r   r   rq   rI   rr   rs   rt   s   @r%   r)  r)  =  s,    # #| #
$  r'   r)  c                   ^   ^  \ rS rSrSrS\4U 4S jjrS\\\	4   S\\\	4   S\
4S jrS	rU =r$ )
LogEveryNTimestepsiR  z`
Log data every ``n_steps`` timesteps

:param n_steps: Number of timesteps between two trigger.
r*  c                 H   > [         TU ]  U[        U R                  5      S9  g )Nr   )r   r   r   	_log_data)r#   r*  r$   s     r%   r   LogEveryNTimesteps.__init__Y  s    ?4>>+JKr'   _locals_globalsr(   c                 8    U R                   R                  5         gr   )r   	dump_logs)r#   r5  r6  s      r%   r3  LogEveryNTimesteps._log_data\  s    

r'   r7   )rg   rh   ri   rj   rk   rm   r   ro   rp   r   rq   r3  rr   rs   rt   s   @r%   r1  r1  R  sE    L Lc3h 4S> d  r'   r1  c                   T   ^  \ rS rSrSrS
S\S\4U 4S jjjrSS jrS\4S jr	S	r
U =r$ )StopTrainingOnMaxEpisodesia  a  
Stop the training once a maximum number of episodes are played.

For multiple environments presumes that, the desired behavior is that the agent trains on each env for ``max_episodes``
and in total for ``max_episodes * n_envs`` episodes.

:param max_episodes: Maximum number of episodes to stop training.
:param verbose: Verbosity level: 0 for no output, 1 for indicating information about when training ended by
    reaching ``max_episodes``
max_episodesr   c                 F   > [         TU ]  US9  Xl        Xl        SU l        g Nr{   r   )r   r   r<  _total_max_episodes
n_episodes)r#   r<  r   r$   s      r%   r   "StopTrainingOnMaxEpisodes.__init__m  s&    )(#/ r'   r(   c                 T    U R                   U R                  R                  -  U l        g r.   )r<  r+   num_envsr?  r0   s    r%   r3   (StopTrainingOnMaxEpisodes._init_callbacks  s!    #'#4#4t7H7H7Q7Q#Q r'   c                 J   SU R                   ;   d   S5       eU =R                  [        R                  " U R                   S   5      R	                  5       -  sl        U R                  U R
                  :  nU R                  S:  a  U(       d  U R                  U R                  R                  -  nU R                  R                  S:  a  SUS S3OSn[        SU R                   S	U R                   R                  S
5       SU R                   SU R                   SU 3
5        U$ )NdoneszT`dones` variable is not defined, please check your code next to `callback.on_step()`rL   zwith an average of r  z episodes per envr   z"Stopping training with a total of z steps because the tb_log_namez model reached max_episodes=z, by playing for z
 episodes )r    r@  r   sumitemr?  r   r+   rC  r   r   r   r<  )r#   r   mean_episodes_per_envmean_ep_strs       r%   rI   "StopTrainingOnMaxEpisodes._on_stepw  s   $++%}'}}%266$++g"67<<>> OOd.F.FF<<1%6$(OOd6G6G6P6P$P!VZVgVgVpVpstVt%&;C%@@QRz|  4T5G5G4HH[;;??=122NtO`O`Na b""&//!2*-! ! r'   )r?  r<  r@  rd   rf   )rg   rh   ri   rj   rk   rm   r   r3   rq   rI   rr   rs   rt   s   @r%   r;  r;  a  s7    	S 3  R!$ ! !r'   r;  c                   Z   ^  \ rS rSr% Sr\\S'   SS\S\S\4U 4S jjjrS\	4S	 jr
S
rU =r$ ) StopTrainingOnNoModelImprovementi  aM  
Stop the training early if there is no new best model (new best mean reward) after more than N consecutive evaluations.

It is possible to define a minimum number of evaluations before start to count evaluations without improvement.

It must be used with the ``EvalCallback``.

:param max_no_improvement_evals: Maximum number of consecutive evaluations without a new best model.
:param min_evals: Number of evaluations before start to count evaluations without improvements.
:param verbose: Verbosity level: 0 for no output, 1 for indicating when training ended because no new best model
r"   max_no_improvement_evals	min_evalsr   c                 r   > [         TU ]  US9  Xl        X l        [        R
                  * U l        SU l        g r>  )r   r   rO  rP  r   r   last_best_mean_rewardno_improvement_evals)r#   rO  rP  r   r$   s       r%   r   )StopTrainingOnNoModelImprovement.__init__  s4    )(@%"&(ffW"$%!r'   r(   c                    U R                   c   S5       eSnU R                  U R                  :  a]  U R                   R                  U R                  :  a  SU l        O1U =R
                  S-  sl        U R
                  U R                  :  a  SnU R                   R                  U l        U R                  S:  a!  U(       d  [        SU R
                  S S35        U$ )	NzS``StopTrainingOnNoModelImprovement`` callback must be used with an ``EvalCallback``Tr   rL   FzBStopping training because there was no new best model in the last dz evaluations)	r"   r   rP  r   rR  rS  rO  r   r   r&  s     r%   rI   )StopTrainingOnNoModelImprovement._on_step  s    {{&}(}}& <<$..({{++d.H.HH,-)))Q.),,t/L/LL(-%%)[[%A%A"<<1%6TUYUnUnopTqq}~ ! r'   )rR  rO  rP  rS  )r   r   )rg   rh   ri   rj   rk   r   rl   rm   r   rq   rI   rr   rs   rt   s   @r%   rN  rN    sA    
 & & &SV & &!$ ! !r'   rN  c                   ^   ^  \ rS rSr% Sr\\S'   S
U 4S jjrS
S jrS\	4S jr
S
S jrS	rU =r$ )ProgressBarCallbacki  zN
Display a progress bar when training SB3 agent
using tqdm and rich packages.
pbarr(   c                 F   > [         TU ]  5         [        c  [        S5      eg )NzYou must install tqdm and rich in order to use the progress bar callback. It is included if you install stable-baselines with the extra packages: `pip install stable-baselines3[extra]`)r   r   r   ImportError)r#   r$   s    r%   r   ProgressBarCallback.__init__  s*    <9  r'   c                 h    [        U R                  S   U R                  R                  -
  S9U l        g )Ntotal_timesteps)total)r   r    r   r   rZ  r0   s    r%   r=   &ProgressBarCallback._on_training_start  s)     t{{+<=

@X@XXY	r'   c                 b    U R                   R                  U R                  R                  5        gr   )rZ  r]   r+   rC  r0   s    r%   rI   ProgressBarCallback._on_step  s#    		**334r'   c                 l    U R                   R                  5         U R                   R                  5         g r.   )rZ  refreshcloser0   s    r%   rP   $ProgressBarCallback._on_training_end  s     				r'   )rZ  rf   )rg   rh   ri   rj   rk   r   rl   r   r=   rq   rI   rP   rr   rs   rt   s   @r%   rY  rY    s0    
 JZ
$ 
 r'   rY  )*r   r   abcr   r   typingr   r   r   r   r	   	gymnasiumr  numpyr   stable_baselines3.common.loggerr
   r   r   filterwarnings	tqdm.richr\  #stable_baselines3.common.evaluationr    stable_baselines3.common.vec_envr   r   r   stable_baselines3.commonr   r   rv   r   r   r   r   r!  r)  r1  r;  rN  rY  r7   r'   r%   <module>rr     s   	  # @ @   2	, H/FG @ Y Y3q3 qh)1L )1X3,< 3,lL L^l $G1= G1T!L !<m * )! )!X*!| *!Z, S   Ds   C- -C87C8