
    hl                        S r SSKrSSKrSSKrSSKrSSKJrJr  SSKJ	r	  SSK
Jr  SSKJrJrJrJrJr  SSKrSSKrSSKrSSKJr  SSKJr  SS	KJrJrJrJr  SS
K J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)  SSK*J+r+J,r,J-r-  SSK.J/r/J0r0J1r1J2r2  SSK3J4r4J5r5J6r6J7r7  SSK8J9r9J:r:J;r;J<r<J=r=J>r>  SSK?J@r@JArAJBrBJCrCJDrDJErE  SSKFJGrGJHrH  \" SSS9rIS\\4\J4   S\KS\44S jrL " S S\5      rMg)z(Abstract base classes for RL algorithms.    N)ABCabstractmethod)deque)Iterable)AnyClassVarOptionalTypeVarUnion)spaces)utils)BaseCallbackCallbackListConvertCallbackProgressBarCallback)
is_wrapped)Logger)Monitor)ActionNoise)
BasePolicy)check_for_nested_spacesis_image_spaceis_image_space_channels_first)load_from_zip_filerecursive_getattrrecursive_setattrsave_to_zip_file)GymEnvMaybeCallbackSchedule
TensorDict)FloatSchedulecheck_for_correct_spaces
get_deviceget_system_infoset_random_seedupdate_learning_rate)DummyVecEnvVecEnvVecNormalizeVecTransposeImageis_vecenv_wrappedunwrap_vec_normalize)_convert_space
_patch_envSelfBaseAlgorithmBaseAlgorithm)boundenvverbosereturnc                     [        U [        5      (       a/  U nUS:  a  [        SU S35         [        R                  " USS9n U $ U $ ! [
         a    [        R                  " U5      n  U $ f = f)zIf env is a string, make the environment; otherwise, return env.

:param env: The environment to learn from.
:param verbose: Verbosity level: 0 for no output, 1 for indicating if environment is created
:return A Gym (vector) environment.
   z*Creating environment from the given name ''	rgb_array)render_mode)
isinstancestrprintgymmake	TypeError)r3   r4   env_ids      ]/home/james-whalen/.local/lib/python3.13/site-packages/stable_baselines3/common/base_class.pymaybe_make_envrC   0   ss     #sa<>vhaHI	#((6{;C J3J  	#((6"CJ	#s   A  A+*A+c                       \ rS rSr% Sr0 r\\\\	\
   4      \S'   \
\S'   \R                  \S'   \R                  \S'   \\S'   \\S'   \\S	'              SOS\\\	\
   4   S\\\S
4   S\\\4   S\\\\4      S\S\\   S\S\\R.                  \4   S\S\S\\   S\S\S\\\	\R                     S4      SS
4S jjr\SPS\S\S\S\4S jj5       r\SQS j5       rS\SS
4S jr \!S\4S j5       r"SQS  jr#S!\S"\SS
4S# jr$S$\\%\RL                  RN                     \RL                  RN                  4   SS
4S% jr(S\%\   4S& jr)S'\S\	\
   4S( jr*S\\%\   \%\   4   4S) jr+ SRS*\,S+\S\-4S, jjr.    SSS"\S*\,S-\S.\S+\S\\\-4   4S/ jjr/STS0\%\\\4      S1\\0Rb                     SS
4S2 jjr2S\\   4S3 jr3S\\4   4S4 jr5SUS\S5\SS
4S6 jjr6\     SVS7\7S"\S*\,S8\S.\S-\S+\S\74S9 jj5       r8   SWS:\\0Rb                  \\\0Rb                  4   4   S;\\\0Rb                  S4      S<\\0Rb                     S=\S\\0Rb                  \\\0Rb                  S4      4   4
S> jjr9STS\\   SS
4S? jjr:  SXS@\\\;4   SA\S\\R.                  \4   SS
4SB jjr<\=     SYSC\	\7   SD\\\>R~                  \@R                  4   S\\   S\\R.                  \4   SE\\\\4      SF\S5\S\74SG jj5       rBS\\\4   4SH jrC  SZSD\\\>R~                  \@R                  4   SI\\D\      SJ\\D\      SS
4SK jjrESQSL jrFSQSM jrGSNrHg
)[r1   C   aD  
The base of RL algorithms

:param policy: The policy model to use (MlpPolicy, CnnPolicy, ...)
:param env: The environment to learn from
            (if registered in Gym, can be str. Can be None for loading trained models)
:param learning_rate: learning rate for the optimizer,
    it can be a function of the current progress remaining (from 1 to 0)
:param policy_kwargs: Additional arguments to be passed to the policy on creation
:param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average
    the reported success rate, mean episode length, and mean reward over
:param tensorboard_log: the log location for tensorboard (if None, no logging)
:param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for
    debug messages
:param device: Device on which the code should run.
    By default, it will try to use a Cuda compatible device and fallback to cpu
    if it is not possible.
:param support_multi_env: Whether the algorithm supports training
    with multiple environments (as in A2C)
:param monitor_wrapper: When creating an environment, whether to wrap it
    or not in a Monitor wrapper.
:param seed: Seed for the pseudo random generators
:param use_sde: Whether to use generalized State Dependent Exploration (gSDE)
    instead of action noise exploration (default: False)
:param sde_sample_freq: Sample a new noise matrix every n steps when using gSDE
    Default: -1 (only sample at the beginning of the rollout)
:param supported_action_spaces: The action spaces supported by the algorithm.
policy_aliasespolicyobservation_spaceaction_spacen_envslr_schedule_loggerNr3   learning_ratepolicy_kwargsstats_window_sizetensorboard_logr4   devicesupport_multi_envmonitor_wrapperseeduse_sdesde_sample_freqsupported_action_spaces.r5   c                     [        U[        5      (       a  U R                  U5      U l        OXl        [	        U5      U l        US:  a  [        SU R
                   S35        Xpl        Uc  0 OUU l        SU l	        SU l
        SU l        Xl        S U l        SU l        X0l        X`l        S U l        S U l        S U l        SU l        Xl        Xl        SU l        XPl        S U l        S U l        SU l        SU l        S U l        S U l        UGb  [?        X R                  5      nU RA                  X R                  U
5      nURB                  U l!        URD                  U l"        URF                  U l$        X l        [K        U5      U l        Ub3  [        U RD                  U5      (       d   SU S	U RD                   S
35       eU	(       d  U RH                  S:  a  [M        S5      eUS;   a7  [        U RB                  [N        RP                  5      (       a  [M        SU 35      eU R*                  (       a4  [        U RD                  [N        RR                  5      (       d  [M        S5      e[        U RD                  [N        RR                  5      (       au  [T        RV                  " [T        RX                  " [T        RZ                  " U RD                  R\                  U RD                  R^                  /5      5      5      (       d   S5       eg g g )Nr7   zUsing z devicer   g              ?FzThe algorithm only supports z as action spaces but z was providedz]Error: the model does not support multiple envs; it requires a single vectorized environment.)	MlpPolicy	CnnPolicyzNYou must use `MultiInputPolicy` when working with dict observation space, not zXgeneralized State-Dependent Exploration (gSDE) can only be used with continuous actions.z@Continuous action space must have a finite lower and upper bound)0r;   r<   _get_policy_from_namepolicy_classr$   rQ   r=   r4   rN   num_timesteps_total_timesteps_num_timesteps_at_startrT   action_noise
start_timerM   rP   	_last_obs_last_episode_starts_last_original_obs_episode_numrU   rV   _current_progress_remaining_stats_window_sizeep_info_bufferep_success_buffer
_n_updates_custom_loggerr3   _vec_normalize_envrC   	_wrap_envrH   rI   num_envsrJ   r-   
ValueErrorr   DictBoxnpallisfinitearraylowhigh)selfrG   r3   rM   rN   rO   rP   r4   rQ   rR   rS   rT   rU   rV   rW   s                  rB   __init__BaseAlgorithm.__init__j   s   " fc"" $ : :6 BD & (a<F4;;-w/0#0#8Rm !'($	37*.$(!"&. ,/("3"!%#%):> ? ll3C..llODC%(%:%:D" # 0 0D,,DKH ';3&?D#&2!$"3"35LMM 23J2K L,,-]<M
 %q v 
 33
4CYCY[a[f[f8g8g #qrxqy!z{{||Jt/@/@&**$M$M !{||$++VZZ88vvKK$*;*;*?*?ARARAWAW)X YZ  VUV  9=     c                   ^  [        T [        5      (       dd  [        T 5      m [        T [        5      (       d#  U(       a  US:  a  [        S5        [	        T 5      m US:  a  [        S5        [        U 4S j/5      m [        T R                  5        [        T [        5      (       d  Sn[        T R                  [        R                  5      (       aW  T R                  R                  R                  5        H.  nU=(       d"    [        U5      =(       a    [        U5      (       + nM0     O6[        T R                  5      =(       a    [        T R                  5      (       + nU(       a  US:  a  [        S5        [        T 5      m T $ )a^  "
Wrap environment with the appropriate wrappers if needed.
For instance, to have a vectorized environment
or to re-order the image channels.

:param env:
:param verbose: Verbosity level: 0 for no output, 1 for indicating wrappers used
:param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible.
:return: The wrapped environment.
r7   z)Wrapping the env with a `Monitor` wrapperz"Wrapping the env in a DummyVecEnv.c                     > T $ N r3   s   rB   <lambda>)BaseAlgorithm._wrap_env.<locals>.<lambda>   s    sr|   Fz(Wrapping the env in a VecTransposeImage.)r;   r)   r/   r   r   r=   r(   r   rH   r,   r+   r   rq   valuesr   r   )r3   r4   rS   wrap_with_vectransposespaces   `    rB   rn   BaseAlgorithm._wrap_env   s4    #v&&S/Cc7++a<EFcl!|:;{m,C 	  5 56 &788%*"#//== !2299@@BE-C .&u-Z6STY6Z2Z + C
 *88M8M)N *Wt))X T& &a<DE',
r|   c                     g)z'Create networks, buffer and optimizers.Nr   ry   s    rB   _setup_modelBaseAlgorithm._setup_model       r|   loggerc                     Xl         SU l        g)z
Setter for for logger object.

.. warning::

  When passing a custom logger object,
  this will overwrite ``tensorboard_log`` and ``verbose`` settings
  passed to the constructor.
TN)rL   rl   )ry   r   s     rB   
set_loggerBaseAlgorithm.set_logger   s     "r|   c                     U R                   $ )zGetter for the logger object.)rL   r   s    rB   r   BaseAlgorithm.logger  s     ||r|   c                 8    [        U R                  5      U l        g)z Transform to callable if needed.N)r"   rM   rK   r   s    rB   _setup_lr_schedule BaseAlgorithm._setup_lr_schedule  s    (););<r|   r^   total_timestepsc                 B    S[        U5      [        U5      -  -
  U l        g)z
Compute current progress remaining (starts from 1 and ends to 0)

:param num_timesteps: current number of timesteps
:param total_timesteps:
rY   N)floatrg   )ry   r^   r   s      rB   "_update_current_progress_remaining0BaseAlgorithm._update_current_progress_remaining  s      ,/}1EoH^1^+^(r|   
optimizersc                     U R                   R                  SU R                  U R                  5      5        [	        U[
        5      (       d  U/nU H'  n[        X R                  U R                  5      5        M)     g)z
Update the optimizers learning rate using the current learning rate schedule
and the current progress remaining (from 1 to 0).

:param optimizers:
    An optimizer or a list of optimizers.
ztrain/learning_rateN)r   recordrK   rg   r;   listr'   )ry   r   	optimizers      rB   _update_learning_rate#BaseAlgorithm._update_learning_rate  sa     	0$2B2B4CcCc2de*d++$J#I ,<,<T=]=],^_ $r|   c                 
    / SQ$ )aP  
Returns the names of the parameters that should be excluded from being
saved by pickling. E.g. replay buffers are skipped by default
as they take up a lot of space. PyTorch variables should be excluded
with this so they can be stored with ``th.save``.

:return: List of parameters that should be excluded from being saved with pickle.
)rG   rQ   r3   replay_bufferrollout_bufferrm   rL   rl   r   r   s    rB   _excluded_save_params#BaseAlgorithm._excluded_save_params/  s    	
 		
r|   policy_namec                 \    XR                   ;   a  U R                   U   $ [        SU S35      e)a!  
Get a policy class from its name representation.

The goal here is to standardize policy naming, e.g.
all algorithms can call upon "MlpPolicy" or "CnnPolicy",
and they receive respective policies that work for them.

:param policy_name: Alias of the policy
:return: A policy class (type)
zPolicy z unknown)rF   rp   )ry   r   s     rB   r\   #BaseAlgorithm._get_policy_from_nameC  s5     ---&&{33w{m8<==r|   c                     S/nU/ 4$ )a  
Get the name of the torch variables that will be saved with
PyTorch ``th.save``, ``th.load`` and ``state_dicts`` instead of the default
pickling strategy. This is to handle device placement correctly.

Names can point to specific variables under classes, e.g.
"policy.optimizer" would point to ``optimizer`` object of ``self.policy``
if this object.

:return:
    List of Torch variables whose state dicts to save (e.g. th.nn.Modules),
    and list of other Torch variables to store with ``th.save``.
rG   r   )ry   state_dictss     rB   _get_torch_save_params$BaseAlgorithm._get_torch_save_paramsT  s      jBr|   callbackprogress_barc                     [        U[        5      (       a  [        U5      n[        U[        5      (       d  [	        U5      nU(       a  [        U[        5       /5      nUR                  U 5        U$ )z
:param callback: Callback(s) called at every step with state of the algorithm.
:param progress_bar: Display a progress bar using tqdm and rich.
:return: A hybrid callback calling `callback` and performing evaluation.
)r;   r   r   r   r   r   init_callback)ry   r   r   s      rB   _init_callbackBaseAlgorithm._init_callbackf  s_     h%%#H-H (L11&x0H #X/B/D$EFHt$r|   reset_num_timestepstb_log_namec                 p   [         R                  " 5       U l        U R                  b  U(       a0  [	        U R
                  S9U l        [	        U R
                  S9U l        U R                  b  U R                  R                  5         U(       a  SU l	        SU l
        OXR                  -  nXl        U R                  U l        U(       d  U R                  c  U R                  c   eU R                  R                  5       U l        [        R                   " U R                  R"                  4[$        S9U l        U R(                  b  U R(                  R+                  5       U l        U R.                  (       d1  [0        R2                  " U R4                  U R6                  XC5      U l        U R;                  X%5      nX4$ )a  
Initialize different variables needed for training.

:param total_timesteps: The total number of samples (env steps) to train on
:param callback: Callback(s) called at every step with state of the algorithm.
:param reset_num_timesteps: Whether to reset or not the ``num_timesteps`` attribute
:param tb_log_name: the name of the run for tensorboard log
:param progress_bar: Display a progress bar using tqdm and rich.
:return: Total timesteps and callback(s)
)maxlenr   )dtype)timetime_nsrb   ri   r   rh   rj   ra   resetr^   rf   r_   r`   rc   r3   rs   onesro   boolrd   rm   get_original_obsre   rl   r   configure_loggerr4   rP   rL   r   )ry   r   r   r   r   r   s         rB   _setup_learnBaseAlgorithm._setup_learn  sU   $ ,,.&*="'t/F/F"GD%*$2I2I%JD"(##%!"D !D 111O /'+'9'9$ $.."888'''!XX^^-DN(*1B1B0DD(QD%&&2*.*A*A*R*R*T' "" 11$,,@T@TVawDL &&x>((r|   infosdonesc                    U R                   c   eU R                  c   eUc#  [        R                  " S/[	        U5      -  5      n[        U5       Hq  u  p4UR                  S5      nUR                  S5      nUb  U R                   R                  U/5        Uc  MK  X#   (       d  MV  U R                  R                  U5        Ms     g)z
Retrieve reward, episode length, episode success and update the buffer
if using Monitor wrapper or a GoalEnv.

:param infos: List of additional information about the transition.
:param dones: Termination signals
NFepisode
is_success)	ri   rj   rs   rv   len	enumerategetextendappend)ry   r   r   idxinfomaybe_ep_infomaybe_is_successs          rB   _update_info_buffer!BaseAlgorithm._update_info_buffer  s     ""...%%111=HHeWs5z12E"5)IC HHY/M#xx5(##**M?;+

&&--.>? *r|   c                     U R                   $ )za
Returns the current environment (can be None if not defined).

:return: The current environment
r   r   s    rB   get_envBaseAlgorithm.get_env  s     xxr|   c                     U R                   $ )zk
Return the ``VecNormalize`` wrapper of the training env
if it exists.

:return: The ``VecNormalize`` env.
)rm   r   s    rB   get_vec_normalize_env#BaseAlgorithm.get_vec_normalize_env  s     &&&r|   force_resetc                    U R                  XR                  5      nUR                  U R                  :X  d9   SUR                   SU R                   SU R                  R
                   S35       e[        XR                  U R                  5        [        U5      U l
        U(       a  SU l        UR                  U l        Xl        g)a  
Checks the validity of the environment, and if it is coherent, set it as the current environment.
Furthermore wrap any non vectorized env into a vectorized
checked parameters:
- observation_space
- action_space

:param env: The environment for learning a policy
:param force_reset: Force call to ``reset()`` before training
    to avoid unexpected behavior.
    See issue https://github.com/DLR-RM/stable-baselines3/issues/597
zaThe number of environments to be set is different from the number of environments in the model: (z != zz), whereas `set_env` requires them to be the same. To load a model with a different number of environments, you must use `z.load(path, env)` insteadN)rn   r4   ro   rJ   	__class____name__r#   rH   rI   r-   rm   rc   r3   )ry   r3   r   s      rB   set_envBaseAlgorithm.set_env  s     nnS,,/||t{{* 	
~T$++ /AAEAXAX@YYrt	
* 	!&<&<d>O>OP #7s"; !DNllr|   ry   log_intervalc                     g)a0  
Return a trained model.

:param total_timesteps: The total number of samples (env steps) to train on
    Note: it is a lower bound, see `issue #1150 <https://github.com/DLR-RM/stable-baselines3/issues/1150>`_
:param callback: callback(s) called at every step with state of the algorithm.
:param log_interval: for on-policy algos (e.g., PPO, A2C, ...) this is the number of
    training iterations (i.e., log_interval * n_steps * n_envs timesteps) before logging;
    for off-policy algos (e.g., TD3, SAC, ...) this is the number of episodes before
    logging.
:param tb_log_name: the name of the run for TensorBoard logging
:param reset_num_timesteps: whether or not to reset the current timestep number (used in logging)
:param progress_bar: Display a progress bar using tqdm and rich.
:return: the trained model
Nr   )ry   r   r   r   r   r   r   s          rB   learnBaseAlgorithm.learn  r   r|   observationstateepisode_startdeterministicc                 :    U R                   R                  XX45      $ )ae  
Get the policy action from an observation (and optional hidden state).
Includes sugar-coating to handle different observations (e.g. normalizing images).

:param observation: the input observation
:param state: The last hidden states (can be None, used in recurrent policies)
:param episode_start: The last masks (can be None, used in recurrent policies)
    this correspond to beginning of episodes,
    where the hidden states of the RNN must be reset.
:param deterministic: Whether or not to return deterministic actions.
:return: the model's action and the next hidden state
    (used in recurrent policies)
)rG   predict)ry   r   r   r   r   s        rB   r   BaseAlgorithm.predict  s    ( {{"";}TTr|   c                    Uc  g[        XR                  R                  [        R                  " S5      R                  :H  S9  U R                  R                  U5        U R                  b  U R                  R                  U5        gg)zh
Set the seed of the pseudo-random generators
(python, numpy, pytorch, gym, action_space)

:param seed:
Ncuda)
using_cuda)r&   rQ   typethrI   rT   r3   )ry   rT   s     rB   r&   BaseAlgorithm.set_random_seed/  se     <)9)9RYYv=N=S=S)STt$88HHMM$  r|   load_path_or_dictexact_matchc                    0 n[        U[        5      (       a  UnO[        XSS9u  pTn[        U R	                  5       S   5      n[        5       nU Hq  nSn	 [        X5      n	[        U	[        R                  R                  5      (       a  U	R                  XH   5        OU	R                  XH   US9  UR                  U5        Ms     U(       a  Xv:w  a  [        SU S	U 35      egg! [         a  n
[        SU S35      U
eSn
A
ff = f)
a  
Load parameters from a given zip-file or a nested dictionary containing parameters for
different modules (see ``get_parameters``).

:param load_path_or_iter: Location of the saved data (path or file-like, see ``save``), or a nested
    dictionary containing nn.Module parameters used by the policy. The dictionary maps
    object names to a state-dictionary returned by ``torch.nn.Module.state_dict()``.
:param exact_match: If True, the given parameters should include parameters for each
    module and each of their parameters, otherwise raises an Exception. If set to False, this
    can be used to update only specific parameters.
:param device: Device on which the code should run.
F)rQ   	load_datar   NzKey z is an invalid object name.)strictz>Names of parameters do not match agents' parameters: expected z, got )r;   dictr   setr   r   	Exceptionrp   r   optim	Optimizerload_state_dictadd)ry   r   r   rQ   params_objects_needing_updateupdated_objectsnameattres              rB   set_parametersBaseAlgorithm.set_parameters>  s%   $ '..&F-.?Z_`LAq
 "%T%@%@%B1%E!F%DDR(4 $ 2 233  $$V\2 $$V\+$F%= @ ?D236/9JL  E;9  R !4v-H!IJPQQ	Rs   C$$
D.C>>Dclspathcustom_objectsprint_system_infoc                 8   U(       a  [        S5        [        5         [        UUUUS9u  pn
Uc   S5       eU	c   S5       eSU;   ab  SUS   ;   a  US   S	 US   R                  S5      nU(       a8  [	        U[
        5      (       a#  [	        US	   [        5      (       a  US	   US   S'   SU;   a#  US   US   :w  a  [        S
US    SUS    35      eSU;  d  SU;  a  [        S5      eS H  n[        X   5      X'   M     UbH  U R                  X(S   5      n[        X(S   US   5        U(       a  Ub  SUS'   Ub  UR                  US'   OSU;   a  US   nU " US   UUSS9nUR                  R                  U5        UR                  R                  U5        UR                  5          UR!                  U	SUS9  U
b.  U
 H(  nU
U   c  M  [3        UU S"3U
U   R4                  5        M*     UR6                  (       a  UR,                  R9                  5         U$ ! ["         aU  nS[%        U5      ;   a?  S[%        U5      ;   a0  UR!                  U	SUS9  [&        R(                  " SU S35         SnANUeSnAf[         a  nU	S   S   S	   S   n[+        U5      n[+        UR,                  R.                  R0                  S	   S   5      nUS U-  :X  aB  USU U	S   S   S	   S'   UR!                  U	SUS9  [&        R(                  " S!U S35         SnAGNXUeSnAff = f)#a  
Load the model from a zip-file.
Warning: ``load`` re-creates the model from scratch, it does not update it in-place!
For an in-place load use ``set_parameters`` instead.

:param path: path to the file (or a file-like) where to
    load the agent from
:param env: the new environment to run the loaded model on
    (can be None if you only need prediction from a trained model) has priority over any saved environment
:param device: Device on which the code should run.
:param custom_objects: Dictionary of objects to replace
    upon loading. If a variable is present in this dictionary as a
    key, it will not be deserialized and the corresponding item
    will be used instead. Similar to custom_objects in
    ``keras.models.load_model``. Useful when you have an object in
    file that can not be deserialized.
:param print_system_info: Whether to print system info from the saved model
    and the current system info (useful to debug loading issues)
:param force_reset: Force call to ``reset()`` before training
    to avoid unexpected behavior.
    See https://github.com/DLR-RM/stable-baselines3/issues/597
:param kwargs: extra arguments to change the model when loading
:return: new model instance with loaded parameters
z== CURRENT SYSTEM INFO ==)rQ   r  r  NzNo data found in the saved filez!No params found in the saved filerN   rQ   net_archr   zQThe specified policy kwargs do not equal the stored policy kwargs.Stored kwargs: z, specified kwargs: rH   rI   zTThe observation_space and action_space were not given, can't verify new environments>   rI   rH   r4   rc   rJ   r3   r]   F)rG   r3   rQ   _init_setup_modelT)r   rQ   pi_features_extractorzMissing key(s) in state_dictzYou are probably loading a A2C/PPO model saved with SB3 < 1.7.0, we deactivated exact_match so you can save the model again to avoid issues in the future (see https://github.com/DLR-RM/stable-baselines3/issues/1233 for more info). Original error: z> 
Note: the model should still work fine, this only a warning.zpolicy.optimizerparam_groupsr      zYou are probably loading a DQN model saved with SB3 < 2.4.0, we truncated the optimizer state so you can save the model again to avoid issues in the future (see https://github.com/DLR-RM/stable-baselines3/pull/1963 for more info). Original error: z.data)r=   r%   r   r   r;   r   r   rp   KeyErrorr.   rn   r#   ro   __dict__updater   r  RuntimeErrorr<   warningswarnr   rG   r   r  r   datarU   reset_noise)r  r  r3   rQ   r  r  r   kwargsr  r   pytorch_variablessaved_net_archkeymodelr  saved_optim_paramsn_params_savedn_paramsr   s                      rB   loadBaseAlgorithm.load  s   F -.*<)/	+
'' B!BB!F#FF! d"400)(3!/266zBN*^T"B"BzR`abRceiGjGj4B14E_%j1f$)@DDY)Y""&"7!88LVTcMdLeg 
 d*nD.Hqrr 9C&ty1DI 9 ?--)_5C$S/B*CT.EYZ t/$([!!$X }5k'#	
 	d#f%(	  T& IR () %T*2 "%D69J49P9U9UV * ==LL$$&s  	 '#a&05SWZ[\W]5]$$Vv$N' ()c *S	S   	 "((:!;N!KA!Nx!X !34N5<<11>>qA(KLHX-J\]f^fJg)*>:1=hG$$Vf$M' ()c *S	S  +	s2   )H 
L"A	I20I22L?BLLLc                 z    U R                  5       u  p0 nU H   n[        X5      nUR                  5       X4'   M"     U$ )z
Return the parameters of the agent. This includes parameters from different networks, e.g.
critics (value functions) and policies (pi functions).

:return: Mapping of from names of the objects to PyTorch state-dicts.
)r   r   
state_dict)ry   state_dicts_namesr   r   r   r   s         rB   get_parametersBaseAlgorithm.get_parameters$  sD      $::<%D$T0D??,FL & r|   excludeincludec                    U R                   R                  5       nUc  / n[        U5      R                  U R	                  5       5      nUb  UR                  U5      nU R                  5       u  pVXV-   nU H(  nUR                  S5      S   n	UR                  U	5        M*     U H  n
UR                  U
S5        M     SnUb  0 nU H  n[        X5      nXU'   M     U R                  5       n[        XXS9  g)aD  
Save all the attributes of the object and the model parameters in a zip-file.

:param path: path to the file where the rl agent should be saved
:param exclude: name of parameters that should be excluded in addition to the default ones
:param include: name of parameters that might be excluded but should be included anyway
N.r   )r  r   r  )r  copyr   unionr   
differencer   splitr   popr   r#  r   )ry   r  r%  r&  r  r"  torch_variable_namesall_pytorch_variables	torch_varvar_name
param_namer  r   r   params_to_saves                  rB   saveBaseAlgorithm.save3  s     }}!!# ?Gg,$$T%?%?%AB ((1G262M2M2O/ 1 H.I s+A.HKK!	 / "JHHZ& " !+ ",(4*.$' -
 ,,.er|   c                     [        5       e)zK
Write log data. (Implemented by OffPolicyAlgorithm and OnPolicyAlgorithm)
)NotImplementedErrorr   s    rB   	dump_logsBaseAlgorithm.dump_logse  s     "##r|   c                 N    [         R                  " S5        U R                  " U6   g )Nz^algo._dump_logs() is deprecated in favor of algo.dump_logs(). It will be removed in SB3 v2.7.0)r  r  r8  )ry   argss     rB   
_dump_logsBaseAlgorithm._dump_logsk  s    vwr|   )rg   rl   rf   rd   rc   re   rL   rk   r`   rh   r_   rm   ra   rI   rQ   r3   ri   rj   rM   rK   rJ   r^   rH   r]   rN   rV   rT   rb   rP   rU   r4   )Nd   Nr   autoFTNFN)r   T)r5   N)F)NTrunFr   )T)Nr>  rA  TF)NNF)Tr?  )Nr?  NFT)NN)Ir   
__module____qualname____firstlineno____doc__rF   r   r   r<   r   r   __annotations__r   Spaceintr    r   r   r   r   r	   r   r   rQ   r   tuplerz   staticmethodr)   rn   r   r   r   propertyr   r   r   r   r   r   r   r   r\   r   r   r   r   r   rs   ndarrayr   r   r*   r   r   r0   r   r   r&   r!   r  classmethodpathlibPathioBufferedIOBaser  r#  r   r4  r8  r<  __static_attributes__r   r|   rB   r1   r1   C   sA   < =?NHT#tJ'7"789>||#,,KO 37!$)-(."' $"!LP_Vc4
++,_V 63$%_V UH_-	_V
  S#X/_V _V "#_V _V biin%_V  _V _V sm_V _V _V "*%V\\0BC0G*H!I_V  
!_VB -v - -$ -RX - -^ 6 6# #D #   =_ _VY _^b _`d288;M;M6NPRPXPXPbPb6b0c `hl ` 
tCy 
(> >j9I >"d3ic.B(C * #  
	8 #'$( "5)5)  5) "	5)
 5) 5) 
sL 	!5)n@d38n)= @hrzzFZ @fj @*&) 'x'= '!6 ! ! !F  #' $("   	
  "  
 8 37.2#U2::tCO'<<=U bjj#o./U  

+	U
 U 
rzz8E"**c/$:;;	<U, HSM  T  $ !(.	B j1B B biin%	B
 
BH  !%(.37"' _#$_Cr'8'889_ f_ biin%	_
 !c3h0_  _ _ 
_ _BS$Y $ ,0+/	0fCr'8'8890f (3-(0f (3-(	0f
 
0fd$r|   )NrE  rP  rN  r   r  abcr   r   collectionsr   collections.abcr   typingr   r   r	   r
   r   	gymnasiumr>   numpyrs   torchr   r   stable_baselines3.commonr   "stable_baselines3.common.callbacksr   r   r   r   !stable_baselines3.common.env_utilr   stable_baselines3.common.loggerr    stable_baselines3.common.monitorr   stable_baselines3.common.noiser   !stable_baselines3.common.policiesr   &stable_baselines3.common.preprocessingr   r   r   "stable_baselines3.common.save_utilr   r   r   r   %stable_baselines3.common.type_aliasesr   r   r    r!   stable_baselines3.common.utilsr"   r#   r$   r%   r&   r'    stable_baselines3.common.vec_envr(   r)   r*   r+   r,   r-   *stable_baselines3.common.vec_env.patch_gymr.   r/   r0   r<   rH  rC   r1   r   r|   rB   <module>rg     s    . 	    #  $ : :     * o o 8 2 4 6 8 y y y y ] ]   R/G fck* S V &jC jr|   