
    h'S              	         S r SSKJr  SSKrSSKJr  SSKJr  SSKJ	r	J
r
Jr  SSKrSSKJr  SSKJrJrJrJr  SS	KJr  SS
KJrJrJrJrJr  \	(       a  SSKJr  / SQr " S S\R>                  \\\\4   \R@                  RB                  5      r" " S S\R>                  \\\\4   \R@                  RB                  5      r# " S S\R>                  \\\\4   \R@                  RB                  5      r$ " S S\R>                  \\\\4   \R@                  RB                  5      r% " S S\R>                  \\\\4   \R@                  RB                  5      r&g)a  A collection of common wrappers.

* ``TimeLimit`` - Provides a time limit on the number of steps for an environment before it truncates
* ``Autoreset`` - Auto-resets the environment
* ``PassiveEnvChecker`` - Passive environment checker that does not modify any environment data
* ``OrderEnforcing`` - Enforces the order of function calls to environments
* ``RecordEpisodeStatistics`` - Records the episode statistics
    )annotationsN)deque)deepcopy)TYPE_CHECKINGAnySupportsFloat)logger)ActTypeObsTypeRenderFrameWrapperObsType)ResetNeeded)check_action_spacecheck_observation_spaceenv_render_passive_checkerenv_reset_passive_checkerenv_step_passive_checker)EnvSpec)	TimeLimit	AutoresetPassiveEnvCheckerOrderEnforcingRecordEpisodeStatisticsc                  |   ^  \ rS rSrSr    S
S jr    SS jrSSS.     SU 4S jjjr\SS j5       r	S	r
U =r$ )r   +   ab  Limits the number of steps for an environment through truncating the environment if a maximum number of timesteps is exceeded.

If a truncation is not defined inside the environment itself, this is the only place that the truncation signal is issued.
Critically, this is different from the `terminated` signal that originates from the underlying environment as part of the MDP.
No vector wrapper exists.

Example using the TimeLimit wrapper:
    >>> from gymnasium.wrappers import TimeLimit
    >>> from gymnasium.envs.classic_control import CartPoleEnv

    >>> spec = gym.spec("CartPole-v1")
    >>> spec.max_episode_steps
    500
    >>> env = gym.make("CartPole-v1")
    >>> env  # TimeLimit is included within the environment stack
    <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
    >>> env.spec  # doctest: +ELLIPSIS
    EnvSpec(id='CartPole-v1', ..., max_episode_steps=500, ...)
    >>> env = gym.make("CartPole-v1", max_episode_steps=3)
    >>> env.spec  # doctest: +ELLIPSIS
    EnvSpec(id='CartPole-v1', ..., max_episode_steps=3, ...)
    >>> env = TimeLimit(CartPoleEnv(), max_episode_steps=10)
    >>> env
    <TimeLimit<CartPoleEnv instance>>

Example of `TimeLimit` determining the episode step
    >>> env = gym.make("CartPole-v1", max_episode_steps=3)
    >>> _ = env.reset(seed=123)
    >>> _ = env.action_space.seed(123)
    >>> _, _, terminated, truncated, _ = env.step(env.action_space.sample())
    >>> terminated, truncated
    (False, False)
    >>> _, _, terminated, truncated, _ = env.step(env.action_space.sample())
    >>> terminated, truncated
    (False, False)
    >>> _, _, terminated, truncated, _ = env.step(env.action_space.sample())
    >>> terminated, truncated
    (False, True)

Change logs:
 * v0.10.6 - Initially added
 * v0.25.0 - With the step API update, the termination and truncation signal is returned separately.
c                    [        U[        5      (       a  US:  d
   SU 35       e[        R                  R                  R                  XS9  [        R                  R                  X5        X l        SU l        g)a#  Initializes the :class:`TimeLimit` wrapper with an environment and the number of steps after which truncation will occur.

Args:
    env: The environment to apply the wrapper
    max_episode_steps: the environment step after which the episode is truncated (``elapsed >= max_episode_steps``)
r   z9Expect the `max_episode_steps` to be positive, actually: )max_episode_stepsN)	
isinstanceintgymutilsRecordConstructorArgs__init__Wrapper_max_episode_steps_elapsed_steps)selfenvr   s      S/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/wrappers/common.pyr#   TimeLimit.__init__Z   sw     (#..3Dq3H	[FGXFYZ	[H		''00 	1 	
 	T'"3"    c                    U R                   R                  U5      u  p#pEnU =R                  S-  sl        U R                  U R                  :  a  SnX#XEU4$ )aF  Steps through the environment and if the number of steps elapsed exceeds ``max_episode_steps`` then truncate.

Args:
    action: The environment step action

Returns:
    The environment step ``(observation, reward, terminated, truncated, info)`` with `truncated=True`
    if the number of steps elapsed >= max episode steps

   T)r(   stepr&   r%   )r'   actionobservationreward
terminated	truncatedinfos          r)   r.   TimeLimit.stepp   sU     <@88==;P8ZDq $"9"99IJ4??r+   Nseedoptionsc               ,   > SU l         [        TU ]	  XS9$ )zResets the environment with :param:`**kwargs` and sets the number of steps elapsed to zero.

Args:
    seed: Seed for the environment
    options: Options for the environment

Returns:
    The reset environment
r   r6   )r&   superresetr'   r7   r8   	__class__s      r)   r;   TimeLimit.reset   s      w}$}88r+   c                (   U R                   b  U R                   $ U R                  R                  nUb   [        U5      nU R                  Ul        Xl         U$ ! [         a/  n[        R                  R                  SU SU 35         SnAgSnAff = f)zYModifies the environment spec to include the `max_episode_steps=self._max_episode_steps`.NAn exception occurred (%) while copying the environment spec=)
_cached_specr(   specr   r%   r   	Exceptionr    r	   warnr'   env_speces      r)   rC   TimeLimit.spec   s     ($$$88==#H--1-D-D* %  

-aS0UV^U_` 	s   A 
B"%BB)rB   r&   r%   )r(   gym.Envr   r   r/   r
   returnz9tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]r7   
int | Noner8   dict[str, Any] | NonerL   ztuple[ObsType, dict[str, Any]]rL   zEnvSpec | None)__name__
__module____qualname____firstlineno____doc__r#   r.   r;   propertyrC   __static_attributes____classcell__r=   s   @r)   r   r   +   su    *X## #,@@	B@, %)49!93H9	'9 9  r+   r   c                  `   ^  \ rS rSrSrS	S jrSSS.     S
U 4S jjjr    SS jrSrU =r	$ )r      a  The wrapped environment is automatically reset when a terminated or truncated state is reached.

This follows the vector autoreset api where on the step after an episode terminates or truncated then the environment is reset.

Change logs:
 * v0.24.0 - Initially added as `AutoResetWrapper`
 * v1.0.0 - renamed to `Autoreset` and autoreset order was changed to reset on the step after the environment terminates or truncates. As a result, `"final_observation"` and `"final_info"` is removed.
c                    [         R                  R                  R                  U 5        [         R                  R                  X5        SU l        g)zA class for providing an automatic reset functionality for gymnasium environments when calling :meth:`self.step`.

Args:
    env (gym.Env): The environment to apply the wrapper
FN)r    r!   r"   r#   r$   	autoresetr'   r(   s     r)   r#   Autoreset.__init__   s5     			''006T'r+   Nr6   c               ,   > SU l         [        TU ]	  XS9$ )z>Resets the environment and sets autoreset to False preventing.Fr6   )r]   r:   r;   r<   s      r)   r;   Autoreset.reset   s     w}$}88r+   c                    U R                   (       a"  U R                  R                  5       u  p#Su  pEnOU R                  R                  U5      u  p$pVnU=(       d    UU l         X$XVU4$ )zSteps through the environment with action and resets the environment if a terminated or truncated signal is encountered.

Args:
    action: The action to take

Returns:
    The autoreset environment :meth:`step`
)        FF)r]   r(   r;   r.   )r'   r/   obsr4   r1   r2   r3   s          r)   r.   Autoreset.step   sY     >>(IC,=)F	7;xx}}V7L4C#0yJ477r+   )r]   )r(   rJ   )r7   rN   r8   rO   rL   z%tuple[WrapperObsType, dict[str, Any]]rK   )
rQ   rR   rS   rT   rU   r#   r;   r.   rW   rX   rY   s   @r)   r   r      sP    	 %)49!93H9	.9 988	B8 8r+   r   c                  v    \ rS rSrSrSS jr    SS jrSSS.     SS jjrSS jr\	SS	 j5       r
S
 rSrg)r      a  A passive wrapper that surrounds the ``step``, ``reset`` and ``render`` functions to check they follow Gymnasium's API.

This wrapper is automatically applied during make and can be disabled with `disable_env_checker`.
No vector version of the wrapper exists.

Example:
    >>> import gymnasium as gym
    >>> env = gym.make("CartPole-v1")
    >>> env
    <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
    >>> env = gym.make("CartPole-v1", disable_env_checker=True)
    >>> env
    <TimeLimit<OrderEnforcing<CartPoleEnv<CartPole-v1>>>>

Change logs:
 * v0.24.1 - Initially added however broken in several ways
 * v0.25.0 - Bugs was all fixed
 * v0.29.0 - Removed warnings for infinite bounds for Box observation and action spaces and inregular bound shapes
c                Z   [         R                  R                  R                  U 5        [         R                  R                  X5        [        U[         R                  5      (       dF  [        UR                  R                  5      S:X  a  [        S5      e[        S[        U5       S35      e[        US5      (       d  [        S5      e[        UR                  5        [        US5      (       d  [        S5      e[!        UR"                  5        S	U l        S	U l        S	U l        S	U l        g
)zZInitialises the wrapper with the environments, run the observation and action space tests.z<class 'gym.core.Env'>zGym is incompatible with Gymnasium, please update the environment class to `gymnasium.Env`. See https://gymnasium.farama.org/introduction/create_custom_env/ for more info.zIThe environment must inherit from the gymnasium.Env class, actual class: zQ. See https://gymnasium.farama.org/introduction/create_custom_env/ for more info.action_spacezjThe environment must specify an action space. https://gymnasium.farama.org/introduction/create_custom_env/observation_spacezoThe environment must specify an observation space. https://gymnasium.farama.org/introduction/create_custom_env/FN)r    r!   r"   r#   r$   r   Envstrr=   __base__	TypeErrortypehasattrAttributeErrorr   ri   r   rj   checked_resetchecked_stepchecked_renderclose_calledr^   s     r)   r#   PassiveEnvChecker.__init__   s   		''006T'#sww''3==))*.FFf 
  _`deh`i_j kf f 
 sN++ |  	3++,s/00  B  	  5 56#("'$)"'r+   c                    U R                   SL a  SU l         [        U R                  U5      $ U R                  R                  U5      $ )z[Steps through the environment that on the first call will run the `passive_env_step_check`.FT)rs   r   r(   r.   )r'   r/   s     r)   r.   PassiveEnvChecker.step  s>     % $D+DHHf==88==((r+   Nr6   c                   U R                   SL a  SU l         [        U R                  XS9$ U R                  R                  XS9$ )zUResets the environment that on the first call will run the `passive_env_reset_check`.FTr6   )rr   r   r(   r;   )r'   r7   r8   s      r)   r;   PassiveEnvChecker.reset  s@     &!%D,TXXDRR88>>t>==r+   c                    U R                   SL a  SU l         [        U R                  5      $ U R                  R                  5       $ )zWRenders the environment that on the first call will run the `passive_env_render_check`.FT)rt   r   r(   renderr'   s    r)   r|   PassiveEnvChecker.render)  s8    %'"&D-dhh7788??$$r+   c                   U R                   b  U R                   $ U R                  R                  nUb   [        U5      nSUl        Xl         U$ ! [
         a/  n[        R                  R                  SU SU 35         SnAgSnAff = f)zGModifies the environment spec to such that `disable_env_checker=False`.NFr@   rA   )	rB   r(   rC   r   disable_env_checkerrD   r    r	   rE   rF   s      r)   rC   PassiveEnvChecker.spec1  s     ($$$88==#H-/4, %  

-aS0UV^U_` 	   A 
B%BBc                    U R                   (       d!  SU l         U R                  R                  5       $  U R                  R                  5       $ ! [         a  n[        R
                  " S5        UeSnAff = f)z5Warns if calling close on a closed environment fails.TziCalling `env.close()` on the closed environment should be allowed, but it raised the following exception.N)ru   r(   closerD   r	   rE   )r'   rH   s     r)   r   PassiveEnvChecker.closeE  s`       $D88>>##xx~~''  	s   A 
A5A00A5)rB   rt   rr   rs   ru   )r(   gym.Env[ObsType, ActType]rK   rM   rL   z&RenderFrame | list[RenderFrame] | NonerP   )rQ   rR   rS   rT   rU   r#   r.   r;   r|   rV   rC   r   rW    r+   r)   r   r      sd    ( (D))	B) %)4>!>3H>	'>%  &r+   r   c                     ^  \ rS rSrSr S   SS jjrSU 4S jjrSSS.     SU 4S jjjrSU 4S jjr\	S	 5       r
\	SS
 j5       rSrU =r$ )r   iT  a  Will produce an error if ``step`` or ``render`` is called before ``reset``.

No vector version of the wrapper exists.

Example:
    >>> import gymnasium as gym
    >>> from gymnasium.wrappers import OrderEnforcing
    >>> env = gym.make("CartPole-v1", render_mode="human")
    >>> env = OrderEnforcing(env)
    >>> env.step(0)
    Traceback (most recent call last):
        ...
    gymnasium.error.ResetNeeded: Cannot call env.step() before calling env.reset()
    >>> env.render()
    Traceback (most recent call last):
        ...
    gymnasium.error.ResetNeeded: Cannot call `env.render()` before calling `env.reset()`, if this is an intended action, set `disable_render_order_enforcing=True` on the OrderEnforcer wrapper.
    >>> _ = env.reset()
    >>> env.render()
    >>> _ = env.step(0)
    >>> env.close()

Change logs:
 * v0.22.0 - Initially added
 * v0.24.0 - Added order enforcing for the render function
c                    [         R                  R                  R                  XS9  [         R                  R                  X5        SU l        X l        g)zA wrapper that will produce an error if :meth:`step` is called before an initial :meth:`reset`.

Args:
    env: The environment to wrap
    disable_render_order_enforcing: If to disable render order enforcing
)disable_render_order_enforcingFN)r    r!   r"   r#   r$   
_has_reset_disable_render_order_enforcing)r'   r(   r   s      r)   r#   OrderEnforcing.__init__r  sE     			''00 	1 	
 	T' %5S,r+   c                Z   > U R                   (       d  [        S5      e[        TU ]  U5      $ )zSteps through the environment.z1Cannot call env.step() before calling env.reset())r   r   r:   r.   )r'   r/   r=   s     r)   r.   OrderEnforcing.step  s%    QRRw|F##r+   Nr6   c               ,   > SU l         [        TU ]	  XS9$ )z%Resets the environment with `kwargs`.Tr6   )r   r:   r;   r<   s      r)   r;   OrderEnforcing.reset  s     w}$}88r+   c                z   > U R                   (       d  U R                  (       d  [        S5      e[        TU ]  5       $ )z&Renders the environment with `kwargs`.zCannot call `env.render()` before calling `env.reset()`, if this is an intended action, set `disable_render_order_enforcing=True` on the OrderEnforcer wrapper.)r   r   r   r:   r|   )r'   r=   s    r)   r|   OrderEnforcing.render  s4    33DOOZ  w~r+   c                    U R                   $ )z1Returns if the environment has been reset before.)r   r}   s    r)   	has_resetOrderEnforcing.has_reset  s     r+   c                   U R                   b  U R                   $ U R                  R                  nUb   [        U5      nSUl        Xl         U$ ! [
         a/  n[        R                  R                  SU SU 35         SnAgSnAff = f)z>Modifies the environment spec to add the `order_enforce=True`.NTr@   rA   )	rB   r(   rC   r   order_enforcerD   r    r	   rE   rF   s      r)   rC   OrderEnforcing.spec  s     ($$$88==#H-)-& %  

-aS0UV^U_` 	r   )rB   r   r   )F)r(   r   r   bool)r/   r
   rL   z/tuple[ObsType, SupportsFloat, bool, bool, dict]rM   r   rP   )rQ   rR   rS   rT   rU   r#   r.   r;   r|   rV   r   rC   rW   rX   rY   s   @r)   r   r   T  s    < 05T&T )-T&$ %)49!93H9	'9 9     r+   r   c                  x   ^  \ rS rSrSr  S	     S
S jjr    SU 4S jjrSSS.     SU 4S jjjrSrU =r	$ )r   i  a  This wrapper will keep track of cumulative rewards and episode lengths.

At the end of an episode, the statistics of the episode will be added to ``info``
using the key ``episode``. If using a vectorized environment also the key
``_episode`` is used which indicates whether the env at the respective index has
the episode statistics.
A vector version of the wrapper exists, :class:`gymnasium.wrappers.vector.RecordEpisodeStatistics`.

After the completion of an episode, ``info`` will look like this::

    >>> info = {
    ...     "episode": {
    ...         "r": "<cumulative reward>",
    ...         "l": "<episode length>",
    ...         "t": "<elapsed time since beginning of episode>"
    ...     },
    ... }

For a vectorized environments the output will be in the form of::

    >>> infos = {
    ...     "episode": {
    ...         "r": "<array of cumulative reward>",
    ...         "l": "<array of episode length>",
    ...         "t": "<array of elapsed time since beginning of episode>"
    ...     },
    ...     "_episode": "<boolean array of length num-envs>"
    ... }

Moreover, the most recent rewards and episode lengths are stored in buffers that can be accessed via
:attr:`wrapped_env.return_queue` and :attr:`wrapped_env.length_queue` respectively.

Attributes:
 * time_queue: The time length of the last ``deque_size``-many episodes
 * return_queue: The cumulative rewards of the last ``deque_size``-many episodes
 * length_queue: The lengths of the last ``deque_size``-many episodes

Change logs:
 * v0.15.4 - Initially added
 * v1.0.0 - Removed vector environment support (see :class:`gymnasium.wrappers.vector.RecordEpisodeStatistics`) and add attribute ``time_queue``
c                ,   [         R                  R                  R                  U 5        [         R                  R                  X5        X0l        SU l        SU l        SU l        SU l	        [        US9U l        [        US9U l        [        US9U l        g)a'  This wrapper will keep track of cumulative rewards and episode lengths.

Args:
    env (Env): The environment to apply the wrapper
    buffer_length: The size of the buffers :attr:`return_queue`, :attr:`length_queue` and :attr:`time_queue`
    stats_key: The info key for the episode statistics
r   rc   )maxlenN)r    r!   r"   r#   r$   
_stats_keyepisode_countepisode_start_timeepisode_returnsepisode_lengthsr   
time_queuereturn_queuelength_queue)r'   r(   buffer_length	stats_keys       r)   r#    RecordEpisodeStatistics.__init__  sy     			''006T'#)+&)$%(-](C*/}*E(-](Cr+   c                  > [         TU ]  U5      u  p#pEnU =R                  U-  sl        U =R                  S-  sl        U(       d  U(       a  U R                  U;  d   e[        [        R                  " 5       U R                  -
  S5      nU R                  U R                  US.X`R                  '   U R                  R                  U5        U R                  R                  U R                  5        U R                  R                  U R                  5        U =R                  S-  sl        [        R                  " 5       U l        X#XEU4$ )z@Steps through the environment, recording the episode statistics.r-      )rlt)r:   r.   r   r   r   roundtimeperf_counterr   r   appendr   r   r   )	r'   r/   rd   r1   r2   r3   r4   episode_time_lengthr=   s	           r)   r.   RecordEpisodeStatistics.step  s    497<3G0ZD&!??$..."'!!#d&=&==q# ))))(%D! OO""#67$$T%9%9:$$T%9%9:!#&*&7&7&9D#J477r+   Nr6   c               x   > [         TU ]  XS9u  p4[        R                  " 5       U l        SU l        SU l        X44$ )zYResets the environment using seed and options and resets the episode rewards and lengths.r6   rc   r   )r:   r;   r   r   r   r   r   )r'   r7   r8   rd   r4   r=   s        r)   r;   RecordEpisodeStatistics.reset  s@     GMtM=	"&"3"3"5" yr+   )r   r   r   r   r   r   r   r   )d   episode)r(   r   r   r   r   rl   rK   rM   )
rQ   rR   rS   rT   rU   r#   r.   r;   rW   rX   rY   s   @r)   r   r     st    (Z !"	D&D D 	D688	B8> %)4
!
3H
	'
 
r+   r   )'rU   
__future__r   r   collectionsr   copyr   typingr   r   r   	gymnasiumr    r	   gymnasium.corer
   r   r   r   gymnasium.errorr   #gymnasium.utils.passive_env_checkerr   r   r   r   r   gymnasium.envs.registrationr   __all__r$   r!   r"   r   r   r   r   r   r   r+   r)   <module>r      s)   #    4 4   H H '  3{KK'723SYY5T5T{|08KK'723SYY5T5T08fuKK'723SYY5T5Tup^KK'723SYY5T5T^BpKK'723SYY5T5Tpr+   