
    he              	         S r SSKJr  SSKJr  SSKJr  SSKJrJ	r	J
r
  SSKrSSKrSSKJr  SSKJrJrJrJr  SSKJrJrJr  SS	KJrJrJr  SS
KJrJr  / SQr  " S S\RB                  \\\4   \RD                  RF                  5      r$ " S S\RB                  \\\4   \RD                  RF                  5      r% " S S\RL                  \\\\4   \RD                  RF                  5      r' " S S\RB                  \\\4   \RD                  RF                  5      r( " S S\RL                  \\\\4   \RD                  RF                  5      r)g)a  A collection of stateful observation wrappers.

* ``DelayObservation`` - A wrapper for delaying the returned observation
* ``TimeAwareObservation`` - A wrapper for adding time aware observations to environment observation
* ``FrameStackObservation`` - Frame stack the observations
* ``NormalizeObservation`` - Normalized the observations to have unit variance with a moving mean
* ``MaxAndSkipObservation`` - Return only every ``skip``-th frame (frameskipping) and return the max between the two last frames.
    )annotations)deque)deepcopy)AnyFinalSupportsFloatN)ActTypeObsTypeWrapperActTypeWrapperObsType)BoxDictTuple)batch_spaceconcatenatecreate_empty_array)RunningMeanStdcreate_zero_array)DelayObservationTimeAwareObservationFrameStackObservationNormalizeObservationMaxAndSkipObservationc                  X   ^  \ rS rSrSrS	S jrSSS.     S
U 4S jjjrSS jrSrU =r	$ )r   #   a  Adds a delay to the returned observation from the environment.

Before reaching the :attr:`delay` number of timesteps, returned observations is an array of zeros with
the same shape as the observation space.

No vector version of the wrapper exists.

Note:
    This does not support random delay values, if users are interested, please raise an issue or pull request to add this feature.

Example:
    >>> import gymnasium as gym
    >>> env = gym.make("CartPole-v1")
    >>> env.reset(seed=123)
    (array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32), {})

    >>> env = DelayObservation(env, delay=2)
    >>> env.reset(seed=123)
    (array([0., 0., 0., 0.], dtype=float32), {})
    >>> env.step(env.action_space.sample())
    (array([0., 0., 0., 0.], dtype=float32), 1.0, False, False, {})
    >>> env.step(env.action_space.sample())
    (array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32), 1.0, False, False, {})

Change logs:
 * v1.0.0 - Initially added
c                   [         R                  " [        U5      [         R                  5      (       d  [	        S[        U5       35      eSU::  d  [        SU 35      e[        R                  R                  R                  XS9  [        R                  R                  X5        [        U5      U l        [        5       U l        g)zInitialises the DelayObservation wrapper with an integer.

Args:
    env: The environment to wrap
    delay: The number of timesteps to delay observations
z5The delay is expected to be an integer, actual type: r   z7The delay needs to be greater than zero, actual value: )delayN)np
issubdtypetypeinteger	TypeError
ValueErrorgymutilsRecordConstructorArgs__init__ObservationWrapperintr   r   observation_queue)selfenvr   s      a/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/wrappers/stateful_observation.pyr'   DelayObservation.__init__B   s     }}T%["**55GU}U  EzI%Q  			''000C''2!$U
/4w    Nseedoptionsc               R   > U R                   R                  5         [        TU ]  XS9$ )z7Resets the environment, clearing the observation queue.r0   )r*   clearsuperresetr+   r1   r2   	__class__s      r-   r6   DelayObservation.resetX   s(     	$$&w}$}88r/   c                    U R                   R                  U5        [        U R                   5      U R                  :  a  U R                   R	                  5       $ [        U R                  5      $ )zReturn the delayed observation.)r*   appendlenr   popleftr   observation_spacer+   observations     r-   r@   DelayObservation.observation`   sS    %%k2t%%&3))1133$T%;%;<<r/   )r   r*   )r,   gym.Env[ObsType, ActType]r   r)   )r1   
int | Noner2   dict[str, Any] | Nonereturnztuple[ObsType, dict[str, Any]])r@   r
   rE   r
   )
__name__
__module____qualname____firstlineno____doc__r'   r6   r@   __static_attributes____classcell__r8   s   @r-   r   r   #   s@    87. %)49!93H9	'9 9= =r/   r   c                     ^  \ rS rSrSr  SSS.       SS jjjrSS jr    SU 4S jjrSSS	.     SU 4S
 jjjrSr	U =r
$ )r   j   a  Augment the observation with the number of time steps taken within an episode.

The :attr:`normalize_time` if ``True`` represents time as a normalized value between [0,1]
otherwise if ``False``, the current timestep is an integer.

For environments with ``Dict`` observation spaces, the time information is automatically
added in the key `"time"` (can be changed through :attr:`dict_time_key`) and for environments with ``Tuple``
observation space, the time information is added as the final element in the tuple.
Otherwise, the observation space is transformed into a ``Dict`` observation space with two keys,
`"obs"` for the base environment's observation and `"time"` for the time information.

To flatten the observation, use the :attr:`flatten` parameter which will use the
:func:`gymnasium.spaces.utils.flatten` function.

No vector version of the wrapper exists.

Example:
    >>> import gymnasium as gym
    >>> from gymnasium.wrappers import TimeAwareObservation
    >>> env = gym.make("CartPole-v1")
    >>> env = TimeAwareObservation(env)
    >>> env.observation_space
    Box([-4.80000019        -inf -0.41887903        -inf  0.        ], [4.80000019e+00            inf 4.18879032e-01            inf
     5.00000000e+02], (5,), float64)
    >>> env.reset(seed=42)[0]
    array([ 0.0273956 , -0.00611216,  0.03585979,  0.0197368 ,  0.        ])
    >>> _ = env.action_space.seed(42)
    >>> env.step(env.action_space.sample())[0]
    array([ 0.02727336, -0.20172954,  0.03625453,  0.32351476,  1.        ])

Normalize time observation space example:
    >>> env = gym.make('CartPole-v1')
    >>> env = TimeAwareObservation(env, normalize_time=True)
    >>> env.observation_space
    Box([-4.8               -inf -0.41887903        -inf  0.        ], [4.8               inf 0.41887903        inf 1.        ], (5,), float32)
    >>> env.reset(seed=42)[0]
    array([ 0.0273956 , -0.00611216,  0.03585979,  0.0197368 ,  0.        ],
          dtype=float32)
    >>> _ = env.action_space.seed(42)
    >>> env.step(env.action_space.sample())[0]
    array([ 0.02727336, -0.20172954,  0.03625453,  0.32351476,  0.002     ],
          dtype=float32)

Flatten observation space example:
    >>> env = gym.make("CartPole-v1")
    >>> env = TimeAwareObservation(env, flatten=False)
    >>> env.observation_space
    Dict('obs': Box([-4.8               -inf -0.41887903        -inf], [4.8               inf 0.41887903        inf], (4,), float32), 'time': Box(0, 500, (1,), int32))
    >>> env.reset(seed=42)[0]
    {'obs': array([ 0.0273956 , -0.00611216,  0.03585979,  0.0197368 ], dtype=float32), 'time': array([0], dtype=int32)}
    >>> _ = env.action_space.seed(42)
    >>> env.step(env.action_space.sample())[0]
    {'obs': array([ 0.02727336, -0.20172954,  0.03625453,  0.32351476], dtype=float32), 'time': array([1], dtype=int32)}

Change logs:
 * v0.18.0 - Initially added
 * v1.0.0 - Remove vector environment support, add ``flatten`` and ``normalize_time`` parameters
time)dict_time_keyc                 ^ ^^ [         R                  R                  R                  T UUTS9  [         R                  R                  T U5        UT l        UT l        UR                  b3  UR                  R                  b  UR                  R                  T l	        OUn[        U[         R                  5      (       ah  [        U[         R                  R                  5      (       a  UR                  T l	        O-UR                  n[        U[         R                  5      (       a  Mh  [        U[         R                  R                  5      (       d  [!        S5      eST l        T R                  (       a  U 4S jT l        ['        SS5      nO+S T l        ['        ST R                  [(        R*                  S	9n[        UR,                  [.        5      (       aO  TUR,                  R1                  5       ;  d   e[/        TU0UR,                  R2                  E5      mU4S
 jT l        Og[        UR,                  [6        5      (       a,  [7        UR,                  R2                  U4-   5      mS T l        O[/        UR,                  US9mS T l        T R
                  (       a'  [2        R8                  " T5      T l        U4S jT l        gTT l        S T l        g)a  Initialize :class:`TimeAwareObservation`.

Args:
    env: The environment to apply the wrapper
    flatten: Flatten the observation to a `Box` of a single dimension
    normalize_time: if `True` return time in the range [0,1]
        otherwise return time as remaining timesteps before truncation
    dict_time_key: For environment with a ``Dict`` observation space, the key for the time space. By default, `"time"`.
)flattennormalize_timerQ   NzaThe environment must be wrapped by a TimeLimit wrapper or the spec specify a `max_episode_steps`.r   c                f   > [         R                  " U TR                  -  /[         R                  S9$ Ndtype)r   arraymax_timestepsfloat32)rP   r+   s    r-   <lambda>/TimeAwareObservation.__init__.<locals>.<lambda>   s%    bhh***+2::7r/           g      ?c                J    [         R                  " U /[         R                  S9$ rV   )r   rY   int32)rP   s    r-   r\   r]      s    bhhvRXX6Vr/   rW   c                   > TU0U E$ N )obsrP   rQ   s     r-   r\   r]      s    t7Ss7Sr/   c                    X4-   $ rb   rc   rd   rP   s     r-   r\   r]      s    sW}r/   rf   c                
    XS.$ )Nrf   rc   rf   s     r-   r\   r]      s    s7Qr/   c                2   > [         R                  " TU 5      $ rb   )spacesrS   )rd   r>   s    r-   r\   r]      s    V^^!36r/   c                    U $ rb   rc   )rd   s    r-   r\   r]      s    Sr/   )r$   r%   r&   r'   r(   rS   rT   specmax_episode_stepsrZ   
isinstanceWrapperwrappers	TimeLimit_max_episode_stepsr,   r#   	timesteps_time_preprocess_funcr   r   r`   r>   r   keysri   _append_data_funcr   flatten_space_obs_postprocess_func)r+   r,   rS   rT   rQ   wrapped_env
time_spacer>   s   `   `  @r-   r'   TimeAwareObservation.__init__   sE   " 			''00)'	 	1 	
 	''c2$++9 88CHH$>$>$J!$!;!;D K[#++66k3<<+A+ABB)4)G)GD&)oo	 [#++66 k3<<+A+ABB w    *D& S#J)VD&Q 2 2"((CJ c++T22 (=(=(B(B(DDDD $
Kc.C.C.J.JK! &TD"--u55 %c&;&;&B&Bj]&R S%DD" $)>)>Z P%QD" <<@F@T@T!AD"*D& ARD")8D&r/   c                t    U R                  U R                  XR                  U R                  5      5      5      $ )zAdds to the observation with the current time information.

Args:
    observation: The observation to add the time step to

Returns:
    The observation with the time information appended to it
)rw   ru   rs   rr   r?   s     r-   r@    TimeAwareObservation.observation   s7     ))""77G
 	
r/   c                L   > U =R                   S-  sl         [        TU ]	  U5      $ )zSteps through the environment, incrementing the time step.

Args:
    action: The action to take

Returns:
    The environment's step using the action with the next observation containing the timestep info
   )rr   r5   step)r+   actionr8   s     r-   r   TimeAwareObservation.step  s"     	!w|F##r/   Nr0   c               ,   > SU l         [        TU ]	  XS9$ )zReset the environment setting the time to zero.

Args:
    seed: The seed to reset the environment
    options: The options used to reset the environment

Returns:
    Resets the environment with the initial timestep info added the observation
r   r0   )rr   r5   r6   r7   s      r-   r6   TimeAwareObservation.reset  s     w}$}88r/   )ru   rw   rs   rS   rZ   rT   r>   rr   )TF)r,   rB   rS   boolrT   r   rQ   strr@   r
   rE   r   )r   r	   rE   @tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]r1   rC   r2   rD   rE   z%tuple[WrapperObsType, dict[str, Any]])rF   rG   rH   rI   rJ   r'   r@   r   r6   rK   rL   rM   s   @r-   r   r   j   s    9| $	Q9 $Q9&Q9 Q9 	Q9 Q9f
$$	I$  %)49!93H9	.9 9r/   r   c                  d    \ rS rSrSrSS.     SS jjr    SS jrSSS.     SS	 jjrS
rg)r   i+  a~	  Stacks the observations from the last ``N`` time steps in a rolling manner.

For example, if the number of stacks is 4, then the returned observation contains
the most recent 4 observations. For environment 'Pendulum-v1', the original observation
is an array with shape [3], so if we stack 4 observations, the processed observation
has shape [4, 3].

Users have options for the padded observation used:

 * "reset" (default) - The reset value is repeated
 * "zero" - A "zero"-like instance of the observation space
 * custom - An instance of the observation space

No vector version of the wrapper exists.

Example:
    >>> import gymnasium as gym
    >>> from gymnasium.wrappers import FrameStackObservation
    >>> env = gym.make("CarRacing-v3")
    >>> env = FrameStackObservation(env, stack_size=4)
    >>> env.observation_space
    Box(0, 255, (4, 96, 96, 3), uint8)
    >>> obs, _ = env.reset()
    >>> obs.shape
    (4, 96, 96, 3)

Example with different padding observations:
    >>> env = gym.make("CartPole-v1")
    >>> env.reset(seed=123)
    (array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32), {})
    >>> stacked_env = FrameStackObservation(env, 3)   # the default is padding_type="reset"
    >>> stacked_env.reset(seed=123)
    (array([[ 0.01823519, -0.0446179 , -0.02796401, -0.03156282],
           [ 0.01823519, -0.0446179 , -0.02796401, -0.03156282],
           [ 0.01823519, -0.0446179 , -0.02796401, -0.03156282]],
          dtype=float32), {})


    >>> stacked_env = FrameStackObservation(env, 3, padding_type="zero")
    >>> stacked_env.reset(seed=123)
    (array([[ 0.        ,  0.        ,  0.        ,  0.        ],
           [ 0.        ,  0.        ,  0.        ,  0.        ],
           [ 0.01823519, -0.0446179 , -0.02796401, -0.03156282]],
          dtype=float32), {})
    >>> stacked_env = FrameStackObservation(env, 3, padding_type=np.array([1, -1, 0, 2], dtype=np.float32))
    >>> stacked_env.reset(seed=123)
    (array([[ 1.        , -1.        ,  0.        ,  2.        ],
           [ 1.        , -1.        ,  0.        ,  2.        ],
           [ 0.01823519, -0.0446179 , -0.02796401, -0.03156282]],
          dtype=float32), {})

Change logs:
 * v0.15.0 - Initially add as ``FrameStack`` with support for lz4
 * v1.0.0 - Rename to ``FrameStackObservation`` and remove lz4 and ``LazyFrame`` support
            along with adding the ``padding_type`` parameter

r6   )padding_typec                  [         R                  R                  R                  XUS9  [         R                  R                  X5        [
        R                  " [        U5      [
        R                  5      (       d  [        S[        U5       35      eSU:  d  [        SU 35      e[        U[        5      (       a'  US:X  d  US:X  a  [        UR                  5      U l        OYX1R                  ;   a	  X0l        SnOA[        U[        5      (       a  [        SU< 35      e[        SU< S	UR                   S
35      e[!        UR                  US9U l        X l        X0l        ['        [)        U R"                  5       Vs/ s H  o@R                  PM     snU R"                  S9U l        [-        UR                  U R"                  S9U l        gs  snf )a  Observation wrapper that stacks the observations in a rolling manner.

Args:
    env: The environment to apply the wrapper
    stack_size: The number of frames to stack.
    padding_type: The padding type to use when stacking the observations, options: "reset", "zero", custom obs
)
stack_sizer   z:The stack_size is expected to be an integer, actual type: r   z<The stack_size needs to be greater than zero, actual value: r6   zero_customzaUnexpected `padding_type`, expected 'reset', 'zero' or a custom observation space, actual value: z% not an instance of env observation ())n)maxlenN)r$   r%   r&   r'   rn   r   r   r    r!   r"   r#   rm   r   r   r>   padding_valuer   r   r   r   range	obs_queuer   stacked_obs)r+   r,   r   r   _s        r-   r'   FrameStackObservation.__init__i  s    			''00l 	1 	
 	T'}}T*-rzz::LTR\M]L^_  :~Nzl[  lC((G#|v'=*;C<Q<Q*RD222!-$L,,, w  yE  xH  I  !w  yE  xH  Hm  nq  nC  nC  mD  DE  F  "-S-B-Bj!Q&0(4).t)?@)?A)?@
 .c.C.CtW As   <G
c                    U R                   R                  U5      u  p#pEnU R                  R                  U5        [	        [        U R                   R                  U R                  U R                  5      5      nXsXEU4$ )zSteps through the environment, appending the observation to the frame buffer.

Args:
    action: The action to step through the environment with

Returns:
    Stacked observations, reward, terminated, truncated, and info from the environment
)r,   r   r   r;   r   r   r>   r   )r+   r   rd   reward
terminated	truncatedinfoupdated_obss           r-   r   FrameStackObservation.step  sh     4888==3H0ZDc"22DNNDDTDTU
 J4??r/   Nr0   c                  U R                   R                  XS9u  p4U R                  S:X  a  X0l        [	        U R
                  S-
  5       H(  nU R                  R                  U R                  5        M*     U R                  R                  U5        [        [        U R                   R                  U R                  U R                  5      5      nXd4$ )zReset the environment, returning the stacked observation and info.

Args:
    seed: The environment seed
    options: The reset options

Returns:
    The stacked observations and info
r0   r6   r~   )r,   r6   r   r   r   r   r   r;   r   r   r>   r   )r+   r1   r2   rd   r   r   r   s          r-   r6   FrameStackObservation.reset  s     HHNNN>	'!$t*+ANN!!$"4"45 ,c"22DNNDDTDTU
   r/   )r   r>   r   r   r   r   )r,   rB   r   r)   r   zstr | ObsTyper   r   rE   r   r   )	rF   rG   rH   rI   rJ   r'   r   r6   rK   rc   r/   r-   r   r   +  st    8~ '.3X&3X 3X
 $3Xj@$@	I@( %)4!!!3H!	.! !r/   r   c                  l    \ rS rSrSrS	S
S jjr\SS j5       r\R                  SS j5       rSS jr	Sr
g)r   i  a  Normalizes observations to be centered at the mean with unit variance.

The property :attr:`update_running_mean` allows to freeze/continue the running mean calculation of the observation
statistics. If ``True`` (default), the ``RunningMeanStd`` will get updated every time ``step`` or ``reset`` is called.
If ``False``, the calculated statistics are used but not updated anymore; this may be used during evaluation.

A vector version of the wrapper exists :class:`gymnasium.wrappers.vector.NormalizeObservation`.

Note:
    The normalization depends on past trajectories and observations will not be normalized correctly if the wrapper was
    newly instantiated or the policy was changed recently.

Example:
    >>> import numpy as np
    >>> import gymnasium as gym
    >>> env = gym.make("CartPole-v1")
    >>> obs, info = env.reset(seed=123)
    >>> term, trunc = False, False
    >>> while not (term or trunc):
    ...     obs, _, term, trunc, _ = env.step(1)
    ...
    >>> obs
    array([ 0.1511158 ,  1.7183299 , -0.25533703, -2.8914354 ], dtype=float32)
    >>> env = gym.make("CartPole-v1")
    >>> env = NormalizeObservation(env)
    >>> obs, info = env.reset(seed=123)
    >>> term, trunc = False, False
    >>> while not (term or trunc):
    ...     obs, _, term, trunc, _ = env.step(1)
    >>> obs
    array([ 2.0059888,  1.5676788, -1.9944268, -1.6120394], dtype=float32)

Change logs:
 * v0.21.0 - Initially add
 * v1.0.0 - Add `update_running_mean` attribute to allow disabling of updating the running mean / standard, particularly useful for evaluation time.
    Casts all observations to `np.float32` and sets the observation space with low/high of `-np.inf` and `np.inf` and dtype as `np.float32`
c                   [         R                  R                  R                  XS9  [         R                  R                  X5        UR
                  R                  c   e[         R                  R                  [        R                  * [        R                  UR
                  R                  [        R                  S9U l        [        U R
                  R                  U R
                  R                  S9U l        X l        SU l        g)zThis wrapper will normalize observations such that each observation is centered with unit variance.

Args:
    env (Env): The environment to apply the wrapper
    epsilon: A stability parameter that is used when scaling the observations.
)epsilonN)lowhighshaperX   )r   rX   T)r$   r%   r&   r'   r(   r>   r   ri   r   r   infr[   r   rX   obs_rmsr   _update_running_mean)r+   r,   r   s      r-   r'   NormalizeObservation.__init__  s     			''000G''2$$**666!$''--**	 "0 "
 &((..d6L6L6R6R
 $(!r/   c                    U R                   $ )zWProperty to freeze/continue the running mean calculation of the observation statistics.r   )r+   s    r-   update_running_mean(NormalizeObservation.update_running_mean  s     (((r/   c                    Xl         g)z`Sets the property to freeze/continue the running mean calculation of the observation statistics.Nr   )r+   settings     r-   r   r     s
     %,!r/   c                L   U R                   (       a0  U R                  R                  [        R                  " U/5      5        [        R
                  " XR                  R                  -
  [        R                  " U R                  R                  U R                  -   5      -  5      $ )zSNormalises the observation using the running mean and variance of the observations.)
r   r   updater   rY   r[   meansqrtvarr   r?   s     r-   r@    NormalizeObservation.observation  sj    $$LL+ 78zz<<,,,8H8H4<<8W0XX
 	
r/   )r   r   r   r>   N)g:0yE>)r,   rB   r   float)rE   r   )r   r   r   )rF   rG   rH   rI   rJ   r'   propertyr   setterr@   rK   rc   r/   r-   r   r     s@    $L)0 ) ) ,  ,
r/   r   c                  8    \ rS rSrSrSSS jjr    S	S jrSrg)
r   i   a  Skips the N-th frame (observation) and return the max values between the two last observations.

No vector version of the wrapper exists.

Note:
    This wrapper is based on the wrapper from [stable-baselines3](https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/atari_wrappers.html#MaxAndSkipEnv)

Example:
    >>> import gymnasium as gym
    >>> env = gym.make("CartPole-v1")
    >>> obs0, *_ = env.reset(seed=123)
    >>> obs1, *_ = env.step(1)
    >>> obs2, *_ = env.step(1)
    >>> obs3, *_ = env.step(1)
    >>> obs4, *_ = env.step(1)
    >>> skip_and_max_obs = np.max(np.stack([obs3, obs4], axis=0), axis=0)
    >>> env = gym.make("CartPole-v1")
    >>> wrapped_env = MaxAndSkipObservation(env)
    >>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
    >>> wrapped_obs1, *_ = wrapped_env.step(1)
    >>> np.all(obs0 == wrapped_obs0)
    np.True_
    >>> np.all(wrapped_obs1 == skip_and_max_obs)
    np.True_

Change logs:
 * v1.0.0 - Initially add
c                (   [         R                  R                  R                  XS9  [         R                  R                  X5        [
        R                  " [        U5      [
        R                  5      (       d  [        S[        U5       35      eUS:  a  [        SU 35      eUR                  R                  c  [        S5      eX l        [
        R                  " S/UR                  R                  Q7UR                  R                  S9U l        g)zThis wrapper will return only every ``skip``-th frame (frameskipping) and return the max between the two last frames.

Args:
    env (Env): The environment to apply the wrapper
    skip: The number of frames to skip
)skipz4The skip is expected to be an integer, actual type:    zDThe skip value needs to be equal or greater than two, actual value: Nz4The observation space must have the shape attribute.rW   )r$   r%   r&   r'   rn   r   r   r    r!   r"   r#   r>   r   _skipzerosrX   _obs_buffer)r+   r,   r   s      r-   r'   MaxAndSkipObservation.__init__A  s     			''000AT'}}T$Z44FtDzlS  !8VW[V\]    &&.STT
88-&&,,-S5J5J5P5P
r/   c                   SnS=p40 n[        U R                  5       H  nU R                  R                  U5      u  pxp4nX`R                  S-
  :X  a  XpR                  S'   X`R                  S-
  :X  a  XpR                  S'   U[        U5      -  nU(       d	  U(       d  M    O   [        R                  " U R                  SS9n	XX4U4$ )a*  Step the environment with the given action for ``skip`` steps.

Repeat action, sum reward, and max over last observations.

Args:
    action: The action to step through the environment with
Returns:
    Max of the last two observations, reward, terminated, truncated, and info from the environment
r^   Fr   r   r~   )axis)r   r   r,   r   r   r   r   max)
r+   r   total_rewardr   r   r   ird   r   	max_frames
             r-   r   MaxAndSkipObservation.step[  s     !&&
tzz"A7;xx}}V7L4CJJN"&)  #JJN"&)  #E&M)LYY # FF4++!4	
tCCr/   )r   r   N)   )r,   rB   r   r)   r   )rF   rG   rH   rI   rJ   r'   r   rK   rc   r/   r-   r   r      s$    :
4D$D	IDr/   r   )*rJ   
__future__r   collectionsr   copyr   typingr   r   r   numpyr   	gymnasiumr$   gymnasium.spacesri   gymnasium.corer	   r
   r   r   r   r   r   gymnasium.vector.utilsr   r   r   gymnasium.wrappers.utilsr   r   __all__r(   r%   r&   r   r   rn   r   r   r   rc   r/   r-   <module>r      s+   #   , ,   ! K K - - O O FD=7GW45syy7V7VD=N~9>7G;<II##~9B]!KK'9:II##]!@R
>7G;<II##R
jUDKK'9:II##UDr/   