
    h                        S r SSKJr  SSKJr  SSKrSSKrSSK	J
r
  SSKJr  SSKJrJrJr  SSKJr  S	/r " S
 S	\\R(                  R*                  5      rg)zhA collection of stateful observation wrappers.

* ``NormalizeObservation`` - Normalize the observations
    )annotations)AnyN)ObsType)warn)AutoresetMode	VectorEnvVectorObservationWrapper)RunningMeanStdNormalizeObservationc                     ^  \ rS rSrSrSSS jjr\SS j5       r\R                  SS j5       rSSS.     SU 4S jjjr	SS	 jr
S
rU =r$ )r      a  This wrapper will normalize observations s.t. each coordinate is centered with unit variance.

The property `_update_running_mean` allows to freeze/continue the running mean calculation of the observation
statistics. If `True` (default), the `RunningMeanStd` will get updated every step and reset call.
If `False`, the calculated statistics are used but not updated anymore; this may be used during evaluation.

Note:
    The normalization depends on past trajectories and observations will not be normalized correctly if the wrapper was
    newly instantiated or the policy was changed recently.

Example without the normalize reward wrapper:
    >>> import gymnasium as gym
    >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
    >>> obs, info = envs.reset(seed=123)
    >>> _ = envs.action_space.seed(123)
    >>> for _ in range(100):
    ...     obs, *_ = envs.step(envs.action_space.sample())
    >>> np.mean(obs)
    np.float32(0.024251968)
    >>> np.std(obs)
    np.float32(0.62259156)
    >>> envs.close()

Example with the normalize reward wrapper:
    >>> import gymnasium as gym
    >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
    >>> envs = NormalizeObservation(envs)
    >>> obs, info = envs.reset(seed=123)
    >>> _ = envs.action_space.seed(123)
    >>> for _ in range(100):
    ...     obs, *_ = envs.step(envs.action_space.sample())
    >>> np.mean(obs)
    np.float32(-0.2359734)
    >>> np.std(obs)
    np.float32(1.1938739)
    >>> envs.close()
c                   [         R                  R                  R                  XS9  [        R                  " X5        SU R
                  R                  ;  a  [        U  S35        O.U R
                  R                  S   [        R                  1;   d   e[        U R                  R                  U R                  R                  S9U l        X l        SU l        g)zThis wrapper will normalize observations s.t. each coordinate is centered with unit variance.

Args:
    env (Env): The environment to apply the wrapper
    epsilon: A stability parameter that is used when scaling the observations.
)epsilonautoreset_modez is missing `autoreset_mode` data. Assuming that the vector environment it follows the `NextStep` autoreset api or autoreset is disabled. Read https://farama.org/Vector-Autoreset-Mode for more details.)shapedtypeTN)gymutilsRecordConstructorArgs__init__r	   envmetadatar   r   	NEXT_STEPr
   single_observation_spacer   r   obs_rmsr   _update_running_mean)selfr   r   s      h/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/wrappers/vector/stateful_observation.pyr   NormalizeObservation.__init__A   s     			''000G ))$4488#4#44&  b  c 88$$%56=;R;R:SSSS%//55//55
 $(!    c                    U R                   $ )zWProperty to freeze/continue the running mean calculation of the observation statistics.r   )r   s    r   update_running_mean(NormalizeObservation.update_running_meanY   s     (((r    c                    Xl         g)z`Sets the property to freeze/continue the running mean calculation of the observation statistics.Nr"   )r   settings     r   r#   r$   ^   s
     %,!r    Nseedoptionsc               p   > Ub&  SU;  d   [         R                  " US   5      (       d   e[        TU ]  XS9$ )zVReset function for `NormalizeObservationWrapper` which is disabled for partial resets.
reset_maskr'   )npallsuperreset)r   r(   r)   	__class__s      r   r/   NormalizeObservation.resetc   sC     O7*vvgl+,,	
- w}$}88r    c                    U R                   (       a  U R                  R                  U5        XR                  R                  -
  [        R
                  " U R                  R                  U R                  -   5      -  $ )zDefines the vector observation normalization function.

Args:
    observations: A vector observation from the environment

Returns:
    the normalized observation
)r   r   updatemeanr,   sqrtvarr   )r   observationss     r   r7   !NormalizeObservation.observationsq   sZ     $$LL-||000BGGLLt||+5
 
 	
r    )r   r   r   )g:0yE>)r   r   r   float)returnbool)r&   r;   )r(   zint | list[int] | Noner)   zdict[str, Any] | Noner:   ztuple[ObsType, dict[str, Any]])r7   r   r:   r   )__name__
__module____qualname____firstlineno____doc__r   propertyr#   setterr/   r7   __static_attributes____classcell__)r0   s   @r   r   r      sv    $L)0 ) ) ,  , (,)-	9 %9 '	9
 
(9 9
 
r    )r@   
__future__r   typingr   numpyr,   	gymnasiumr   gymnasium.corer   gymnasium.loggerr   gymnasium.vector.vector_envr   r   r	   gymnasium.wrappers.utilsr
   __all__r   r   r    r    r   <module>rO      sR   
 #    " ! 
 4 "
"d
3SYY5T5T d
r    