
    h                         S r SSKJr  SSKrSSKJr  SSKrSSKJ	r	J
r
  SSKJr  SSKJrJrJrJr  S/r " S	 S\5      rg)
z?Wrapper that tracks the cumulative rewards and episode lengths.    )annotationsN)deque)ActTypeObsType)warn)	ArrayTypeAutoresetMode	VectorEnvVectorWrapperRecordEpisodeStatisticsc                  r   ^  \ rS rSrSr  S     SU 4S jjjr  S	   S
U 4S jjjr    SS jrSrU =r	$ )r      a`  This wrapper will keep track of cumulative rewards and episode lengths.

At the end of any episode within the vectorized env, the statistics of the episode
will be added to ``info`` using the key ``episode``, and the ``_episode`` key
is used to indicate the environment index which has a terminated or truncated episode.

    >>> infos = {  # doctest: +SKIP
    ...     ...
    ...     "episode": {
    ...         "r": "<array of cumulative reward for each done sub-environment>",
    ...         "l": "<array of episode length for each done sub-environment>",
    ...         "t": "<array of elapsed time since beginning of episode for each done sub-environment>"
    ...     },
    ...     "_episode": "<boolean array of length num-envs>"
    ... }

Moreover, the most recent rewards and episode lengths are stored in buffers that can be accessed via
:attr:`wrapped_env.return_queue` and :attr:`wrapped_env.length_queue` respectively.

Attributes:
    return_queue: The cumulative rewards of the last ``deque_size``-many episodes
    length_queue: The lengths of the last ``deque_size``-many episodes

Example:
    >>> from pprint import pprint
    >>> import gymnasium as gym
    >>> envs = gym.make_vec("CartPole-v1", num_envs=3)
    >>> envs = RecordEpisodeStatistics(envs)
    >>> obs, info = envs.reset(123)
    >>> _ = envs.action_space.seed(123)
    >>> end = False
    >>> while not end:
    ...     obs, rew, term, trunc, info = envs.step(envs.action_space.sample())
    ...     end = term.any() or trunc.any()
    ...
    >>> envs.close()
    >>> pprint(info) # doctest: +SKIP
    {'_episode': array([ True, False, False]),
     '_final_info': array([ True, False, False]),
     '_final_observation': array([ True, False, False]),
     'episode': {'l': array([11,  0,  0], dtype=int32),
                 'r': array([11.,  0.,  0.], dtype=float32),
                 't': array([0.007812, 0.      , 0.      ], dtype=float32)},
     'final_info': array([{}, None, None], dtype=object),
     'final_observation': array([array([ 0.11448676,  0.9416149 , -0.20946532, -1.7619033 ], dtype=float32),
           None, None], dtype=object)}
c                  > [         TU ]  U5        X0l        SU R                  R                  ;  a$  [        U  S35        [        R                  U l        OL[        U R                  R                  S   [        5      (       d   eU R                  R                  S   U l        SU l
        [        R                  " U R                  45      U l        [        R                  " U R                  45      U l        [        R                  " U R                  4[         S9U l        [        R                  " U R                  4[$        S9U l        [)        US9U l        [)        US9U l        [)        US9U l        g)a  This wrapper will keep track of cumulative rewards and episode lengths.

Args:
    env (Env): The environment to apply the wrapper
    buffer_length: The size of the buffers :attr:`return_queue`, :attr:`length_queue` and :attr:`time_queue`
    stats_key: The info key to save the data
autoreset_modez is missing `autoreset_mode` tag in its metadata, therefore, `RecordEpisodeStatistics` is assuming that the environment uses `AutoresetMode.NEXT_STEP`. See `https://farama.org/Vector-Autoreset-Mode` for more information on autoreset modes.r   dtype)maxlenN)super__init__
_stats_keyenvmetadatar   r	   	NEXT_STEP_autoreset_mode
isinstanceepisode_countnpzerosnum_envsepisode_start_timesepisode_returnsintepisode_lengthsbool
prev_donesr   
time_queuereturn_queuelength_queue)selfr   buffer_length	stats_key	__class__s       Z/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/wrappers/vector/common.pyr    RecordEpisodeStatistics.__init__H   s    	#488#4#44&  H  I $1#:#:D dhh//0@A=QQQQ#'88#4#45E#FD /1xx8H/I +-88T]]4D+E+-88T]]4DC+P&(hh/?t&L}5!7!7    c                  > [         TU ]  XS9u  p4UGb/  SU;   Ga(  UR                  S5      n[        U[        R
                  5      (       d   S[        U5       35       eUR                  U R                  4:X  d!   SU R                   SUR                   35       eUR                  [        R                  :X  d   SUR                   35       e[        R                  " U5      (       d
   SU 35       e[        R                  " 5       U R                  U'   SU R                  U'   SU R                   U'   S	U R"                  U'   X44$ [        R$                  " U R                  [        R                  " 5       5      U l        [        R&                  " U R                  5      U l        [        R&                  " U R                  [(        S
9U l        [        R&                  " U R                  [*        S
9U l        X44$ )zOResets the environment using kwargs and resets the episode returns and lengths.)seedoptions
reset_maskz9`options['reset_mask': mask]` must be a numpy array, got z0`options['reset_mask': mask]` must have shape `(z	,)`, got z>`options['reset_mask': mask]` must have `dtype=np.bool_`, got zK`options['reset_mask': mask]` must contain a boolean array, got reset_mask=r   Fr   )r   resetpopr   r   ndarraytypeshaper   r   bool_anytimeperf_counterr    r!   r#   r%   fullr   r"   r$   )r)   r1   r2   obsinfor3   r,   s         r-   r4   RecordEpisodeStatistics.resetk   s    GMtM=	<7#: \2JBJJ  ^J4PZK[J\]^  ##(  mA$--PYZdZjZjYklm    BHH,cOPZP`P`Oabc,66  j\]g\hij  483D3D3FD$$Z0/0D  ,/0D  ,*/DOOJ' y (*wwt}}d>O>O>Q'RD$#%88DMM#:D #%88DMM#ED  hht}}DADOyr/   c           	        U R                   R                  U5      u  nnnnn[        U[        5      (       d   S[	        U5       S35       eSU R
                  U R                  '   U R
                  [        R                  " U R                  5      ==   U[        R                  " U R                  5         -  ss'   SU R                  U R                  '   U R                  U R                  ) ==   S-  ss'   [        R                  " 5       U R                  U R                  '   [        R                  " XE5      =U l        n[        R                  " U5      nU(       Ga  U R                  U;   d  SU R                   3U;   a2  [!        SU R                   S[#        UR%                  5       5       35      e[        R&                  " [        R                  " 5       U R                  -
  S5      n	[        R(                  " XpR
                  S	5      [        R(                  " XpR                  S5      [        R(                  " XyS	5      S
.X`R                  '   XvSU R                   3'   U =R*                  U-  sl        [        R(                  " U5       Hp  n
U R,                  R/                  X   5        U R0                  R/                  U R
                  U
   5        U R2                  R/                  U R                  U
   5        Mr     UUUUU4$ )z@Steps through the environment, recording the episode statistics.zW`vector.RecordEpisodeStatistics` requires `info` type to be `dict`, its actual type is z@. This may be due to usage of other wrappers in the wrong order.r      _z)Attempted to add episode stats with key 'z'' but this key already exists in info:    g        )rlt)r   stepr   dictr7   r!   r%   r   logical_notr#   r;   r<   r    
logical_orsumr   
ValueErrorlistkeysroundwherer   r&   extendr'   r(   )r)   actionsobservationsrewardsterminationstruncationsinfosdones	num_donesepisode_time_lengthis              r-   rH   RecordEpisodeStatistics.step   s    HHMM'"	
 4
 
 	sdeijoepdq  rr  s	s 
 12T__-R^^DOO<=NN4??+B
 	
= 12T__-doo-.!3.484E4E4G  1"$--"JJ%FF5M	%'Qt.?+@E+I ??PPwx|  ~C  ~H  ~H  ~J  yK  xL  M  ')hh%%'$*B*BBA'# %)=)=sC%)=)=qA%cB*oo&
 05$//*+,)+XXe_&&':'=>!!(()=)=a)@A!!(()=)=a)@A % 
 	
r/   )
r   r   r   r#   r!   r    r(   r%   r'   r&   )d   episode)r   r
   r*   r"   r+   str)NN)r1   zint | list[int] | Noner2   zdict | None)rS   r   returnz5tuple[ObsType, ArrayType, ArrayType, ArrayType, dict])
__name__
__module____qualname____firstlineno____doc__r   r4   rH   __static_attributes____classcell__)r,   s   @r-   r   r      sw    .f !"	!8!8 !8 	!8 !8J (,#!$! ! !F:
:
	>:
 :
r/   )rf   
__future__r   r;   collectionsr   numpyr   gymnasium.corer   r   gymnasium.loggerr   gymnasium.vector.vector_envr   r	   r
   r   __all__r    r/   r-   <module>rq      s<    E "    + !  %
%q
m q
r/   