
    h,              	       b   S SK Jr  S SKrS SKrS SKJr  S SKJrJ	r	   S SK
r
\
R                  R                  S5         " S S\R                  \R                  \\R                  \4   5      r " S S	\R                  \R                  \\R                  \4   5      r " S
 S\R                  \R                  \\R                  \4   5      r " S S\R                  \R                  \\R                  \4   5      r " S S\R                  \R                  \\R                  \4   5      r " S S\R,                  5      r " S S\R0                  \R                  \\R                  4   5      r " S S\R                  \R                  \\R                  \4   5      rg! \ a    Sr
 GNpf = f)    )SupportsFloatN)spaces)AtariResetReturnAtariStepReturnFc                   p   ^  \ rS rSrSrS\R                  S\SS4U 4S jjrS\	4S jr
S	\S\4S
 jrSrU =r$ )StickyActionEnv   z
Sticky action.

Paper: https://arxiv.org/abs/1709.06009
Official implementation: https://github.com/mgbellemare/Arcade-Learning-Environment

:param env: Environment to wrap
:param action_repeat_probability: Probability of repeating the last action
envaction_repeat_probabilityreturnNc                 v   > [         TU ]  U5        X l        UR                  R	                  5       S   S:X  d   eg Nr   NOOP)super__init__r   	unwrappedget_action_meanings)selfr
   r   	__class__s      a/home/james-whalen/.local/lib/python3.13/site-packages/stable_baselines3/common/atari_wrappers.pyr   StickyActionEnv.__init__   s6    )B&}}00215???    c                 H    SU l         U R                  R                  " S0 UD6$ )Nr    )_sticky_actionr
   reset)r   kwargss     r   r   StickyActionEnv.reset!   s     xx~~'''r   actionc                     U R                   R                  5       U R                  :  a  Xl        U R                  R                  U R                  5      $ N)	np_randomrandomr   r   r
   step)r   r   s     r   r$   StickyActionEnv.step%   s<    >>  "d&D&DD"(xx}}T0011r   )r   r   )__name__
__module____qualname____firstlineno____doc__gymEnvfloatr   r   r   intr   r$   __static_attributes____classcell__r   s   @r   r   r      sO    @CGG @ @$ @
(!1 (23 2? 2 2r   r   c                   b   ^  \ rS rSrSrS
S\R                  S\SS4U 4S jjjrS\	4S jr
S	rU =r$ )NoopResetEnv+   z
Sample initial states by taking random number of no-ops on reset.
No-op is assumed to be action 0.

:param env: Environment to wrap
:param noop_max: Maximum value of no-ops to run
r
   noop_maxr   Nc                    > [         TU ]  U5        X l        S U l        SU l        UR
                  R                  5       S   S:X  d   eg r   )r   r   r5   override_num_noopsnoop_actionr   r   )r   r
   r5   r   s      r   r   NoopResetEnv.__init__4   sE     "&}}00215???r   c                    U R                   R                  " S0 UD6  U R                  b  U R                  nO3U R                  R                  R                  SU R                  S-   5      nUS:  d   e[        R                  " S5      n0 n[        U5       HZ  nU R                   R                  U R                  5      u  p5pgnU(       d	  U(       d  M<  U R                   R                  " S0 UD6u  p4M\     X44$ )N   r   r   )r
   r   r7   r   r"   integersr5   npzerosranger$   r8   )r   r   noopsobsinfo_
terminated	truncateds           r   r   NoopResetEnv.reset;   s      "".++ENN,,55a9JKEqyyhhqkuA26((--@P@P2Q/CJ4YY HHNN4V4	T  yr   )r8   r5   r7   )   )r&   r'   r(   r)   r*   r+   r,   r.   r   r   r   r/   r0   r1   s   @r   r3   r3   +   sB    @CGG @s @D @ @!1  r   r3   c                   Z   ^  \ rS rSrSrS\R                  SS4U 4S jjrS\4S jr	Sr
U =r$ )	FireResetEnvK   ze
Take action on reset for environments that are fixed until firing.

:param env: Environment to wrap
r
   r   Nc                    > [         TU ]  U5        UR                  R                  5       S   S:X  d   e[	        UR                  R                  5       5      S:  d   eg )Nr;   FIRE   )r   r   r   r   lenr   r
   r   s     r   r   FireResetEnv.__init__R   sP    }}00215???3==44671<<<r   c                 f   U R                   R                  " S0 UD6  U R                   R                  S5      u  p#pEnU(       d  U(       a  U R                   R                  " S0 UD6  U R                   R                  S5      u  p#pEnU(       d  U(       a  U R                   R                  " S0 UD6  U0 4$ )Nr;      r   )r
   r   r$   )r   r   rA   rC   rD   rE   s         r   r   FireResetEnv.resetW   s      +/88==+;(
qHHNN$V$+/88==+;(
qHHNN$V$Bwr   r   )r&   r'   r(   r)   r*   r+   r,   r   r   r   r/   r0   r1   s   @r   rI   rI   K   s1    =CGG = =
!1  r   rI   c                   l   ^  \ rS rSrSrS\R                  SS4U 4S jjrS\S\	4S jr
S\4S	 jrS
rU =r$ )EpisodicLifeEnvb   z
Make end-of-life == end-of-episode, but only reset on true game over.
Done by DeepMind for the DQN and co. since it helps value estimation.

:param env: Environment to wrap
r
   r   Nc                 @   > [         TU ]  U5        SU l        SU l        g Nr   T)r   r   liveswas_real_donerO   s     r   r   EpisodicLifeEnv.__init__j   s    
!r   r   c                 
   U R                   R                  U5      u  p#pEnU=(       d    UU l        U R                   R                  R                  R                  5       nSUs=:  a  U R
                  :  a  O  OSnXpl        X#XEU4$ rX   )r
   r$   rZ   r   alerY   )r   r   rA   rewardrD   rE   rB   rY   s           r   r$   EpisodicLifeEnv.stepo   sr    3788==3H0ZD'49 ""&&,,.u!tzz! J
J477r   c                 d   U R                   (       a  U R                  R                  " S0 UD6u  p#OKU R                  R                  S5      u  p$pVnU(       d  U(       a  U R                  R                  " S0 UD6u  p#U R                  R                  R
                  R                  5       U l        X#4$ )a8  
Calls the Gym environment reset, only when lives are exhausted.
This way all states are still reachable even though lives are episodic,
and the learner need not know about any of this behind-the-scenes.

:param kwargs: Extra keywords passed to env.reset() call
:return: the first observation of the environment
r   r   )rZ   r
   r   r$   r   r]   rY   )r   r   rA   rB   rC   rD   rE   s          r   r   EpisodicLifeEnv.reset}   s     00IC 37((--2B/CJ4
 Y HHNN4V4	XX''++113
yr   )rY   rZ   )r&   r'   r(   r)   r*   r+   r,   r   r.   r   r$   r   r   r/   r0   r1   s   @r   rU   rU   b   sD    "CGG " "
83 8? 8!1  r   rU   c                   f   ^  \ rS rSrSrSS\R                  S\SS4U 4S jjjrS\S\	4S	 jr
S
rU =r$ )MaxAndSkipEnv   z
Return only every ``skip``-th frame (frameskipping)
and return the max between the two last frames.

:param env: Environment to wrap
:param skip: Number of ``skip``-th frame
    The same action will be taken ``skip`` times.
r
   skipr   Nc                 4  > [         TU ]  U5        UR                  R                  c   S5       eUR                  R                  c   S5       e[
        R                  " S/UR                  R                  Q7UR                  R                  S9U l        X l        g )Nz,No dtype specified for the observation spacez*No shape defined for the observation spacerR   )dtype)	r   r   observation_spacerg   shaper=   r>   _obs_buffer_skip)r   r
   re   r   s      r   r   MaxAndSkipEnv.__init__   s    $$**6f8ff6$$**6d8dd688Q$E)>)>)D)D$ESMbMbMhMhi
r   r   c                    SnS=p4[        U R                  5       H  nU R                  R                  U5      u  pgp4nU=(       d    Un	XPR                  S-
  :X  a  X`R                  S'   XPR                  S-
  :X  a  X`R                  S'   U[        U5      -  nU	(       d  M    O   U R                  R                  SS9n
XX4W4$ )z
Step the environment with the given action
Repeat action, sum reward, and max over last observations.

:param action: the action
:return: observation, reward, terminated, truncated, information
        FrR   r   r;   )axis)r?   rk   r
   r$   rj   r-   max)r   r   total_rewardrD   rE   irA   r^   rB   done	max_frames              r   r$   MaxAndSkipEnv.step   s     !&&
tzz"A7;xx}}V7L4C*DJJN"&)  #JJN"&)  #E&M)Lt # $$((a(0	
tCCr   )rj   rk   )   )r&   r'   r(   r)   r*   r+   r,   r.   r   r   r$   r/   r0   r1   s   @r   rc   rc      sG    CGG 3 t  D3 D? D Dr   rc   c                   ^   ^  \ rS rSrSrS\R                  SS4U 4S jjrS\S\	4S jr
S	rU =r$ )
ClipRewardEnv   zN
Clip the reward to {+1, 0, -1} by its sign.

:param env: Environment to wrap
r
   r   Nc                 $   > [         TU ]  U5        g r!   )r   r   rO   s     r   r   ClipRewardEnv.__init__   s    r   r^   c                 @    [         R                  " [        U5      5      $ )zA
Bin reward to {+1, 0, -1} by its sign.

:param reward:
:return:
)r=   signr-   )r   r^   s     r   r^   ClipRewardEnv.reward   s     wwuV}%%r   r   )r&   r'   r(   r)   r*   r+   r,   r   r   r-   r^   r/   r0   r1   s   @r   rx   rx      s7    CGG  &] &u & &r   rx   c            	          ^  \ rS rSrSrSS\R                  S\S\SS4U 4S jjjrS	\	R                  S\	R                  4S
 jrSrU =r$ )	WarpFrame   z
Convert to grayscale and warp frames to 84x84 (default)
as done in the Nature paper and later work.

:param env: Environment to wrap
:param width: New frame width
:param height: New frame height
r
   widthheightr   Nc                 D  > [         TU ]  U5        X l        X0l        [	        UR
                  [        R                  5      (       d   SUR
                   35       e[        R                  " SSU R                  U R                  S4UR
                  R                  S9U l        g )NzExpected Box space, got r      r;   )lowhighri   rg   )	r   r   r   r   
isinstancerh   r   Boxrg   )r   r
   r   r   r   s       r   r   WarpFrame.__init__   s    
#//<<p@XY\YnYnXo>pp<!';;

A.''--	"
r   framec                     [         c   S5       e[         R                  " U[         R                  5      n[         R                  " XR                  U R
                  4[         R                  S9nUSS2SS2S4   $ )zh
returns the current observation from a frame

:param frame: environment frame
:return: the observation
Nz?OpenCV is not installed, you can do `pip install opencv-python`)interpolation)cv2cvtColorCOLOR_RGB2GRAYresizer   r   
INTER_AREA)r   r   s     r   observationWarpFrame.observation   s\     a aaUC$6$67

5::t{{";3>>ZQ4Z  r   )r   rh   r   )T   r   )r&   r'   r(   r)   r*   r+   r,   r.   r   r=   ndarrayr   r/   r0   r1   s   @r   r   r      sR    
CGG 
C 
c 
4 
 

! 
!

 
! 
!r   r   c                   t   ^  \ rS rSrSr      SS\R                  S\S\S\S\S\S	\	S
S4U 4S jjjr
SrU =r$ )AtariWrapper   ar  
Atari 2600 preprocessings

Specifically:

* Noop reset: obtain initial state by taking random number of no-ops on reset.
* Frame skipping: 4 by default
* Max-pooling: most recent two observations
* Termination signal when a life is lost.
* Resize to a square image: 84x84 by default
* Grayscale observation
* Clip reward to {-1, 0, 1}
* Sticky actions: disabled by default

See https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/
for a visual explanation.

.. warning::
    Use this wrapper only with Atari v4 without frame skip: ``env_id = "*NoFrameskip-v4"``.

:param env: Environment to wrap
:param noop_max: Max number of no-ops
:param frame_skip: Frequency at which the agent experiences the game.
    This correspond to repeating the action ``frame_skip`` times.
:param screen_size: Resize Atari frame
:param terminal_on_life_loss: If True, then step() returns done=True whenever a life is lost.
:param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
:param action_repeat_probability: Probability of repeating the last action
r
   r5   
frame_skipscreen_sizeterminal_on_life_lossclip_rewardr   r   Nc                 0  > US:  a  [        X5      nUS:  a	  [        XS9nUS:  a	  [        XS9nU(       a  [        U5      nSUR                  R                  5       ;   a  [        U5      n[        XUS9nU(       a  [        U5      n[        TU ])  U5        g )Nrn   r   )r5   r;   )re   rL   )r   r   )r   r3   rc   rU   r   r   rI   r   rx   r   r   )	r   r
   r5   r   r   r   r   r   r   s	           r   r   AtariWrapper.__init__  s     %s*!#ACa<s6C>5C !#&CS]]6688s#C{C$Cr   r   )rG   rv   r   TTrn   )r&   r'   r(   r)   r*   r+   r,   r.   boolr-   r   r/   r0   r1   s   @r   r   r      sy    B &* +.WW  	
   $  $) 
 r   r   )typingr   	gymnasiumr+   numpyr=   r   %stable_baselines3.common.type_aliasesr   r   r   oclsetUseOpenCLImportErrorWrapperr   r.   r   r3   rI   rU   rc   RewardWrapperrx   ObservationWrapperr   r   r   r   r   <module>r      sg        SGG
2ckk"**c2::s"BC 243;;rzz3

C?@ @3;;rzz3

C?@ .0ckk"**c2::s"BC 0f*DCKK

CS @A *DZ&C%% &(!!&&rzz3

'BC !!H83;;rzz3

C?@ 8[  
Cs   F" "F.-F.