
    hT5                    ^   % S r SSKJr  SSKJr  SSKJrJrJr  SSK	r	SSK
Jr  SSKrSSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  \(       a  SSKr " S S\5      r " S S\5      r\	R@                  r!S\"S'   S r# " S S\\\	R@                  \$\%\&\S4   5      r' " S S\\5      r(\)S:X  at   \" \(" SS95      r*\*RW                  5       u  r,r-\." \,\-5        Sr/\/(       d:  \$" \0" S5      5      r1\*Re                  \15      u  r,r3r/r4r-\." \,\3\/\4\-5        \/(       d  M:  \5" 5         gg)zpThis module provides a CliffWalking functional environment and Gymnasium environment wrapper CliffWalkingJaxEnv.    )annotations)path)TYPE_CHECKING
NamedTuple	TypeAliasN)spaces)FunctionalJaxEnv)DependencyNotInstalled)ActTypeFuncEnv)EzPickle)AutoresetMode)HumanRenderingc                      \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S
\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   Srg)RenderStateType   znA named tuple which contains the full render state of the Cliffwalking Env. This is static during the episode.zpygame.Surfacescreenztuple[int, int]shapeintnS	cell_sizez
np.ndarraycliffzEtuple[pygame.Surface, pygame.Surface, pygame.Surface, pygame.Surface]
elf_images	start_imggoal_imgztuple[str, str]bg_imgsz%tuple[pygame.Surface, pygame.Surface]mountain_bg_imgnear_cliff_imgsnear_cliff_img	cliff_img N__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__r!       ]/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/envs/tabular/cliffwalking.pyr   r      sR    xGUU::$$99r*   r   c                  8    \ rS rSr% SrS\S'   S\S'   S\S'   S	rg
)EnvState.   zEA named tuple which contains the full state of the Cliffwalking game.	jax.Arrayplayer_positionr   last_actionboolfallenr!   Nr"   r!   r*   r+   r-   r-   .   s    OLr*   r-   r   PRNGKeyTypec                6    U S   S:H  U S   S:  -  U S   S:*  -  $ )zNChecks to see if the player_position means the player has fallen of the cliff.r         
   r!   )r0   s    r+   fell_offr9   9   s:     
	q	 1"	$1#	%r*   c                  \   \ rS rSrSr\R                  " SS\R                  S9r	\R                  " SSS\R                  S9r
S	/S
\R                  S.r S         SS jjrSSS jjrSSS jjrSSS jjr S         SS jjr S     SS jjr S       SS jjrSS jrSrg)CliffWalkingFunctionalB   u  Cliff walking involves crossing a gridworld from start to goal while avoiding falling off a cliff.

## Description
The game starts with the player at location [3, 0] of the 4x12 grid world with the
goal located at [3, 11]. If the player reaches the goal the episode ends.

A cliff runs along [3, 1..10]. If the player moves to a cliff location it
returns to the start location.

The player makes moves until they reach the goal.

Adapted from Example 6.6 (page 132) from Reinforcement Learning: An Introduction
by Sutton and Barto [<a href="#cliffwalk_ref">1</a>].

With inspiration from:
[https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py](https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py)

## Action Space
The action shape is `(1,)` in the range `{0, 3}` indicating
which direction to move the player.

- 0: Move up
- 1: Move right
- 2: Move down
- 3: Move left

## Observation Space
There are 3 x 12 + 1 possible states. The player cannot be at the cliff, nor at
the goal as the latter results in the end of the episode. What remains are all
the positions of the first 3 rows plus the bottom-left cell.

The observation is a value representing the player's current position as
current_row * ncols + current_col (where both the row and col start at 0).

For example, the starting position can be calculated as follows: 3 * 12 + 0 = 36.

The observation is returned as an `numpy.ndarray` with shape `(1,)` and dtype `numpy.int32` .

## Starting State
The episode starts with the player in state `[36]` (location [3, 0]).

## Reward
Each time step incurs -1 reward, unless the player stepped into the cliff,
which incurs -100 reward.

## Episode End
The episode terminates when the player enters state `[47]` (location [3, 11]).


## Arguments

```python
import gymnasium as gym
gym.make('tablular/CliffWalking-v0')
```

## References
<a id="cliffwalk_ref"></a>[1] R. Sutton and A. Barto, “Reinforcement Learning:
An Introduction” 2020. [Online]. Available: [http://www.incompleteideas.net/book/RLbook2020.pdf](http://www.incompleteideas.net/book/RLbook2020.pdf)

## Version History
- v0: Initial version release

r   r6   )lowhighdtype/   r7   )r=   r>   r   r?   	rgb_array   )render_modes
render_fpsautoreset_modeNc           	     6   UR                   n[        R                  " US   SUS:H  -  -   SUS:H  -  -   US   SUS:H  -  -   SUS:H  -  -   /5      n[        R                  " [        R                  " [        R                  " US   S5      S5      [        R                  " [        R                  " US   S5      S5      /5      n[        U5      n[        R                  " US   SU-
  -  SU-  -   US   SU-
  -  /5      n[        UR                  S5      US   US9nU$ )	z9The Cliffwalking environment's state transition function.r   r7      r6      )rH   r0   r1   r3   )r0   jnparraymaximumminimumr9   r-   reshape)selfstateactionkeyparamsnew_positionr3   	new_states           r+   
transition!CliffWalkingFunctional.transition   s-    ,, yyQ1!#45v{9KLQ1!#45v{9KL
 yyCKKQ;Q?CKKQ<a@
 ,'yyQ1v:.V;Q1v:.
 (006q	
	 r*   c                L    [         R                  " SS/5      n[        USSS9nU$ )z*Cliffwalking initial observation function.r6   r   rI   FrK   )rL   rM   r-   )rQ   rngrU   r0   rR   s        r+   initialCliffWalkingFunctional.initial   s(    ))QF+bQVWr*   c                    [         R                  " UR                  S   S-  UR                  S   -   5      R                  S5      $ )zCliffwalking observation.r      r7   rA   )rL   rM   r0   rP   rQ   rR   rU   s      r+   observation"CliffWalkingFunctional.observation   s?    yy!!!$r)E,A,A!,DD

'$-	r*   c                p    [         R                  " UR                  [         R                  " SS/5      5      $ )z@Determines if a particular Cliffwalking observation is terminal.r6   rJ   )rL   array_equalr0   rM   r`   s      r+   terminalCliffWalkingFunctional.terminal   s&    u44ciiB6HIIr*   c                    UnSSUR                   S   -  -   n[        R                  R                  U[        R
                  5      $ )zCalculates reward from a state.rI   ir   )r3   jaxlaxconvert_element_typerL   float32)rQ   rR   rS   
next_staterU   rewards         r+   rm   CliffWalkingFunctional.reward   s;     sU\\!_,-ww++FCKK@@r*   c                    SSK nSnSUS   -  SUS   -  4nUR                  " 5         UR                  " US   US   45      nSnS	n[
        R                  " U[        S
9n	SU	SSS24'   [        R                  " [        R                  " [        5      S5      [        R                  " [        R                  " [        5      S5      [        R                  " [        R                  " [        5      S5      [        R                  " [        R                  " [        5      S5      /n
SnU
 Vs/ s H8  nUR                  R                  UR                  R                  U5      U5      PM:     nn[        R                  " [        R                  " [        5      S5      nUR                  R                  UR                  R                  U5      U5      n[        R                  " [        R                  " [        5      S5      nUR                  R                  UR                  R                  U5      U5      n[        R                  " [        R                  " [        5      S5      [        R                  " [        R                  " [        5      S5      /nU Vs/ s H8  nUR                  R                  UR                  R                  U5      U5      PM:     nn[        R                  " [        R                  " [        5      S5      [        R                  " [        R                  " [        5      S5      /nU Vs/ s H8  nUR                  R                  UR                  R                  U5      U5      PM:     nn[        R                  " [        R                  " [        5      S5      nUR                  R                  UR                  R                  U5      U5      n[!        UUUUU	[#        U5      UU[#        U5      [#        U5      [#        U5      [#        U5      US9$ ! [         a    [        S5      ef = fs  snf s  snf s  snf )z Returns an initial render state.r   NzGpygame is not installed, run `pip install "gymnasium[classic_control]"`)<   rp   rC   r_   r7   )rC   r_   0   )r?   Tr6   rI   z../toy_text/img/elf_up.pngz../toy_text/img/elf_right.pngz../toy_text/img/elf_down.pngz../toy_text/img/elf_left.pngz../toy_text/img/stool.pngz../toy_text/img/cookie.pngz ../toy_text/img/mountain_bg1.pngz ../toy_text/img/mountain_bg2.pngz(../toy_text/img/mountain_near-cliff1.pngz(../toy_text/img/mountain_near-cliff2.pngz"../toy_text/img/mountain_cliff.png)r   r   r   r   r   r   r   r   r   r   r   r   r    )pygameImportErrorr
   initSurfacenpzerosr2   r   joindirname__file__	transformscaleimageloadr   tuple)rQ   screen_widthscreen_heightrr   r   window_sizer   r   r   r   hikersf_namer   	file_namer   r   r   r   r   r   r    s                        r+   render_init"CliffWalkingFunctional.render_init   s}   	 		!1

 	QQ @Ad+a2g IIdll8,.JKIIdll8,.MNIIdll8,.LMIIdll8,.LM	
 	 !
  ""6<<#4#4V#<iH  	 
 IIdll846QR	$$**6<<+<+<Y+GS	IIdll846RS	##))&,,*;*;I*F	RIIdll8,.PQIIdll8,.PQ
 "
! ""6<<#4#4V#<iH! 	 

 IIX&(R IIX&(R	
 *
) ""6<<#4#4V#<iH) 	 
 IILL"$H
	 $$**6<<+<+<Y+GS	Z('N!/2!/2 0
 	
}  	(Y 	:


s   P )?P7?P<7?QP4c                    SSK nUu  nnnnn	n
nnnnnnn[        U5       GH=  n[        R
                  " UU5      u  nnUUS   -  UUS   -  4nUS-  US-  -  nUR                  UU   U5        U	UU4   (       a  UR                  UU5        UUS   S-
  :  a$  U	US-   U4   (       a  UR                  UU   U5        US:X  a  UR                  UU5        UUS-
  :X  a  UR                  UU5        UUR                  S   S-  UR                  S   -   :X  d  M  US   US   SUS   -  -
  4nUR                  S	:w  a  UR                  OSnUR                  U
U   U5        GM@     U[        R                  " [        R                  " UR                  R                  U5      5      S
S94$ ! [         a    [        S5      ef = f)zRenders an image from a state.r   Nz@pygame is not installed, run `pip install "gymnasium[toy_text]"`r7   rH   $   r_   g?rI   )r7   r   rH   )axes)rr   rs   r
   rangerv   unravel_indexblitr0   r1   	transposerM   	surfarraypixels3d)rQ   rR   render_staterU   rr   window_surfacer   r   r   r   r   r   r   r   r   r   r   r    srowcolposcheck_board_maskelf_posr1   s                            r+   render_image#CliffWalkingFunctional.render_image,  s   	& 	
 rA''51HC1%sYq\'9:C"Qwq00@ A3GS#X##Is3U1X\!eC!GSL&9##N3C$DcJBw##Is3BF{##Hc2E))!,r1E4I4I!4LLLq63q6C)A,,>#>?383D3D3Je//PQ##J{$;WE# & R\\HHV%%..~>?i
 
 	
O  	(R 	s   F' 'F=c                     SSK nUR                  R	                  5         UR                  " 5         g! [         a  n[        S5      UeSnAff = f)zCloses the render state.r   Nz@pygame is not installed, run `pip install "gymnasium[toy-text]"`)rr   rs   r
   displayquit)rQ   r   rr   es       r+   render_close#CliffWalkingFunctional.render_close]  sJ    	
 	  	(R	s   2 
AAAr!   N)
rR   r-   rS   zint | jax.ArrayrT   r4   rU   Nonereturnr-   )r[   r4   rU   r   r   r-   )rR   r-   rU   r   r   r/   )
rR   r-   rS   r   rl   r-   rU   r   r   r/   )iX  i  )r   r   r   r   r   r   )rR   r-   r   r   rU   r   r   z"tuple[RenderStateType, np.ndarray])r   r   r   r   )r#   r$   r%   r&   r'   r   Boxrv   int32action_spaceobservation_spacer   	NEXT_STEPmetadatarX   r\   ra   re   rm   r   r   r   r)   r!   r*   r+   r;   r;   B   s;   ?B ::!!288<L

LBHH
 %'11H ((  ( 	(
 ( 
(TJ 
A
A 
A 	
A
 
A 

A =@R
R
69R
	R
j NR/
/
-</
FJ/
	+/
b	r*   r;   c                  B   ^  \ rS rSrSrS/SSS.rS	S
U 4S jjjrSrU =r$ )CliffWalkingJaxEnvii  z<A Gymnasium Env wrapper for the functional cliffwalking env.rB   2   T)rD   rE   rh   c                   > [         R                  " U 4SU0UD6  [        S0 UD6nUR                  [        R
                  5        [        TU ]  UU R                  US9  g)z8Initializes Gym wrapper for cliffwalking functional env.render_mode)r   r   Nr!   )r   __init__r;   r{   rh   jitsuperr   )rQ   r   kwargsenv	__class__s       r+   r   CliffWalkingJaxEnv.__init__n  sW    $BKB6B$.v.cgg]]# 	 	
r*   r!   r   )r   z
str | None)	r#   r$   r%   r&   r'   r   r   r)   __classcell__)r   s   @r+   r   r   i  s    F!,RMH

 

r*   r   __main__rB   )r   FzPlease input an action
)6r'   
__future__r   osr   typingr   r   r   rh   	jax.numpynumpyrL   rv   	gymnasiumr   !gymnasium.envs.functional_jax_envr	   gymnasium.errorr
   !gymnasium.experimental.functionalr   r   gymnasium.utilsr   gymnasium.vectorr   gymnasium.wrappersr   rr   r   r-   Arrayr4   r(   r9   r   floatr2   r;   r   r#   r   resetobsinfoprintre   inputrS   steprm   	truncatedexitr!   r*   r+   <module>r      s,   v "  7 7 
    > 2 > $ * - j *z  Y "dHciieT?DHIdN	
)8 
$ z +D
EC		IC	#tHU56714&1A.VXy$c68Y5 h
 	F! r*   