
    h#4              	           S SK Jr  S SKJr  S SKJr  S SKJr  S SKr	S SK
rS SK
JrJr  S SKJr  S SKJr  S rS	rS
rSr\SS /\S S	/\S	S /\S S/0r " S S\5      rg)    )closing)StringIO)path)AnyN)Envspaces)categorical_sample)DependencyNotInstalled         c                     ^  \ rS rSrSr/ SQSS.rSS\S-  S\4S	 jjrS
\	R                  S\	R                  4S jrS\\   \	R                  -  S\S\\\\\\4      4S jrS rSSS.S\S-  S\S-  4U 4S jjjrS rS rS rS rSrU =r$ )CliffWalkingEnv   u
  
Cliff walking involves crossing a gridworld from start to goal while avoiding falling off a cliff.

## Description
The game starts with the player at location [3, 0] of the 4x12 grid world with the
goal located at [3, 11]. If the player reaches the goal the episode ends.

A cliff runs along [3, 1..10]. If the player moves to a cliff location it
returns to the start location.

The player makes moves until they reach the goal.

Adapted from Example 6.6 (page 132) from Reinforcement Learning: An Introduction
by Sutton and Barto [<a href="#cliffwalk_ref">1</a>].

The cliff can be chosen to be slippery (disabled by default) so the player may move perpendicular
to the intended direction sometimes (see <a href="#is_slippy">`is_slippery`</a>).

With inspiration from:
[https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py](https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py)

## Action Space
The action shape is `(1,)` in the range `{0, 3}` indicating
which direction to move the player.

- 0: Move up
- 1: Move right
- 2: Move down
- 3: Move left

## Observation Space
There are 3 x 12 + 1 possible states. The player cannot be at the cliff, nor at
the goal as the latter results in the end of the episode. What remains are all
the positions of the first 3 rows plus the bottom-left cell.

The observation is a value representing the player's current position as
current_row * ncols + current_col (where both the row and col start at 0).

For example, the starting position can be calculated as follows: 3 * 12 + 0 = 36.

The observation is returned as an `int()`.

## Starting State
The episode starts with the player in state `[36]` (location [3, 0]).

## Reward
Each time step incurs -1 reward, unless the player stepped into the cliff,
which incurs -100 reward.

## Episode End
The episode terminates when the player enters state `[47]` (location [3, 11]).

## Information

`step()` and `reset()` return a dict with the following keys:
- "p" - transition proability for the state.

As cliff walking is not stochastic, the transition probability returned always 1.0.

## Arguments

```python
import gymnasium as gym
gym.make('CliffWalking-v1')
```

## References
<a id="cliffwalk_ref"></a>[1] R. Sutton and A. Barto, “Reinforcement Learning:
An Introduction” 2020. [Online]. Available: [http://www.incompleteideas.net/book/RLbook2020.pdf](http://www.incompleteideas.net/book/RLbook2020.pdf)

## Version History
- v1: Add slippery version of cliffwalking
- v0: Initial version release

)human	rgb_arrayansi   )render_modes
render_fpsNrender_modeis_slipperyc                    SU l         [        R                  " SU R                   5      U l        [        R                  " U R                   5      U l        SU l        X l        [        R                  " U R                   [        S9U l
        SU R                  SSS24'   0 U l        [        U R
                  5       H  n[        R                  " X0R                   5      n[        U R                  5       Vs0 s H  oU/ _M     snU R                  U'   U R                  U[        5      U R                  U   [        '   U R                  U[         5      U R                  U   [         '   U R                  U["        5      U R                  U   ["        '   U R                  U[$        5      U R                  U   [$        '   GM     [        R                  " U R
                  5      U l        S	U R&                  U R                  '   [(        R*                  " U R
                  5      U l        [(        R*                  " U R                  5      U l        Xl        S
U l        U R                   S   U R2                  S   -  U R                   S   U R2                  S   -  4U l        S U l        S U l        S U l        S U l        S U l        S U l         S U l!        S U l"        S U l#        g s  snf )N)r      )r   r   r   )dtypeTr   r   r   g      ?)<   r   r   )$shapenpravel_multi_indexstart_state_indexprodnSnAr   zerosbool_cliffPrangeunravel_index_calculate_transition_probUPRIGHTDOWNLEFTinitial_state_distribr   Discreteobservation_spaceaction_spacer   	cell_sizewindow_sizewindow_surfaceclock
elf_images	start_imggoal_img	cliff_imgmountain_bg_imgnear_cliff_imgtree_img)selfr   r   spositionas         ^/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/envs/toy_text/cliffwalking.py__init__CliffWalkingEnv.__init__h   s*   
!#!5!5fdjj!I''$**%& hhtzz6#AqtG twwA''::6H(-dgg71B7DFF1I ;;HbIDFF1IbM#>>xODFF1Ie"==hMDFF1IdO"==hMDFF1IdO   &(XXdgg%6"=@""4#9#9:!'!9"OODGG4& "JJqMDNN1--JJqMDNN1--
 #
#"= 8s   1Kcoordreturnc                     [        US   U R                  S   S-
  5      US'   [        US   S5      US'   [        US   U R                  S   S-
  5      US'   [        US   S5      US'   U$ )z5Prevent the agent from falling out of the grid world.r   r   )minr   max)r?   rF   s     rC   _limit_coordinates"CliffWalkingEnv._limit_coordinates   sq    uQxA!23auQx#auQxA!23auQx#a    currentmovec                    U R                   (       d  [        U   /nO'US-
  S-  X"S-   S-  4 Vs/ s H  n[        U   PM     nn/ nU GH  n[        R                  " U5      [        R                  " U5      -   nU R	                  U5      R                  [        5      n[        R                  " [        U5      U R                  5      nU R                  [        U5         (       a-  UR                  S[        U5      -  U R                  SS45        M  U R                  S   S-
  U R                  S   S-
  4n	[        U5      U	:H  n
UR                  S[        U5      -  USU
45        GM     U$ s  snf )a  Determine the outcome for an action. Transition Prob is always 1.0.

Args:
    current: Current position on the grid as (row, col)
    delta: Change in position for transition

Returns:
    Tuple of ``(transition_probability, new_state, reward, terminated)``
    where `transition_probability` is 1 if the environment is not slippery, otherwise 1/3 for `move`
    and the perpendicular moves.
r   r   iFr   r   )r   POSITION_MAPPINGr   arrayrK   astypeintr    tupler   r'   appendlenr!   )r?   rN   rO   deltasactoutcomesdeltanew_position	new_stateterminal_stateis_terminateds              rC   r+   *CliffWalkingEnv._calculate_transition_prob   sI    &t,-F 48!8q.$PQUV1W1W# %1W   E88G,rxx>L22<@GGLL,,U<-@$**MI{{5./S[$2H2H$PU VW"&**Q-!"3TZZ]Q5F!G %l 3~ ES[)R OP  s   E+c                    U R                   U R                     U   n[        U Vs/ s H  o3S   PM	     snU R                  5      nX$   u  pVpsX`l        Xl        U R
                  S:X  a  U R                  5         [        U5      XsSSU04$ s  snf )Nr   r   Fprob)r(   r@   r	   	np_random
lastactionr   renderrT   )r?   rB   transitionstipr@   rs           rC   stepCliffWalkingEnv.step   s    ffTVVnQ'k:k!k:DNNK ^
aw&KKM1vqUVQK//  ;s   B
)seedoptionsrm   rn   c                   > [         TU ]  US9  [        U R                  U R                  5      U l        S U l        U R                  S:X  a  U R                  5         [        U R
                  5      SS04$ )N)rm   r   rb   r   )
superresetr	   r0   rc   r@   rd   r   re   rT   )r?   rm   rn   	__class__s      rC   rq   CliffWalkingEnv.reset   s^    4 #D$>$>Ow&KKM466{VQK''rM   c                     U R                   cG  U R                  c   e[        R                  R	                  SU R                  R
                   S35        g U R                   S:X  a  U R                  5       $ U R                  U R                   5      $ )NzYou are calling render method without specifying any render mode. You can specify the render_mode at initialization, e.g. gym.make("z", render_mode="rgb_array")r   )r   specgymloggerwarnid_render_text_render_gui)r?   s    rC   re   CliffWalkingEnv.render   s    #99(((JJOO""&)),,/JL
 v%$$&&##D$4$455rM   c                     SS K nU R                  c  UR	                  5         US:X  a`  UR
                  R	                  5         UR
                  R                  S5        UR
                  R                  U R                  5      U l        O UR                  U R                  5      U l        U R                  c  UR                  R                  5       U l
        U R                  Gc  [        R                  " [        R                   " ["        5      S5      [        R                  " [        R                   " ["        5      S5      [        R                  " [        R                   " ["        5      S5      [        R                  " [        R                   " ["        5      S5      /nU Vs/ s HB  nUR$                  R'                  UR(                  R+                  U5      U R,                  5      PMD     snU l        U R.                  cs  [        R                  " [        R                   " ["        5      S	5      nUR$                  R'                  UR(                  R+                  U5      U R,                  5      U l        U R0                  cs  [        R                  " [        R                   " ["        5      S
5      nUR$                  R'                  UR(                  R+                  U5      U R,                  5      U l        U R2                  c  [        R                  " [        R                   " ["        5      S5      [        R                  " [        R                   " ["        5      S5      /nU Vs/ s HB  nUR$                  R'                  UR(                  R+                  U5      U R,                  5      PMD     snU l        U R4                  c  [        R                  " [        R                   " ["        5      S5      [        R                  " [        R                   " ["        5      S5      /nU Vs/ s HB  nUR$                  R'                  UR(                  R+                  U5      U R,                  5      PMD     snU l        U R6                  cs  [        R                  " [        R                   " ["        5      S5      nUR$                  R'                  UR(                  R+                  U5      U R,                  5      U l        [9        U R:                  5       GH  n	[<        R>                  " XR@                  5      u  pXR,                  S   -  XR,                  S   -  4nU
S-  US-  -  nU R                  RC                  U R2                  U   U5        U RD                  X4   (       a&  U R                  RC                  U R6                  U5        XR@                  S   S-
  :  aB  U RD                  U
S-   U4   (       a)  U R                  RC                  U R4                  U   U5        XRF                  :X  a&  U R                  RC                  U R.                  U5        XR:                  S-
  :X  a&  U R                  RC                  U R0                  U5        XRH                  :X  d  GM  US   US   SU R,                  S   -  -
  4nU RJ                  b  U RJ                  OSnU R                  RC                  U R                  U   U5        GM     US:X  a]  URL                  RO                  5         UR
                  RQ                  5         U R                  RS                  U RT                  S   5        g [<        RV                  " [<        RX                  " URZ                  R]                  U R                  5      5      SS9$ ! [         a  n[        S5      UeS nAff = fs  snf s  snf s  snf )Nr   z@pygame is not installed, run `pip install "gymnasium[toy-text]"`r   CliffWalkingzimg/elf_up.pngzimg/elf_right.pngzimg/elf_down.pngzimg/elf_left.pngzimg/stool.pngzimg/cookie.pngzimg/mountain_bg1.pngzimg/mountain_bg2.pngzimg/mountain_near-cliff1.pngzimg/mountain_near-cliff2.pngzimg/mountain_cliff.pngr   r   g?r   )r   r   r   )axes)/pygameImportErrorr
   r6   initdisplayset_captionset_moder5   Surfacer7   timeClockr8   r   joindirname__file__	transformscaleimageloadr4   r9   r:   r<   r=   r;   r)   r#   r   r*   r   blitr'   r!   r@   rd   eventpumpupdatetickmetadata	transposerR   	surfarraypixels3d)r?   moder   ehikersf_name	file_namebg_imgsnear_cliff_imgsr@   rowcolposcheck_board_maskelf_poslast_actions                   rC   r{   CliffWalkingEnv._render_gui   s   	
 &KKMw##%**>:&,nn&=&=d>N>N&O#&,nnT5E5E&F#::**,DJ??"		$,,x02BC		$,,x02EF		$,,x02DE		$,,x02DE	F %$F   &&v||'8'8'@$..Q$DO >>!		$,,x"8/JI#--33!!),dnnDN == 		$,,x"8:JKI",,22!!),dnnDM '		$,,x02HI		$,,x02HIG &$%F   &&v||'8'8'@$..Q%$D  &		$,,x02PQ		$,,x02PQO .#-F   &&v||'8'8'@$..Q-#D >>!		$,,x"8:RSI#--33!!),dnnDN twwA''::6HC**C..2C,CDC"Qwq0$$T%9%9:J%KSQ{{38$##((=ZZ]Q&&4;;sQw|+D##(()<)<=M)NPST***##((=GGaK##((<FF{q63q6C$..2C,C#CD151LdooRS##(()EwO#  & 7?LLNN!!#JJOODMM,78<<))2243F3FGHy i  	(R	,&$#s,   ^) !A	_!A	_!A	_)
_3^??_c                 6   [        5       n[        U R                  5       H  n[        R                  " X R
                  5      nU R                  U:X  a  SnO"US:X  a  SnOU R                  U   (       a  SnOSnUS   S:X  a  UR                  5       nUS   U R
                  S   S-
  :X  a  UR                  5       nUS-  nUR                  U5        M     UR                  S5        [        U5         UR                  5       sS S S 5        $ ! , (       d  f       g = f)	Nz x )r      z T z C z o r   r   
)r   r)   r#   r   r*   r   r@   r'   lstriprstripwriter   getvalue)r?   outfiler@   rA   outputs        rC   rz   CliffWalkingEnv._render_textA  s    *twwA''::6Hvv{W$X&{a{djjma//$MM&!%  & 	dW##% s   0D


Dc                 |    U R                   b/  SS KnUR                  R                  5         UR                  5         g g )Nr   )r6   r   r   quit)r?   r   s     rC   closeCliffWalkingEnv.close\  s/    *NN!KKM	 +rM   )r(   r'   r3   r4   r;   r7   r8   r:   r0   r   rd   r<   r$   r#   r=   r2   r   r@   r   r9   r!   r>   r5   r6   )NF)__name__
__module____qualname____firstlineno____doc__r   strr&   rD   r   ndarrayrK   listrT   rU   floatr   r+   rk   dictrq   re   r{   rz   r   __static_attributes____classcell__)rr   s   @rC   r   r      s    JZ 7H
/C$J /D /b

 rzz Cy2::-58	eE3T)*	+B
0 +/t (S4Z ( ( (6Yv&6 rM   r   )
contextlibr   ior   osr   typingr   numpyr   	gymnasiumrv   r   r   gymnasium.envs.toy_text.utilsr	   gymnasium.errorr
   r,   r-   r.   r/   rQ   r    rM   rC   <module>r      sk          ! < 2 	QAq!fdQGL Kc KrM   