
    hY              	           S r SSKrSSKJr  SSKrSSKrSSKJrJ	r	  SSK
Jr  SSKJr  SSKJrJr  SSKJr   " S	 S
\R&                  \R(                  \\\R(                  4   4   5      r " S S\5      rg)z
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
    N)Union)loggerspaces)utils)DependencyNotInstalled)AutoresetMode	VectorEnv)batch_spacec                      ^  \ rS rSrSrSS/SS.r SS\S	\S-  4S
 jjrS r	SSS.S\
S-  S\S-  4U 4S jjjrS rS rSrU =r$ )CartPoleEnv   u9  
## Description

This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in
["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077).
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces
 in the left and right direction on the cart.

## Action Space

The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
 of the fixed force the cart is pushed with.

- 0: Push cart to the left
- 1: Push cart to the right

**Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle
 the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it

## Observation Space

The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:

| Num | Observation           | Min                 | Max               |
|-----|-----------------------|---------------------|-------------------|
| 0   | Cart Position         | -4.8                | 4.8               |
| 1   | Cart Velocity         | -Inf                | Inf               |
| 2   | Pole Angle            | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
| 3   | Pole Angular Velocity | -Inf                | Inf               |

**Note:** While the ranges above denote the possible values for observation space of each element,
    it is not reflective of the allowed values of the state space in an unterminated episode. Particularly:
-  The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates
   if the cart leaves the `(-2.4, 2.4)` range.
-  The pole angle can be observed between  `(-.418, .418)` radians (or **±24°**), but the episode terminates
   if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**)

## Rewards
Since the goal is to keep the pole upright for as long as possible, by default, a reward of `+1` is given for every step taken, including the termination step. The default reward threshold is 500 for v1 and 200 for v0 due to the time limit on the environment.

If `sutton_barto_reward=True`, then a reward of `0` is awarded for every non-terminating step and `-1` for the terminating step. As a result, the reward threshold is 0 for v0 and v1.

## Starting State
All observations are assigned a uniformly random value in `(-0.05, 0.05)`

## Episode End
The episode ends if any one of the following occurs:

1. Termination: Pole Angle is greater than ±12°
2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display)
3. Truncation: Episode length is greater than 500 (200 for v0)

## Arguments

Cartpole only has `render_mode` as a keyword for `gymnasium.make`.
On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.

```python
>>> import gymnasium as gym
>>> env = gym.make("CartPole-v1", render_mode="rgb_array")
>>> env
<TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
>>> env.reset(seed=123, options={"low": -0.1, "high": 0.1})  # default low=-0.05, high=0.05
(array([ 0.03647037, -0.0892358 , -0.05592803, -0.06312564], dtype=float32), {})

```

| Parameter               | Type       | Default                 | Description                                                                                   |
|-------------------------|------------|-------------------------|-----------------------------------------------------------------------------------------------|
| `sutton_barto_reward`   | **bool**   | `False`                 | If `True` the reward function matches the original sutton barto implementation                |

## Vectorized environment

To increase steps per seconds, users can use a custom vector environment or with an environment vectorizor.

```python
>>> import gymnasium as gym
>>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point")
>>> envs
CartPoleVectorEnv(CartPole-v1, num_envs=3)
>>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
>>> envs
SyncVectorEnv(CartPole-v1, num_envs=3)

```

## Version History
* v1: `max_time_steps` raised to 500.
    - In Gymnasium `1.0.0a2` the `sutton_barto_reward` argument was added (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium/issues/790))
* v0: Initial versions release.
human	rgb_array2   )render_modes
render_fpsNsutton_barto_rewardrender_modec                    Xl         SU l        SU l        SU l        U R                  U R                  -   U l        SU l        U R                  U R
                  -  U l        SU l        SU l        SU l	        S[        R                  -  S	-  U l        S
U l        [        R                  " U R                  S-  [        R                   U R                  S-  [        R                   /[        R"                  S9n[$        R&                  " S5      U l        [$        R*                  " U* U[        R"                  S9U l        X l        SU l        SU l        S U l        S U l        SU l        S U l        S U l        g )N皙#@      ?皙?      ?      $@{Gz?euler   h  333333@   dtypeX    T)_sutton_barto_rewardgravitymasscartmasspole
total_masslengthpolemass_length	force_magtaukinematics_integratormathpitheta_threshold_radiansx_thresholdnparrayinffloat32r   Discreteaction_spaceBoxobservation_spacer   screen_widthscreen_heightscreenclockisopenstatesteps_beyond_terminated)selfr   r   highs       a/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/envs/classic_control/cartpole.py__init__CartPoleEnv.__init__w   s5    %8!--$--7#}}t{{:%," (.'7#'=$ xx  1$,,q0	 **
 #OOA.!'TE4rzz!J& 
(,
'+$    c                    U R                   R                  U5      (       d   U< S[        U5       S35       eU R                  c   S5       eU R                  u  p#pEUS:X  a  U R                  OU R                  * n[
        R                  " U5      n[
        R                  " U5      nX`R                  [
        R                  " U5      -  U-  -   U R                  -  n	U R                  U-  Xy-  -
  U R                  SU R                  [
        R                  " U5      -  U R                  -  -
  -  -  n
XR                  U
-  U-  U R                  -  -
  nU R                  S:X  aE  X R                  U-  -   nX0R                  U-  -   nX@R                  U-  -   nXPR                  U
-  -   nODX0R                  U-  -   nX R                  U-  -   nXPR                  U
-  -   nX@R                  U-  -   n[
        R                   " X#XE4[
        R"                  S9U l        [%        X R&                  * :  =(       d9    X R&                  :  =(       d$    X@R(                  * :  =(       d    X@R(                  :  5      nU(       d  U R*                  (       a  SOS	nOzU R,                  c  S
U l        U R*                  (       a  SOS	nOPU R,                  S
:X  a  [.        R0                  " S5        U =R,                  S-  sl        U R*                  (       a  SOSnU R2                  S:X  a  U R5                  5         [
        R                   " U R                  [
        R6                  S9XS0 4$ )N (	) invalid$Call reset before using step method.   UUUUUU?r   r!           r   r   g      zYou are calling 'step()' even though this environment has already returned terminated = True. You should always call 'reset()' once you receive 'terminated = True' -- any further steps are undefined behavior.r   F)r8   containstyper@   r,   r3   cossinr+   squarer)   r&   r*   r(   r.   r-   r4   float64boolr2   r1   r%   rA   r   warnr   renderr6   )rB   actionxx_dottheta	theta_dotforcecosthetasinthetatempthetaaccxacc
terminatedrewards                 rD   stepCartPoleEnv.step   s     ))
 
 	2Zr$v,y1	2 
 zz%M'MM%%)ZZ"%"(A+DNN?66%=66%=
 ((299Y+??(JJOO LL8+ho=KK4==299X+>>PPR
 **X5@4??RR%%0HHu$$AHHtO+EHHy00E!HHx$77IHHtO+EHHu$$A!HHx$77IHHy00EXXq:"**M
!!!! 4###444444 333	

  55S3F))1+,D(!66TCF++q0I ((A-(!66TCFw&KKM xx

"**5v5RTTTrG   seedoptionsrh   ri   c                <  > [         TU ]  US9  [        R                  " USS5      u  p4U R                  R                  X4SS9U l        S U l        U R                  S:X  a  U R                  5         [        R                  " U R                  [        R                  S90 4$ )Nrh   皙皙?   lowrC   sizer   r!   )superresetr   maybe_parse_reset_bounds	np_randomuniformr@   rA   r   rW   r3   r4   r6   )rB   rh   ri   rq   rC   	__class__s        rD   rt   CartPoleEnv.reset   s     	4  22UD
	 ^^++T+J
'+$w&KKMxx

"**5r99rG   c           	      
   U R                   cG  U R                  c   e[        R                  R	                  SU R                  R
                   S35        g  SS KnSSKJn  U R                  c  UR                  5         U R                   S:X  aQ  UR                  R                  5         UR                  R                  U R                  U R                  45      U l
        O,UR!                  U R                  U R                  45      U l
        U R"                  c  UR$                  R'                  5       U l        U R(                  S-  nU R                  U-  nSnUSU R*                  -  -  nS	nS
n	U R,                  c  g U R,                  n
UR!                  U R                  U R                  45      U l        U R.                  R1                  S5        U* S-  US-  U	S-  U	* S-  4u  ppU	S-  nU
S   U-  U R                  S-  -   nSnX4X4X4X4/nU Vs/ s H  nUS   U-   US   U-   4PM     nnUR3                  U R.                  US5        UR5                  U R.                  US5        U* S-  US-  XvS-  -
  U* S-  4u  pp/ nX4X4X4X44 HU  nUR6                  R9                  U5      R;                  U
S   * 5      nUS   U-   US   U-   U-   4nUR=                  U5        MW     UR3                  U R.                  US5        UR5                  U R.                  US5        UR?                  U R.                  [A        U5      [A        UU-   5      [A        US-  5      S5        URC                  U R.                  [A        U5      [A        UU-   5      [A        US-  5      S5        URE                  U R.                  SU R                  US5        URF                  RI                  U R.                  SS5      U l        U R                  RK                  U R.                  S5        U R                   S:X  a]  URL                  RO                  5         U R"                  RQ                  U RR                  S   5        UR                  RI                  5         g U R                   S:X  aL  [T        RV                  " [T        RX                  " URZ                  R]                  U R                  5      5      SS9$ g ! [         a  n[        S5      UeS nAff = fs  snf )NzYou are calling render method without specifying any render mode. You can specify the render_mode at initialization, e.g. gym.make("", render_mode="rgb_array")r   gfxdrawzGpygame is not installed, run `pip install "gymnasium[classic-control]"`r   r    r         I@      >@   r   r         @       @d   rL   r   r   r         e            FTr   r   r   r   rL   r   r    axes)/r   specgymr   rV   idpygamer}   ImportErrorr   r=   initdisplayset_moder;   r<   Surfacer>   timeClockr2   r*   r@   surffill	aapolygonfilled_polygonr/   Vector2
rotate_radappendaacircleintfilled_circlehline	transformflipbliteventpumptickmetadatar3   	transposer4   	surfarraypixels3d)rB   r   r}   eworld_widthscale	polewidthpolelen	cartwidth
cartheightrY   lrtb
axleoffsetcartxcartycart_coordscpole_coordscoords                         rD   rW   CartPoleEnv.render   s   #99(((JJOO""&)),,/JL
 	& ;;KKM7*##%$nn55&&(:(:; %nnd.?.?ASAS-TU::**,DJ&&*!!K/	1t{{?+	
::JJNND$5$5t7I7I#JK			'Z!^Y]JNZKRSOS
a#%
!ut00366vvvv6=HI[!uadUl3[I$))[)<tyy+yA JNM!m#JN	

a fqfqfqf5EKK''.991Q4%@E1X%uQx%'7*'DEEu% 6 	$))[/Btyy+GIIJ
"#	A	
 	IIJ
"#	A	
 	diiD$5$5uiH$$))$))UDA	F+w&LLJJOODMM,78NN!,<<))224;;?@y  -a  	(Y	J Js   
S$ T$
S?.S::S?c                     U R                   b6  SS KnUR                  R                  5         UR                  5         SU l        g g )Nr   F)r=   r   r   quitr?   rB   r   s     rD   closeCartPoleEnv.closeY  s4    ;;"NN!KKMDK #rG   )r%   r8   r>   r,   r&   r?   r.   r*   r'   r(   r:   r+   r   r=   r<   r;   r@   rA   r   r-   r1   r)   r2   )FN)__name__
__module____qualname____firstlineno____doc__r   rU   strrE   re   r   dictrt   rW   r   __static_attributes____classcell__rx   s   @rD   r   r      s    [| !+.H LP+,#'+,>ADj+,Z>UF  #	: Dj: 	: :&`D   rG   r   c            	       *  ^  \ rS rSrS/S\R
                  S.r    SS\S\S\S-  S	\	4S
 jjr
S\R                  S\\R                  \R                  \R                  \R                  \4   4S jrSSS.S\S-  S\S-  4U 4S jjjrS rS rSrU =r$ )CartPoleVectorEnvib  r   r   )r   r   autoreset_modeNnum_envsmax_episode_stepsr   r   c                    X@l         Xl        X l        X0l        SU l        SU l        SU l        U R                  U R
                  -   U l        SU l        U R                  U R                  -  U l	        SU l
        SU l        SU l        S U l        [        R                  " U[        R                   S9U l        [        R                  " U[        R$                  S9U l        S	[(        R*                  -  S
-  U l        SU l        [        R0                  " U R.                  S-  [        R2                  U R,                  S-  [        R2                  /[        R4                  S9nSU l        SU l        [:        R<                  " S5      U l        [A        U R>                  U5      U l!        [:        RD                  " U* U[        R4                  S9U l#        [A        U RF                  U5      U l$        SU l%        SU l&        S U l'        S U l(        S U l)        g )Nr   r   r   r   r   r   r   r!   r   r   r   r    rl   rm   r#   r$   )*r%   r   r   r   r&   r'   r(   r)   r*   r+   r,   r-   r.   r@   r3   zerosint32stepsbool_	prev_doner/   r0   r1   r2   r4   r5   r6   rq   rC   r   r7   single_action_spacer
   r8   r9   single_observation_spacer:   r;   r<   screensr   rA   )rB   r   r   r   r   rC   s         rD   rE   CartPoleVectorEnv.__init__i  s    %8! !2&--$--7#}}t{{:%,"
XXhbhh7
("((; (.'7#'=$ xx  1$,,q0	 **
 	#)??1#5 '(@(@(K(.

D5$bjj(Q%!,T-J-JH!U 	'+$rG   rX   returnc                    U R                   R                  U5      (       d   U< S[        U5       S35       eU R                  c   S5       eU R                  u  p#pE[        R
                  " US-
  5      U R                  -  n[        R                  " U5      n[        R                  " U5      nX`R                  [        R                  " U5      -  U-  -   U R                  -  n	U R                  U-  Xy-  -
  U R                  SU R                  [        R                  " U5      -  U R                  -  -
  -  -  n
XR                  U
-  U-  U R                  -  -
  nU R                  S:X  aE  X R                   U-  -   nX0R                   U-  -   nX@R                   U-  -   nXPR                   U
-  -   nODX0R                   U-  -   nX R                   U-  -   nXPR                   U
-  -   nX@R                   U-  -   n[        R"                  " X#XE45      U l        X R$                  * :  X R$                  :  -  X@R&                  * :  -  X@R&                  :  -  nU =R(                  S-  sl        U R(                  U R*                  :  nU R,                  (       a%  [        R.                  " U[        R0                  S9* nO#[        R2                  " U[        R0                  S9nU R4                  R7                  U R8                  U R:                  S	U R<                  R?                  5       4S
9U R                  S S 2U R<                  4'   SU R(                  U R<                  '   SXR<                  '   SXR<                  '   SXR<                  '   [        R@                  " X5      U l        U R                  RB                  RE                  [        R0                  5      XU0 4$ )NrI   rJ   rK   r   rM   r   rL   r!   ro   rp   r   rN   F)#r8   rO   rP   r@   r3   signr,   rQ   rR   r+   rS   r)   r&   r*   r(   r.   r-   stackr2   r1   r   r   r%   r4   r6   	ones_likerv   rw   rq   rC   r   sum
logical_orTastype)rB   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   	truncatedrd   s                  rD   re   CartPoleVectorEnv.step  sF      ))
 
 	2Zr$v,y1	2 
 zz%M'MM%%)ZZ"%%666%=66%=
 ((299Y+??(JJOO LL8+ho=KK4==299X+>>PPR
 **X5@4??RR%%0HHu$$AHHtO+EHHy00E!HHx$77IHHtO+EHHu$$A!HHx$77IHHy00EXXq:;
 """"###%44446 3335 	 	

a
JJ$"8"88	$$hhz<<F\\*BJJ?F )-(>(>tyy4>>3E3E3G/H )? )


1dnn$% &'

4>>"!$~~%*
>>"$)	..!z=zz||""2::.IrQQrG   rg   rh   ri   c                (  > [         TU ]  US9  [        R                  " USS5      u  U l        U l        U R                  R                  U R                  U R
                  SU R                  4S9U l	        S U l
        [        R                  " U R                  [        R                  S9U l        [        R                  " U R                  [        R                  S9U l        U R                  R"                  R%                  [        R&                  5      0 4$ )Nrk   rl   rm   ro   rp   r!   )rs   rt   r   ru   rq   rC   rv   rw   r   r@   rA   r3   r   r   r   r   r   r   r   r6   )rB   rh   ri   rx   s      rD   rt   CartPoleVectorEnv.reset  s     	4  $<<WeTR$)^^++tyy4==/A , 

 (,$XXdmm288<
$--rxx@zz||""2::.22rG   c           
       	   U R                   cG  U R                  c   e[        R                  R	                  SU R                  R
                   S35        g  SS KnSSKJn  U R                  c`  UR                  5         [        U R                  5       Vs/ s H*  nUR                  U R                  U R                   45      PM,     snU l
        U R"                  S-  nU R                  U-  nSnUSU R$                  -  -  nSnS	n	U R&                  c  [)        S
5      e[+        U R&                  R,                  U R                  5       GH  u  p[/        U
[0        R2                  5      (       a  U
R4                  S:X  d   eUR                  U R                  U R                   45      U l        U R6                  R9                  S5        U* S-  US-  U	S-  U	* S-  4u  ppU	S-  nU
S   U-  U R                  S-  -   nSnX4X4X4X4/nU Vs/ s H  nUS   U-   US   U-   4PM     nnUR;                  U R6                  US5        UR=                  U R6                  US5        U* S-  US-  XvS-  -
  U* S-  4u  pp/ nX4X4X4X44 HU  nUR>                  RA                  U5      RC                  U
S   * 5      nUS   U-   US   U-   U-   4nURE                  U5        MW     UR;                  U R6                  US5        UR=                  U R6                  US5        URG                  U R6                  [I        U5      [I        UU-   5      [I        US-  5      S5        URK                  U R6                  [I        U5      [I        UU-   5      [I        US-  5      S5        URM                  U R6                  SU R                  US5        URN                  RQ                  U R6                  SS5      U l        URS                  U R6                  S5        GM     U R                   Vs/ s HE  n[0        RT                  " [0        RV                  " URX                  R[                  U5      5      SS9PMG     sn$ ! [         a    [        S5      ef = fs  snf s  snf s  snf )NzYou are calling render method without specifying any render mode. You can specify the render_mode at initialization, e.g. gym.make_vec("r{   r   r|   zGpygame is not installed, run `pip install "gymnasium[classic_control]"`r    r   r~   r   z:Cartpole's state is None, it probably hasn't be reset yet.rn   r   r   r   r   rL   r   r   r   FTr   r   r   ).r   r   r   r   rV   r   r   r}   r   r   r   r   ranger   r   r;   r<   r2   r*   r@   
ValueErrorzipr   
isinstancer3   ndarrayshaper   r   r   r   r/   r   r   r   r   r   r   r   r   r   r   r   r4   r   r   )rB   r   r}   _r   r   r   r   r   r   rY   r=   r   r   r   r   r   r   r   r   r   r   r   s                          rD   rW   CartPoleVectorEnv.render  sW   #99(((JJOO&&*iill^3NP
 	& <<KKM t}}--A  1 143E3EFG-DL
 &&*!!K/	1t{{?+	
::L  TZZ\\4<<8IAa,,D@@(9(94;M;M'NODIIINN?+#aQ
QVWWJA!#c)JaD5L4#4#4s#::EE6A6A6A6:KALMAAaD5L!A$,7KMdiii@""499k9E 
QAa-'
Q	JA! K&1&1&1&9++E2==qteDqE)58e+;j+HI""5) : diioF""499k?K		E
EJ&'IM" !!		E
EJ&'IM" MM$))Q(9(95)L((--diiEDIKK		6*a 9h ,,
& LL&"2"2";";F"CD9U&
 	
W  	(Y 	8 NN
s   
Q( 1RRAR(Q>c                 H    U R                   b  SS KnUR                  5         g g )Nr   )r   r   r   r   s     rD   r   CartPoleVectorEnv.closeW  s    <<#KKM $rG   )r%   r8   r,   r&   rC   r.   r*   rq   r'   r(   r   r   r:   r+   r   r   r<   r;   r   r   r   r@   r   rA   r   r-   r1   r)   r2   )rL   i  NF)r   r   r   r   r   	NEXT_STEPr   r   r   rU   rE   r3   r   tupler   re   rt   rW   r   r   r   r   s   @rD   r   r   b  s    $'11H !$"&$)9,9, 9, 4Z	9,
 "9,v@Rjj@R	rzz2::rzz2::tC	D@RJ  #	3 Dj3 	3 3([
z rG   r   )r   r/   typingr   numpyr3   	gymnasiumr   r   r   gymnasium.envs.classic_controlr   gymnasium.errorr   gymnasium.vectorr   r	   gymnasium.vector.utilsr
   Envr   r   r   r    rG   rD   <module>r     sc        $ 0 2 5 .K #''"**eCO&<<= K \
y	 yrG   