ó
    ûëhú  ã                  ó¶   • S r SSKJr  SSKJr  SSKrSSKrSSK	J
r
Jr  SSKJrJr  S/r " S S\R                   \\
\
4   \R"                  R$                  5      rg)	zQ``StickyAction`` wrapper - There is a probability that the action is taken again.é    )Úannotations)ÚAnyN)ÚActTypeÚObsType)ÚInvalidBoundÚInvalidProbabilityÚStickyActionc                  óh   ^ • \ rS rSrSr S	     S
S jjrSSS.     SU 4S jjjrSS jrSrU =r	$ )r	   é   a¿  Adds a probability that the action is repeated for the same ``step`` function.

This wrapper follows the implementation proposed by `Machado et al., 2018 <https://arxiv.org/pdf/1709.06009.pdf>`_
in Section 5.2 on page 12, and adds the possibility to repeat the action for
more than one step.

No vector version of the wrapper exists.

Example:
    >>> import gymnasium as gym
    >>> env = gym.make("CartPole-v1")
    >>> env = StickyAction(env, repeat_action_probability=0.9)
    >>> env.reset(seed=123)
    (array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32), {})
    >>> env.step(1)
    (array([ 0.01734283,  0.15089367, -0.02859527, -0.33293587], dtype=float32), 1.0, False, False, {})
    >>> env.step(0)
    (array([ 0.0203607 ,  0.34641072, -0.03525399, -0.6344974 ], dtype=float32), 1.0, False, False, {})
    >>> env.step(1)
    (array([ 0.02728892,  0.5420062 , -0.04794393, -0.9380709 ], dtype=float32), 1.0, False, False, {})
    >>> env.step(0)
    (array([ 0.03812904,  0.34756234, -0.06670535, -0.6608303 ], dtype=float32), 1.0, False, False, {})

Change logs:
 * v1.0.0 - Initially added
 * v1.1.0 - Add `repeat_action_duration` argument for dynamic number of sticky actions
c                ó~  • SUs=::  a  S:  d  O  [        SU 35      e[        U[        5      (       a  X34n[        U[        5      (       d  [	        SU 35      e[        U5      S:w  a  [	        SU 35      eUS   US   :”  a  [        SU 35      e[        R                  " [        R                  " U5      S:  5      (       a  [	        SU 35      e[        R                  R                  R                  XS	9  [        R                  R                  X5        X l        X0l        S
U l        SU l        SU l        SU l        g
)aŽ  Initialize StickyAction wrapper.

Args:
    env (Env): the wrapped environment,
    repeat_action_probability (int | float): a probability of repeating the old action,
    repeat_action_duration (int | tuple[int, int]): the number of steps
        the action is repeated. It can be either an int (for deterministic
        repeats) or a tuple[int, int] for a range of stochastic number of repeats.
r   é   zF`repeat_action_probability` should be in the interval [0,1). Received zJ`repeat_action_duration` should be either an integer or a tuple. Received é   zO`repeat_action_duration` should be a tuple or a list of two integers. Received z8`repeat_action_duration` is not a valid bound. Received zD`repeat_action_duration` should be larger or equal than 1. Received )Úrepeat_action_probabilityNF)r   Ú
isinstanceÚintÚtupleÚ
ValueErrorÚlenr   ÚnpÚanyÚarrayÚgymÚutilsÚRecordConstructorArgsÚ__init__ÚActionWrapperr   Úrepeat_action_duration_rangeÚlast_actionÚis_sticky_actionsÚnum_repeatsÚrepeats_taken)ÚselfÚenvr   Úrepeat_action_durations       Ú\/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/wrappers/stateful_action.pyr   ÚStickyAction.__init__0   s_  € ð Ð-Õ1°Õ1Ü$ØXÐYrÐXsÐtóð ô Ð,¬c×2Ñ2Ø&<Ð%UÐ"äÐ0´%×8Ñ8ÜØ\Ð]sÐ\tÐuóð ô Ð'Ó(¨AÓ-ÜØaÐbxÐayÐzóð ð $ AÑ&Ð)?ÀÑ)BÓBÜØJÐKaÐJbÐcóð ô VŠV”B—H’HÐ3Ó4°qÑ8×9Ñ9ÜØVÐWmÐVnÐoóð ô 		‰	×'Ñ'×0Ñ0Øð 	1ñ 	
ô 	×Ñ×"Ñ" 4Ô-à)BÔ&Ø,BÔ)à+/ˆÔØ',ˆÔØ !ˆÔØ"#ˆÕó    N©ÚseedÚoptionsc               óV   >• SU l         SU l        SU l        SU l        [        TU ]  XS9$ )zReset the environment.NFr   r(   )r   r   r    r!   ÚsuperÚreset)r"   r)   r*   Ú	__class__s      €r%   r-   ÚStickyAction.resete   s6   ø€ ð  ˆÔØ!&ˆÔØˆÔØˆÔä‰w‰} $ˆ}Ð8Ð8r'   c                ó
  • U R                   (       d5  U R                  bž  U R                  R                  5       U R                  :  av  U R
                  S:X  a>  U R                  R                  U R                  S   U R                  S   S-   5      U l        U R                  nSU l         U =R                  S-  sl        U R                   (       a/  U R
                  U R                  :X  a  SU l         SU l        SU l        Xl        U$ )zExecute the action.r   r   TF)	r   r   Ú	np_randomÚuniformr   r    Úintegersr   r!   )r"   Úactions     r%   r4   ÚStickyAction.actionp   sã   € ð ×!×!Ø×ÑÑ(Ø—‘×&Ñ&Ó(¨4×+IÑ+IÓIð ×Ñ 1Ó$Ø#'§>¡>×#:Ñ#:Ø×5Ñ5°aÑ8Ø×5Ñ5°aÑ8¸1Ñ<ó$Ô ð ×%Ñ%ˆFØ%)ˆDÔ"Ø×Ò !Ñ#Õð ×!×! d×&6Ñ&6¸$×:LÑ:LÓ&LØ%*ˆDÔ"Ø ˆDÔØ!"ˆDÔà!ÔØˆr'   )r   r   r    r   r   r!   )r   )r#   zgym.Env[ObsType, ActType]r   Úfloatr$   zint | tuple[int, int])r)   z
int | Noner*   zdict[str, Any] | NoneÚreturnztuple[ObsType, dict[str, Any]])r4   r   r7   r   )
Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__r   r-   r4   Ú__static_attributes__Ú__classcell__)r.   s   @r%   r	   r	      sc   ø† ñð@ 9:ð	3$à&ð3$ð $)ð3$ð !6õ	3$ðl %)È4ñ	9Ø!ð	9Ø3Hð	9à	'÷	9ð 	9÷ò r'   )r<   Ú
__future__r   Útypingr   Únumpyr   Ú	gymnasiumr   Úgymnasium.corer   r   Úgymnasium.errorr   r   Ú__all__r   r   r   r	   © r'   r%   Ú<module>rG      sQ   ðÙ Wå "å ã ã ß +ß <ð Ð
€ôwØ×Ñg˜w¨Ð/Ñ0°#·)±)×2QÑ2Qõwr'   