
    nim?                     Z   S r SSKJr  SSKJr  SSKJr  SSKrSSKrSSKrSSKrSSK	r	SSK
r
SSKr
SSKrSSKJs  Jr   " S S\5      r " S S	\5      r " S
 S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      rg)z%Wrappers for OpenAI Gym environments.    )absolute_import)division)print_functionNc                   0    \ rS rSrSrS rS rS rS rSr	g)		AutoReset    z9Automatically reset environment when the episode is done.c                     Xl         SU l        g )NT)_env_doneselfenvs     f/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/minitaur/agents/tools/wrappers.py__init__AutoReset.__init__#       IDJ    c                 .    [        U R                  U5      $ Ngetattrr
   r   names     r   __getattr__AutoReset.__getattr__'       499d##r   c                     U R                   (       a"  U R                  R                  5       SS0 4u  p#pEOU R                  R                  U5      u  p#pEX@l         X#XE4$ )Ng        F)r   r
   resetstepr   actionobservrewarddoneinfos         r   r   AutoReset.step*   sP    zz#'99??#4c5"#D fdD#'99>>&#9 fdJ4%%r   c                 D    SU l         U R                  R                  5       $ )NF)r   r
   r   r   s    r   r   AutoReset.reset2   s    DJ99??r   )r   r
   N
__name__
__module____qualname____firstlineno____doc__r   r   r   r   __static_attributes__ r   r   r   r       s    A$&r   r   c                   *    \ rS rSrSrS rS rS rSrg)ActionRepeat7   z'Repeat the agent action multiple steps.c                     Xl         X l        g r   )r
   _amount)r   r   amounts      r   r   ActionRepeat.__init__:   s    ILr   c                 .    [        U R                  U5      $ r   r   r   s     r   r   ActionRepeat.__getattr__>   r   r   c                     SnSnSnX@R                   :  aF  U(       d?  U R                  R                  U5      u  pVp'X6-  nUS-  nX@R                   :  a	  U(       d  M?  WX2W4$ )NFr      )r6   r
   r   )r   r!   r$   total_rewardcurrent_stepr"   r#   r%   s           r   r   ActionRepeat.stepA   se    DLL

%d#'99>>&#9 fdlal 
%dd <t++r   )r6   r
   N)	r+   r,   r-   r.   r/   r   r   r   r0   r1   r   r   r3   r3   7   s    /$,r   r3   c                   *    \ rS rSrSrS rS rS rSrg)RandomStartL   zDPerform random number of random actions at the start of the episode.c                     Xl         X l        g r   r
   
_max_steps)r   r   	max_stepss      r   r   RandomStart.__init__O   s    IOr   c                 .    [        U R                  U5      $ r   r   r   s     r   r   RandomStart.__getattr__S   r   r   c                    U R                   R                  5       n[        R                  R	                  SU R
                  5      n[        U5       H}  nU R                   R                  R                  5       nU R                   R                  U5      u  ppgU(       d  MN  [        R                  R                  S5        U R                  5       s  $    U$ )Nr   z"Episode ended during random start.)r
   r   nprandomrandintrE   rangeaction_spacesampler   tfloggingwarning)r   r"   random_steps_r!   unused_rewardr$   unused_infos           r   r   RandomStart.resetV   s    YY__F99$$Q8L< yy%%,,.f151G.fT	


?@zz| ! Mr   rD   N)	r+   r,   r-   r.   r/   r   r   r   r0   r1   r   r   rA   rA   L   s    L $	r   rA   c                   F    \ rS rSrSrS rS r\S 5       rS r	S r
S rS	rg
)FrameHistoryb   z/Augment the observation with past observations.c                     SU;  a  [        S5      eXl        X l        SU l        SU l        [        U5      U l        X0l        g)a  Augment the observation with past observations.

Implemented as a Numpy ring buffer holding the necessary past observations.

Args:
  env: OpenAI Gym environment to wrap.
  past_indices: List of non-negative integers indicating the time offsets
    from the current time step of observations to include.
  flatten: Concatenate the past observations rather than stacking them.

Raises:
  KeyError: The current observation is not included in the indices.
r   z4Past indices should include 0 for the current frame.N)KeyErrorr
   _past_indices_step_buffermax	_capacity_flatten)r   r   past_indicesflattens       r   r   FrameHistory.__init__e   sB     	KLLI%DJDL&DNMr   c                 .    [        U R                  U5      $ r   r   r   s     r   r   FrameHistory.__getattr__|   r   r   c                 6   U R                   R                  R                  nU R                   R                  R                  n[        R
                  " US   [        U R                  5      S5      n[        R
                  " US   [        U R                  5      S5      nU R                  (       aN  [        R                  " USUR                  SS  -   5      n[        R                  " USUR                  SS  -   5      n[        R                  R                  X5      $ )NN.r      )r
   observation_spacelowhighrK   repeatlenr^   rc   reshapeshapegymspacesBoxr   ro   rp   s      r   rn   FrameHistory.observation_space   s    
))
%
%
)
)C99&&++D
))C	NC(:(:$;Q
?C99T)_c$*<*<&=qAD}}JJsECIIabM12cZZedjjn45d::>>#$$r   c                     U R                   R                  U5      u  p#pEU =R                  S-  sl        X R                  U R                  U R                  -  '   U R                  5       nX#XE4$ )Nr<   )r
   r   r_   r`   rb   _select_framesr    s         r   r   FrameHistory.step   sX    !%!7FDJJ!OJ06LLdnn,-  "F4%%r   c                     U R                   R                  5       n[        R                  " US   U R                  S5      U l        SU l        U R                  5       $ )Nrj   r   )r
   r   rK   rq   rb   r`   r_   r{   r   r"   s     r   r   FrameHistory.reset   sE    YY__F99VI.BDLDJ  r   c                    U R                    Vs/ s H  oR                  U-
  U R                  -  PM      nnU R                  U   nU R                  (       a'  [
        R                  " USUR                  SS  -   5      nU$ s  snf )Nrk   rm   )r^   r_   rb   r`   rc   rK   rs   rt   )r   indexindicesr"   s       r   r{   FrameHistory._select_frames   sn    BFBTBTUBT

U"dnn4BTGU\\'"F}}zz&%&,,qr*:":;fM	 Vs   %A?)r`   rb   r
   rc   r^   r_   N)r+   r,   r-   r.   r/   r   r   propertyrn   r   r   r{   r0   r1   r   r   rZ   rZ   b   s2    7.$ % %&!r   rZ   c                   @    \ rS rSrSrS rS r\S 5       rS r	S r
Srg	)

FrameDelta   zFConvert the observation to a difference from the previous observation.c                     Xl         S U l        g r   r
   _lastr   s     r   r   FrameDelta.__init__   r   r   c                 .    [        U R                  U5      $ r   r   r   s     r   r   FrameDelta.__getattr__   r   r   c                     U R                   R                  R                  nU R                   R                  R                  nX-
  X!-
  p![        R
                  R                  X5      $ r   )r
   rn   ro   rp   ru   rv   rw   rx   s      r   rn   FrameDelta.observation_space   sJ    
))
%
%
)
)C99&&++D
DJ::>>#$$r   c                 n    U R                   R                  U5      u  p#pEX R                  -
  nX l        XcXE4$ r   )r
   r   r   )r   r!   r"   r#   r$   r%   deltas          r   r   FrameDelta.step   s5    !%!7FDZZEJ$$$r   c                 F    U R                   R                  5       nXl        U$ r   )r
   r   r   r~   s     r   r   FrameDelta.reset   s    YY__FJMr   r   N)r+   r,   r-   r.   r/   r   r   r   rn   r   r   r0   r1   r   r   r   r      s-    N$ % %%r   r   c                   f    \ rS rSrSrSS jrS r\S 5       r\S 5       r	S r
S	 rS
 rS rS rSrg)RangeNormalize   zCNormalize the specialized observation and action ranges to [-1, 1].Nc                 8   Xl         USL=(       a%    U R                  U R                   R                  5      U l        USL a  U R                  (       d  [	        S5      eUc0  U R                  (       d  [
        R                  R                  S5        USL=(       a%    U R                  U R                   R                  5      U l	        USL a  U R                  (       d  [	        S5      eUc2  U R                  (       d   [
        R                  R                  S5        g g g )NFTz,Cannot normalize infinite observation range.z+Not normalizing infinite observation range.z'Cannot normalize infinite action range.z&Not normalizing infinite action range.)
r
   
_is_finitern   _should_normalize_observ
ValueErrorrQ   rR   r%   rO   _should_normalize_action)r   r   r"   r!   s       r   r   RangeNormalize.__init__   s    I%+5%8 &R%)__TYY5P5P%Q 	!~d;;EFF~d;;jjooCD%+5%8 &M%)__TYY5K5K%L 	!~d;;@AA~d;;jjoo>? <~r   c                 .    [        U R                  U5      $ r   r   r   s     r   r   RangeNormalize.__getattr__   r   r   c                    U R                   R                  nU R                  (       d  U$ [        R                  R                  [        R                  " UR                  5      * [        R                  " UR                  5      5      $ r   )	r
   rn   r   ru   rv   rw   rK   onesrt   r   spaces     r   rn    RangeNormalize.observation_space   sQ    II''E((l::>>2775;;//1EFFr   c                    U R                   R                  nU R                  (       d  U$ [        R                  R                  [        R                  " UR                  5      * [        R                  " UR                  5      5      $ r   )	r
   rO   r   ru   rv   rw   rK   r   rt   r   s     r   rO   RangeNormalize.action_space   sQ    II""E((l::>>2775;;//1EFFr   c                     U R                   (       a  U R                  U5      nU R                  R                  U5      u  p#pEU R                  (       a  U R                  U5      nX#XE4$ r   )r   _denormalize_actionr
   r   r   _normalize_observr    s         r   r   RangeNormalize.step   sV    $$''/f!%!7FD$$%%f-f4%%r   c                 ~    U R                   R                  5       nU R                  (       a  U R                  U5      nU$ r   )r
   r   r   r   r~   s     r   r   RangeNormalize.reset   s0    YY__F$$%%f-fMr   c                     U R                   R                  R                  nU R                   R                  R                  nUS-   S-  X2-
  -  U-   nU$ )Nr<   rm   )r
   rO   ro   rp   )r   r!   min_max_s       r   r   "RangeNormalize._denormalize_action   sK    99!!%%D99!!&&DqjA-4FMr   c                     U R                   R                  R                  nU R                   R                  R                  nSX-
  -  X2-
  -  S-
  nU$ )Nrm   r<   )r
   rn   ro   rp   )r   r"   r   r   s       r   r    RangeNormalize._normalize_observ   sI    99&&**D99&&++D&- DK014FMr   c                     [         R                  " UR                  5      R                  5       =(       a.    [         R                  " UR                  5      R                  5       $ r   )rK   isfinitero   allrp   r   s     r   r   RangeNormalize._is_finite   s9    ;;uyy!%%'IBKK

,C,G,G,IIr   )r
   r   r   )NN)r+   r,   r-   r.   r/   r   r   r   rn   rO   r   r   r   r   r   r0   r1   r   r   r   r      sV    K@$ G G G G&Jr   r   c                   :    \ rS rSrSrS rS r\S 5       rS r	Sr
g)	
ClipAction   zAClip out of range actions to the action space of the environment.c                     Xl         g r   r
   r   s     r   r   ClipAction.__init__   s    Ir   c                 .    [        U R                  U5      $ r   r   r   s     r   r   ClipAction.__getattr__  r   r   c                    U R                   R                  R                  n[        R                  R                  [        R                  * [        R                  " U5      -  [        R                  [        R                  " U5      -  5      $ r   )	r
   rO   rt   ru   rv   rw   rK   infr   )r   rt   s     r   rO   ClipAction.action_space  sP    II""((E::>>266'BGGEN2BFFRWWU^4KLLr   c                     U R                   R                  n[        R                  " XR                  UR
                  5      nU R                   R                  U5      $ r   )r
   rO   rK   clipro   rp   r   )r   r!   rO   s      r   r   ClipAction.step  s@    99))LWWV--|/@/@AF99>>&!!r   r   N)r+   r,   r-   r.   r/   r   r   r   rO   r   r0   r1   r   r   r   r      s*    I$ M M"r   r   c                   0    \ rS rSrSrS rS rS rS rSr	g)	LimitDurationi  z-End episodes after specified number of steps.c                 *    Xl         X l        S U l        g r   )r
   	_durationr_   )r   r   durations      r   r   LimitDuration.__init__  s    INDJr   c                 .    [        U R                  U5      $ r   r   r   s     r   r   LimitDuration.__getattr__  r   r   c                     U R                   c  [        S5      eU R                  R                  U5      u  p#pEU =R                   S-  sl         U R                   U R                  :  a	  SnS U l         X#XE4$ )NzMust reset environment.r<   T)r_   RuntimeErrorr
   r   r   r    s         r   r   LimitDuration.step  sc    zz233!%!7FDJJ!OJzzT^^#ddj4%%r   c                 D    SU l         U R                  R                  5       $ )Nr   )r_   r
   r   r(   s    r   r   LimitDuration.reset&  s    DJ99??r   )r   r
   r_   Nr*   r1   r   r   r   r     s    5
$&r   r   c                       \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rS r\S 5       r\S 5       rS rSS jrSS jrS rS rS rSrg)ExternalProcessi+  zAStep environment in a separate process for lock free paralellism.r<   rm                     c                    [         R                  " 5       u  U l        n[         R                  " U R                  X4S9U l        [        R                  " U R                  5        U R
                  R                  5         SU l
        SU l        g)a#  Step environment in a separate process for lock free paralellism.

The environment will be created in the external process by calling the
specified callable. This can be an environment class, or a function
creating the environment and potentially wrapping it. The returned
environment should not access global variables.

Args:
  constructor: Callable that creates and returns an OpenAI gym environment.

Attributes:
  observation_space: The cached observation space of the environment.
  action_space: The cached action space of the environment.
)targetargsN)multiprocessingPipe_connProcess_worker_processatexitregisterclosestart_observ_space_action_space)r   constructorconns      r   r   ExternalProcess.__init__8  sd     '++-DJ#++4<<{FYZDM
OODJJMMDDr   c                 h    U R                   (       d  U R                  S5      U l         U R                   $ )Nrn   )r   r   r(   s    r   rn   !ExternalProcess.observation_spaceN  s+    ++,?@dr   c                 h    U R                   (       d  U R                  S5      U l         U R                   $ )NrO   )r   r   r(   s    r   rO   ExternalProcess.action_spaceT  s*    ++N;dr   c                     U R                   R                  U R                  U45        U R                  U R                  5      $ )zRequest an attribute from the environment.

Note that this involves communication with the external process, so it can
be slow.

Args:
  name: Attribute to access.

Returns:
  Value of the attribute.
)r   send
_ATTRIBUTE_receive_VALUEr   s     r   r   ExternalProcess.__getattr__Z  s0     	JJOOT__d+,==%%r   c                     U R                   R                  U R                  U45        U(       a  U R                  U R                  5      $ [
        R                  " U R                  U R                  5      $ )zStep the environment.

Args:
  action: The action to apply to the environment.
  blocking: Whether to wait for the result.

Returns:
  Transition tuple when blocking, otherwise callable that returns the
  transition tuple.
)r   r   _ACTIONr   _TRANSITION	functoolspartial)r   r!   blockings      r   r   ExternalProcess.stepi  sR     	JJOOT\\6*+]]4++,,t}}d.>.>??r   c                     U R                   R                  U R                  S45        U(       a  U R                  U R                  5      $ [
        R                  " U R                  U R                  5      $ )zReset the environment.

Args:
  blocking: Whether to wait for the result.

Returns:
  New observation when blocking, otherwise callable that returns the new
  observation.
N)r   r   _RESETr   _OBSERVr   r   )r   r   s     r   r   ExternalProcess.resetz  sN     	JJOOT[[$'(]]4<<((t}}dll;;r   c                      U R                   R                  U R                  S45        U R                   R                  5         U R
                  R                  5         g! [         a     N'f = f)z9Send a close message to the external process and join it.N)r   r   _CLOSEr   IOErrorr   joinr(   s    r   r   ExternalProcess.close  sV    
jjoot{{D)*
jj 	MM  
s   AA 
A+*A+c                     U R                   R                  5       u  p#X R                  :X  a  Un[        U5      eX!:X  a  U$ [	        SR                  U5      5      e)a.  Wait for a message from the worker process and return its payload.

Args:
  expected_message: Type of the expected message.

Raises:
  Exception: An exception was raised inside the worker process.
  KeyError: The reveived message is not of the expected type.

Returns:
  Payload object of the message.
z&Received message of unexpected type {})r   recv
_EXCEPTION	Exceptionr]   format)r   expected_messagemessagepayload
stacktraces        r   r   ExternalProcess._receive  sS     zz(G//!jj!!"n
;BB7K
LLr   c                     U" 5       n  UR                  S5      (       d  M  UR                  5       u  pEX@R                  :X  a0  UnUR                  U R                  UR                  U5      45        Mk  X@R                  :X  a2  Ub   eUR                  U R                  UR                  5       45        M  X@R                  :X  a*  UnUR                  U R                  [        X75      45        M  X@R                  :X  a  Ub   eO[        SR!                  U5      5      eUR7                  5         g! [        [        4 a     M$  f = f! ["         a    SR%                  [&        R(                  " [*        R,                  " 5       6 5      nUR                  U R.                  U45        [0        R2                  R5                  SR!                  U5      5         Nf = f)zThe process waits for actions and sends back environment results.

Args:
  constructor: Constructor for the OpenAI Gym environment.
  conn: Connection for communication to the main process.
g?Nz#Received message of unknown type {} z Error in environment process: {})pollr  EOFErrorKeyboardInterruptr   r   r   r   r   r   r   r   r   r   r  r]   r
  r	  r  	tracebackformat_exceptionsysexc_infor  rQ   rR   errorr   )	r   r   r   r   r  r  r!   r   r  s	            r   r   ExternalProcess._worker  s~   NMc	3!YY[
' ll"&
))T%%sxx'78
9
kk!
 
))T\\399;/
0
oo%$
))T[['#"45
6
kk!
 
<CCGLMM
 	JJL- +, 	
	$  N77955s||~FGj
ii*-.jj9@@LMNsE   E D. E D. CE E .E>E EE B
GG)r   r   r   r   N)T)r+   r,   r-   r.   r/   r   r   r  r   r   r   r  r   r   r   rn   rO   r   r   r   r   r   r   r0   r1   r   r   r   r   +  s|    I '&&*+'*&,  
  
&@"< M,%r   r   c                   <    \ rS rSrSrS rS rS rS rS r	S r
S	rg
)ConvertTo32Biti  z:Convert data types of an OpenAI Gym environment to 32 bit.c                     Xl         g)zaConvert data types of an OpenAI Gym environment to 32 bit.

Args:
  env: OpenAI Gym environment.
Nr   r   s     r   r   ConvertTo32Bit.__init__  s	     Ir   c                 .    [        U R                  U5      $ )zForward unimplemented attributes to the original environment.

Args:
  name: Attribute that was accessed.

Returns:
  Value behind the attribute name in the wrapped environment.
r   r   s     r   r   ConvertTo32Bit.__getattr__  s     499d##r   c                     U R                   R                  U5      u  p#pEU R                  U5      nU R                  U5      nX#XE4$ )zForward action to the wrapped environment.

Args:
  action: Action to apply to the environment.

Raises:
  ValueError: Invalid action.

Returns:
  Converted observation, converted reward, done flag, and info object.
)r
   r   _convert_observ_convert_rewardr    s         r   r   ConvertTo32Bit.step  sF     "&!7FD!!&)F!!&)F4%%r   c                 \    U R                   R                  5       nU R                  U5      nU$ )z`Reset the environment and convert the resulting observation.

Returns:
  Converted observation.
)r
   r   r"  r~   s     r   r   ConvertTo32Bit.reset  s)     YY__F!!&)FMr   c                 b   [         R                  " U5      R                  5       (       d  [        S5      eUR                  [         R
                  :X  a  UR                  [         R                  5      $ UR                  [         R                  :X  a  UR                  [         R                  5      $ U$ )zConvert the observation to 32 bits.

Args:
  observ: Numpy observation.

Raises:
  ValueError: Observation contains infinite values.

Returns:
  Numpy observation with 32-bit data type.
z!Infinite observation encountered.)
rK   r   r   r   dtypefloat64astypefloat32int64int32r~   s     r   r"  ConvertTo32Bit._convert_observ  ss     ;;v""$$:;;||rzz!]]2::&&||rxx]]288$$Mr   c                     [         R                  " U5      R                  5       (       d  [        S5      e[         R                  " U[         R
                  S9$ )zConvert the reward to 32 bits.

Args:
  reward: Numpy reward.

Raises:
  ValueError: Rewards contain infinite values.

Returns:
  Numpy reward with 32-bit data type.
zInfinite reward encountered.)r(  )rK   r   r   r   arrayr+  )r   r#   s     r   r#  ConvertTo32Bit._convert_reward  s=     ;;v""$$56688F"**--r   r   N)r+   r,   r-   r.   r/   r   r   r   r   r"  r#  r0   r1   r   r   r  r    s#    B	$&"(.r   r  )r/   
__future__r   r   r   r   r   r   r  r  ru   
gym.spacesnumpyrK   tf.compat.v1compatv1rQ   objectr   r3   rA   rZ   r   r   r   r   r   r  r1   r   r   <module>r9     s    , &  %    
  
     .,6 ,*& ,:6 :z :>JV >JB" "(F 4df dNS.V S.r   