
    ni("                     >    S r SSKJr  SSKrSSKJr   " S S\5      rg)zExtends the environment by adding observation and action history.

The implementation is a bit dirty import of the implementation in
the experimental branch.

    )spacesN)MinitaurReactiveEnvc                      ^  \ rS rSrSrSrSrSrSrSr	SS	/S
S.r
         SU 4S jjrU 4S jrU 4S jrU 4S jrU 4S jrS rS r\S 5       rS rS rSrU =r$ )MinitaurExtendedEnv   a<  The 'extended' environment for Markovian property.

This class implements to include prior actions and observations to the
observation vector, thus making the environment "more" Markovian. This is
especially useful for systems with latencies.

Args:
  history_length: the length of the historic data
  history_include_actions: a flag for including actions as history
  history_include_states: a flag for including states as history
  include_state_difference: a flag for including the first-order differences
    as history
  include_second_state_difference: a flag for including the second-order state
    differences as history.
  include_base_position: a flag for including the base as observation,
  never_terminate: if this is on, the environment unconditionally never
    terminates.
  action_scale: the scale of actions,
i        g       @        human	rgb_array2   )zrender.modeszvideo.frames_per_secondc
                   > Xl         Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl	        [        R                  " U R                  S-   U R                  45      U l        [        R                  " U R                  S-   S45      U l        [        R                  " U R                  U R                   45      U l        SU l        [&        [(        U ]V  " S0 U
D6  [,        R.                  " SSU R0                  R2                  5      U l        [,        R.                  " [        R4                  * [        R4                  U R7                  5       R2                  5      U l        [;        U R0                  R<                  5      U R0                  l        [;        U R8                  R<                  5      U R8                  l        g )N   r   r   g            ? ) _kwargs_history_length_history_include_actions_history_include_states_include_state_difference _include_second_state_difference_include_base_position_include_leg_model_never_terminate_action_scalenpzerosMAX_BUFFER_SIZEPARENT_OBSERVATION_DIM_past_parent_observations_past_motor_angles
ACTION_DIM_past_actions_countersuperr   __init__r   Boxaction_spaceshapeinf_get_observationobservation_spacelenlowflat_dim)selfhistory_lengthhistory_include_actionshistory_include_statesinclude_state_differenceinclude_second_state_differenceinclude_base_positioninclude_leg_modelnever_terminateaction_scalekwargs	__class__s              k/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/minitaur/envs/minitaur_extended_env.pyr&   MinitaurExtendedEnv.__init__-   s\    L)$;!#9 %=",K)"7/+%%'XXt/C/Ca/G/3/J/J/L &MD" hh(<(<q(@!'DED4#7#7"IJDDM	
t-77

4d.?.?.E.EFD#ZZ(,(=(=(?(E(EGD "%T%6%6%:%:!;D&)$*@*@*D*D&ED#    c                   > [         [        U ]  5       n[        R                  " U5      nXl        XR                  U R                  '   U R                  R                  nX* S U R                  U R                  '   / n/ n[        U R                  5       H  n[        U R                  U-
  S-
  S5      nU R                  (       a  UR                  U R                  U   5        U R                   (       d  Ma  UR                  U R"                  U   5        M     U R                  n[        SU R                  S-
  5      [        SU R                  S-
  5      p/ / pU R$                  (       a   U R                  U   U R                  U   -
  /n	U R&                  (       a3  U R                  U   SU R                  U   -  -
  U R                  U   -   /n
/ nU R(                  (       a.  [        R                  " U R                  R+                  5       5      n/ nU R,                  (       a+  U R                  R/                  5       nU R1                  U5      nU/U-   U-   U	-   U
-   U/-   U/-   n[        R2                  " U5      nU$ )z>Maybe concatenate motor velocity and torque into observations.Nr   r      )r%   r   r+   r   array_observationr    r$   minitaur
num_motorsr!   ranger   maxr   appendr   r#   r   r   r   GetBasePositionr   GetMotorAnglesconvert_to_leg_modelconcatenate)r0   parent_observationrD   history_stateshistory_actionsittmtmmstate_differencesecond_state_differencebase_position	leg_modelraw_motor_anglesobservation_listfull_observationr;   s                   r<   r+   $MinitaurExtendedEnv._get_observationS   sI   2DJL"45*4F""4==1))J-?-MDDMM*NO4''(
dmma!#Q
'a		%	%d<<Q?@		&	&	&t11!45 ) 	A!T]]Q&'Q0A)B02B-%%

!
!!
$t'>'>r'B
B ,,

!
!!
$q4+B+B2+F'F
F

!
!#
&'!
 M""hh = = ?AmI557++,<=i 
~-?	2	36C_	E		 
 ~~&67r>   c                   > SU l         U R                  U R                   5        [        R                  " U R                  S-   U R
                  45      U l        [        R                  " U R                  S-   S45      U l        [        R                  " U R                  U R                  45      U l	        SU l         [        R                  " [        [        U ]7  5       5      $ )z#Resets the time and history buffer.r   r   r   )r$   _signalr   r   r   r   r    r!   r"   r#   rA   r%   r   resetr0   r;   s    r<   r]   MinitaurExtendedEnv.reset   s    DMLL%'XXt/C/Ca/G/3/J/J/L &MD" hh(<(<q(@!'DED4#7#7"IJDDM88E-t:<==r>   c                    > XR                   -  nXR                  U R                  '   U =R                  S-  sl        [        [        U ]  U5      u  p#pEU R                  5       nUR                  US9  X&XE4$ )a
  Step function wrapper can be used to add shaping terms to the reward.

Args:
  action: an array of the given action

Returns:
  next_obs: the next observation
  reward: the reward for this experience tuple
  done: the terminal flag
  info: an additional information
r   )base_reward)r   r#   r$   r%   r   steprewardupdate)r0   actionnext_obs_doneinforc   r;   s          r<   rb   MinitaurExtendedEnv.step   sn        F(.t}}%MMQM#$7CFKH[[]FKKFK#T''r>   c                 *   > [         [        U ]  5         g)z1The helper function to terminate the environment.N)r%   r   _closer^   s    r<   	terminateMinitaurExtendedEnv.terminate   s    	
t+-r>   c                     U R                   (       a  gU R                  U R                  R                  5       5      nUS   nUS   nSnX$:  d  X4:  a  gU R                  S:  a  gg)zDetermines whether the env is terminated or not.

checks whether 1) the front leg is bent too much or 2) the time exceeds
the manually set weights.

Returns:
  terminal: the terminal flag whether the env is terminated or not
Fr   r@   g?Ti  )r   rJ   rC   rI   r$   )r0   rV   swing0swing1maximum_swing_angles        r<   _termination MinitaurExtendedEnv._termination   se     ))$--*F*F*HIIq\Fq\F#v'C}}r>   c                    U R                   R                  5       nU R                  nUS   U R                  S   -
  U-  n[        R
                  " USS5      nU R                  U R                  S-
     nU R                  [        U R                  S-
  S5         nU R                  [        U R                  S-
  S5         nUSU-  -
  U-   n[        R                  " [        R                  " U5      5      n	Sn
U
SU-  -  n
U
S	U	-  -  n
U
$ )
zCompute rewards for the given time step.

It considers two terms: 1) forward velocity reward and 2) action
acceleration penalty.

Returns:
  reward: the computed reward.
r   g            ?r   r@      r
   r   g?)rC   rH   control_time_step_last_base_positionr   clipr#   r$   rF   meanabs)r0   current_base_positiondtvelocityvelocity_rewardre   prev_actionprev_prev_actionaccaction_acceleration_penaltyrc   s              r<   rc   MinitaurExtendedEnv.reward   s    !MM99;			B%a(4+C+CA+FF"LHgghc2O 12F$$S):A%>?K))#dmma.?*CD
1{?
"%5
5C"$''"&&+"6F
cO##F
c///FMr>   c                     Sn[         R                  " US-  5      n[        U5       H4  nU SU-  SU-  S-    u  pESUS-  -  S-  XT-
  -  nSXE-   -  nXbU'   XrX1-   '   M6     U$ )zA helper function to convert motor angles to leg model.

Args:
  motor_angles: raw motor angles:

Returns:
  leg_angles: the leg pose model represented in swing and extension.
   r@   rv   )r   r   rE   )motor_anglesnum_legs
leg_anglesrO   motor1motor2swing	extensions           r<   rJ   (MinitaurExtendedEnv.convert_to_leg_model   s     H(Q,'J8_#AE!a%!)4nfQ!Vns"fo6e)im!*  r>   c                     SU R                   0$ )z)A helper get state function for pickling.r:   )r   )r0   s    r<   __getstate__ MinitaurExtendedEnv.__getstate__   s    dll##r>   c                 .    U R                   " S0 US   D6  g)z)A helper set state function for pickling.r:   Nr   )r&   )r0   states     r<   __setstate__ MinitaurExtendedEnv.__setstate__   s    MM$E(O$r>   )r   r$   r   r   r   r   r   r   r   r   r   rB   r#   r!   r    r(   r,   )	r   TFFFFFFrv   )__name__
__module____qualname____firstlineno____doc__r   r"   r   INIT_EXTENSION_POSINIT_SWING_POSmetadatar&   r+   r]   rb   rm   rs   rc   staticmethodrJ   r   r   __static_attributes____classcell__)r;   s   @r<   r   r      s    & /*. ,!#(  '+&+(-/4%*!&$$FL2h
>(2.04  .$% %r>   r   )r   gymr   numpyr   1pybullet_envs.minitaur.envs.minitaur_reactive_envr   r   r   r>   r<   <module>r      s$      Qr%- r%r>   