
    h>                     l    S S/r SSKrSSKJr  SSKJr  SSKJr  SS0r	 " S	 S
\\R                  5      rg)zKallinteris-AndreaszRushiv Arora    N)utils)	MujocoEnv)Boxdistanceg      @c                       \ rS rSrSrS/ SQ0rSS\SSSS	4S
\S\S\	\\
\-  4   S\
S\
S\
S\4S jjrS rS rS\
4S jrS rS rS rSrg)HalfCheetahEnv   u8+  
## Description
This environment is based on the work of P. Wawrzyński in ["A Cat-Like Robot Real-Time Learning to Run"](http://staff.elka.pw.edu.pl/~pwawrzyn/pub-s/0812_LSCLRR.pdf).
The HalfCheetah is a 2-dimensional robot consisting of 9 body parts and 8 joints connecting them (including two paws).
The goal is to apply torque to the joints to make the cheetah run forward (right) as fast as possible, with a positive reward based on the distance moved forward and a negative reward for moving backward.
The cheetah's torso and head are fixed, and torque can only be applied to the other 6 joints over the front and back thighs (which connect to the torso), the shins (which connect to the thighs), and the feet (which connect to the shins).


## Action Space
```{figure} action_space_figures/half_cheetah.png
:name: half_cheetah
```

The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.

| Num | Action                                  | Control Min | Control Max | Name (in corresponding XML file) | Joint | Type (Unit)  |
| --- | --------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
| 0   | Torque applied on the back thigh rotor  | -1          | 1           | bthigh                           | hinge | torque (N m) |
| 1   | Torque applied on the back shin rotor   | -1          | 1           | bshin                            | hinge | torque (N m) |
| 2   | Torque applied on the back foot rotor   | -1          | 1           | bfoot                            | hinge | torque (N m) |
| 3   | Torque applied on the front thigh rotor | -1          | 1           | fthigh                           | hinge | torque (N m) |
| 4   | Torque applied on the front shin rotor  | -1          | 1           | fshin                            | hinge | torque (N m) |
| 5   | Torque applied on the front foot rotor  | -1          | 1           | ffoot                            | hinge | torque (N m) |


## Observation Space
The observation space consists of the following parts (in order):

- *qpos (8 elements by default):* Position values of the robot's body parts.
- *qvel (9 elements):* The velocities of these individual body parts (their derivatives).

By default, the observation does not include the robot's x-coordinate (`rootx`).
This can be included by passing `exclude_current_positions_from_observation=False` during construction.
In this case, the observation space will be a `Box(-Inf, Inf, (18,), float64)`, where the first observation element is the x-coordinate of the robot.
Regardless of whether `exclude_current_positions_from_observation` is set to `True` or `False`, the x- and y-coordinates are returned in `info` with the keys `"x_position"` and `"y_position"`, respectively.

By default, however, the observation space is a `Box(-Inf, Inf, (17,), float64)` where the elements are as follows:


| Num | Observation                                 | Min  | Max | Name (in corresponding XML file) | Joint | Type (Unit)              |
| --- | ------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
| 0   | z-coordinate of the front tip               | -Inf | Inf | rootz                            | slide | position (m)             |
| 1   | angle of the front tip                      | -Inf | Inf | rooty                            | hinge | angle (rad)              |
| 2   | angle of the back thigh                     | -Inf | Inf | bthigh                           | hinge | angle (rad)              |
| 3   | angle of the back shin                      | -Inf | Inf | bshin                            | hinge | angle (rad)              |
| 4   | angle of the back foot                      | -Inf | Inf | bfoot                            | hinge | angle (rad)              |
| 5   | angle of the front thigh                    | -Inf | Inf | fthigh                           | hinge | angle (rad)              |
| 6   | angle of the front shin                     | -Inf | Inf | fshin                            | hinge | angle (rad)              |
| 7   | angle of the front foot                     | -Inf | Inf | ffoot                            | hinge | angle (rad)              |
| 8   | velocity of the x-coordinate of front tip   | -Inf | Inf | rootx                            | slide | velocity (m/s)           |
| 9   | velocity of the z-coordinate of front tip   | -Inf | Inf | rootz                            | slide | velocity (m/s)           |
| 10  | angular velocity of the front tip           | -Inf | Inf | rooty                            | hinge | angular velocity (rad/s) |
| 11  | angular velocity of the back thigh          | -Inf | Inf | bthigh                           | hinge | angular velocity (rad/s) |
| 12  | angular velocity of the back shin           | -Inf | Inf | bshin                            | hinge | angular velocity (rad/s) |
| 13  | angular velocity of the back foot           | -Inf | Inf | bfoot                            | hinge | angular velocity (rad/s) |
| 14  | angular velocity of the front thigh         | -Inf | Inf | fthigh                           | hinge | angular velocity (rad/s) |
| 15  | angular velocity of the front shin          | -Inf | Inf | fshin                            | hinge | angular velocity (rad/s) |
| 16  | angular velocity of the front foot          | -Inf | Inf | ffoot                            | hinge | angular velocity (rad/s) |
| excluded | x-coordinate of the front tip          | -Inf | Inf | rootx                            | slide | position (m)             |


## Rewards
The total reward is: ***reward*** *=* *forward_reward - ctrl_cost*.

- *forward_reward*:
A reward for moving forward,
this reward would be positive if the Half Cheetah moves forward (in the positive $x$ direction / in the right direction).
$w_{forward} \times \frac{dx}{dt}$, where
$dx$ is the displacement of the "tip" ($x_{after-action} - x_{before-action}$),
$dt$ is the time between actions, which depends on the `frame_skip` parameter (default is $5$),
and `frametime` which is $0.01$ - so the default is $dt = 5 \times 0.01 = 0.05$,
$w_{forward}$ is the `forward_reward_weight` (default is $1$).
- *ctrl_cost*:
A negative reward to penalize the Half Cheetah for taking actions that are too large.
$w_{control} \times \|action\|_2^2$,
where $w_{control}$ is `ctrl_cost_weight` (default is $0.1$).

`info` contains the individual reward terms.


## Starting State
The initial position state is $\mathcal{U}_{[-reset\_noise\_scale \times I_{9}, reset\_noise\_scale \times I_{9}]}$.
The initial velocity state is $\mathcal{N}(0_{9}, reset\_noise\_scale^2 \times I_{9})$.

where $\mathcal{N}$ is the multivariate normal distribution and $\mathcal{U}$ is the multivariate uniform continuous distribution.


## Episode End
### Termination
The Half Cheetah never terminates.

### Truncation
The default duration of an episode is 1000 timesteps.


## Arguments
HalfCheetah provides a range of parameters to modify the observation space, reward function, initial state, and termination condition.
These parameters can be applied during `gymnasium.make` in the following way:

```python
import gymnasium as gym
env = gym.make('HalfCheetah-v5', ctrl_cost_weight=0.1, ....)
```

| Parameter                                    | Type      | Default              | Description                                                                                                                                                                                         |
| -------------------------------------------- | --------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `xml_file`                                   | **str**   | `"half_cheetah.xml"` | Path to a MuJoCo model                                                                                                                                                                              |
| `forward_reward_weight`                      | **float** | `1`                  | Weight for _forward_reward_ term (see `Rewards` section)                                                                                                                                            |
| `ctrl_cost_weight`                           | **float** | `0.1`                | Weight for _ctrl_cost_ weight (see `Rewards` section)                                                                                                                                               |
| `reset_noise_scale`                          | **float** | `0.1`                | Scale of random perturbations of initial position and velocity (see `Starting State` section)                                                                                                       |
| `exclude_current_positions_from_observation` | **bool**  | `True`               | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies (see `Observation State` section) |

## Version History
* v5:
    - Minimum `mujoco` version is now 2.3.3.
    - Added support for fully custom/third party `mujoco` models using the `xml_file` argument (previously only a few changes could be made to the existing models).
    - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
    - Added `env.observation_structure`, a dictionary for specifying the observation space compose (e.g. `qpos`, `qvel`), useful for building tooling and wrappers for the MuJoCo environments.
    - Return a non-empty `info` with `reset()`, previously an empty dictionary was returned, the new keys are the same state information as `step()`.
    - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
    - Restored the `xml_file` argument (was removed in `v4`).
    - Renamed `info["reward_run"]` to `info["reward_forward"]` to be consistent with the other environments.
* v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3.
* v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics).
* v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics).
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
* v0: Initial versions release.
render_modeshuman	rgb_arraydepth_array
rgbd_tuplezhalf_cheetah.xml         ?g?Txml_file
frame_skipdefault_camera_configforward_reward_weightctrl_cost_weightreset_noise_scale*exclude_current_positions_from_observationc           
         [         R                  R                  " U UUUUUUU40 UD6  X@l        XPl        X`l        UU l        [        R                  " U UU4S US.UD6  / SQ[        [        R                  " SU R                  -  5      5      S.U l        U R                  R                  R                  U R                  R                   R                  -   U-
  n	[#        [        R$                  * [        R$                  U	4[        R&                  S9U l        SU-  U R                  R                  R                  SU-  -
  U R                  R                   R                  S.U l        g )N)observation_spacer   r   r   )r
   
render_fps)lowhighshapedtype   )skipped_qposqposqvel)r   EzPickle__init___forward_reward_weight_ctrl_cost_weight_reset_noise_scale+_exclude_current_positions_from_observationr   intnprounddtmetadatadatar"   sizer#   r   inffloat64r   observation_structure)
selfr   r   r   r   r   r   r   kwargsobs_sizes
             _/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/envs/mujoco/half_cheetah_v5.pyr%   HalfCheetahEnv.__init__   sU    	!!6
	
 
	
 '<#!1"3 7 	8 		
 #"7	
 	
 bhhsTWW}56
 IINNiinn!!"89 	
 "%bffXKrzz"

  JJIINN''<<=IINN''	&
"    c                 t    U R                   [        R                  " [        R                  " U5      5      -  nU$ )N)r'   r+   sumsquare)r4   actioncontrol_costs      r7   r>   HalfCheetahEnv.control_cost   s*    --ryy7H0IIr9   c                 \   U R                   R                  S   nU R                  XR                  5        U R                   R                  S   nX2-
  U R                  -  nU R                  5       nU R                  XA5      u  pgX4S.UEnU R                  S:X  a  U R                  5         XVSSU4$ )Nr   )
x_position
x_velocityr   F)	r/   r"   do_simulationr   r-   _get_obs_get_rewrender_moderender)	r4   r=   x_position_beforex_position_afterrB   observationrewardreward_infoinfos	            r7   stepHalfCheetahEnv.step   s     IINN1-6??399>>!,&:dggE
mmo"mmJ?.XKXw&KKME5$66r9   rB   c                 \    U R                   U-  nU R                  U5      nX4-
  nUU* S.nXV4$ )N)reward_forwardreward_ctrl)r&   r>   )r4   rB   r=   forward_reward	ctrl_costrK   rL   s          r7   rE   HalfCheetahEnv._get_rew   sF    44zA%%f-	+ -%:
 ""r9   c                    U R                   R                  R                  5       nU R                   R                  R                  5       nU R                  (       a  USS  n[
        R                  " X45      R                  5       nU$ )Nr    )r/   r"   flattenr#   r)   r+   concatenateravel)r4   positionvelocityrJ   s       r7   rD   HalfCheetahEnv._get_obs   sb    99>>))+99>>))+;;|Hnnh%9:@@Br9   c                    U R                   * nU R                   nU R                  U R                  R                  XU R                  R
                  S9-   nU R                  U R                   U R                  R                  U R                  R                  5      -  -   nU R                  X45        U R                  5       nU$ )N)r   r   r0   )r(   	init_qpos	np_randomuniformmodelnq	init_qvelstandard_normalnv	set_staterD   )r4   	noise_low
noise_highr"   r#   rJ   s         r7   reset_modelHalfCheetahEnv.reset_model  s    ,,,	,,
~~ 6 6 !7 !
 
 NN%%(F(Ftzz}}(UUV 	
 	t"mmor9   c                 8    SU R                   R                  S   0$ )NrA   r   )r/   r"   )r4   s    r7   _get_reset_infoHalfCheetahEnv._get_reset_info  s    $))..+
 	
r9   )r'   r)   r&   r(   r.   r   r3   N)__name__
__module____qualname____firstlineno____doc__r.   DEFAULT_CAMERA_CONFIGstrr*   dictfloatboolr%   r>   rN   rE   rD   ri   rl   __static_attributes__ r9   r7   r   r      s    D 	 
H +8M'*"%#&;?A
A
 A
  $C$45	A

  %A
  A
 !A
 59A
F7
#5 
#"
r9   r   )__credits__numpyr+   	gymnasiumr   gymnasium.envs.mujocor   gymnasium.spacesr   rs   r$   r   ry   r9   r7   <module>r      s>   $n5   +    
K
Y K
r9   