
    nia                        S SK r S SKrS SKrS SKrS SKJrJr  S SKJ	r	  S SK
JrJr  S SKJr  \S:X  Ga  \ R                   " S5      r\R%                  SS	S
\SS
S/S9  \R%                  S\SSS9  \R%                  SSS\" S5      \S9  \R%                  SSS\S9  \R+                  5       r\R.                  r\R2                  r\R4                   S\ 3r\R8                  " \5      r\" \R8                  " \5      5      r\" \\S9/r\R>                  S :  a!  \RA                  \" \R>                  \SS95        \\S.\R4                     r\RB                  RD                  S    r#\$" SS \$" SS/S!9S"\" S#5      S$S%9\$" S&\$" S'S(/S!9S)S"\" S5      S*S*\	" \RJ                  " \#5      S+\RL                  " \#5      -  S,9S-9S.\R4                     r'\" S.\4S/S*0\'D6r( \(RS                  \\S09  \+" S1\ S235        \(RY                  \5        gg! \* a     N'f = f)3    N)SACTD3)NormalActionNoise)EvalCallbackCheckpointCallback)Monitor__main__z)Train an RL agent using Stable Baselines3z--algoz+RL Algorithm (Soft Actor-Critic by default)sacFtd3)helpdefaulttyperequiredchoicesz--envzHalfCheetahBulletEnv-v0zenvironment ID)r   r   r   z-nz--n-timestepszNumber of training timestepsg    .A)r   r   r   z--save-freqz9Save the model every n steps (if negative, no checkpoint)_)best_model_save_pathrl_model)	save_freq	save_pathname_prefix)r
   r      g\(\?)net_archi'  g    OAg{Gz?)
batch_sizegammapolicy_kwargslearning_startsbuffer_sizetaud   i  i,  gMbP?   g?)meansigma)r   r   learning_rater   r   
train_freqgradient_stepsaction_noise	MlpPolicyverbose)callbackz
Saving to z.zip)-argparsepybullet_envsgymnumpynpstable_baselines3r   r   stable_baselines3.common.noiser   "stable_baselines3.common.callbacksr   r    stable_baselines3.common.monitorr   __name__ArgumentParserparseradd_argumentstrint
parse_argsargsenvenv_idn_timestepsalgor   makeeval_env	callbacksr   appendaction_spaceshape	n_actionsdictzerosoneshyperparamsmodellearnKeyboardInterruptprintsave     ^/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/stable_baselines/train.py<module>rS      s     
  & < O 4 z$$%PQF
:   c#<CS   +C   H	   DXXF""K99+Qvh'I ((6
C sxx'(HhYGHI ~~..I:	
  iiD
   &&q)I c
3!C
 c
3!C*XXi(bggi6H0H
* ii+K. c<1<<EK)4 
Jyk
&'	JJym d  s   H H
H