
    ni=                         S SK rS SKr S SKJs  Jr  S SK	J
r
  S SKJr  S SKJs  Js  Jr  S SKJs  Js  Jr  S SKJr  S SKJr  S SKJr  S SKJr    " S S\
5      rg! \ a    S SKr NUf = f)    N)PGAgent)	MPISolver)Logger)Envc                      ^  \ rS rSrSrSrSrSrSrSr	Sr
S	rU 4S
 jrU 4S jrS rS rS rS rS rS rS rS rS rS rS rS rS rS rSrU =r$ )PPOAgent   PPOEpochs	BatchSize	RatioClipNormAdvClipTDLambdaTarClipFracActorStepsizeDecayc                 &   > [         TU ]  XU5        g N)super__init__)selfworldid	json_data	__class__s       e/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/deep_mimic/learning/ppo_agent.pyr   PPOAgent.__init__   s    	GU	*
    c                   > [         TU ]  U5        U R                  U;  a  SOXR                     U l        U R                  U;  a  SOXR                     U l        U R                  U;  a  SOXR                     U l        U R                  U;  a  SOXR                     U l	        U R                  U;  a  SOXR                     U l        U R                  U;  a  SOXR                     U l        U R                  U;  a  SOXR                     U l        [         R"                  " 5       n[%        U R
                  U-  5      nSU-  nU R&                  U:  d   e[(        R*                  " X@R&                  5      U l        g )	N   i   g?   gffffff?      ?   )r   _load_params
EPOCHS_KEYepochsBATCH_SIZE_KEY
batch_sizeRATIO_CLIP_KEY
ratio_clipNORM_ADV_CLIP_KEYnorm_adv_clipTD_LAMBDA_KEY	td_lambdaTAR_CLIP_FRACtar_clip_fracACTOR_STEPSIZE_DECAYactor_stepsize_decayMPIUtilget_num_procsintreplay_buffer_sizenpmaximum)r   r   	num_procslocal_batch_sizemin_replay_sizer   s        r   r$   PPOAgent._load_params#   sy   	G#y8!y?YDK9, 3<=P=P3Q 	O 	9, 3<=P=P3Q 	O 	i/ 6?@V@V6W 	 	)+ 2;<N<N2O 	N 	)+ 2;<N<N2O 	 	!!2 !$9BC\C\9] 	 %%'I4??Y67**O##o565 jj:Q:QRD
r   c                    U R                   U;   d   eU R                  U;   d   eXR                      nXR                     nU R                  U;  a  SOXR                     nU R                  5       nU R	                  5       nU R                  5       n[        R                  " [        R                  S U/SS9U l	        [        R                  " [        R                  S U/SS9U l
        [        R                  " [        R                  S /SS9U l        [        R                  " [        R                  S /SS9U l        [        R                  " [        R                  U R                  5       (       a  S U/OS SS9U l        [        R                  " [        R                  S /SS9U l        [        R                  " [        R                  S /S	S9U l        [        R"                  " S
5         [        R"                  " S5         U R%                  X$5      U l        S S S 5        [        R"                  " S5         U R)                  U5      U l        S S S 5        S S S 5        U R&                  S :w  a  [,        R.                  " SU-   5        U R*                  S :w  a  [,        R.                  " SU-   5        U R0                  R2                  [        R4                  " U5      -  U l        U R6                  [        R8                  " [        R:                  " U R&                  5      S9-  nU[        R<                  " U R                   SS9-  nU R&                  XR>                  R@                  -  -   U l!        [D        RF                  " US U R6                  S9U l$        g ! , (       d  f       GN= f! , (       d  f       GN^= f! , (       d  f       GNh= f)Nr   s)shapenameatar_valadvgold_logpexp_maskmainactorcriticzBuilt actor net: zBuilt critic net: )r?   r!   )axis)x_tfmean_tfstd_tf)%ACTOR_NET_KEYCRITIC_NET_KEYACTOR_INIT_OUTPUT_SCALE_KEYget_state_sizeget_goal_sizeget_action_sizetfplaceholderfloat32s_tfa_tf
tar_val_tfadv_tfhas_goalg_tfold_logp_tfexp_mask_tfvariable_scope_build_net_actor	a_mean_tf_build_net_critic	critic_tfr   print2exp_params_currnoiseonesnorm_a_std_tfrandom_normalr?   expand_dimsa_normrM   sample_a_tfTFUtilcalc_logp_gaussiansample_a_logp_tf)	r   r   actor_net_namecritic_net_nameactor_init_output_scales_sizeg_sizea_sizenorm_a_noise_tfs	            r   _build_netsPPOAgent._build_nets=   s   ***)+++112N 3 34O$($D$DI$Ua*34T4T*U    "F!F!!#F rzz$cJDIrzz$cJDInnRZZvINDO..D6FDKrzz8<tVnT$')DI ~~bjjZPD~~bjjZPD			6	"W%..~W &X&//@ ' 
# 	$mm'.89$mm(?:;--33bggfoED((2+;+;"((4>>BZ+[[Or~~d&6&6R@@O~~++:L:L(LLD"55?>B=A=O=OQD ' &%&& 
#	"s<   'N3>NN33N!
N3
N	N3!
N0	+N33
Oc           
      L   U R                   U;  a  SOXR                      nU R                  U;  a  SOXR                     nU R                  R                  U R                  5      U R                  R                  U R
                  5      -
  nS[        R                  " [        R                  " U5      5      -  U l	        US:w  a&  U =R                  X0R                  S5      -  -  sl	        U R                  R                  U R                  5      nU R                  R                  U R                  5      U l        [        R                   " XPR                  U R"                  5      U l        [        R&                  " U R$                  U R(                  -
  5      nU R*                  U-  nU R*                  [        R,                  " USU R.                  -
  SU R.                  -   5      -  n[        R                  " [        R0                  " Xx5      5      * U l        U R                  R5                  U R6                  5      n	U R                  R5                  U R8                  5      n
[        R:                  " U R                  X5      nU =R2                  U-  sl        US:w  a&  U =R2                  X R                  S5      -  -  sl        [        R                  " [        R<                  " [        R>                  " [        R@                  " US-
  5      U R.                  5      5      5      U l!        g )Nr   r"   main/criticg      ?r   
main/actor)"ACTOR_WEIGHT_DECAY_KEYCRITIC_WEIGHT_DECAY_KEYval_normnormalize_tfrY   rc   rT   reduce_meansquarecritic_loss_tf_weight_decay_lossrk   rX   ra   _norm_a_mean_tfrm   rn   rh   logp_tfexpr]   rZ   clip_by_valuer*   minimumactor_loss_tf	normalizea_bound_mina_bound_maxcalc_bound_lossto_floatgreaterabsclip_frac_tf)r   r   actor_weight_decaycritic_weight_decaynorm_val_diffnorm_tar_a_tfratio_tfactor_loss0actor_loss1norm_a_bound_minnorm_a_bound_maxa_bound_losss               r   _build_lossesPPOAgent._build_lossesk   si   ##94 ;DE`E`;a  	$$I5 <EFbFb<c  MM..t?$--B\B\C Mryy/G HHDq 
03J3J=3YYYKK,,TYY7M;;33DNNCD,,]<P<P-1-?-?ADLvvdllT%5%556H++(K++ 0 03;P12T__1D!F FK..K)MNND{{,,T-=-=>{{,,T-=-=>))$*>*>@PcL,&a
.1H1H1VVV 
BJJrvvhn5tGHJD r   c                    U R                   U;  a  SOXR                      nU R                  U;  a  SOXR                     nU R                  U;  a  SOXR                     nU R                  U;  a  SOXR                     nU R	                  S5      n[
        R                  R                  UUS9n[
        R                  " U R                  U5      U l
        [        U R                  Xv5      U l        [
        R                  " [
        R                  SUSS9U l        [
        R                  " [
        R                  S	/ S
9U l        U R                   R%                  U R"                  5      U l        U R	                  S5      n[
        R                  R                  U R                   US9n	[
        R                  " U R(                  U5      U l        [        U R                  X5      U l        g )NgMbP?g?{Gz?rz   )learning_ratemomentumactor_stepsizeF)dtyper@   initializer	trainableactor_stepsize_ph)r   r@   r?   r{   )ACTOR_STEPSIZE_KEYACTOR_MOMENTUM_KEYCRITIC_STEPSIZE_KEYCRITIC_MOMENTUM_KEY_tf_varsrT   trainMomentumOptimizer	gradientsr   critic_grad_tfr   sesscritic_solverget_variablerV   _actor_stepsize_tf_actor_stepsize_phassign_actor_stepsize_update_opr   actor_grad_tfactor_solver)
r   r   r   actor_momentumcritic_stepsizecritic_momentumcritic_vars
critic_opt
actor_vars	actor_opts
             r   _build_solversPPOAgent._build_solvers   s   y0 7@AXAX7Y  	y0 7@AXAX7Y  	  	1 8ABZBZ8[  	  	1 8ABZBZ8[  --.K++/5D , FJ,,t':':KHD"499jFD ooBJJ3C:H8=?D !ooBJJEX`bcD%)%<%<%C%CDD[D[%\D"|,J**9P9P4B + DId&8&8*ED!$))YCD
r   c                    U R                   R                  5          U R                  R                  5          U R                  5       =(       a*    [        R
                  " U R                  R                  5      U l        U R                  XU R                  5      u  p4S S S 5        S S S 5        WS   WS   4$ ! , (       d  f       N = f! , (       d  f       N)= fNr   )
r   
as_defaultgraph_enable_stoch_policyMathUtil	flip_coinre   rate_exp_action_eval_actor)r   r>   rD   rA   logps        r   _decide_actionPPOAgent._decide_action   s    				!6!6!8224 %9K9K



#
#:%d   t'7'78ga	 "9	
 Q4a= "9!8		s#   CA%B5C5
C	?C
Cc                    [         R                  " USU R                  5       /5      nU R                  5       (       a'  [         R                  " USU R	                  5       /5      OS nU R
                  XR                  X R                  [         R                  " U(       a  SOS/5      0nU R                  R                  U R                  U R                  /US9u  pVXV4$ )Nr!   r   r   )	feed_dict)r7   reshaperQ   r[   rR   rW   r\   r^   arrayr   runrl   ro   )r   r>   rD   
enable_expfeedrA   r   s          r   r   PPOAgent._eval_actor   s    


1r4..012A59]]__

1r4--/01$AIIq))Q(8(8"((AYZC[:\]DiimmT--t/D/DEQUmVGA7Nr   c                 &   SnU R                   R                  nU R                   R                  nUS:X  d   eU R                   R                  5       U R                   R                  ::  d   eX#:  d   e[
        R                  " [        [        X#5      5      5      nU R                   R                  U5      n[
        R                  " U5      nU R                  X#5      nU R                  X#U5      nXE   nU R                   R                  U R                  5      R                  5       n	UR                   S   n
U	R                   S   n[
        R"                  " U	[
        R                  " [        [        SU5      5      [
        R$                  S9/5      n	UR&                  n[)        [*        R,                  " U5      5      n[)        [
        R.                  " XR0                  -  5      5      nXyS S 2S4      XiS S 2S4      -
  n[
        R2                  " XpR4                  U R6                  5      n[
        R8                  " U5      n[
        R:                  " U5      nUU-
  UU-   -  n[
        R2                  " XR<                  * U R<                  5      nSnSnSn[        U R>                  5       GH  n[
        R@                  RC                  U5        [
        R@                  RC                  U	5        [        U5       GH.  nUU RD                  -  nUU RD                  -   n[
        R                  " [        UU5      [
        R$                  S9nUR                  5       n[
        RF                  " UU
5      n[
        RF                  " UU5      nUS   US   :  =(       d    US   US-
  :H  nUU   nU	U   nUU   nUUS S 2S4      nU R                   RI                  SU5      nU RK                  5       (       a  U R                   RI                  SU5      OS nU RM                  UUU5      n U R                   RI                  SUS S 2S4   5      n!U RK                  5       (       a#  U R                   RI                  SUS S 2S4   5      OS n"U R                   RI                  SUS S 2S4   5      n#U R                   RI                  S	US S 2S4   5      n$U RO                  U!U"U#U$U5      u  n%n&UU -  nU[
        RP                  " U%5      -  nUU&-  nU(       d  GM  [
        R@                  RC                  U	5        GM1     GM     XR>                  -  n'UU'-  nUU'-  nUU'-  n[*        RR                  " U5      n[*        RR                  " U5      n[*        RR                  " U5      nU RT                  RW                  5       n(U RY                  U5      n)U RZ                  R]                  S
U5        U RZ                  R]                  SU(5        U RZ                  R]                  SU5        U RZ                  R]                  SU)5        U RZ                  R]                  SU5        U RZ                  R]                  SU5        U RZ                  R]                  SU5        U R                   R_                  5         g )Ngh㈵>r   )r   r!   r   statesgoalsactionslogpsCritic_LossCritic_Stepsize
Actor_LossActor_Stepsize	Clip_FracAdv_MeanAdv_Std)0replay_bufferbuffer_tailbuffer_headget_current_sizebuffer_sizer7   r   listrangeis_path_endlogical_not_compute_batch_vals_compute_batch_new_valsget_idx_filteredEXP_ACTION_FLAGcopyr?   column_stackint32sizer5   r3   
reduce_sumceilmini_batch_sizeclipval_minval_maxmeanstdr,   r&   randomshuffle_local_mini_batch_sizemodgetr[   _update_critic_update_actorr   
reduce_avgr   get_stepsizeupdate_actor_stepsizeloggerlog_tabularclear)*r   adv_eps	start_idxend_idxidxend_maskvalsnew_vals	valid_idxexp_idxnum_valid_idxnum_exp_idxlocal_sample_countglobal_sample_countmini_batchesrC   adv_meanadv_stdcritic_loss
actor_lossactor_clip_fracebbatch_idx_begbatch_idx_endcritic_batchactor_batchshuffle_actorcritic_batch_valsactor_batch_advcritic_scritic_gcurr_critic_lossactor_sactor_gactor_a
actor_logpcurr_actor_losscurr_actor_clip_fractotal_batchesr   r   s*                                             r   _train_stepPPOAgent._train_step   s   G""..I  ,,GNN//1T5G5G5S5SS S 
((4i12
3C!!--c2H~~h'H##I7D++IEHI  11$2F2FGLLNGOOA&M--"KooweA{6K1LTVT\T\(]^_G"g001CDErww25I5IIJKL
1a4=
!DA$7
7Cwwxt||<Hwws|HffSkG>g/
0C
''#***D,>,>
?CKJO4;;ii	"ii \"!D777%(C(CCxxm] C288T"'')vvlM:ff[+6$R;q>9b{2R]`aRa?a .k*$\2k!Q$/0%%))(LADHMMOO4%%))'<@Y]..xCTU$$((;q!t3DEHL$$$((+ad2CD]a$$((K14EF''++G[A5FG
040B0B7GU\CM1`-- 	''bff_--
//M
))

G
$? #	  J !;;.M= K-J}$O$$[1K##J/J((9O((557O//@NKKM;7KK-?KKL*5KK,n=KKK9KKJ1KKIw/
r   c                     g)Nr    )r   s    r   _get_iters_per_updatePPOAgent._get_iters_per_update%  s    r   c                 0   U R                   R                  5       nU R                   R                  U R                  5      n[	        [
        R                  " U5      5      n[	        [
        R                  " U5      5      nX0R                  :  =(       a    US:  $ r   )	r   r   count_filteredr   r5   r3   r   
reduce_minr(   )r   samplesexp_samplesr  global_exp_mins        r   _valid_train_stepPPOAgent._valid_train_step(  st      113G$$33D4H4HIKg009:++K89N//1K8JKr   c                    U R                   R                  S5      X nU R                  5       (       a  U R                   R                  S5      X OS n[        R                  " [        [        X5      5      5      nU R                   R                  U5      nU R                   R                  U[        R                  R                  5      nU R                   R                  U[        R                  R                  5      n[        R                  " Xg5      n[        R                  " Xh5      nU R                  X45      n	U R                  X'   U R                   X'   U	$ )Nr   r   )r   get_allr[   r7   r   r   r   r   check_terminal_flagr   	TerminateFailSucclogical_and_eval_criticval_failval_succ)
r   r
  r  r   r   r  is_endis_failis_succr  s
             r   r   PPOAgent._compute_batch_vals/  s    ''1)DFFJmmooD&&w/	B[_E
((4i12
3C++C0F  44S#--:L:LMG  44S#--:L:LMGnnV-GnnV-GV+DMMDMMMDMKr   c                    U R                   R                  S5      X nU R                  S:X  a  UR                  5       nU$ [        R
                  " U5      nUnXb:  ai  Xa-
  nU R                   R                  U5      U-
  nXGU n	X7US-    n
[        R                  " XR                  U R                  U
5      XWU& X-   S-   nXb:  a  Mi  U$ )Nrewardsr   r   )
r   r>  discountr   r7   
zeros_likeget_path_endRLUtilcompute_returnr.   )r   r
  r  
val_bufferrL  r  curr_idxidx0idx1rvs              r   r    PPOAgent._compute_batch_new_vals@  s      ((3IFG}}h O z*hh#!!..x89DTAX'$33A}}dnnVWXd#a'  Or   c                     U R                   XR                  X R                  U0nU R                  R	                  U R
                  U R                  /U5      u  pVU R                  R                  U5        U$ r   )	rW   r\   rY   r   r   r   r   r   update)r   r>   rD   tar_valsr   lossgradss          r   r  PPOAgent._update_criticT  s\    IIq))QBD))--!4!4d6I6I JDQKDe$Kr   c           
      2   U R                   XR                  X R                  X0R                  XPR                  U0nU R
                  R                  U R                  U R                  U R                  /U5      u  pxn	U R                  R                  U5        Xy4$ r   )rW   r\   rX   rZ   r]   r   r   r   r   r   r   rZ  )
r   r>   rD   rA   r   rC   r   r\  r]  	clip_fracs
             r   r  PPOAgent._update_actor[  s}    IIq))Q		1kk3HXHXZ^_D!YY]]			T//1B1BCTKDU#?r   c                    SnSnSnSnSnU R                   R                  5       nU R                  S:  a  U R                  U:  a  U R                  U-  nU R                  U-  n	X:  n
X:  nU(       d  U
(       aL  U(       a  XpR                  -  nOXpR                  -  n[
        R                  " XuU5      nU R                  U5        U$ )Ng      ?r#   r   g:0yE>r    r   )r   r  r0   iterr2   r7   r   set_actor_stepsize)r   r`  clip_tol
step_scalemax_stepsizemin_stepsizewarmup_itersr   min_clipmax_clip	under_tolover_tols               r   r  PPOAgent.update_actor_stepsized  s    HJLLL&&335NaDII$<##h.h##h.h&i%h
i
55
5.
55
5.|L/r   c                 l    U R                   U0nU R                  R                  U R                  U5        g r   )r   r   r   r   )r   stepsizer   s      r   rd  PPOAgent.set_actor_stepsize}  s1    D 	IIMM$00$7
r   )!r   r   r   r   r   ra   rX   r   r   r   r2   rZ   r(   r   r   r   r   rc   r&   r^   r\   r   rh   r,   r]   r*   r6   rW   ro   rl   r0   rY   r.   )__name__
__module____qualname____firstlineno__NAMEr%   r'   r)   r+   r-   r/   r1   r   r$   rw   r   r   r   r   r/  r3  r;  r   r   r  r  r  rd  __static_attributes____classcell__)r   s   @r   r   r      s    	$*..#---4,\$L>bHL"(2 r   r   ) numpyr7   r   tensorflow.compat.v1compatv1rT   	Exception
tensorflow*pybullet_envs.deep_mimic.learning.pg_agentr   4pybullet_envs.deep_mimic.learning.solvers.mpi_solverr   )pybullet_envs.deep_mimic.learning.tf_util
deep_mimiclearningtf_utilrm   )pybullet_envs.deep_mimic.learning.rl_utilrl_utilrP  pybullet_utils.loggerr   pybullet_utils.mpi_utilmpi_utilr3   pybullet_utils.math_util	math_utilr    pybullet_envs.deep_mimic.env.envr   r   r2  r   r   <module>r     s]     ## ? J : : : : ( ) + 0
mw m!  s   	A 
A)(A)