
    niY2                        S SK r S SKJs  Jr  S SKrS SK	J
r
  S SKJr  S SKJs  Js  Jr  S SKJs  Js  Js  Jr  S SKJr  S SKJs  Js  Jr  S SKJr  S SKJr  S SK J!r"  S SK#J$r$  S SK%J&r&    " S S	\
5      r'g! \ a    S SKr Ntf = f)
    N)TFAgent)	MPISolver)TFNormalizer)Logger)ActionSpace)Envc                     ^  \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrU 4S jrU 4S jrS rU 4S jrS rU 4S jrU 4S jrU 4S jrS rS rS rS rU 4S jrS rS rS rS rS r S r!U 4S  jr"S! r#S" r$S# r%S$ r&S% r'U 4S& jr(S'r)U =r*$ )(PGAgent   PGActorNetActorStepsizeActorMomentumActorWeightDecayActorInitOutputScale	CriticNetCriticStepsizeCriticMomentumCriticWeightDecay   c                 4   > SU l         [        TU ]	  XU5        g NF)_exp_actionsuper__init__)selfworldid	json_data	__class__s       d/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/deep_mimic/learning/pg_agent.pyr   PGAgent.__init__)   s    D	GU	*
    c                 0   > [         TU ]  5         SU l        g r   )r   resetr   r   r    s    r!   r%   PGAgent.reset.   s    	GMOD
r#   c                 H    U R                  5       nU[        R                  :H  $ N)get_action_spacer   
Continuous)r   action_spaces     r!   _check_action_spacePGAgent._check_action_space3   s!    ((*L;1111r#   c                    > [         TU ]  U5        U R                  U R                  5      u  U l        U l        U R                  U R                  5      u  U l        U l        g r)   )	r   _load_params_calc_val_boundsdiscountval_minval_max_calc_term_valsval_failval_succ)r   r   r    s     r!   r0   PGAgent._load_params7   sM    	G#!%!6!6t}}!EDL$,#'#7#7#F DM4=
r#   c                    U R                   U;   d   eU R                  U;   d   eXR                      nXR                     nU R                  U;  a  SOXR                     nU R                  5       nU R	                  5       nU R                  5       n[        R                  " [        R                  S U/SS9U l	        [        R                  " [        R                  S /SS9U l
        [        R                  " [        R                  S /SS9U l        [        R                  " [        R                  S U/SS9U l        [        R                  " [        R                  U R                  5       (       a  S U/OS SS9U l        [        R                  " S5         [        R                  " S	5         U R!                  X$5      U l        S S S 5        [        R                  " S
5         U R%                  U5      U l        S S S 5        S S S 5        U R"                  S :w  a  [(        R*                  " SU-   5        U R&                  S :w  a  [(        R*                  " SU-   5        g ! , (       d  f       N= f! , (       d  f       Nz= f! , (       d  f       N= f)Nr   s)shapenametar_valadvagmainactorcriticzBuilt actor net: zBuilt critic net: )ACTOR_NET_KEYCRITIC_NET_KEYACTOR_INIT_OUTPUT_SCALE_KEYget_state_sizeget_goal_sizeget_action_sizetfplaceholderfloat32s_tf
tar_val_tfadv_tfa_tfhas_goalg_tfvariable_scope_build_net_actoractor_tf_build_net_critic	critic_tfr   print2)r   r   actor_net_namecritic_net_nameactor_init_output_scales_sizeg_sizea_sizes           r!   _build_netsPGAgent._build_nets=   s   ***)+++112N 3 34O$($D$DI$Ua*34T4T*U    "F!F!!#F rzz$cJDInnRZZvINDO..D6FDKrzz$cJDIrzz8<tVnT$')DI 
		6	"W%--nV &X&//@ ' 
# 	mm'.89$mm(?:;
 &%&& 
#	"s<   I;*II;I*6I;
I'	#I;*
I8	4I;;
J	c           	        > [         TU ]  5         U R                  R                  5          U R                  R                  5          [
        R                  " U R                  5         [
        R                  " U R                  5         U R                  U R                  5      u  p[        U R                  SS5      U l        U R                  R                  U* SU-  5        S S S 5        S S S 5        S S S 5        S S S 5        g ! , (       d  f       N'= f! , (       d  f       N0= f! , (       d  f       N9= f! , (       d  f       g = f)Nval_normr   g      ?)r   _build_normalizerssess
as_defaultgraphrJ   rS   tf_scopeRESOURCE_SCOPE_calc_val_offset_scaler2   r   rb   set_mean_std)r   
val_offset	val_scaler    s      r!   rc   PGAgent._build_normalizersa   s    	G 				!6!6!8":K:KDMM:ZT001 $ ; ;DMM J
$TYY
A>""J;i@ 2 ;[!8	
 	 21 ;[:Z!8!8		
 sT   D5!D$&!DAD	!D)D$1D5
DD
D!D$$
D2	.D55
Ec                 &  > [         TU ]  5         U R                  R                  5          U R                  R                  5          U R
                  R                  5         S S S 5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = fr)   )r   _init_normalizersrd   re   rf   rb   updater&   s    r!   ro   PGAgent._init_normalizersj   s[    	G				!6!6!8
mm "9	
 "9!8		
s#   BA1 B1
A?	;B
Bc                 V   > [         TU ]  5         U R                  R                  5         g r)   )r   _load_normalizersrb   loadr&   s    r!   rs   PGAgent._load_normalizersp   s!    	GMM
r#   c                    U R                   U;  a  SOXR                      nU R                  U;  a  SOXR                     nU R                  R                  U R                  5      U R                  R                  U R
                  5      -
  nS[        R                  " [        R                  " U5      5      -  U l	        US:w  a&  U =R                  X0R                  S5      -  -  sl	        U R                  R                  U R                  5      nU R                  R                  U R                  5      U-
  n[        R                  " [        R                  " U5      SS9U l        U =R                  U R                   -  sl        S[        R                  " U R                  5      -  U l        U R                  R#                  U R$                  5      nU R                  R#                  U R&                  5      n[(        R*                  " XWU5      n	XR,                  R.                  -  n	U =R                  U	-  sl        US:w  a&  U =R                  X R                  S5      -  -  sl        g )Nr   g      ?main/criticaxis
main/actor)ACTOR_WEIGHT_DECAY_KEYCRITIC_WEIGHT_DECAY_KEYrb   normalize_tfrN   rW   rJ   reduce_meansquarecritic_loss_tf_weight_decay_lossa_normrU   rP   
reduce_sumactor_loss_tfrO   	normalizea_bound_mina_bound_maxTFUtilcalc_bound_lossexp_params_currnoise)
r   r   actor_weight_decaycritic_weight_decaynorm_val_diffnorm_a_mean_tfnorm_a_diffnorm_a_bound_minnorm_a_bound_maxa_bound_losss
             r!   _build_lossesPGAgent._build_lossesu   s   ##94 ;DE`E`;a  	$$I5 <EFbFb<c  MM..t?$--B\B\C Mryy/G HHDq 
03J3J=3YYY[[--dmm<N++**4995FKryy'=BGD$++%r~~d.@.@AAD{{,,T-=-=>{{,,T-=-=>)).L\]L((...L,&a
.1H1H1VVV
r#   c                    U R                   U;  a  SOXR                      nU R                  U;  a  SOXR                     nU R                  U;  a  SOXR                     nU R                  U;  a  SOXR                     nU R	                  S5      n[
        R                  R                  UUS9n[
        R                  " U R                  U5      U l
        [        U R                  Xv5      U l        U R	                  S5      n[
        R                  R                  X#S9n	[
        R                  " U R                  U5      U l        [        U R                  X5      U l        g )NgMbP?g?g{Gz?rw   )learning_ratemomentumr{   )ACTOR_STEPSIZE_KEYACTOR_MOMENTUM_KEYCRITIC_STEPSIZE_KEYCRITIC_MOMENTUM_KEY_tf_varsrJ   trainMomentumOptimizer	gradientsr   critic_grad_tfr   rd   critic_solverr   actor_grad_tfactor_solver)
r   r   actor_stepsizeactor_momentumcritic_stepsizecritic_momentumcritic_vars
critic_opt
actor_vars	actor_opts
             r!   _build_solversPGAgent._build_solvers   sG   y0 7@AXAX7Y  	y0 7@AXAX7Y  	  	1 8ABZBZ8[  	  	1 8ABZBZ8[  --.K++/5D , FJ,,t':':KHD"499jFD|,J***aId&8&8*ED!$))YCD
r#   c           
         U R                   R                  U R                  5      nU/nU R                  5       (       a*  U R                  R                  U R
                  5      nXE/-  n[        R                  " X5      n[        R                  R                  UU R                  5       S [        R                  " U* US9S9nU R                  R                  U5      nU$ )N)minvalmaxvalinputsunits
activationkernel_initializer)s_normr~   rM   rQ   g_normrR   
NetBuilder	build_netrJ   layersdenserI   random_uniform_initializerr   unnormalize_tf)	r   net_nameinit_output_scale	norm_s_tf	input_tfs	norm_g_tfh	norm_a_tfrP   s	            r!   rT   PGAgent._build_net_actor   s    ((3II++**4995i;iX1A		q&*&:&:&<+/353P3P,=+=FW4Y   ZI ;;%%i0DKr#   c                    U R                   R                  U R                  5      nU/nU R                  5       (       a*  U R                  R                  U R
                  5      nX4/-  n[        R                  " X5      n[        R                  R                  USS [        R                  S9n[        R                  " US/5      nU R                  R                  U5      nU$ )Nr   r   rx   )r   r~   rM   rQ   r   rR   r   r   rJ   r   r   r   xavier_initializerreshaperb   r   )r   r   r   r   r   r   norm_val_tfval_tfs           r!   rV   PGAgent._build_net_critic   s    ((3II++**4995i;iX1A))//()-15;5N5N " PK
 **[2$/K]]))+6FMr#   c                 B   > [         TU ]  5         U R                  5         g r)   )r   _initialize_vars_sync_solversr&   s    r!   r   PGAgent._initialize_vars   s    	G
r#   c                 l    U R                   R                  5         U R                  R                  5         g r)   )r   syncr   r   s    r!   r   PGAgent._sync_solvers   s)    
r#   c                    U R                   R                  5          U R                  R                  5          SU l        U R	                  X5      S   nSnU R                  5       (       a  [        R                  " U R                  R                  5      nU(       as  [        R                  R                  " UR                  6 nX`R                  R                  -  nX`R                  R                   -  nX7-  nU R#                  U5      nSU l        S S S 5        S S S 5        WW4$ ! , (       d  f       N= f! , (       d  f       WW4$ = f)NFr   T)rd   re   rf   r   _eval_actor_enable_stoch_policyMathUtil	flip_coinr   ratenprandomrandnr;   r   r   std_calc_action_logp)r   r:   r@   r?   logprand_actionnorm_exp_noise	exp_noises           r!   _decide_actionPGAgent._decide_action   s    				!6!6!8d


1
 
#ad		"	"	$	$(()=)=)B)BC99??AGG4.
0066
6.${{6)
.!''7$!$
 "9	" d7N# "9!8		" d7Ns#   D2CD!D2!
D/	+D22
Ec                     U R                   =(       aM    U R                  U R                  R                  :H  =(       d#    U R                  U R                  R                  :H  $ r)   )enable_training_modeModeTRAIN	TRAIN_ENDr   s    r!   r   PGAgent._enable_stoch_policy   sF     HTZZ499??%B &G%)ZZ4993F3F%FHr#   c                 8   [         R                  " USU R                  5       /5      nU R                  5       (       a'  [         R                  " USU R	                  5       /5      OS nU R
                  XR                  U0nU R                  R                  U5      nU$ Nrx   )	r   r   rG   rQ   rH   rM   rR   rU   eval)r   r:   r@   feedr?   s        r!   r   PGAgent._eval_actor   sw    


1r4..012A59]]__

1r4--/01$AIIq))Q'D4 AHr#   c                 
   U R                   R                  5          U R                  R                  5          [        R                  " USU R                  5       /5      nU R                  5       (       a'  [        R                  " USU R                  5       /5      OS nU R                  XR                  U0nU R                  R                  U5      nS S S 5        S S S 5        W$ ! , (       d  f       N= f! , (       d  f       W$ = fr   )rd   re   rf   r   r   rG   rQ   rH   rM   rR   rW   r   )r   r:   r@   r   vals        r!   _eval_criticPGAgent._eval_critic   s    				!6!6!8
**QT0023
4a7;}}"**QT//12
3DaiiIIq)dNN%c "9	 J "9!8		 Js#   C3BC"C3"
C0	,C33
Dc                 Z    [        S5      nU R                  (       a  XR                  -  nU$ Nr   )intr   EXP_ACTION_FLAG)r   flagss     r!   _record_flagsPGAgent._record_flags  s'    FE***eLr#   c                   > [         TU ]  5         U R                  5       nU R                  5       n[        R
                  " U5      n[        R
                  " U5      nU R                  R                  5       nU R                  R                  5       nU R                  R                  SU5        U R                  R                  SU5        U R                  R                  SU5        U R                  R                  SU5        g )NCritic_LossCritic_Stepsize
Actor_LossActor_Stepsize)r   _train_step_update_critic_update_actorMPIUtil
reduce_avgr   get_stepsizer   loggerlog_tabular)r   critic_loss
actor_lossr   r   r    s        r!   r  PGAgent._train_step  s    	G%%'K##%J$$[1K##J/J((557O&&335NKKM;7KK-?KKL*5KK,n=
r#   c                 L   U R                   R                  U R                  5      nU R                   R                  SU5      nU R	                  5       (       a  U R                   R                  SU5      OS nU R                  U5      n[        R                  " X@R                  U R                  5      nU R                  X R                  X0R                  U0nU R                  R                  U R                  U R                   /U5      u  pgU R"                  R%                  U5        U$ )Nstatesgoals)replay_buffersample_local_mini_batch_sizegetrQ   _calc_updated_valsr   clipr3   r4   rM   rR   rN   rd   runr   r   r   rp   )r   idxr:   r@   tar_Vr   lossgradss           r!   r  PGAgent._update_critic  s    



#
#D$?$?
@Cx-A04w,TA##C(EGGE<<6EIIq))Q?D))--!4!4d6I6I JDQKDe$Kr#   c                    U R                   nU R                  R                  U R                  U5      nU R	                  5       nU R                  R                  SU5      nU(       a  U R                  R                  SU5      OS nU R                  R                  SU5      nU R                  U5      nU R                  XE5      nXx-
  n	U R                  X@R                  XPR                  X`R                  U	0n
U R                  R                  U R                  U R                  /U
5      u  pU R                   R#                  U5        U$ )Nr  r  actions)r   r  sample_filteredr  rQ   r  r  r   rM   rR   rP   rO   rd   r  r   r   r   rp   )r   keyr  rQ   r:   r@   r?   V_newV_oldr>   r   r  r  s                r!   r  PGAgent._update_actor(  s	   


C



,
,T-H-H#
NC}}Hx-A08w,dAy#.A##C(Ea#E
-CIIq))Q		1kk3GD))--!3!3T5G5G H$OKDU#Kr#   c                    U R                   R                  SU5      nU R                  S:X  a  UnU$ U R                   R                  U5      nU R                   R                  SU5      nU R	                  5       (       a  U R                   R                  SU5      OS nU R                   R                  U5      nU R                   R                  U[        R                  R                  5      nU R                   R                  U[        R                  R                  5      n	[        R                  " Xx5      n[        R                  " Xy5      n	U R                  XV5      n
U R                  X'   U R                  X'   X R                  U
-  -   nU$ )Nrewardsr   r  r  )r  r  r2   get_next_idxrQ   is_path_endcheck_terminal_flagr   	TerminateFailSuccr   logical_andr   r6   r7   )r   r  rnew_Vnext_idxs_nextg_nextis_endis_failis_succV_nexts              r!   r  PGAgent._calc_updated_vals<  s9   y#.A}}e" L ##005h!!%%h9f<@MMOOt!!%%gx8QUf!!--c2f""66sCMM<N<NOg""66sCMM<N<NOgv/gv/g  0ffofo--&((eLr#   c                 \   U R                   R                  nUS:  d   eU R                  5       nSX"-  -  [        R                  " [        R
                  " U5      SS9-  nUSU-  [        R                  " S[        R                  -  5      -  -  nXC* [        R                  " U5      -  -  nU$ )Nr   g      rx   ry      )r   r   rI   r   sumr   logpi)r   norm_action_deltasstdevr^   r   s        r!   r   PGAgent._calc_action_logpS  s      &&E199!!#F5=!BFF2995G+Hr$RRDD6MBFF1ruu9---DGbffUm##DKr#   c                     U R                  X5      nU R                  R                  U5      nU R                  R                  R                  U R                  US   5        g r   )r   rb   r   r   envlog_valr   )r   r:   r@   r   norm_vals        r!   _log_valPGAgent._log_val^  sI    


A
!C}}&&s+HJJNN477HQK0
r#   c                 n   > [         TU ]  U5        U R                  R                  U R                  5        g r)   )r   _build_replay_bufferr  add_filter_keyr   )r   buffer_sizer    s     r!   rF  PGAgent._build_replay_bufferd  s-    	G -%%d&:&:;
r#   )r   rP   r   r   r   rU   rO   r   r   r   rW   rR   rM   rN   r6   r4   r3   rb   r7   )+__name__
__module____qualname____firstlineno__NAMErD   r   r   r|   rF   rE   r   r   r}   r   r   r%   r-   r0   r_   rc   ro   rs   r   r   rT   rV   r   r   r   r   r   r   r   r  r  r  r  r   rC  rF  __static_attributes____classcell__)r    s   @r!   r
   r
      s    	$-&&- 6.((//

2"H
>.""

(H$(.	 r#   r
   )(numpyr   tensorflow.compat.v1compatv1rJ   	Exception
tensorflowcopy*pybullet_envs.deep_mimic.learning.tf_agentr   4pybullet_envs.deep_mimic.learning.solvers.mpi_solverr   )pybullet_envs.deep_mimic.learning.tf_util
deep_mimiclearningtf_utilr   2pybullet_envs.deep_mimic.learning.nets.net_buildernetsnet_builderr   /pybullet_envs.deep_mimic.learning.tf_normalizerr   )pybullet_envs.deep_mimic.learning.rl_utilrl_utilRLUtilpybullet_utils.loggerr   pybullet_utils.mpi_utilmpi_utilr  pybullet_utils.math_util	math_utilr   )pybullet_envs.deep_mimic.env.action_spacer    pybullet_envs.deep_mimic.env.envr   r
    r#   r!   <module>rm     si    ##  > J : : G G H : : ( ) + A 0
Ng N+  s   	A7 7
BB