
    niQ                         S r SSKJr  SSKJr  SSKJr  SSKrSSKrSSKr SSKJ	s  J
r  SSKJr  SS jrSS jrS	 rS
 rS rS rS rS rS rS rSS jrSS jrg! \ a    SSKr N?f = f)z Utilities for the PPO algorithm.    )absolute_import)division)print_functionN)
device_libc           	         [        U [        [        45      (       a-  [        R                  " U  Vs/ s H  n[        X!5      PM     sn6 $ Uc%  U R                  [        R                  " U 5      5      $ [        R                  " [        R                  " U5      S   /U R                  SS R                  5       -   5      n[        R                  " XU5      $ s  snf )zReset all variables in a nested tuple to zeros.

Args:
  variables: Nested tuple or list of variaables.
  indices: Batch indices to reset, defaults to all.

Returns:
  Operation.
Nr      )
isinstancetuplelisttfgroupreinit_nested_varsassign
zeros_likezerosshapeas_listscatter_update)	variablesindicesvariabler   s       Z/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/agents/ppo/utility.pyr   r      s     	E4=))88IVI(;IVWW_BMM)455HHbhhw'*+iooab.A.I.I.KKLEY77 Ws   Cc           
         [        U [        [        45      (       a:  [        R                  " [        X5       VVs/ s H  u  p4[        X45      PM     snn6 $ Uc  U R                  U5      $ [        R                  " XU5      $ s  snnf )zAssign tensors to matching nested tuple of variables.

Args:
  variables: Nested tuple or list of variables to update.
  tensors: Nested tuple or list of tensors to assign.
  indices: Batch indices to assign to; default to all.

Returns:
  Operation.
)	r	   r
   r   r   r   zipassign_nested_varsr   r   )r   tensorsr   r   tensors        r   r   r   2   sy     	E4=))88GJ9G^	_G^3C8
X
.G^	_a a_G$$Y99	 
`s   B
c                 J  ^ [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      n[         R                  " [         R                  " [         R                  " U4S j[         R                  " [         R                  " X@-  S/5      SS/5      [         R                  " U SS2S4   5      SS5      SS/5      S/5      n[         R                  " [         R                  " U5      S5      $ )zDiscounted Monte-Carlo returns.r   Nc                    > UTU -  -   $ N aggcurdiscounts     r   <lambda>#discounted_return.<locals>.<lambda>L       3C#7    r   Freturn)r   ranger   valuecastfloat32reverse	transposescanr   check_numericsstop_gradient)rewardlengthr%   timestepmaskreturn_s     `   r   discounted_returnr:   F   s    XXfll1o++,(	$'"VAtG_4bjj	A$JJll
''7,,rzz$-!=1vF--q"u.5:<=q6C FGC	I'
 
		2++G4h	??r)   c                    [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      n[         R                  " U 5      n[        U5       HI  nXp-  nU[         R                  " U SS2SS24   [         R                  " U SS2SS24   5      /S5      -  n MK     XsU-  [         R                  " USS2US24   [         R                  " USS2U* S24   5      S/5      -  -  n[         R                  " [         R                  " Xg-  5      S5      $ )zN-step discounted return.r   Nr*   r+   )
r   r,   r   r-   r.   r/   r   concatr3   r4   )	r5   r-   r6   r%   windowr7   r8   r9   _s	            r   fixed_step_returnr?   R   s   XXfll1o++,(	$'"VAtG_4bjj	A$MM&!'=aG		6!QR%="--q"#v2O"PRSTTF  
v		QZ"--a&k(:;Q?!A A A'			2++DN;X	FFr)   c                    [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      nX`-  X1-  SU-
  -  -   nXc-  U-  n[         R                  " Xs/S5      n[         R                  " [         R                  " [         R                  " S [         R                  " [         R                  " US/5      / SQ5      [         R                  " USS2S4   5      SS5      SS/5      S/5      n[         R                  " [         R                  " U5      S	5      $ )
zTD-lambda returns.r   N   c                     US   US   U -  -   $ )Nr   r   r!   )r#   r$   s     r   r&   lambda_return.<locals>.<lambda>h   s    3q6CFSL#8r)   )r   rA   r   r*   Fr   r+   )r   r,   r   r-   r.   r/   stackr0   r1   r2   r   r3   r4   )	r5   r-   r6   r%   lambda_r7   r8   sequencer9   s	            r   lambda_returnrG   _   s   XXfll1o++,(	$'"VAtG_4bjj	A$]X-W==(_w&(XXx*A.(JJll
''8,,rzz(QC8)DbmmTYZ[]_Z_T`FaUV% ()c	+'
 
		2++G4h	??r)   c                   ^ [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      n[         R                  " USS2SS24   [         R                  " USS2SS24   5      /S5      nU TU-  -   U-
  n[         R                  " [         R                  " [         R                  " U4S j[         R                  " [         R                  " XW-  S/5      SS/5      [         R                  " USS2S4   5      SS5      SS/5      S/5      n[         R                  " [         R                  " U5      S5      $ )z!Generalized Advantage Estimation.r   Nr*   c                    > UTU -  -   $ r    r!   r"   s     r   r&   "lambda_advantage.<locals>.<lambda>v   r(   r)   r   F	advantage)r   r,   r   r-   r.   r/   r<   r   r0   r1   r2   r3   r4   )	r5   r-   r6   r%   r7   r8   
next_valuedeltarK   s	      `     r   lambda_advantagerN   n   s$   XXfll1o++,(	$'"VAtG_4bjj	A$yy%12,eArsFm(DEqI*
8j(
(5
0%jjll
''7,,rzz$,<q!fEr}}UZ[\^`[`UaGbUV% ()c	+)
 
		2++I6	DDr)   c                 p   SU-  SU-  pTS[         R                  " [         R                  " XE-
  5      S5      [         R                  " X -
  S-  [         R                  " U5      -  S5      -   [         R                  " US5      -   [         R                  " US5      -
  U R                  S   R                  -
  -  $ )z?Epirical KL divergence of two normals with diagonal covariance.rA   g      ?r*   )r   
reduce_sumexpr   r-   )mean0logstd0mean1logstd1	logstd0_2	logstd1_2s         r   diag_normal_klrX   |   s    Wa'kY	bffY%:;R@2==}q266),,bD2 246MM)R4PQi,-/4{{2/D/DE 
F Fr)   c                     S[         R                  " S[         R                  -  5      -  U-
  nSX -
  [        R                  " U5      -  S-  -  n[        R
                  " X4-   S5      $ )z1Log density of a normal with diagonal covariance.g      rA   r*   )mathlogpir   rQ   rP   )meanlogstdlocconstantr-   s        r   diag_normal_logpdfra      sT    DHHQ[))F2(
3:/!3
3%	x'	,,r)   c                     U R                   S   R                  [        R                  " S[        R                  -  [        R
                  -  5      -  nU[        R                  " SU-  S5      -   S-  $ )z7Empirical entropy of a normal with diagonal covariance.r*   rA   r   )r   r-   rZ   r[   r\   er   rP   )r]   r^   r`   s      r   diag_normal_entropyrd      sS    ZZ^!!DHHQ[466-A$BB(
R]]1v:q1
1Q	66r)   c                      [         R                  " 5       n U  Vs/ s H   oR                  S:X  d  M  UR                  PM"     sn$ s  snf )z0List of GPU device names detected by TensorFlow.GPU)r   list_local_devicesdevice_typename)local_device_protosxs     r   available_gpusrl      s:    "557-	H-Q%1G&!&&-	HH	Hs
   AAc                 :   U=(       d    SS0n[         R                  " [        5      nU  H~  u  pEUc  M
  UR                  5        H`  u  pg[        R
                  " XuR                  5      (       d  M,  [        R                  " XvUR                  5      nX6   R                  U5        Mb     M     U H8  nXc;  d  M
  [        R                  R                  SR                  U5      5        M:     / nUR                  5        Hy  u  piU	 Vs/ s H  n[        R                  " US/5      PM     n	n[        R                  " U	S5      n	UR                  [        R                  R!                  US-   U-   U	5      5        M{     [        R                  R#                  U5      $ s  snf )a>  Create histogram summaries of the gradient.

Summaries can be grouped via regexes matching variables names.

Args:
  grad_vars: List of (gradient, variable) tuples as returned by optimizers.
  groups: Mapping of name to regex for grouping summaries.
  scope: Name scope for this operation.

Returns:
  Summary tensor.
all.*!No variables matching '{}' group.r*   r   /collectionsdefaultdictr   itemsrematchri   subappendr   loggingwarnformatreshaper<   summary	histogrammerge)
	grad_varsgroupsscopegroupedgradvarri   pattern	summariesgradss
             r   gradient_summariesr      s;    $fe_&##D)'id|	'88	$	$vvgSXX.T" (  djjoo9@@FG  )]]_kd056RZZrd#E6IIeQERZZ))%#+*<eDE % 
		)	$$ 7s   "Fc                 ,   U=(       d    SS0n[         R                  " [        5      nU  Hw  nUR                  5        H`  u  pV[        R
                  " XdR                  5      (       d  M,  [        R                  " XeUR                  5      nX5   R                  U5        Mb     My     U H8  nXS;  d  M
  [        R                  R                  SR                  U5      5        M:     / nUR                  5        Hy  u  pPU  Vs/ s H  n[        R                  " US/5      PM     n n[        R                  " U S5      n UR                  [        R                  R!                  US-   U-   U 5      5        M{     [        R                  R#                  U5      $ s  snf )a&  Create histogram summaries for the provided variables.

Summaries can be grouped via regexes matching variables names.

Args:
  vars_: List of variables to summarize.
  groups: Mapping of name to regex for grouping summaries.
  scope: Name scope for this operation.

Returns:
  Summary tensor.
rn   ro   rp   r*   r   rq   rr   )vars_r   r   r   r   ri   r   r   s           r   variable_summariesr      s1    $fe_&##D)'c	'88	$	$vvgSXX.S! ( 
 djjoo9@@FG  )]]_kd.34esRZZbT"eE4IIeQERZZ))%#+*<eDE % 
		)	$$ 5s   >"Fr    )N	gradients)Nweights)__doc__
__future__r   r   r   rs   rZ   rv   tensorflow.compat.v1compatv1r   	Exception
tensorflowtensorflow.python.clientr   r   r   r:   r?   rG   rN   rX   ra   rd   rl   r   r   r!   r)   r   <module>r      s    ' &  %   	## 08&:(	@
G@EF-7I%B%}  s   	A 
A+*A+