
    ni                         S r SSKJr  SSKJr  SSKJr  SSKrSSKrSSKrSSKJ	s  J
r  SSKJr  S rSS jrS	 rS
 rS rS rS rS rS rS rS rSS jrSS jrg)z Utilities for the PPO algorithm.    )absolute_import)division)print_functionN)
device_libc                     [        U [        [        45      (       a  [        U 5      " S U  5       5      $ [        R
                  " U S5      $ )zCreate variables matching a nested tuple of tensors.

Args:
  tensors: Nested tuple of list of tensors.

Returns:
  Nested tuple or list of variables.
c              3   8   #    U  H  n[        U5      v   M     g 7fN)create_nested_vars).0tensors     c/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/minitaur/agents/ppo/utility.py	<genexpr>%create_nested_vars.<locals>.<genexpr>&   s     J'+F33's   F)
isinstancetuplelisttypetfVariable)tensorss    r   r
   r
      s;     %''=J'JJJ	We	$$    c           	         [        U [        [        45      (       a-  [        R                  " U  Vs/ s H  n[        X!5      PM     sn6 $ Uc%  U R                  [        R                  " U 5      5      $ [        R                  " [        R                  " U5      S   /U R                  SS R                  5       -   5      n[        R                  " XU5      $ s  snf )zReset all variables in a nested tuple to zeros.

Args:
  variables: Nested tuple or list of variaables.
  indices: Indices along the first dimension to reset, defaults to all.

Returns:
  Operation.
Nr      )r   r   r   r   groupreinit_nested_varsassign
zeros_likezerosshapeas_listscatter_update)	variablesindicesvariabler   s       r   r   r   *   s     	E4=))88IVI(;IVWW_BMM)455HHbhhw'*+iooab.A.I.I.KKLEY77 Ws   Cc           
          [        U [        [        45      (       a:  [        R                  " [        X5       VVs/ s H  u  p#[        X#5      PM     snn6 $ U R                  U5      $ s  snnf )zAssign tensors to matching nested tuple of variables.

Args:
  variables: Nested tuple or list of variables to update.
  tensors: Nested tuple or list of tensors to assign.

Returns:
  Operation.
)r   r   r   r   r   zipassign_nested_varsr   )r"   r   r$   r   s       r   r'   r'   =   sa     	E4=))88GJ9G^	_G^3C8
X
.G^	_a a			'	"" 
`s   A'
c                 J  ^ [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      n[         R                  " [         R                  " [         R                  " U4S j[         R                  " [         R                  " X@-  S/5      SS/5      [         R                  " U SS2S4   5      SS5      SS/5      S/5      n[         R                  " [         R                  " U5      S5      $ )zDiscounted Monte-Carlo returns.r   Nc                    > UTU -  -   $ r	    aggcurdiscounts     r   <lambda>#discounted_return.<locals>.<lambda>S       3C#7r   r   Freturn)r   ranger   valuecastfloat32reverse	transposescanr   check_numericsstop_gradient)rewardlengthr.   timestepmaskreturn_s     `   r   discounted_returnrB   M   s    XXfll1o++,(	$'"VAtG_4bjj	A$JJll
''7,,rzz$-!=1vF--q"u.5:<=q6C FGC	I'
 
		2++G4h	??r   c                    [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      n[         R                  " U 5      n[        U5       HI  nXp-  nU[         R                  " U SS2SS24   [         R                  " U SS2SS24   5      /S5      -  n MK     XsU-  [         R                  " USS2US24   [         R                  " USS2U* S24   5      S/5      -  -  n[         R                  " [         R                  " Xg-  5      S5      $ )zN-step discounted return.r   Nr2   r3   )
r   r4   r   r5   r6   r7   r   concatr;   r<   )	r=   r5   r>   r.   windowr?   r@   rA   _s	            r   fixed_step_returnrG   Y   s   XXfll1o++,(	$'"VAtG_4bjj	A$MM&!'=aG		6!QR%="--q"#v2O"PRSTTF  
v		QZ"--a&k(:;Q?!A A A'			2++DN;X	FFr   c                    [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      nX`-  X1-  SU-
  -  -   nXc-  U-  n[         R                  " Xs/S5      n[         R                  " [         R                  " [         R                  " S [         R                  " [         R                  " US/5      / SQ5      [         R                  " USS2S4   5      SS5      SS/5      S/5      n[         R                  " [         R                  " U5      S	5      $ )
zTD-lambda returns.r   N   c                     US   US   U -  -   $ )Nr   r   r*   )r,   r-   s     r   r/   lambda_return.<locals>.<lambda>o   s    3q6CFSL#8r   )r   rI   r   r2   Fr   r3   )r   r4   r   r5   r6   r7   stackr8   r9   r:   r   r;   r<   )	r=   r5   r>   r.   lambda_r?   r@   sequencerA   s	            r   lambda_returnrO   f   s   XXfll1o++,(	$'"VAtG_4bjj	A$]X-W==(_w&(XXx*A.(JJll
''8,,rzz(QC8)DbmmTYZ[]_Z_T`FaUV% ()c	+'
 
		2++G4h	??r   c                   ^ [         R                  " U R                  S   R                  5      n[         R                  " USSS24   USS2S4   :  [         R
                  5      n[         R                  " USS2SS24   [         R                  " USS2SS24   5      /S5      nU TU-  -   U-
  n[         R                  " [         R                  " [         R                  " U4S j[         R                  " [         R                  " XW-  S/5      SS/5      [         R                  " USS2S4   5      SS5      SS/5      S/5      n[         R                  " [         R                  " U5      S5      $ )z!Generalized Advantage Estimation.r   Nr2   c                    > UTU -  -   $ r	   r*   r+   s     r   r/   "lambda_advantage.<locals>.<lambda>}   r1   r   r   F	advantage)r   r4   r   r5   r6   r7   rD   r   r8   r9   r:   r;   r<   )	r=   r5   r>   r.   r?   r@   
next_valuedeltarS   s	      `     r   lambda_advantagerV   u   s$   XXfll1o++,(	$'"VAtG_4bjj	A$yy%12,eArsFm(DEqI*
8j(
(5
0%jjll
''7,,rzz$,<q!fEr}}UZ[\^`[`UaGbUV% ()c	+)
 
		2++I6	DDr   c                 p   SU-  SU-  pTS[         R                  " [         R                  " XE-
  5      S5      [         R                  " X -
  S-  [         R                  " U5      -  S5      -   [         R                  " US5      -   [         R                  " US5      -
  U R                  S   R                  -
  -  $ )z?Epirical KL divergence of two normals with diagonal covariance.rI   g      ?r2   )r   
reduce_sumexpr   r5   )mean0logstd0mean1logstd1	logstd0_2	logstd1_2s         r   diag_normal_klr`      s    Wa'kY	bffY%:;R@2==}q266),,bD2 246MM)R4PQi,-/4{{2/D/DE 
F Fr   c                     S[         R                  " S[         R                  -  5      U-   -  nSX -
  [        R                  " U5      -  S-  -  n[        R
                  " X4-   S5      $ )z1Log density of a normal with diagonal covariance.g      rI   r2   )mathlogpir   rY   rX   )meanlogstdlocconstantr5   s        r   diag_normal_logpdfri      sT    TXXa$''k*V34(
3:/!3
3%	x'	,,r   c                     U R                   S   R                  [        R                  " S[        R                  -  [        R
                  -  5      -  nU[        R                  " SU-  S5      -   S-  $ )z7Empirical entropy of a normal with diagonal covariance.r2   rI   r   )r   r5   rb   rc   rd   er   rX   )re   rf   rh   s      r   diag_normal_entropyrl      sS    ZZ^!!DHHQ[466-A$BB(
R]]1v:q1
1Q	66r   c                      [         R                  " 5       n U  Vs/ s H   oR                  S:X  d  M  UR                  PM"     sn$ s  snf )z0List of GPU device names detected by TensorFlow.GPU)r   list_local_devicesdevice_typename)local_device_protosxs     r   available_gpusrt      s:    "557-	H-Q%1G&!&&-	HH	Hs
   AAc                 :   U=(       d    SS0n[         R                  " [        5      nU  H~  u  pEUc  M
  UR                  5        H`  u  pg[        R
                  " XuR                  5      (       d  M,  [        R                  " XvUR                  5      nX6   R                  U5        Mb     M     U H8  nXc;  d  M
  [        R                  R                  SR                  U5      5        M:     / nUR                  5        Hy  u  piU	 Vs/ s H  n[        R                  " US/5      PM     n	n[        R                  " U	S5      n	UR                  [        R                  R!                  US-   U-   U	5      5        M{     [        R                  R#                  U5      $ s  snf )a>  Create histogram summaries of the gradient.

Summaries can be grouped via regexes matching variables names.

Args:
  grad_vars: List of (gradient, variable) tuples as returned by optimizers.
  groups: Mapping of name to regex for grouping summaries.
  scope: Name scope for this operation.

Returns:
  Summary tensor.
all.*!No variables matching '{}' group.r2   r   /collectionsdefaultdictr   itemsrematchrq   subappendr   loggingwarnformatreshaperD   summary	histogrammerge)
	grad_varsgroupsscopegroupedgradvarrq   pattern	summariesgradss
             r   gradient_summariesr      s;    $fe_&##D)'id|	'88	$	$vvgSXX.T" (  djjoo9@@FG  )]]_kd056RZZrd#E6IIeQERZZ))%#+*<eDE % 
		)	$$ 7s   "Fc                 ,   U=(       d    SS0n[         R                  " [        5      nU  Hw  nUR                  5        H`  u  pV[        R
                  " XdR                  5      (       d  M,  [        R                  " XeUR                  5      nX5   R                  U5        Mb     My     U H8  nXS;  d  M
  [        R                  R                  SR                  U5      5        M:     / nUR                  5        Hy  u  pPU  Vs/ s H  n[        R                  " US/5      PM     n n[        R                  " U S5      n UR                  [        R                  R!                  US-   U-   U 5      5        M{     [        R                  R#                  U5      $ s  snf )a&  Create histogram summaries for the provided variables.

Summaries can be grouped via regexes matching variables names.

Args:
  vars_: List of variables to summarize.
  groups: Mapping of name to regex for grouping summaries.
  scope: Name scope for this operation.

Returns:
  Summary tensor.
rv   rw   rx   r2   r   ry   rz   )vars_r   r   r   r   rq   r   r   s           r   variable_summariesr      s1    $fe_&##D)'c	'88	$	$vvgSXX.S! ( 
 djjoo9@@FG  )]]_kd.34esRZZbT"eE4IIeQERZZ))%#+*<eDE % 
		)	$$ 5s   >"Fr	   )N	gradients)Nweights)__doc__
__future__r   r   r   r{   rb   r~   tf.compat.v1compatv1r   tensorflow.python.clientr   r
   r   r'   rB   rG   rO   rV   r`   ri   rl   rt   r   r   r*   r   r   <module>r      sm    ' &  %   	   /%8&# 	@
G@EF-7I%B%r   