
    ni                         S r SSKJr  SSKJr  SSKJr  SSKrSSKrSSKr SSKJ	s  J
r  \R                  " SS5      rS
S jrS
S	 jrg! \ a    SSKr N*f = f)z*Network definitions for the PPO algorithm.    )absolute_import)division)print_functionNNetworkOutputz"policy, mean, logstd, value, statec                    [         R                  R                  R                  U R                  S9n[         R
                  " U R                  S5      n[         R                  " U[         R                  " U5      S   [         R                  " U5      S   [        R                  " [        R                  UR                  R                  5       SS S5      /5      n[         R                  " S5         UnU R                   HE  n	[         R                  R                  R!                  X[         R"                  R$                  5      nMG     [         R                  R                  R!                  UU[         R&                  US9n
[         R(                  " S	U
R                  SS [         R*                  U5      n[         R,                  " US
   [         R                  " U
5      S   [         R                  " U
5      S   /S/U
R                  R.                  S-
  -  -   5      nSSS5        [         R                  " S5         UnU R0                   HE  n	[         R                  R                  R!                  X[         R"                  R$                  5      nMG     [         R                  R                  R!                  USS5      S   nSSS5        [         R2                  " W
S5      n
[         R2                  " WS	5      n[         R2                  " WS5      n[         R                  R4                  R7                  U
[         R8                  " U5      5      n[;        XXU5      $ ! , (       d  f       GNC= f! , (       d  f       N= f)a  Independent feed forward networks for policy and value.

The policy network outputs the mean action and the log standard deviation
is learned as independent parameter vector.

Args:
  config: Configuration object.
  action_size: Length of the action vector.
  observations: Sequences of observations.
  unused_length: Batch of sequence lengths.
  state: Batch of initial recurrent states.

Returns:
  NetworkOutput tuple.
factor绽|=r         Npolicyweights_initializerlogstdNNvalue.r   mean)tfcontriblayersvariance_scaling_initializerinit_mean_factorrandom_normal_initializerinit_logstdreshapeshape	functoolsreduceoperatormulas_listvariable_scopepolicy_layersfully_connectednnrelutanhget_variablefloat32tilendimsvalue_layerscheck_numericsdistributionsMultivariateNormalDiagexpr   )configaction_sizeobservationsunused_lengthstatemean_weights_initializerlogstd_initializerflat_observationsxsizer   r   r   r   s                 W/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/agents/networks.pyfeed_forward_gaussianr=       s      ZZ..KK$$ L &33F4F4FNjjhh|Qhh|Qx||#))113AB7<0  
"A$$
**


+
+ARUUZZ
@a %::,,Q-8-/WWAY - [D __Xtzz!"~rzzCUVFWWVJ'hhtnQ'$):;qcTZZEUEUXYEY>ZZ\F # 
!A##
**


+
+ARUUZZ
@a $JJ--aD9&AE	 "
 
		4	($VX.&


E7
+%::##::4P&	vVE	::) #" "!s   0D,M:BM*
M'*
M8c                    [         R                  R                  R                  U R                  S9n[         R
                  " U R                  S5      n[         R                  R                  R                  U R                  S   5      n[         R                  " U[         R                  " U5      S   [         R                  " U5      S   [        R                  " [        R                  UR                  R!                  5       SS S5      /5      n[         R"                  " S5         Un	U R                  SS  HE  n
[         R                  R                  R%                  X[         R&                  R(                  5      n	MG     [         R&                  R+                  XyX4[         R,                  5      u  p[         R                  R                  R%                  U	U[         R.                  US	9n[         R0                  " S
UR                  SS [         R,                  U5      n[         R2                  " US   [         R                  " U5      S   [         R                  " U5      S   /S/UR                  R4                  S-
  -  -   5      nSSS5        [         R"                  " S5         Un	U R6                   HE  n
[         R                  R                  R%                  X[         R&                  R(                  5      n	MG     [         R                  R                  R%                  U	SS5      S   nSSS5        [         R8                  " WS5      n[         R8                  " WS
5      n[         R8                  " WS5      n[         R                  R:                  R=                  U[         R>                  " U5      5      n[A        XXU5      $ ! , (       d  f       GNC= f! , (       d  f       N= f)a  Independent recurrent policy and feed forward value networks.

The policy network outputs the mean action and the log standard deviation
is learned as independent parameter vector. The last policy layer is
recurrent and uses a GRU cell.

Args:
  config: Configuration object.
  action_size: Length of the action vector.
  observations: Sequences of observations.
  length: Batch of sequence lengths.
  state: Batch of initial recurrent states.

Returns:
  NetworkOutput tuple.
r   r
   r   r   r   Nr   r   r   r   r   r   r   )!r   r   r   r   r   r   r   rnnGRUBlockCellr$   r   r   r   r   r    r!   r"   r#   r%   r&   r'   dynamic_rnnr*   r(   r)   r+   r,   r-   r.   r/   r0   r1   r   )r2   r3   r4   lengthr6   r7   r8   cellr9   r:   r;   r   r   r   r   s                  r<   recurrent_gaussianrE   P   s   "  ZZ..KK$$ L &33F4F4FN		$	$V%9%9"%=	>$jjhh|Qhh|Qx||#))113AB7<0  
"A$$Sb)
**


+
+ARUUZZ
@a *uu  &DHA::,,Q-8-/WWAY - [D __Xtzz!"~rzzCUVFWWVJ'hhtnQ'$):;qcTZZEUEUXYEY>ZZ\F # 
!A##
**


+
+ARUUZZ
@a $JJ--aD9&AE	 "
 
		4	($VX.&


E7
+%::##::4P&	vVE	::- #" "!s   &E O$BO
O
O")N)__doc__
__future__r   r   r   collectionsr   r    tensorflow.compat.v1compatv1r   	Exception
tensorflow
namedtupler   r=   rE        r<   <module>rQ      s\    1 &  %   ## &&8\]-;`1;m  s   	A	 	
AA