
    h}l              
       0   S r SSKJrJr  SSKJrJrJrJr  SSK	r
SSKrSSKJr  SSKJr  SSKJrJrJr  SSKJr  \" S	S
S9r\" SSS9r\" SSS9r\" SSS9r\" SSS9r\" SSS9r\" SSS9r " S S
\5      rS\R<                  S\R<                  4S jr " S S\5      r  " S S\ 5      r! " S S\5      r" " S S\5      r# " S  S\5      r$ " S! S\5      r% " S" S#5      r& S+S$\RN                  S%\(S&\\)\*\4      S\4S' jjr+S(\S)\S\R<                  4S* jr,g),zProbability distributions.    )ABCabstractmethod)AnyOptionalTypeVarUnionN)spaces)nn)	BernoulliCategoricalNormal)get_action_dimSelfDistributionDistribution)boundSelfDiagGaussianDistributionDiagGaussianDistribution$SelfSquashedDiagGaussianDistribution SquashedDiagGaussianDistributionSelfCategoricalDistributionCategoricalDistribution SelfMultiCategoricalDistributionMultiCategoricalDistributionSelfBernoulliDistributionBernoulliDistribution#SelfStateDependentNoiseDistributionStateDependentNoiseDistributionc                     ^  \ rS rSrSrU 4S jr\S\\R                  \
\R                  \R                  4   4   4S j5       r\S\S\4S j5       r\S\R                   S\R                   4S	 j5       r\S\\R                      4S
 j5       r\S\R                   4S j5       r\S\R                   4S j5       rSS\S\R                   4S jjr\S\R                   4S j5       r\S\
\R                   \R                   4   4S j5       rSrU =r$ )r      z&Abstract base class for distributions.c                 0   > [         TU ]  5         S U l        g N)super__init__distributionself	__class__s    `/home/james-whalen/.local/lib/python3.13/site-packages/stable_baselines3/common/distributions.pyr#   Distribution.__init__   s         returnc                     g)zCreate the layers and parameters that represent the distribution.

Subclasses must define this, but the arguments and return type vary between
concrete classes.N r&   argskwargss      r(   proba_distribution_net#Distribution.proba_distribution_net        r*   r&   c                     g)z3Set parameters of the distribution.

:return: self
Nr-   r.   s      r(   proba_distributionDistribution.proba_distribution'   r3   r*   xc                     g)zh
Returns the log likelihood

:param x: the taken action
:return: The log likelihood of the distribution
Nr-   r&   r7   s     r(   log_probDistribution.log_prob.   r3   r*   c                     g)zl
Returns Shannon's entropy of the probability

:return: the entropy, or None if no analytical form is known
Nr-   r&   s    r(   entropyDistribution.entropy7   r3   r*   c                     g)zT
Returns a sample from the probability distribution

:return: the stochastic action
Nr-   r=   s    r(   sampleDistribution.sample?   r3   r*   c                     g)zy
Returns the most likely action (deterministic output)
from the probability distribution

:return: the stochastic action
Nr-   r=   s    r(   modeDistribution.modeG   r3   r*   deterministicc                 P    U(       a  U R                  5       $ U R                  5       $ )z[
Return actions according to the probability distribution.

:param deterministic:
:return:
)rD   rA   )r&   rF   s     r(   get_actionsDistribution.get_actionsP   s     99;{{}r*   c                     g)z[
Returns samples from the probability distribution
given its parameters.

:return: actions
Nr-   r.   s      r(   actions_from_params Distribution.actions_from_params[   r3   r*   c                     g)z
Returns samples and the associated log probabilities
from the probability distribution given its parameters.

:return: actions and log prob
Nr-   r.   s      r(   log_prob_from_params!Distribution.log_prob_from_paramsd   r3   r*   )r$   F)__name__
__module____qualname____firstlineno____doc__r#   r   r   r
   Moduletuple	Parameterr1   r   r5   thTensorr:   r   r>   rA   rD   boolrH   rK   rN   __static_attributes____classcell__r'   s   @r(   r   r      s_   0! ryy%PRPYPY[][g[gPgJh?h9i   !1 GW   ")) 		   ")),   		   bii  	 	")) 	 bii   uRYY		=Q7R  r*   tensorr+   c                 z    [        U R                  5      S:  a  U R                  SS9n U $ U R                  5       n U $ )a  
Continuous actions are usually considered to be independent,
so we can sum components of the ``log_prob`` or the entropy.

:param tensor: shape: (n_batch, n_actions) or (n_batch,)
:return: shape: (n_batch,) for (n_batch, n_actions) input, scalar for (n_batch,) input
   dim)lenshapesum)r_   s    r(   sum_independent_dimsrg   n   s>     6<<1" M Mr*   c            	       2  ^  \ rS rSrSrS\4U 4S jjrSS\S\S\\	R                  \	R                  4   4S jjrS	\S
\R                  S\R                  S\4S jrS\R                  S\R                  4S jrS\\R                     4S jrS\R                  4S jrS\R                  4S jrSS
\R                  S\R                  S\S\R                  4S jjrS
\R                  S\R                  S\\R                  \R                  4   4S jrSrU =r$ )r   }   z
Gaussian distribution with diagonal covariance matrix, for continuous actions.

:param action_dim:  Dimension of the action space.

action_dimc                 J   > [         TU ]  5         Xl        S U l        S U l        g r!   )r"   r#   rj   mean_actionslog_stdr&   rj   r'   s     r(   r#   !DiagGaussianDistribution.__init__   s"    $ r*   
latent_dimlog_std_initr+   c                     [         R                  " XR                  5      n[         R                  " [        R
                  " U R                  5      U-  SS9nX44$ )ap  
Create the layers and parameter that represent the distribution:
one output will be the mean of the Gaussian, the other parameter will be the
standard deviation (log std in fact to allow negative values)

:param latent_dim: Dimension of the last layer of the policy (before the action layer)
:param log_std_init: Initial value for the log standard deviation
:return:
Trequires_grad)r
   Linearrj   rX   rY   ones)r&   rp   rq   rl   rm   s        r(   r1   /DiagGaussianDistribution.proba_distribution_net   sB     yy__=,,rwwt7,FVZ[$$r*   r&   rl   rm   c                 t    [         R                  " U5      UR                  5       -  n[        X5      U l        U $ )zi
Create the distribution given its parameters (mean, std)

:param mean_actions:
:param log_std:
:return:
)rY   	ones_likeexpr   r$   )r&   rl   rm   
action_stds       r(   r5   +DiagGaussianDistribution.proba_distribution   s/     \\,/'++-?
"<<r*   actionsc                 N    U R                   R                  U5      n[        U5      $ )z
Get the log probabilities of actions according to the distribution.
Note that you must first call the ``proba_distribution()`` method.

:param actions:
:return:
)r$   r:   rg   )r&   r}   r:   s      r(   r:   !DiagGaussianDistribution.log_prob   s%     $$--g6#H--r*   c                 H    [        U R                  R                  5       5      $ r!   )rg   r$   r>   r=   s    r(   r>    DiagGaussianDistribution.entropy   s    #D$5$5$=$=$?@@r*   c                 6    U R                   R                  5       $ r!   )r$   rsampler=   s    r(   rA   DiagGaussianDistribution.sample   s      ((**r*   c                 .    U R                   R                  $ r!   )r$   meanr=   s    r(   rD   DiagGaussianDistribution.mode   s      %%%r*   rF   c                 B    U R                  X5        U R                  US9$ N)rF   r5   rH   )r&   rl   rm   rF   s       r(   rK   ,DiagGaussianDistribution.actions_from_params   s$    6m<<r*   c                 L    U R                  X5      nU R                  U5      nX44$ )z
Compute the log probability of taking an action
given the distribution parameters.

:param mean_actions:
:param log_std:
:return:
rK   r:   )r&   rl   rm   r}   r:   s        r(   rN   -DiagGaussianDistribution.log_prob_from_params   s+     **<A==)  r*   )rj   r$   rm   rl   )g        rP   )rQ   rR   rS   rT   rU   intr#   floatrW   r
   rV   rX   r1   r   rY   rZ   r5   r:   r   r>   rA   rD   r[   rK   rN   r\   r]   r^   s   @r(   r   r   }   s7   3 % %E %TYZ\ZcZcegeqeqZqTr %*:<))NPii	%	.		 	.bii 	.A")), A+		 +&bii &=		 =BII =^b =oqoxox =
! !RYY !SXY[YbYbdfdmdmYmSn ! !r*   c                     ^  \ rS rSrSrSS\S\4U 4S jjjrS\S\	R                  S\	R                  S	\4U 4S
 jjrSS\	R                  S\\	R                     S	\	R                  4U 4S jjjrS	\\	R                     4S jrS	\	R                  4U 4S jjrS	\	R                  4U 4S jjrS\	R                  S\	R                  S	\\	R                  \	R                  4   4S jrSrU =r$ )r      z
Gaussian distribution with diagonal covariance matrix, followed by a squashing function (tanh) to ensure bounds.

:param action_dim: Dimension of the action space.
:param epsilon: small value to avoid NaN due to numerical imprecision.
rj   epsilonc                 >   > [         TU ]  U5        X l        S U l        g r!   )r"   r#   r   gaussian_actions)r&   rj   r   r'   s      r(   r#   )SquashedDiagGaussianDistribution.__init__   s    $59r*   r&   rl   rm   r+   c                 &   > [         TU ]  X5        U $ r!   )r"   r5   )r&   rl   rm   r'   s      r(   r5   3SquashedDiagGaussianDistribution.proba_distribution   s     	"<9r*   r}   r   c                    > Uc  [         R                  U5      n[        TU ]  U5      nU[        R
                  " [        R                  " SUS-  -
  U R                  -   5      SS9-  nU$ )Nra      rb   )TanhBijectorinverser"   r:   rY   rf   logr   )r&   r}   r   r:   r'   s       r(   r:   )SquashedDiagGaussianDistribution.log_prob   sd     #+33G< 7#$45 	BFF266!gqj.4<<"?@aHHr*   c                     g r!   r-   r=   s    r(   r>   (SquashedDiagGaussianDistribution.entropy   s     r*   c                 j   > [         TU ]  5       U l        [        R                  " U R                  5      $ r!   )r"   rA   r   rY   tanhr%   s    r(   rA   'SquashedDiagGaussianDistribution.sample   s'     % 0wwt,,--r*   c                 j   > [         TU ]  5       U l        [        R                  " U R                  5      $ r!   )r"   rD   r   rY   r   r%   s    r(   rD   %SquashedDiagGaussianDistribution.mode   s&     %wwt,,--r*   c                 `    U R                  X5      nU R                  X0R                  5      nX44$ r!   )rK   r:   r   )r&   rl   rm   actionr:   s        r(   rN   5SquashedDiagGaussianDistribution.log_prob_from_params  s/    )),@==)>)>?r*   )r   r   ư>r!   )rQ   rR   rS   rT   rU   r   r   r#   r   rY   rZ   r5   r   r:   r>   rA   rD   rW   rN   r\   r]   r^   s   @r(   r   r      s    :3 : : :2BD))VXV_V_	-		 Xbii=P \^\e\e  ")), 
.		 .
.bii .
   RYY  SXY[YbYbdfdmdmYmSn    r*   c                     ^  \ rS rSrSrS\4U 4S jjrS\S\R                  4S jr	S\
S	\R                  S\
4S
 jrS\R                  S\R                  4S jrS\R                  4S jrS\R                  4S jrS\R                  4S jrSS	\R                  S\S\R                  4S jjrS	\R                  S\\R                  \R                  4   4S jrSrU =r$ )r   i  z_
Categorical distribution for discrete actions.

:param action_dim: Number of discrete actions
rj   c                 .   > [         TU ]  5         Xl        g r!   )r"   r#   rj   rn   s     r(   r#    CategoricalDistribution.__init__  s    $r*   rp   r+   c                 F    [         R                  " XR                  5      nU$ )a  
Create the layer that represents the distribution:
it will be the logits of the Categorical distribution.
You can then get probabilities using a softmax.

:param latent_dim: Dimension of the last layer
    of the policy network (before the action layer)
:return:
)r
   ru   rj   r&   rp   action_logitss      r(   r1   .CategoricalDistribution.proba_distribution_net  s     		*oo>r*   r&   r   c                 "    [        US9U l        U $ Nlogits)r   r$   r&   r   s     r(   r5   *CategoricalDistribution.proba_distribution  s    '}=r*   r}   c                 8    U R                   R                  U5      $ r!   )r$   r:   r&   r}   s     r(   r:    CategoricalDistribution.log_prob#  s      ))'22r*   c                 6    U R                   R                  5       $ r!   )r$   r>   r=   s    r(   r>   CategoricalDistribution.entropy&  s      ((**r*   c                 6    U R                   R                  5       $ r!   r$   rA   r=   s    r(   rA   CategoricalDistribution.sample)        ''))r*   c                 T    [         R                  " U R                  R                  SS9$ Nra   rb   )rY   argmaxr$   probsr=   s    r(   rD   CategoricalDistribution.mode,  s    yy**00a88r*   rF   c                 B    U R                  U5        U R                  US9$ r   r   r&   r   rF   s      r(   rK   +CategoricalDistribution.actions_from_params/  $    .m<<r*   c                 L    U R                  U5      nU R                  U5      nX#4$ r!   r   r&   r   r}   r:   s       r(   rN   ,CategoricalDistribution.log_prob_from_params4  )    **=9==)  r*   )rj   r$   rP   )rQ   rR   rS   rT   rU   r   r#   r
   rV   r1   r   rY   rZ   r5   r:   r>   rA   rD   r[   rK   rW   rN   r\   r]   r^   s   @r(   r   r     s    %3 %  !< RYY [v 3		 3bii 3+ +*		 *9bii 9= =4 =\^\e\e =
!")) !biiQSQZQZFZ@[ ! !r*   c                     ^  \ rS rSrSrS\\   4U 4S jjrS\S\R                  4S jr
S\S	\R                  S\4S
 jrS\R                  S\R                  4S jrS\R                  4S jrS\R                  4S jrS\R                  4S jrSS	\R                  S\S\R                  4S jjrS	\R                  S\\R                  \R                  4   4S jrSrU =r$ )r   i:  zx
MultiCategorical distribution for multi discrete actions.

:param action_dims: List of sizes of discrete action spaces
action_dimsc                 .   > [         TU ]  5         Xl        g r!   r"   r#   r   r&   r   r'   s     r(   r#   %MultiCategoricalDistribution.__init__A      &r*   rp   r+   c                 Z    [         R                  " U[        U R                  5      5      nU$ )a+  
Create the layer that represents the distribution:
it will be the logits (flattened) of the MultiCategorical distribution.
You can then get probabilities using a softmax on each sub-space.

:param latent_dim: Dimension of the last layer
    of the policy network (before the action layer)
:return:
)r
   ru   rf   r   r   s      r(   r1   3MultiCategoricalDistribution.proba_distribution_netE  s%     		*c$2B2B.CDr*   r&   r   c                     [         R                  " U[        U R                  5      SS9 Vs/ s H  n[	        US9PM     snU l        U $ s  snf )Nra   rb   r   )rY   splitlistr   r   r$   )r&   r   r   s      r(   r5   /MultiCategoricalDistribution.proba_distributionS  sJ     EGHH]\`aeaqaq\rxyDz{Dz5[6Dz{ |s   A	r}   c           
          [         R                  " [        U R                  [         R                  " USS95       VVs/ s H  u  p#UR                  U5      PM     snnSS9R                  SS9$ s  snnf r   )rY   stackzipr$   unbindr:   rf   )r&   r}   distr   s       r(   r:   %MultiCategoricalDistribution.log_probY  s]    xx7:4;L;LbiiX_efNg7hi7h|tT]]6"7hiop

#!#*	is   A.
c                     [         R                  " U R                   Vs/ s H  oR                  5       PM     snSS9R	                  SS9$ s  snf r   )rY   r   r$   r>   rf   r&   r   s     r(   r>   $MultiCategoricalDistribution.entropy_  s@    xxD4E4EF4ED4EFANRRWXRYYFs   Ac                     [         R                  " U R                   Vs/ s H  oR                  5       PM     snSS9$ s  snf r   )rY   r   r$   rA   r   s     r(   rA   #MultiCategoricalDistribution.sampleb  s1    xx43D3DE3D43DE1MMEs   >c           
          [         R                  " U R                   Vs/ s H"  n[         R                  " UR                  SS9PM$     snSS9$ s  snf r   )rY   r   r$   r   r   r   s     r(   rD   !MultiCategoricalDistribution.modee  s=    xx$BSBSTBS$4::15BSTZ[\\Ts   )ArF   c                 B    U R                  U5        U R                  US9$ r   r   r   s      r(   rK   0MultiCategoricalDistribution.actions_from_paramsh  r   r*   c                 L    U R                  U5      nU R                  U5      nX#4$ r!   r   r   s       r(   rN   1MultiCategoricalDistribution.log_prob_from_paramsm  r   r*   r   r$   rP   )rQ   rR   rS   rT   rU   r   r   r#   r
   rV   r1   r   rY   rZ   r5   r:   r>   rA   rD   r[   rK   rW   rN   r\   r]   r^   s   @r(   r   r   :  s    'DI '  .?Ayy	)		 bii Z ZN		 N]bii ]= =4 =\^\e\e =
!")) !biiQSQZQZFZ@[ ! !r*   c                     ^  \ rS rSrSrS\4U 4S jjrS\S\R                  4S jr	S\
S	\R                  S\
4S
 jrS\R                  S\R                  4S jrS\R                  4S jrS\R                  4S jrS\R                  4S jrSS	\R                  S\S\R                  4S jjrS	\R                  S\\R                  \R                  4   4S jrSrU =r$ )r   is  zd
Bernoulli distribution for MultiBinary action spaces.

:param action_dim: Number of binary actions
r   c                 .   > [         TU ]  5         Xl        g r!   r   r   s     r(   r#   BernoulliDistribution.__init__z  r   r*   rp   r+   c                 F    [         R                  " XR                  5      nU$ )z
Create the layer that represents the distribution:
it will be the logits of the Bernoulli distribution.

:param latent_dim: Dimension of the last layer
    of the policy network (before the action layer)
:return:
)r
   ru   r   r   s      r(   r1   ,BernoulliDistribution.proba_distribution_net~  s     		*.>.>?r*   r&   r   c                 "    [        US9U l        U $ r   )r   r$   r   s     r(   r5   (BernoulliDistribution.proba_distribution  s    %];r*   r}   c                 R    U R                   R                  U5      R                  SS9$ r   )r$   r:   rf   r   s     r(   r:   BernoulliDistribution.log_prob  s'      ))'26616==r*   c                 P    U R                   R                  5       R                  SS9$ r   )r$   r>   rf   r=   s    r(   r>   BernoulliDistribution.entropy  s%      ((*..1.55r*   c                 6    U R                   R                  5       $ r!   r   r=   s    r(   rA   BernoulliDistribution.sample  r   r*   c                 V    [         R                  " U R                  R                  5      $ r!   )rY   roundr$   r   r=   s    r(   rD   BernoulliDistribution.mode  s    xx))//00r*   rF   c                 B    U R                  U5        U R                  US9$ r   r   r   s      r(   rK   )BernoulliDistribution.actions_from_params  r   r*   c                 L    U R                  U5      nU R                  U5      nX#4$ r!   r   r   s       r(   rN   *BernoulliDistribution.log_prob_from_params  r   r*   r   rP   )rQ   rR   rS   rT   rU   r   r#   r
   rV   r1   r   rY   rZ   r5   r:   r>   rA   rD   r[   rK   rW   rN   r\   r]   r^   s   @r(   r   r   s  s    'C '
 
 
!: 299 Yr >		 >bii >6 6*		 *1bii 1= =4 =\^\e\e =
!")) !biiQSQZQZFZ@[ ! !r*   c                     ^  \ rS rSr% Sr\S   \S'   \\   \S'   \\S'   \	R                  \S'   \	R                  \S'   \	R                  \S	'        S(S
\S\S\S\S\S\4U 4S jjjrS\	R                  S\	R                  4S jrS)S\	R                  S\SS4S jjr S*S\S\S\\   S\\R$                  \R&                  4   4S jjrS\S\	R                  S\	R                  S\	R                  S\4
S jrS\	R                  S\	R                  4S jrS\\	R                     4S  jrS\	R                  4S! jrS\	R                  4S" jrS\	R                  S\	R                  4S# jr S+S\	R                  S\	R                  S\	R                  S$\S\	R                  4
S% jjrS\	R                  S\	R                  S\	R                  S\\	R                  \	R                  4   4S& jrS'rU =r$ ),r   i  a  
Distribution class for using generalized State Dependent Exploration (gSDE).
Paper: https://arxiv.org/abs/2005.05719

It is used to create the noise exploration matrix and
compute the log probability of an action with that noise.

:param action_dim: Dimension of the action space.
:param full_std: Whether to use (n_features x n_actions) parameters
    for the std instead of only (n_features,)
:param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure
    a positive standard deviation (cf paper). It allows to keep variance
    above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.
:param squash_output: Whether to squash the output using a tanh function,
    this ensures bounds are satisfied.
:param learn_features: Whether to learn features for gSDE or not.
    This will enable gradients to be backpropagated through the features
    ``latent_sde`` in the code.
:param epsilon: small value to avoid NaN due to numerical imprecision.
r   bijectorlatent_sde_dimweights_dist_latent_sdeexploration_matexploration_matricesrj   full_std	use_explnsquash_outputlearn_featuresr   c                    > [         TU ]  5         Xl        S U l        S U l        S U l        X0l        X l        X`l        XPl	        U(       a  [        U5      U l        g S U l        g r!   )r"   r#   rj   r  rl   rm   r	  r  r   r  r   r  )r&   rj   r  r	  r
  r  r   r'   s          r(   r#   (StateDependentNoiseDistribution.__init__  sW     	$" " ,(1DM DMr*   rm   r+   c                    U R                   (       aT  [        R                  " U5      US:*  -  nXS:  -  U R                  -   n[        R                  " U5      S-   US:  -  nX$-   nO[        R                  " U5      nU R
                  (       a  U$ U R                  c   e[        R                  " U R                  U R                  5      R                  UR                  5      U-  $ )z
Get the standard deviation from the learned parameter
(log of it by default). This ensures that the std is positive.

:param log_std:
:return:
r         ?)r	  rY   rz   r   log1pr  r  rv   rj   todevice)r&   rm   below_thresholdsafe_log_stdabove_thresholdstds         r(   get_std'StateDependentNoiseDistribution.get_std  s     >> !ffWoA>O"k2T\\AL!xx5;!LO!3C &&/C==J""...wwt**DOO<??ORUUUr*   
batch_sizeNc                     U R                  U5      n[        [        R                  " U5      U5      U l        U R                  R                  5       U l        U R                  R                  U45      U l        g)z~
Sample weights for the noise exploration matrix,
using a centered Gaussian distribution.

:param log_std:
:param batch_size:
N)r  r   rY   
zeros_liker  r   r  r  )r&   rm   r  r  s       r(   sample_weights.StateDependentNoiseDistribution.sample_weights  s\     ll7#"2==#5s;#0088:$($5$5$=$=zm$L!r*   rp   rq   c                 j   [         R                  " XR                  5      nUc  UOUU l        U R                  (       a+  [
        R                  " U R                  U R                  5      O [
        R                  " U R                  S5      n[         R                  " XR-  SS9nU R                  U5        XE4$ )a  
Create the layers and parameter that represent the distribution:
one output will be the deterministic action, the other parameter will be the
standard deviation of the distribution that control the weights of the noise matrix.

:param latent_dim: Dimension of the last layer of the policy (before the action layer)
:param log_std_init: Initial value for the log standard deviation
:param latent_sde_dim: Dimension of the last layer of the features extractor
    for gSDE. By default, it is shared with the policy network.
:return:
ra   Trs   )	r
   ru   rj   r  r  rY   rv   rX   r  )r&   rp   rq   r  mean_actions_netrm   s         r(   r1   6StateDependentNoiseDistribution.proba_distribution_net  s     99ZA -;,BjCG=="''$--t?VXV]V]^b^q^qstVu,,w5TJG$((r*   r&   rl   
latent_sdec                 $   U R                   (       a  UOUR                  5       U l        [        R                  " U R                  S-  U R                  U5      S-  5      n[        U[        R                  " X@R                  -   5      5      U l	        U $ )z|
Create the distribution given its parameters (mean, std)

:param mean_actions:
:param log_std:
:param latent_sde:
:return:
r   )
r  detachr  rY   mmr  r   sqrtr   r$   )r&   rl   rm   r!  variances        r(   r5   2StateDependentNoiseDistribution.proba_distribution  sl     *.)<)<:*BSBSBU55))1,dll7.Cq.HI"<LL9P1QRr*   r}   c                 $   U R                   b  U R                   R                  U5      nOUnU R                  R                  U5      n[	        U5      nU R                   b1  U[
        R                  " U R                   R                  U5      SS9-  nU$ r   )r  r   r$   r:   rg   rY   rf   log_prob_correction)r&   r}   r   r:   s       r(   r:   (StateDependentNoiseDistribution.log_prob.  s}    ==$#}}44W=&$$--.>?'1==$t}}@@AQRXYZZHr*   c                 d    U R                   b  g [        U R                  R                  5       5      $ r!   )r  rg   r$   r>   r=   s    r(   r>   'StateDependentNoiseDistribution.entropy=  s+    ==$ #D$5$5$=$=$?@@r*   c                     U R                  U R                  5      nU R                  R                  U-   nU R                  b  U R                  R                  U5      $ U$ r!   )	get_noiser  r$   r   r  forward)r&   noiser}   s      r(   rA   &StateDependentNoiseDistribution.sampleD  sP    t//0##((50==$==((11r*   c                     U R                   R                  nU R                  b  U R                  R                  U5      $ U$ r!   )r$   r   r  r/  r   s     r(   rD   $StateDependentNoiseDistribution.modeK  s7    ##((==$==((11r*   c                 f   U R                   (       a  UOUR                  5       n[        U5      S:X  d"  [        U5      [        U R                  5      :w  a   [        R
                  " XR                  5      $ UR                  SS9n[        R                  " XR                  5      nUR                  SS9$ r   )
r  r#  rd   r  rY   r$  r  	unsqueezebmmsqueeze)r&   r!  r0  s      r(   r.  )StateDependentNoiseDistribution.get_noiseQ  s    #'#6#6ZJ<M<M<O
z?a3z?c$:S:S6T#T55%9%9::  ))a)0
z#<#<=}}}##r*   rF   c                 D    U R                  XU5        U R                  US9$ r   r   )r&   rl   rm   r!  rF   s        r(   rK   3StateDependentNoiseDistribution.actions_from_params]  s(     	zBm<<r*   c                 N    U R                  XU5      nU R                  U5      nXE4$ r!   r   )r&   rl   rm   r!  r}   r:   s         r(   rN   4StateDependentNoiseDistribution.log_prob_from_paramsd  s-     **<*M==)  r*   )r  rj   r  r$   r   r  r  r  r  r  rm   rl   r	  r  )TFFFr   )ra   )g       NrP   ) rQ   rR   rS   rT   rU   r   __annotations__r   r   rY   rZ   r[   r   r#   r  r  rW   r
   rV   rX   r1   r   r5   r:   r>   rA   rD   r.  rK   rN   r\   r]   r^   s   @r(   r   r     sQ   * ~&&SM!YY))#
 #$!! ! 	!
 ! ! ! !.Vryy VRYY V4Mbii MS M M  \`))-2)KSTW=)	ryy",,&	')61ACUWU^U^lnlulu	,"		 bii A")), A		 bii 
$BII 
$")) 
$ in=II=02		=GIyy=ae=	=!II!02		!GIyy!	ryy"))#	$! !r*   c                   >  ^  \ rS rSrSrSS\4U 4S jjjr\S\R                  S\R                  4S j5       r
\S\R                  S\R                  4S j5       r\S	\R                  S\R                  4S
 j5       rS\R                  S\R                  4S jrSrU =r$ )r   il  z
Bijective transformation of a probability distribution
using a squashing function (tanh)

:param epsilon: small value to avoid NaN due to numerical imprecision.
r   c                 .   > [         TU ]  5         Xl        g r!   )r"   r#   r   )r&   r   r'   s     r(   r#   TanhBijector.__init__t  s    r*   r7   r+   c                 .    [         R                  " U 5      $ r!   )rY   r   r7   s    r(   r/  TanhBijector.forwardx  s    wwqzr*   c                 L    SU R                  5       U * R                  5       -
  -  $ )zh
Inverse of Tanh

Taken from Pyro: https://github.com/pyro-ppl/pyro
0.5 * torch.log((1 + x ) / (1 - x))
g      ?)r  rB  s    r(   atanhTanhBijector.atanh|  s"     aggiA2**,.//r*   yc                     [         R                  " U R                  5      R                  n[        R                  U R                  SU-   SU-
  S95      $ )z#
Inverse tanh.

:param y:
:return:
g      r  )minmax)rY   finfodtypeepsr   rE  clamp)rG  rM  s     r(   r   TanhBijector.inverse  sB     hhqww##!!!''dSjcCi'"HIIr*   c                 |    [         R                  " S[         R                  " U5      S-  -
  U R                  -   5      $ )Nr  r   )rY   r   r   r   r9   s     r(   r)   TanhBijector.log_prob_correction  s+    vvcBGGAJ!O+dll:;;r*   )r   r   )rQ   rR   rS   rT   rU   r   r#   staticmethodrY   rZ   r/  rE  r   r)  r\   r]   r^   s   @r(   r   r   l  s       299    0 0ryy 0 0 	J299 	J 	J 	J<RYY <299 < <r*   r   action_spaceuse_sdedist_kwargsc                    Uc  0 n[        U [        R                  5      (       a%  U(       a  [        O[        nU" [        U 5      40 UD6$ [        U [        R                  5      (       a  [        [        U R                  5      40 UD6$ [        U [        R                  5      (       a  [        [        U R                  5      40 UD6$ [        U [        R                  5      (       aJ  [        U R                  [        5      (       d   SU R                   S35       e[        U R                  40 UD6$ [!        S[#        U 5       S35      e)aZ  
Return an instance of Distribution for the correct type of action space

:param action_space: the input action space
:param use_sde: Force the use of StateDependentNoiseDistribution
    instead of DiagGaussianDistribution
:param dist_kwargs: Keyword arguments to pass to the probability distribution
:return: the appropriate Distribution object
zMulti-dimensional MultiBinary(z<) action space is not supported. You can flatten it instead.zIError: probability distribution, not implemented for action spaceof type zJ. Must be of type Gym Spaces: Box, Discrete, MultiDiscrete or MultiBinary.)
isinstancer	   Boxr   r   r   Discreter   r   nMultiDiscreter   r   nvecMultiBinaryr   NotImplementedErrortype)rS  rT  rU  clss       r(   make_proba_distributionra    s#    ,

++18->V>,/?;??	L&//	2	2&s<>>':JkJJ	L&"6"6	7	7+D1B1B,CS{SS	L&"4"4	5	5NNC
 
 	y+LNN+;;wx	y 
 %\^^C{CC!L)* +XX
 	
r*   	dist_true	dist_predc           
         U R                   UR                   :X  d   S5       e[        U[        5      (       a  [        U [        5      (       d   e[        R                  " UR
                  U R
                  5      (       d!   SUR
                   SU R
                   35       e[        R                  " [        U R                  UR                  5       VVs/ s H$  u  p#[        R                  R                  X#5      PM&     snnSS9R                  SS9$ [        R                  R                  U R                  UR                  5      $ s  snnf )z
Wrapper for the PyTorch implementation of the full form KL Divergence

:param dist_true: the p distribution
:param dist_pred: the q distribution
:return: KL(dist_true||dist_pred)
z2Error: input distributions should be the same typez5Error: distributions must have the same input space: z != ra   rb   )r'   rW  r   npallcloser   rY   r   r   r$   distributionskl_divergencerf   )rb  rc  pqs       r(   rh  rh    s&    )"5"55k7kk5 )9::)%ABBBB{{!!9#8#8
 
 	vB9CXCXBYY]^g^s^s]tu	v 
 xx>A)BXBXZcZpZp>qr>qdaR++A1>qr
 #!#*	 --i.D.DiF\F\]] ss   +E
)FN)-rU   abcr   r   typingr   r   r   r   numpyre  torchrY   	gymnasiumr	   r
   torch.distributionsr   r   r   &stable_baselines3.common.preprocessingr   r   r   r   r   r   r   r   r   rZ   rg   r   r   r   r   r   r   r   Spacer[   dictstrra  rh  r-   r*   r(   <module>ru     s}     # 0 0     > > A-^D &'EMgh '.*2T( $ &&CKde #*+MUs#t  #$?G^_ &-.S[|&} #R3 Rj ryy O!| O!d5 '? 5 p0!l 0!f6!< 6!r/!L /!dD!l D!N(< (<X `d 
,, 
)- 
DLTRUWZRZ^D\ 
 
F^\ ^l ^ryy ^r*   