ó
    ëhÈ<  ã                   ó  • S SK JrJr  S SKrS SKrS SKJr  S SKJr  S SK	J
r
Jr  S SKJr  S SKJr   " S S	\R                   5      r " S
 S\5      r " S S\5      r\R(                  SSSS4S\S\S\\   S\\R                      S\S\S\\\\R                            S\\\\R                            S\\R                      4S jjr " S S\R                   5      r " S S\5      rS\\\   \\\\   4   4   S\\\   \\   4   4S jrg)é    )ÚOptionalÚUnionN)Úspaces)Únn)Úget_flattened_obs_dimÚis_image_space)Ú
TensorDict)Ú
get_devicec                   ól   ^ • \ rS rSrSrS
S\R                  S\SS4U 4S jjjr\	S\4S j5       r
S	rU =r$ )ÚBaseFeaturesExtractoré   z©
Base class that represents a features extractor.

:param observation_space: The observation space of the environment
:param features_dim: Number of features extracted.
Úobservation_spaceÚfeatures_dimÚreturnNc                 óJ   >• [         TU ]  5         US:”  d   eXl        X l        g )Nr   )ÚsuperÚ__init__Ú_observation_spaceÚ_features_dim)Úselfr   r   Ú	__class__s      €Ú_/home/james-whalen/.local/lib/python3.13/site-packages/stable_baselines3/common/torch_layers.pyr   ÚBaseFeaturesExtractor.__init__   s(   ø€ Ü‰ÑÔØ˜aÓÐÐØ"3ÔØ)Õó    c                 ó   • U R                   $ )z2The number of features that the extractor outputs.)r   )r   s    r   r   Ú"BaseFeaturesExtractor.features_dim   s   € ð ×!Ñ!Ð!r   )r   r   )r   )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__ÚgymÚSpaceÚintr   Úpropertyr   Ú__static_attributes__Ú__classcell__©r   s   @r   r   r      sF   ø† ññ*¨#¯)©)ð *À3ð *Èt÷ *ð *ð ð"˜có "ó ö"r   r   c                   ó†   ^ • \ rS rSrSrS\R                  SS4U 4S jjrS\R                  S\R                  4S jr
S	rU =r$ )
ÚFlattenExtractoré!   zª
Feature extract that flatten the input.
Used as a placeholder when feature extraction is not needed.

:param observation_space: The observation space of the environment
r   r   Nc                 ól   >• [         TU ]  U[        U5      5        [        R                  " 5       U l        g ©N)r   r   r   r   ÚFlattenÚflatten)r   r   r   s     €r   r   ÚFlattenExtractor.__init__)   s'   ø€ Ü‰ÑÐ*Ô,AÐBSÓ,TÔUÜ—z’z“|ˆr   Úobservationsc                 ó$   • U R                  U5      $ r-   ©r/   ©r   r1   s     r   ÚforwardÚFlattenExtractor.forward-   s   € Ø|‰|˜LÓ)Ð)r   r3   )r   r   r   r    r!   r"   r#   r   ÚthÚTensorr5   r&   r'   r(   s   @r   r*   r*   !   s?   ø† ñð$¨#¯)©)ð $¸÷ $ð* B§I¡Ið *°"·)±)÷ *ò *r   r*   c            	       ó–   ^ • \ rS rSrSr  SS\R                  S\S\SS4U 4S jjjr	S	\
R                  S\
R                  4S
 jrSrU =r$ )Ú	NatureCNNé1   a  
CNN from DQN Nature paper:
    Mnih, Volodymyr, et al.
    "Human-level control through deep reinforcement learning."
    Nature 518.7540 (2015): 529-533.

:param observation_space: The observation space of the environment
:param features_dim: Number of features extracted.
    This corresponds to the number of unit for the last layer.
:param normalized_image: Whether to assume that the image is already normalized
    or not (this disables dtype and bounds checks): when True, it only checks that
    the space is a Box and has 3 dimensions.
    Otherwise, it checks that it has expected dtype (uint8) and bounds (values in [0, 255]).
r   r   Únormalized_imager   Nc                 ó´  >• [        U[        R                  5      (       d   SSU 345       e[        TU ]  X5        [        USUS9(       d   SU S35       eUR                  S   n[        R                  " [        R                  " USS	S
SS9[        R                  " 5       [        R                  " SSS
SSS9[        R                  " 5       [        R                  " SSSSSS9[        R                  " 5       [        R                  " 5       5      U l        [        R                  " 5          U R                  [        R                  " UR!                  5       S    5      R#                  5       5      R                  S   nS S S 5        [        R                  " [        R$                  " WU5      [        R                  " 5       5      U l        g ! , (       d  f       NS= f)Nz-NatureCNN must be used with a gym.spaces.Box zobservation space, not F)Úcheck_channelsr<   z3You should use NatureCNN only with images not with a«  
(you are probably using `CnnPolicy` instead of `MlpPolicy` or `MultiInputPolicy`)
If you are using a custom environment,
please check it using our env checker:
https://stable-baselines3.readthedocs.io/en/master/common/env_checker.html.
If you are using `VecNormalize` or already normalized channel-first images you should pass `normalize_images=False`: 
https://stable-baselines3.readthedocs.io/en/master/guide/custom_env.htmlr   é    é   é   )Úkernel_sizeÚstrideÚpaddingé@   é   é   é   )Ú
isinstancer   ÚBoxr   r   r   Úshaper   Ú
SequentialÚConv2dÚReLUr.   Úcnnr7   Úno_gradÚ	as_tensorÚsampleÚfloatÚLinearÚlinear)r   r   r   r<   Ún_input_channelsÚ	n_flattenr   s         €r   r   ÚNatureCNN.__init__A   sx  ø€ ô Ð+¬V¯Z©Z×8Ñ8ð 	
Ø;Ø%Ð&7Ð%8Ð9ð;
ó 	
Ð8ô 	‰ÑÐ*Ô9ô Ð/ÀÐXh×ið 
	
ð)Ø):Ð(;ð <WðWó
	
Ðið -×2Ñ2°1Ñ5ÐÜ—=’=ÜIŠIÐ&¨¸À!ÈQÑOÜGŠG‹IÜIŠIb˜"¨!°A¸qÑAÜGŠG‹IÜIŠIb˜"¨!°A¸qÑAÜGŠG‹IÜJŠJ‹Ló
ˆŒô ZŠZ\ØŸ™¤§¢Ð.?×.FÑ.FÓ.HÈÑ.NÓ!O×!UÑ!UÓ!WÓX×^Ñ^Ð_`ÑaˆI÷ ô —m’m¤B§I¢I¨i¸Ó$FÌÏÊË	ÓRˆ÷ \ús   Ä*AG	Ç	
Gr1   c                 óB   • U R                  U R                  U5      5      $ r-   )rU   rO   r4   s     r   r5   ÚNatureCNN.forwardj   s   € Ø{‰{˜4Ÿ8™8 LÓ1Ó2Ð2r   )rO   rU   )i   F)r   r   r   r    r!   r"   r#   r$   Úboolr   r7   r8   r5   r&   r'   r(   s   @r   r:   r:   1   sk   ø† ñð$  Ø!&ñ	'SàŸ9™9ð'Sð ð'Sð ð	'Sð
 
÷'Sð 'SðR3 B§I¡Ið 3°"·)±)÷ 3ò 3r   r:   FTÚ	input_dimÚ
output_dimÚnet_archÚactivation_fnÚsquash_outputÚ	with_biasÚpre_linear_modulesÚpost_linear_modulesr   c           	      ó¸  • U=(       d    / nU=(       d    / n/ n[        U5      S:”  a€  U H  n	UR                  U	" U 5      5        M     UR                  [        R                  " XS   US95        U H  n	UR                  U	" US   5      5        M     UR                  U" 5       5        [	        [        U5      S-
  5       HŒ  n
U H  n	UR                  U	" X*   5      5        M     UR                  [        R                  " X*   X*S-      US95        U H  n	UR                  U	" X*S-      5      5        M!     UR                  U" 5       5        MŽ     US:”  aZ  [        U5      S:”  a  US   OU nU H  n	UR                  U	" U5      5        M     UR                  [        R                  " X±US95        U(       a$  UR                  [        R
                  " 5       5        U$ )a`  
Create a multi layer perceptron (MLP), which is
a collection of fully-connected layers each followed by an activation function.

:param input_dim: Dimension of the input vector
:param output_dim: Dimension of the output (last layer, for instance, the number of actions)
:param net_arch: Architecture of the neural net
    It represents the number of units per layer.
    The length of this list is the number of layers.
:param activation_fn: The activation function
    to use after each layer.
:param squash_output: Whether to squash the output using a Tanh
    activation function
:param with_bias: If set to False, the layers will not learn an additive bias
:param pre_linear_modules: List of nn.Module to add before the linear layers.
    These modules should maintain the input tensor dimension (e.g. BatchNorm).
    The number of input features is passed to the module's constructor.
    Compared to post_linear_modules, they are used before the output layer (output_dim > 0).
:param post_linear_modules: List of nn.Module to add after the linear layers
    (and before the activation function). These modules should maintain the input
    tensor dimension (e.g. Dropout, LayerNorm). They are not used after the
    output layer (output_dim > 0). The number of input features is passed to
    the module's constructor.
:return: The list of layers of the neural network
r   )ÚbiasrH   éÿÿÿÿ)ÚlenÚappendr   rT   ÚrangeÚTanh)r\   r]   r^   r_   r`   ra   rb   rc   ÚmodulesÚmoduleÚidxÚlast_layer_dims               r   Ú
create_mlpro   n   s  € ðH ,×1¨rÐØ-×3°Ðà€GÜ
ˆ8ƒ}qÓã(ˆFØN‰N™6 )Ó,Ö-ñ )ð 	‰”r—y’y °Q©K¸iÑHÔIó *ˆFØN‰N™6 (¨1¡+Ó.Ö/ñ *ð 	‰‘}“Ô'ä”S˜“] QÑ&Ö'ˆÛ(ˆFØN‰N™6 (¡-Ó0Ö1ñ )ð 	‰”r—y’y ¡°¸q¹Ñ0AÈ	ÑRÔSã)ˆFØN‰N™6 (°©7Ñ"3Ó4Ö5ñ *ð 	‰‘}“Ö'ñ (ð Aƒ~Ü),¨X«¸Ó):˜ "šÀ	ˆã(ˆFØN‰N™6 .Ó1Ö2ñ )ð 	‰”r—y’y À)ÑLÔMÞØ‰”r—w’w“yÔ!Ø€Nr   c                   ón  ^ • \ rS rSrSr SS\S\\\   \\	\\   4   4   S\
\R                     S\\R                  \	4   SS4
U 4S	 jjjrS
\R                   S\\R                   \R                   4   4S jrS
\R                   S\R                   4S jrS
\R                   S\R                   4S jrSrU =r$ )ÚMlpExtractoréº   aó  
Constructs an MLP that receives the output from a previous features extractor (i.e. a CNN) or directly
the observations (if no features extractor is applied) as an input and outputs a latent representation
for the policy and a value network.

The ``net_arch`` parameter allows to specify the amount and size of the hidden layers.
It can be in either of the following forms:
1. ``dict(vf=[<list of layer sizes>], pi=[<list of layer sizes>])``: to specify the amount and size of the layers in the
    policy and value nets individually. If it is missing any of the keys (pi or vf),
    zero layers will be considered for that key.
2. ``[<list of layer sizes>]``: "shortcut" in case the amount and size of the layers
    in the policy and value nets are the same. Same as ``dict(vf=int_list, pi=int_list)``
    where int_list is the same for the actor and critic.

.. note::
    If a key is not specified or an empty list is passed ``[]``, a linear network will be used.

:param feature_dim: Dimension of the feature vector (can be the output of a CNN)
:param net_arch: The specification of the policy and value networks.
    See above for details on its formatting.
:param activation_fn: The activation function to use for the networks.
:param device: PyTorch device.
Úfeature_dimr^   r_   Údevicer   Nc                 óŽ  >• [         TU ]  5         [        U5      n/ n/ nUnUn[        U[        5      (       a%  UR                  S/ 5      n	UR                  S/ 5      n
OU=pšU	 H@  nUR                  [        R                  " X{5      5        UR                  U" 5       5        UnMB     U
 H@  nUR                  [        R                  " X‹5      5        UR                  U" 5       5        UnMB     Xpl	        X€l
        [        R                  " U6 R                  U5      U l        [        R                  " U6 R                  U5      U l        g )NÚpiÚvf)r   r   r
   rI   ÚdictÚgetrh   r   rT   Úlatent_dim_piÚlatent_dim_vfrL   ÚtoÚ
policy_netÚ	value_net)r   rs   r^   r_   rt   r}   r~   Úlast_layer_dim_piÚlast_layer_dim_vfÚpi_layers_dimsÚvf_layers_dimsÚcurr_layer_dimr   s               €r   r   ÚMlpExtractor.__init__Ó   s  ø€ ô 	‰ÑÔÜ˜FÓ#ˆØ&(ˆ
Ø%'ˆ	Ø'ÐØ'Ðô h¤×%Ñ%à%Ÿ\™\¨$°Ó3ˆNØ%Ÿ\™\¨$°Ó3‰Nà.6Ð6ˆNã,ˆNØ×ÑœbŸišiÐ(9ÓJÔKØ×Ñ™m›oÔ.Ø .Òñ -ó
 -ˆNØ×ÑœRŸYšYÐ'8ÓIÔJØ×Ñ™]›_Ô-Ø .Òñ -ð /ÔØ.Ôô Ÿ-š-¨Ð4×7Ñ7¸Ó?ˆŒÜŸš¨	Ð2×5Ñ5°fÓ=ˆr   Úfeaturesc                 óF   • U R                  U5      U R                  U5      4$ )z…
:return: latent_policy, latent_value of the specified network.
    If all layers are shared, then ``latent_policy == latent_value``
)Úforward_actorÚforward_critic©r   r…   s     r   r5   ÚMlpExtractor.forwardü   s%   € ð
 ×!Ñ! (Ó+¨T×-@Ñ-@ÀÓ-JÐJÐJr   c                 ó$   • U R                  U5      $ r-   )r}   r‰   s     r   r‡   ÚMlpExtractor.forward_actor  s   € Ø‰˜xÓ(Ð(r   c                 ó$   • U R                  U5      $ r-   )r~   r‰   s     r   rˆ   ÚMlpExtractor.forward_critic  s   € Ø~‰~˜hÓ'Ð'r   )rz   r{   r}   r~   )Úauto)r   r   r   r    r!   r$   r   Úlistrx   ÚstrÚtyper   ÚModuler7   rt   r   r8   Útupler5   r‡   rˆ   r&   r'   r(   s   @r   rq   rq   º   sß   ø† ñð: )/ñ'>àð'>ð ˜˜S™	 4¨¨T°#©Y¨Ñ#7Ð7Ñ8ð'>ð ˜BŸI™I‘ð	'>ð
 b—i‘i nÑ%ð'>ð 
÷'>ð '>ðRK §	¡	ð K¨e°B·I±I¸r¿y¹yÐ4HÑ.Iô Kð) b§i¡ið )°B·I±Iô )ð( r§y¡yð (°R·Y±Y÷ (ò (r   rq   c            	       ó‚   ^ • \ rS rSrSr  SS\R                  S\S\SS4U 4S jjjr	S	\
S\R                  4S
 jrSrU =r$ )ÚCombinedExtractori
  aû  
Combined features extractor for Dict observation spaces.
Builds a features extractor for each key of the space. Input from each space
is fed through a separate submodule (CNN or MLP, depending on input shape),
the output features are concatenated and fed through additional MLP network ("combined").

:param observation_space:
:param cnn_output_dim: Number of features to output from each CNN submodule(s). Defaults to
    256 to avoid exploding network sizes.
:param normalized_image: Whether to assume that the image is already normalized
    or not (this disables dtype and bounds checks): when True, it only checks that
    the space is a Box and has 3 dimensions.
    Otherwise, it checks that it has expected dtype (uint8) and bounds (values in [0, 255]).
r   Úcnn_output_dimr<   r   Nc                 ó<  >• [         TU ]  USS9  0 nSnUR                  R                  5        HJ  u  pg[	        XsS9(       a  [        XrUS9XF'   XR-  nM%  [        R                  " 5       XF'   U[        U5      -  nML     [        R                  " U5      U l
        XPl        g )NrH   )r   r   )r<   )r   r<   )r   r   r   Úitemsr   r:   r   r.   r   Ú
ModuleDictÚ
extractorsr   )	r   r   r—   r<   r›   Útotal_concat_sizeÚkeyÚsubspacer   s	           €r   r   ÚCombinedExtractor.__init__  s˜   ø€ ô 	‰ÑÐ*¸ÐÑ;à+-ˆ
àÐØ.×5Ñ5×;Ñ;Ö=‰MˆCÜ˜h×JÜ"+¨HÐdtÑ"u
‘Ø!Ñ3Ò!ô #%§*¢*£,
‘Ø!Ô%:¸8Ó%DÑDÒ!ñ >ô Ÿ-š-¨
Ó3ˆŒð /Õr   r1   c                 ó¨   • / nU R                   R                  5        H  u  p4UR                  U" X   5      5        M      [        R                  " USS9$ )NrH   )Údim)r›   r™   rh   r7   Úcat)r   r1   Úencoded_tensor_listr   Ú	extractors        r   r5   ÚCombinedExtractor.forward4  sI   € Ø Ðà"Ÿo™o×3Ñ3Ö5‰NˆCØ×&Ñ&¡y°Ñ1BÓ'CÖDñ 6ävŠvÐ)¨qÑ1Ð1r   )r   r›   )é   F)r   r   r   r    r!   r   ÚDictr$   r[   r   r	   r7   r8   r5   r&   r'   r(   s   @r   r–   r–   
  s`   ø† ñð$ "Ø!&ñ	/à!Ÿ;™;ð/ð ð/ð ð	/ð
 
÷/ð /ð42 Jð 2°2·9±9÷ 2ò 2r   r–   c                 óº   • [        U [        5      (       a  X p!X4$ [        U [        5      (       d   S5       eSU ;   d   S5       eSU ;   d   S5       eU S   U S   p!X4$ )aÇ  
Get the actor and critic network architectures for off-policy actor-critic algorithms (SAC, TD3, DDPG).

The ``net_arch`` parameter allows to specify the amount and size of the hidden layers,
which can be different for the actor and the critic.
It is assumed to be a list of ints or a dict.

1. If it is a list, actor and critic networks will have the same architecture.
    The architecture is represented by a list of integers (of arbitrary length (zero allowed))
    each specifying the number of units per layer.
   If the number of ints is zero, the network will be linear.
2. If it is a dict,  it should have the following structure:
   ``dict(qf=[<critic network architecture>], pi=[<actor network architecture>])``.
   where the network architecture is a list as described in 1.

For example, to have actor and critic that share the same network architecture,
you only need to specify ``net_arch=[256, 256]`` (here, two hidden layers of 256 units each).

If you want a different architecture for the actor and the critic,
then you can specify ``net_arch=dict(qf=[400, 300], pi=[64, 64])``.

.. note::
    Compared to their on-policy counterparts, no shared layers (other than the features extractor)
    between the actor and the critic are allowed (to prevent issues with target networks).

:param net_arch: The specification of the actor and critic networks.
    See above for details on its formatting.
:return: The network architectures for the actor and the critic
z@Error: the net_arch can only contain be a list of ints or a dictrv   zAError: no key 'pi' was provided in net_arch for the actor networkÚqfzBError: no key 'qf' was provided in net_arch for the critic network)rI   r   rx   )r^   Ú
actor_archÚcritic_archs      r   Úget_actor_critic_archr¬   <  s|   € ô< (œD×!Ñ!Ø"*Kð Ð"Ð"ô	 ˜(¤D×)Ñ)ÐmÐ+mÓmÐ)ØxÓÐdÐ!dÓdÐØxÓÐeÐ!eÓeÐØ"*¨4¡.°(¸4±.KØÐ"Ð"r   ) Útypingr   r   Ú	gymnasiumr"   Útorchr7   r   r   Ú&stable_baselines3.common.preprocessingr   r   Ú%stable_baselines3.common.type_aliasesr	   Ústable_baselines3.common.utilsr
   r“   r   r*   r:   rN   r$   r   r’   r[   ro   rq   r–   rx   r‘   r”   r¬   © r   r   Ú<module>r´      sb  ðß "ã Û Ý Ý ç XÝ <Ý 5ô"˜BŸI™Iô "ô(*Ð,ô *ô :3Ð%ô :3ðB &(§W¡WØØØ:>Ø;?ñIØðIàðIð 3‰iðIð ˜Ÿ	™	‘?ð	Ið
 ðIð ðIð !  d¨2¯9©9¡oÑ!6Ñ7ðIð " $ t¨B¯I©I¡Ñ"7Ñ8ðIð 
ˆ")‰)_õIôXM(2—9‘9ô M(ô`/2Ð-ô /2ðd%# E¨$¨s©)°T¸#¸tÀC¹y¸.Ñ5IÐ*IÑ$Jð %#ÈuÐUYÐZ]ÑU^Ð`dÐehÑ`iÐUiÑOjõ %#r   