
    h$                     j    S SK Jr  S SKJrJrJr  S SKrS SKJ	r	J
r
  S SKJr  S SKJr   " S S\	5      rg)	    )OrderedDict)AnyOptionalUnionN)Envspaces)EnvSpec)GymStepReturnc                     ^  \ rS rSr% Sr\" SS5      r\R                  \	S'          S#S\
S\S	\\
   S
\S\S\S\4U 4S jjjrS\
SS4S jrS\R                  S\\
\R                  4   4S jrS\\
\R                  4   S\
S\R                  4S jrS
\S\S\
S\R(                  4S jrS\\\\
\R                  4   4   4S jrSSS.S\\
   S\\   S\\\\\
\R                  4   4   \4   4S jjrS\\R                  \
4   S\4S jrS\\
\R                  4   S\\
\R                  4   S\\\\4      S\R:                  4S jrS\\R                     4S  jrS$S! jr S"r!U =r"$ )%BitFlippingEnv   a  
Simple bit flipping env, useful to test HER.
The goal is to flip all the bits to get a vector of ones.
In the continuous variant, if the ith action component has a value > 0,
then the ith bit will be flipped. Uses a ``MultiBinary`` observation space
by default.

:param n_bits: Number of bits to flip
:param continuous: Whether to use the continuous actions version or not,
    by default, it uses the discrete one
:param max_steps: Max number of steps, by default, equal to n_bits
:param discrete_obs_space: Whether to use the discrete observation
    version or not, ie a one-hot encoding of all possible states
:param image_obs_space: Whether to use an image observation version
    or not, ie a greyscale image of the state
:param channel_first: Whether to use channel-first or last image.
zBitFlippingEnv-v0zno-entry-pointstateNn_bits
continuous	max_stepsdiscrete_obs_spaceimage_obs_spacechannel_firstrender_modec                   > [         TU ]  5         Xpl        U(       a  SOSU l        U R	                  XEU5      U l        [        R                  " U5      U l        U(       a,  [        R                  " SSU4[        R                  S9U l        O[        R                  " U5      U l        X l        X@l        XPl        [        R"                  " U4U R
                  S   R$                  S9U l        Uc  UnX0l        SU l        g )	N)   $   r   )r   r   r   r   )shapedtypedesired_goalr   r   )super__init__r   image_shape_make_observation_spaceobservation_spacer   MultiBinary
_obs_spaceBoxnpfloat32action_spaceDiscreter   r   r   onesr   r   r   current_step)	selfr   r   r   r   r   r   r   	__class__s	           h/home/james-whalen/.local/lib/python3.13/site-packages/stable_baselines3/common/envs/bit_flipping_env.pyr   BitFlippingEnv.__init__!   s     	&*7;[
 "&!=!=>Pci!j ,,V4 &

2q	 TD & 7D$"4.GGVIT5K5KN5[5a5abI"    seedreturnc                 :    U R                   R                  U5        g N)r$   r1   )r,   r1   s     r.   r1   BitFlippingEnv.seedD   s    T"r0   c           
      d  ^ U R                   (       aO  TR                  [        R                  5      m[	        [        U4S j[        [        T5      5       5       5      5      $ U R                  (       a  [        R                  " U R                  5      n[        R                  " TR                  [        R                  5      S-  [        R                  " U[        T5      -
  [        R                  S945      nUR                  U R                  5      R                  [        R                  5      $ T$ )z>
Convert to discrete space if needed.

:param state:
:return:
c              3   :   >#    U  H  nTU   S U-  -  v   M     g7f)   N ).0ir   s     r.   	<genexpr>3BitFlippingEnv.convert_if_needed.<locals>.<genexpr>T   s     F4Eq58ad?4Es      r   )r   astyper&   int32intsumrangelenr   prodr    concatenateuint8zerosreshape)r,   r   sizeimages    `  r.   convert_if_needed BitFlippingEnv.convert_if_neededG   s     ""LL*E sFE#e*4EFFGG774++,DNNELL$:S$@"((4RUV[R\K\dfdldlBm#noE==!1!1299"((CCr0   
batch_sizec                    [        U[        5      (       av  [        R                  " U5      R	                  US5      nUSS2SS24   S[        R
                  " [        U R                  5      5      -  -  S:  R                  [        5      nU$ U R                  (       a3  UR	                  US5      SS2S[        U R                  5      24   S-  nU$ [        R                  " U5      R	                  US5      nU$ )z
Convert to bit vector if needed.

:param state: The state to be converted, which can be either an integer or a numpy array.
:param batch_size: The batch size.
:return: The state converted into a bit vector.
r   Nr   r   r>   )

isinstancerA   r&   arrayrI   arangerD   r   r?   r   )r,   r   rN   
bit_vectors       r.   convert_to_bit_vector$BitFlippingEnv.convert_to_bit_vector\   s     eS!!%00R@J%ad+qBIIc$**o4N/NOSTT\\]`aJ
 	 !!z26q:KC

O:K7KLsRJ  %00R@Jr0   c                    U(       a  U(       a  [        S5      eU(       a_  [        R                  " [        R                  " SU-  5      [        R                  " SU-  5      [        R                  " SU-  5      S.5      $ U(       a  [        R                  " [        R                  " SSU R
                  [        R                  S9[        R                  " SSU R
                  [        R                  S9[        R                  " SSU R
                  [        R                  S9S.5      $ [        R                  " [        R                  " U5      [        R                  " U5      [        R                  " U5      S.5      $ )a  
Helper to create observation space

:param discrete_obs_space: Whether to use the discrete observation version
:param image_obs_space: Whether to use the image observation version
:param n_bits: The number of bits used to represent the state
:return: the environment observation space
z5Cannot use both discrete and image observation spacesr8   )observationachieved_goalr   r   r>   )lowhighr   r   )	
ValueErrorr   Dictr)   r%   r    r&   rG   r#   )r,   r   r   r   s       r.   r!   &BitFlippingEnv._make_observation_spaceo   s)    /TUU ;;#)??1f9#=%+__QY%?$*OOAvI$>   ;;#):: ".. hh	$ &,ZZ ".. hh	& %+JJ ".. hh	% . {{%11&9!'!3!3F!; & 2 26 :
 	
r0   c           	         [        SU R                  U R                  R                  5       5      4SU R                  U R                  R                  5       5      4SU R                  U R                  R                  5       5      4/5      $ )zF
Helper to create the observation.

:return: The current observation.
rW   rX   r   )r   rL   r   copyr   r,   s    r.   _get_obsBitFlippingEnv._get_obs   st      6 6tzz7H IJ $"8"89J"KL!7!78I8I8N8N8P!QR
 	
r0   )r1   optionsrc   c                    Ub  U R                   R                  U5        SU l        U R                   R                  5       U l        U R                  5       0 4$ )Nr   )r$   r1   r+   sampler   ra   )r,   r1   rc   s      r.   resetBitFlippingEnv.reset   sI     OO  &__++-
}}""r0   actionc                    U R                   (       a&  SU R                  US:     -
  U R                  US:  '   OSU R                  U   -
  U R                  U'   U R                  5       n[        U R	                  US   US   S5      R                  5       5      nUS:H  nU =R                  S-  sl        SU0nU R                  U R                  :  nX#XFU4$ )z-
Step into the env.

:param action:
:return:
r   r   rX   r   N
is_success)r   r   ra   floatcompute_rewarditemr+   r   )r,   rh   obsreward
terminatedinfo	truncateds          r.   stepBitFlippingEnv.step   s     ??%&FQJ)?%?DJJvz"!"TZZ%7!7DJJvmmot**3+?^ATVZ[``bcq[
Qj)%%7	J477r0   rX   r   _infoc                    [        U[        5      (       a  SnOfU R                  (       a+  [        UR                  5      S:  a  UR                  S   OSnO*[        UR                  5      S:  a  UR                  S   OSnU R                  X$5      nU R                  X5      n[        R                  R                  X-
  SS9nUS:  R                  [        R                  5      * $ )Nr      r   r   )axis)rP   rA   r   rD   r   rT   r&   linalgnormr?   r'   )r,   rX   r   ru   rN   distances         r.   rl   BitFlippingEnv.compute_reward   s     mS))J!!36}7J7J3Ka3O,,Q/UVJ36}7J7J3Ka3O,,Q/UVJ11,K22=M 99>>-">R>HA%%bjj111r0   c                     U R                   S:X  a  U R                  R                  5       $ [        U R                  5        g )N	rgb_array)r   r   r_   printr`   s    r.   renderBitFlippingEnv.render   s0    {*::??$$djjr0   c                     g r4   r9   r`   s    r.   closeBitFlippingEnv.close   s    r0   )r$   r(   r   r+   r   r   r   r    r   r"   r   r   )
   FNFFThuman)r2   N)#__name__
__module____qualname____firstlineno____doc__r	   specr&   ndarray__annotations__rA   boolr   strr   r1   r   rL   rT   r   r\   r!   dictra   tuplerf   r
   rs   r   r'   rl   r   r   __static_attributes____classcell__)r-   s   @r.   r   r      s    $ &(89D::  #'#( %""!! ! C=	!
 !! ! ! ! !F# # #rzz eCO6L *5bjj+A s WYWaWa &8
$ 8
QU 8
_b 8
gmgrgr 8
t
$sE#rzz/$::; 
 (,t#}#6>tn#	tCsBJJ//0$6	7#85S1 8m 8(2"3

?32CHbjjCY2bjkopsuxpxkybz2	2&,  r0   r   )collectionsr   typingr   r   r   numpyr&   	gymnasiumr   r   gymnasium.envs.registrationr	   %stable_baselines3.common.type_aliasesr
   r   r9   r0   r.   <module>r      s(    # ' '  ! / ?cS cr0   