
    hP                     f    S SK JrJr  S SKrS SKrS SKJr  S SKJ	r	   " S S\R                  5      rg)    )OptionalUnionNspaces)GymStepReturnc                   >  ^  \ rS rSrSr     SS\S\S\S\S\4
U 4S jjjrS\S\S	S
4S jrS	\	\
\R                  4   4S jrSS jrS\\\R                  4   S	\4S jrSS\
S	S
4S jjrS
S
S.S\\   S\\	   S	\\	\
\R                  4   \	4   4U 4S jjjrSrU =r$ )SimpleMultiObsEnv
   u
  
Base class for GridWorld-based MultiObs Environments 4x4  grid world.

.. code-block:: text

    ____________
   | 0  1  2   3|
   | 4|¯5¯¯6¯| 7|
   | 8|_9_10_|11|
   |12 13  14 15|
   ¯¯¯¯¯¯¯¯¯¯¯¯¯¯

start is 0
states 5, 6, 9, and 10 are blocked
goal is 15
actions are = [left, down, right, up]

simple linear state env of 15 states but encoded with a vector and an image observation:
each column is represented by a random vector and each row is
represented by a random image, both sampled once at creation time.

:param num_col: Number of columns in the grid
:param num_row: Number of rows in the grid
:param random_start: If true, agent starts in random position
:param channel_last: If true, the image will be channel last, else it will be channel first
num_colnum_rowrandom_startdiscrete_actionschannel_lastc           
        > [         TU ]  5         SU l        U(       a
  / SQU l        O	/ SQU l        X0l        X@l        U(       a  [        R                  " S5      U l        O[        R                  " SSS5      U l        [        R                  " [        R                  " SSU R                  4[        R                  S9[        R                  " SS	U R                  [        R                  S9S
.S9U l        SU l        SU l        SU l        SU l        / SQU l        U R)                  5         Xl        / U l        U R/                  X5        [1        U R,                  5      S-
  U l        g )N   )@   r      )r   r   r      r   r   )r   dtype   vecimgr   d    )leftdownrightup)super__init__vector_sizeimg_sizer   r   r   Discreteaction_spaceBoxDictnpfloat64uint8observation_spacecount	max_countlogstate
action2strinit_possible_transitionsr   state_mappinginit_state_mappinglen	max_state)selfr   r   r   r   r   	__class__s         h/home/james-whalen/.local/lib/python3.13/site-packages/stable_baselines3/common/envs/multi_input_envs.pyr"   SimpleMultiObsEnv.__init__&   s    	'DM'DM( 0 & 2D &

1a 6D!'zz!Q)9)9(;2::Nzz!S$--rxxH"
 

9&&(:<1T//014    returnNc                 x   [         R                  R                  XR                  45      n[         R                  R                  SSUSS4[         R                  S9n[        U5       HP  n[        U5       H>  nU R                  R                  X5   XF   R                  U R                  5      S.5        M@     MR     g)z
Initializes the state_mapping array which holds the observation values for each state

:param num_col: Number of columns.
:param num_row: Number of rows.
r   r   r   r   r   N)
r)   randomr#   randintr+   ranger3   appendreshaper$   )r7   r   r   col_vecsrow_imgsijs          r9   r4   $SimpleMultiObsEnv.init_state_mappingQ   s     99##W.>.>$?@99$$Qgr2->bhh$OwA7^""))(+hkFYFYZ^ZgZgFh*ij $  r;   c                 4    U R                   U R                     $ )zd
Uses the state to get the observation mapping.

:return: observation dict {'vec': ..., 'img': ...}
)r3   r0   r7   s    r9   get_state_mapping#SimpleMultiObsEnv.get_state_mappinga   s     !!$**--r;   c                 L    / SQU l         / SQU l        / SQU l        / SQU l        g)aS  
Initializes the transitions of the environment
The environment exploits the cardinal directions of the grid by noting that
they correspond to simple addition and subtraction from the cell id within the grid

- up => means moving up a row => means subtracting the length of a column
- down => means moving down a row => means adding the length of a column
- left => means moving left by one => means subtracting 1
- right => means moving right by one => means adding 1

Thus one only needs to specify in which states each action is possible
in order to define the transitions of the environment
)r                  )r   r      rN         )r   r   rM      rO   rP   )r   rR   rU   rS   rT   rQ   N)left_possibledown_possibleright_possibleup_possiblerI   s    r9   r2   +SimpleMultiObsEnv.init_possible_transitionsi   s$     3030r;   actionc                 d   U R                   (       d  [        R                  " U5      nU =R                  S-  sl        U R                  nSnU R                  U R
                  ;   a  US:X  a  U =R                  S-  sl        OU R                  U R                  ;   a&  US:X  a   U =R                  U R                  -  sl        OuU R                  U R                  ;   a  US:X  a  U =R                  S-  sl        O?U R                  U R                  ;   a%  US:X  a  U =R                  U R                  -  sl        U R                  U R                  :H  nU(       a  SOUnU R                  U R                  :  nUnSU R                  U    SU S	U R                   3U l        U R                  5       X6US
U04$ )aK  
Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Accepts an action and returns a tuple (observation, reward, terminated, truncated, info).

:param action:
:return: tuple (observation, reward, terminated, truncated, info).
r   gr   rM   rN   g      ?zWent z
 in state z, got to state 
got_to_end)r   r)   argmaxr-   r0   rV   rW   r   rX   rY   r6   r.   r1   r/   rJ   )r7   r[   
prev_staterewardr]   	truncated
terminateds          r9   stepSimpleMultiObsEnv.step|   sP    $$YYv&F

a
ZZ
::+++!JJ!OJZZ4---&A+JJ$,,&JZZ4...6Q;JJ!OJZZ4+++!JJ$,,&JZZ4>>1
"JJ/	
4??623:j\Y]YcYcXde%%'YWaHbbbr;   modec                 .    [        U R                  5        g)z2
Prints the log of the environment.

:param mode:
N)printr/   )r7   re   s     r9   renderSimpleMultiObsEnv.render   s     	dhhr;   )seedoptionsrj   rk   c                   > Ub  [         TU ]  US9  SU l        U R                  (       d  SU l        O/[
        R                  R                  SU R                  5      U l        U R                  U R                     0 4$ )z
Resets the environment state and step count and returns reset observation.

:param seed:
:return: observation dict {'vec': ..., 'img': ...}
)rj   r   )
r!   resetr-   r   r0   r)   r>   r?   r6   r3   )r7   rj   rk   r8   s      r9   rm   SimpleMultiObsEnv.reset   sf     GMtM$
  DJ**1dnn=DJ!!$**-r11r;   )r1   r&   r-   r   rW   r$   rV   r/   r.   r6   r   r,   r   rX   r0   r3   rY   r#   )r   r   TTT)r<   N)human)__name__
__module____qualname____firstlineno____doc__intboolr"   r4   dictstrr)   ndarrayrJ   r2   r   r   rc   rh   r   tuplerm   __static_attributes____classcell__)r8   s   @r9   r	   r	   
   s   : !!%!)5)5 )5 	)5
 )5 )5 )5Vk# k k k .4RZZ#8 .1&#c5bjj1 #cm #cJ3 T  .2T 2Xc] 2HTN 2V[\`adfhfpfpap\qsw\wVx 2 2r;   r	   )typingr   r   	gymnasiumgymnumpyr)   r   %stable_baselines3.common.type_aliasesr   Envr	    r;   r9   <module>r      s&    "    ?m2 m2r;   