
    h                        S r SSKJr  SSKJr  SSKJr  SSKrSSK	J
r
  SSKJrJr  SSKJr  SS	KJr   " S
 S\5      r " S S\5      r " S S\5      rg)z4Vectorizes reward function to work with `VectorEnv`.    )annotations)Callable)AnyN)Env)	VectorEnvVectorRewardWrapper)	ArrayType)transform_rewardc                  :   ^  \ rS rSrSrSU 4S jjrSS jrSrU =r$ )TransformReward   a  A reward wrapper that allows a custom function to modify the step reward.

Example with reward transformation:
    >>> import gymnasium as gym
    >>> from gymnasium.spaces import Box
    >>> def scale_and_shift(rew):
    ...     return (rew - 1.0) * 2.0
    ...
    >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3)
    >>> envs = TransformReward(env=envs, func=scale_and_shift)
    >>> _ = envs.action_space.seed(123)
    >>> obs, info = envs.reset(seed=123)
    >>> obs, rew, term, trunc, info = envs.step(envs.action_space.sample())
    >>> envs.close()
    >>> obs
    array([[-4.6343064e-01,  9.8971417e-05],
           [-4.4488689e-01, -1.9375233e-03],
           [-4.3118435e-01, -1.5342437e-03]], dtype=float32)
c                0   > [         TU ]  U5        X l        g)zInitialize LambdaReward wrapper.

Args:
    env (Env): The vector environment to wrap
    func: (Callable): The function to apply to reward
N)super__init__func)selfenvr   	__class__s      d/home/james-whalen/.local/lib/python3.13/site-packages/gymnasium/wrappers/vector/vectorize_reward.pyr   TransformReward.__init__%   s     		    c                $    U R                  U5      $ )zApply function to reward.r   )r   rewards     r   rewardsTransformReward.rewards0   s    yy  r   r   )r   r   r   z Callable[[ArrayType], ArrayType]r   r	   returnr	   	__name__
__module____qualname____firstlineno____doc__r   r   __static_attributes____classcell__r   s   @r   r   r      s    (	! !r   r   c                  F   ^  \ rS rSrSr      SU 4S jjrSS jrSrU =r$ )VectorizeTransformReward5   aY  Vectorizes a single-agent transform reward wrapper for vector environments.

An example such that applies a ReLU to the reward:
    >>> import gymnasium as gym
    >>> from gymnasium.wrappers import TransformReward
    >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3)
    >>> envs = VectorizeTransformReward(envs, wrapper=TransformReward, func=lambda x: (x > 0.0) * x)
    >>> _ = envs.action_space.seed(123)
    >>> obs, info = envs.reset(seed=123)
    >>> obs, rew, term, trunc, info = envs.step(envs.action_space.sample())
    >>> envs.close()
    >>> rew
    array([-0., -0., -0.])
c                P   > [         TU ]  U5        U" [        5       40 UD6U l        g)zConstructor for the vectorized lambda reward wrapper.

Args:
    env: The vector environment to wrap.
    wrapper: The wrapper to vectorize
    **kwargs: Keyword argument for the wrapper
N)r   r   r   wrapper)r   r   r,   kwargsr   s       r   r   !VectorizeTransformReward.__init__E   s%     	su//r   c                h    [        U5       H"  u  p#U R                  R                  U5      X'   M$     U$ )z=Iterates over the reward updating each with the wrapper func.)	enumerater,   r   )r   r   irs       r   r    VectorizeTransformReward.rewardsV   s.    f%DA))!,FI &r   )r,   )r   r   r,   z&type[transform_reward.TransformReward]r-   r   r   r   r'   s   @r   r)   r)   5   s3    00 80 	0" r   r)   c                  B   ^  \ rS rSrSr  S     SU 4S jjjrSrU =r$ )
ClipReward]   a  A wrapper that clips the rewards for an environment between an upper and lower bound.

Example with clipped rewards:
    >>> import numpy as np
    >>> import gymnasium as gym
    >>> envs = gym.make_vec("MountainCarContinuous-v0", num_envs=3)
    >>> envs = ClipReward(envs, 0.0, 2.0)
    >>> _ = envs.action_space.seed(123)
    >>> obs, info = envs.reset(seed=123)
    >>> for _ in range(10):
    ...     obs, rew, term, trunc, info = envs.step(0.5 * np.ones((3, 1)))
    ...
    >>> envs.close()
    >>> rew
    array([0., 0., 0.])
c                B   > [         TU ]  U[        R                  UUS9  g)zConstructor for ClipReward wrapper.

Args:
    env: The vector environment to wrap
    min_reward: The min reward for each step
    max_reward: the max reward for each step
)
min_reward
max_rewardN)r   r   r
   r5   )r   r   r8   r9   r   s       r   r   ClipReward.__init__o   s)     	''!!	 	 	
r    )NN)r   r   r8   float | np.ndarray | Noner9   r<   )r    r!   r"   r#   r$   r   r%   r&   r'   s   @r   r5   r5   ]   s6    ( 1504	

 .
 .	
 
r   r5   )r$   
__future__r   collections.abcr   typingr   numpynp	gymnasiumr   gymnasium.vectorr   r   gymnasium.vector.vector_envr	   gymnasium.wrappersr
   r   r)   r5   r;   r   r   <module>rF      sI    : " $    ; 1 /"!) "!J%2 %P$
) $
r   