
    ni                     v    S r SSKJr  SSKJr  SSKJr   SSKJs  Jr  SSKJr  S	S jrg! \	 a    SSK
r Nf = f)
zEIn-graph simulation step of a vectorized algorithm with environments.    )absolute_import)division)print_functionN   )streaming_meanc           	        ^ ^^^^^^^^^ UU UU4S jmUU UU4S jnUUUUU4S jmUUU4S jn[         R                  " S5         [         R                  " T5      m[         R                  " U5      n[         R                  " S5         [         R                  " [         R
                  " [        T 5      [         R                  S9SS	S
9m[         R                  " [         R
                  " [        T 5      [         R                  S9SSS
9mSSS5        [        R                  " S[         R                  5      m[        R                  " S[         R                  5      m[         R                  " UU 4S jU 4S j5      m[         R                  " [         R                  " [         R                  " T5      S   [         R                  5      UU4S j[        5      n[         R                   " U/5         U" 5       nSSS5        [         R                   " W/5         [         R                  " [         R"                  " T R$                  5      SS2S4   [         R                  5      m[         R                  " [         R                  " [         R                  " T5      S   [         R                  5      UU4S j[        5      nSSS5        [         R                   " W/5         [         R&                  R)                  U" 5       XgU/5      n	SSS5        [         R                   " W	/5         [         R*                  " T R$                  5      [         R*                  " T5      sn
mSSS5        W
TU	4sSSS5        $ ! , (       d  f       GNl= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       Nb= f! , (       d  f       g= f)a$  Simulation step of a vecrotized algorithm with in-graph environments.

Integrates the operations implemented by the algorithm and the environments
into a combined operation.

Args:
  batch_env: In-graph batch environment.
  algo: Algorithm instance implementing required operations.
  log: Tensor indicating whether to compute and return summaries.
  reset: Tensor causing all environments to reset.

Returns:
  Tuple of tensors containing done flags for the current episodes, possibly
  intermediate scores for the episodes, and a summary tensor.
c                   > U R                   R                  S:X  d   e[        R                  " U [        R                  5      n[        R                  " U 5      nTR                  U 5      [        R                  " TX5      [        R                  " TX5      /n[        R                  " U5         TR                  U 5      sSSS5        $ ! , (       d  f       g= f)zReset environments, intermediate scores and durations for new episodes.

Args:
  agent_indices: Tensor containing batch indices starting an episode.

Returns:
  Summary tensor.
r   N)	shapendimstf
zeros_likefloat32resetscatter_updatecontrol_dependenciesbegin_episode)agent_indiceszero_scoreszero_durations	reset_opsalgo	batch_envlengthscores       ]/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/agents/tools/simulate.py_define_begin_episode'simulate.<locals>._define_begin_episode-   s     $$)))--rzz:K]]=1N&
%<
&-@I
 
	 	 	+. 
,	+	+s   -C
Cc            	      ^  > TR                   S-   n [        R                  " [        T5      5      nTR	                  X5      u  p#UR                  TR                  R                  5        [        R                  " TR                  U5      /5         T
R                  TR                  5      nT	R                  [        R                  " [        T5      [        R                  5      5      nSSS5        [        R                  " WW/5         [        R                  " [        T5      5      nTR                  XTR                  TR                  TR                  TR                   5      nSSS5        [        R                   R#                  UW/5      $ ! , (       d  f       N= f! , (       d  f       N@= f)a  Request actions from the algorithm and apply them to the environments.

Increments the lengths of all episodes and increases their scores by the
current reward. After stepping the environments, provides the full
transition tuple to the algorithm.

Returns:
  Summary tensor.
r   N)observr   rangelenperform	set_shapeactionr
   r   simulate
assign_addrewardonesint32
experiencedonesummarymerge)prevobr   r$   step_summary	add_score
inc_lengthexperience_summaryr   r   r   r   s          r   _define_stepsimulate.<locals>._define_stepA   s8    !FHHS^,M<<>F
Y%%++,		 	 )"4"4V"<!=	>""9#3#34i$$RWWS^RXX%FGj 
? 
	 	 )Z!8	9hhs9~.m??=)BRBR+4+;+;Y^^YM]M]_ 
: ::\+=>?? 
?	> 
:	9s   AFAF
F
F,c                   > U R                   R                  S:X  d   eTR                  [        R                  " TU 5      5      nTR                  [        R
                  " [        R                  " TU 5      [        R                  5      5      n[        R                  " X/5         TR                  U 5      sSSS5        $ ! , (       d  f       g= f)zNotify the algorithm of ending episodes.

Also updates the mean score and length counters used for summaries.

Args:
  agent_indices: Tensor holding batch indices that end their episodes.

Returns:
  Summary tensor.
r   N)	r
   r   submitr   gathercastr   r   end_episode)r   submit_scoresubmit_lengthr   r   mean_length
mean_scorer   s      r   _define_end_episode%simulate.<locals>._define_end_episodeX   s     $$)))$$RYYum%DEL&&rwwryy/OQSQ[Q['\]M		 	 ,!>	?m, 
@	?	?s   $B??
Cc            	        > [         R                  " [         R                  " T[         R                  " TR                  [         R
                  5      5      U4S j[        5      n [         R                  " [         R                  " T[         R                  " TR                  [         R
                  5      5      U4S j[        5      n[         R                  R                  X/5      $ )z^Reset the average score and duration, and return them as summary.

Returns:
  Summary string.
c                  `   > [         R                  R                  ST R                  5       5      $ )Nr=   r   r,   scalarclear)r=   s   r   <lambda>5simulate.<locals>._define_summaries.<locals>.<lambda>p   s    RZZ->->|ZM]M]M_-`    c                  `   > [         R                  R                  ST R                  5       5      $ )Nr<   rB   )r<   s   r   rE   rF   t   s    

(9(9-IZIZI\(]rG   )	r   condlogical_andr8   countboolstrr,   r-   )score_summarylength_summarylogr<   r=   s     r   _define_summaries#simulate.<locals>._define_summariesi   s     GGBNN3"''1# $%`!M WWR^^RWW[&&WW ]_bdN ::];<<rG   r%   simulate_temporary)dtypeFr   )namer   N c                  B   > [         R                  " [        T 5      5      $ N)r   r    r!   r   s   r   rE   simulate.<locals>.<lambda>   s    288C	N+CrG   c                     > [         R                  " [         R                  " T R                  5      S S 2S4   [         R                  5      $ )Nr   )r   r8   wherer+   r)   rY   s   r   rE   rZ      s.    RWW
 A&N2rG   r   c                     > T " T5      $ rX   rV   )r   r   s   r   rE   rZ      s    6KM6ZrG   c                     > T " T5      $ rX   rV   )r>   r   s   r   rE   rZ      s    6I-6XrG   )r   
name_scopeconvert_to_tensorvariable_scopeVariablezerosr!   r   r)   r   StreamingMeanrI   r8   r
   rL   rM   r   r\   r+   r,   r-   identity)r   r   rP   r   r3   rQ   r   stepr9   r,   r+   r   r>   r   r   r<   r=   r   s   ```        @@@@@@@r   r%   r%      sw   "/ /(@ @.- -"= 
}}Z 


s
#C  'E			/	0kk"((3y>DeRYZe{{288C	N"((CUQYZf 
1  --b"**=J ..r2::>KGGE#C F2 3MGGBGGBHH]$;A$>$&GG-.Z\_aM		 	 -	1^d 
2		 	 $	(ggbhhy~~6q!t<bhhGmGGBGGBHH]$;A$>$&GG-.XZ]_k 
) 
	 	 +	/

  "3"5}K!XYg 
0		 	 '	+KK	/U1CkdE 
,- !  
1	0 
2	1	(	( 
0	/	+	+) ! s   AOBM.	C$O-N 5OB&N:O(N#O 8N4O.
M=	8O 
N	
O
N 	O#
N1	-O4
O	>O
O)TF)__doc__
__future__r   r   r   tensorflow.compat.v1compatv1r   	Exception
tensorflow r   r%   rV   rG   r   <module>ro      s>    L &  %## q   s   	+ 
88