
    niA%                         S r SSKJr  SSKJr  SSKJr  SSKrSSKr SSKJs  J	r
  SSKJr  \R                  " SS	5      r " S
 S\5      rg! \ a    SSKr
 N1f = f)zDExecute operations in a loop and coordinate logging and checkpoints.    )absolute_import)division)print_functionN   )streaming_meanPhasezNname, writer, op, batch, steps, feed, report_every, log_every,checkpoint_everyc                   V    \ rS rSrSrSS jr    SS jrSS jrS rS r	S	 r
S
 rSrg)Loop#   a"  Execute operations in a loop and coordinate logging and checkpoints.

Supports multiple phases, that define their own operations to run, and
intervals for reporting scores, logging summaries, and storing checkpoints.
All class state is stored in-graph to properly recover from checkpoints.
Nc                 r   Xl         Uc  [        R                  " SSSS9OUU l        Uc$  [        R                  " [        R
                  5      OUU l        Uc$  [        R                  " [        R
                  5      OUU l        Uc$  [        R                  " [        R
                  5      OUU l        / U l	        g)a}  Execute operations in a loop and coordinate logging and checkpoints.

The step, log, report, and report arguments will get created if not
provided. Reset is used to indicate switching to a new phase, so that the
model can start a new computation in case its computation is split over
multiple training steps.

Args:
  logdir: Will contain checkpoints and summaries for each phase.
  step: Variable of the global step (optional).
  log: Tensor indicating to the model to compute summary tensors.
  report: Tensor indicating to the loop to report the current mean score.
  reset: Tensor indicating to the model to start a new computation.
Nr   Fglobal_step)name)
_logdirtfVariable_stepplaceholderbool_log_report_reset_phases)selflogdirsteplogreportresets         Y/home/james-whalen/.local/lib/python3.13/site-packages/pybullet_envs/agents/tools/loop.py__init__Loop.__init__+   sy     L?C|"++a];QUDJ+.;rww'CDI.4n2>>"''*&DL-2]"..)DKDL    c
                 6   [         R                  " U[         R                  5      n[         R                  " U[         R                  5      n[         R                  " U[         R                  5      nU	=(       d    0 n	UR
                  R                  b  UR
                  R                  c  [        S5      eU R                  =(       aZ    [         R                  R                  [        R                  R                  U R                  U5      [         R                  " 5       SS9n
U R                  X#U5      nUR
                  R                  S:X  a  SOUR
                  S   R                   nU R"                  R%                  ['        XX[)        U5      XUU5	      5        g)a  Add a phase to the loop protocol.

If the model breaks long computation into multiple steps, the done tensor
indicates whether the current score should be added to the mean counter.
For example, in reinforcement learning we only have a valid score at the
end of the episode.

Score and done tensors can either be scalars or vectors, to support
single and batched computations.

Args:
  name: Name for the phase, used for the summary writer.
  done: Tensor indicating whether current score can be used.
  score: Tensor holding the current, possibly intermediate, score.
  summary: Tensor holding summary string to write if not an empty string.
  steps: Duration of the phase in steps.
  report_every: Yield mean score every this number of steps.
  log_every: Request summaries via `log` tensor every this number of steps.
  checkpoint_every: Write checkpoint every this number of steps.
  feed: Additional feed dictionary for the session run call.

Raises:
  ValueError: Unknown rank for done or score tensors.
Nz1Rank of 'done' and 'score' tensors must be known.<   )
flush_secsr   r   )r   convert_to_tensorr   float32stringshapendims
ValueErrorr   summary
FileWriterospathjoinget_default_graph_define_stepvaluer   append_Phaseint)r   r   donescorer,   stepsreport_every	log_everycheckpoint_everyfeedwriteropbatchs                r   	add_phaseLoop.add_phaseA   s'   D bgg.D  

3E""7BII6G:2Dzz5;;#4#4#<JKK\\ Qbjj33
T\\4("*>*>*@R 4 QF			4	0B""a'AU[[^-A-AELLtRE
D		!"r"   c              #   z  #    UR                  U R                  5      nSn U(       a  XC:  a  gU R                  U5      u  pgnXvR                  -  U-   n	XR                  -  U:  a@  Sn
U
S-  n
[        R
                  R                  U
R                  UR                  X5      5        X:  UR                  U R                  '   UR                  =(       a&    U R                  XR                  UR                  5      UR                  U R                  '   U R                  XR                  UR                   5      UR                  U R"                  '   UR                  UR$                  UR                  5      u  ppEU R                  XR                  UR&                  5      (       a  U R)                  XU5        U R                  XR                  UR                   5      (       a  Uv   U(       aO  UR                  (       a>  [+        S U R,                   5       5      nX}-  U-   nUR                  R/                  X5        GM  7f)a  Run the loop schedule for a specified number of steps.

Call the operation of the current phase until the global step reaches the
specified maximum step. Phases are repeated over and over in the order they
were added.

Args:
  sess: Session to use to run the phase operation.
  saver: Saver used for checkpointing.
  max_step: Run the operations until the step reaches this limit.

Yields:
  Reported mean scores.
r   z4
--------------------------------------------------
z)Phase {} (phase step {}, global step {}).c              3   8   #    U  H  oR                   v   M     g 7fNr9   .0phases     r   	<genexpr>Loop.run.<locals>.<genexpr>   s     B\EKK\   N)runr   _find_current_phaser9   r   logginginfoformatr   r=   r   r>   _is_every_stepsr@   r;   r   r:   r   r?   r<   _store_checkpointmaxr   add_summary)r   sesssavermax_stepr   
steps_maderI   epochsteps_in
phase_stepmessager,   
mean_scorelongest_phasesummary_steps                  r   rM   Loop.runq   s     ((4::&KJ
	k-#77DeH;;&1j	KK	*	,*>>


uzz:KL!)!6ejj$||  ^#33JU__] jj"&"6"6z;;7<7I7I#Kejj59XXehh

5S2g;			j++u7M7M	N	NtK8			j++u7I7I	J	J	U\\ BT\\BB,x7  73 s   H9H;c                 Z   ^ T(       d  g[        XU-   5      n[        U4S jU 5       5      $ )zDetermine whether a periodic event should happen at this step.

Args:
  phase_step: The incrementing step.
  batch: The number of steps progressed at once.
  every: The interval of the periode.

Returns:
  Boolean of whether the event should happen.
Fc              3   8   >#    U  H  oS -   T-  S:H  v   M     g7f)r   r   N )rH   r   everys     r   rJ   'Loop._is_every_steps.<locals>.<genexpr>   s     A=4qE!Q&=s   )rangeany)r   r\   r@   re   covered_stepss      ` r   rR   Loop._is_every_steps   s+     *5&89MA=AAAr"   c                     [        S U R                   5       5      n[        X-  5      nX-  nU R                   H&  nXER                  :  a  XSU4s  $ XER                  -  nM(     g)a  Determine the current phase based on the global step.

This ensures continuing the correct phase after restoring checkoints.

Args:
  global_step: The global number of steps performed across all phases.

Returns:
  Tuple of phase object, epoch number, and phase steps within the epoch.
c              3   8   #    U  H  oR                   v   M     g 7frE   rF   rG   s     r   rJ   +Loop._find_current_phase.<locals>.<genexpr>   s     ;lU[[lrL   N)sumr   r6   r9   )r   r   
epoch_sizerZ   r[   rI   s         r   rN   Loop._find_current_phase   s\     ;dll;;J)*E'H	KK	X%%++h r"   c                   ^^	 UR                   R                  S:X  a  US   nUR                   R                  S:X  a  US   n[        R                  " S[        R
                  5      m	[        R                  " XU/5         [        R                  " U[        R                  " U5      SS2S4   5      m[        R                  " [        R                  " U5      UU	4S j[        R                  5      nSSS5        [        R                  " W/5         [        R                  " U R                  T	R                  [        5      n[        R                   " U5      S   nU R                  R!                  U5      nSSS5        [        R                  " WW/5         [        R"                  " U5      XWW4sSSS5        $ ! , (       d  f       N= f! , (       d  f       NZ= f! , (       d  f       g= f)a  Combine operations of a phase.

Keeps track of the mean score and when to report it.

Args:
  done: Tensor indicating whether current score can be used.
  score: Tensor holding the current, possibly intermediate, score.
  summary: Tensor holding summary string to write if not an empty string.

Returns:
  Tuple of summary tensor, mean score, and new global step. The mean score
  is zero for non reporting steps.
r   Nrd   c                  &   > TR                  T 5      $ rE   )submit)
done_score
score_means   r   <lambda>#Loop._define_step.<locals>.<lambda>   s    *:K:KJ:Wr"   )r)   r*   r   StreamingMeanr   r'   control_dependenciesgatherwherecond
reduce_anyno_opr   clearfloatr   
assign_addidentity)
r   r7   r8   r,   submit_scorer^   rY   	next_steprt   ru   s
           @@r   r2   Loop._define_step   sQ    zz1$Zd{{ADke--b"**=J		 	 $w!7	899UBHHTN1a4$89jWWR]]402WY[YaYabl 
9 
	 	 ,	0774<<)9)95Aj88E?1%j**''
3i 
1 
	 	 *i!8	9[[!:*D 
:	9 
9	8 
1	0 
:	9s&   >A2F7A%GG7
G
G
G'c                     U R                   (       a  U(       d  g[        R                  R                  U R                   5        [        R
                  R                  U R                   S5      nUR                  XU5        g)a  Store a checkpoint if a log directory was provided to the constructor.

The directory will be created if needed.

Args:
  sess: Session containing variables to store.
  saver: Saver used for checkpointing.
  global_step: Step number of the checkpoint name.
Nz
model.ckpt)r   r   gfileMakeDirsr.   r/   r0   save)r   rV   rW   r   filenames        r   rS   Loop._store_checkpoint   sL     <<uHHdll#ww||DLL,7H	JJt{+r"   )r   r   r   r   r   r   )NNNNrE   )__name__
__module____qualname____firstlineno____doc__r    rA   rM   rR   rN   r2   rS   __static_attributes__rd   r"   r   r
   r
   #   s<    8 "!%."`*8XB &E:,r"   r
   )r   
__future__r   r   r   collectionsr.   tensorflow.compat.v1compatv1r   	Exception
tensorflow r   
namedtupler5   objectr
   rd   r"   r   <module>r      sd    K &  %  	## 			 

H,6 H,  s   	A 
AA