
    6bi*H                        S r SSKrSSKrSSKJr  SSKJr  SSK	J
r
  SSKJr  SSKJr  SrS	rS
rS r\" S/ S9 " S S5      5       r\" S/ S9\R(                  " S5       " S S\5      5       5       r\" S5       " S S\
5      5       rg)z"Python module for evaluation loop.    N)
tf_logging)deprecation)ModelCheckpoint)	optimizer)keras_exportg      N@   c                     [         R                  R                  U 5      nUR                  5       nUR	                  5        Vs1 s H  o3R                  S5      S   iM     sn$ s  snf )a  Lists all the attributes in a checkpoint.

Checkpoint keys are paths in a checkpoint graph, and attribute is the first
element in the path. e.g. with a checkpoint key
"optimizer/iter/.ATTRIBUTES/VARIABLE_VALUE", optimizer is the attribute. The
attribute is also used to save/restore a variable in a checkpoint,
e.g. tf.train.Checkpoint(optimizer=optimizer, model=model).

Args:
  ckpt_dir_or_file: Directory with checkpoints file or path to checkpoint.

Returns:
  Set of attributes in a checkpoint.
/r   )tftrainload_checkpointget_variable_to_shape_mapkeyssplit)ckpt_dir_or_filereadervariable_mapnames       ^/home/james-whalen/.local/lib/python3.13/site-packages/tf_keras/src/utils/sidecar_evaluator.pylist_checkpoint_attributesr   !   sU     XX%%&67F335L+7+<+<+>?+>4JJsOA+>???s   A"zkeras.utils.SidecarEvaluator)v1c                   4    \ rS rSrSr   SS jrS rS rSrg)	SidecarEvaluator5   a  A class designed for a dedicated evaluator task.

`SidecarEvaluator` is expected to be run in a process on a separate machine
from the training cluster. It is meant for the purpose of a dedicated
evaluator, evaluating the metric results of a training cluster which has one
or more workers performing the training, and saving checkpoints.

The `SidecarEvaluator` API is compatible with both Custom Training Loop
(CTL), and TF-Keras `Model.fit` to be used in the training cluster. Using
the model (with compiled metrics) provided at `__init__`, `SidecarEvaluator`
repeatedly performs evaluation "epochs" when it finds a checkpoint that has
not yet been used. Depending on the `steps` argument, an eval epoch is
evaluation over all eval data, or up to certain number of steps (batches).
See examples below for how the training program should save the checkpoints
in order to be recognized by `SidecarEvaluator`.

Since under the hood, `SidecarEvaluator` uses `model.evaluate` for
evaluation, it also supports arbitrary TF-Keras callbacks. That is, if one
or more callbacks are provided, their `on_test_batch_begin` and
`on_test_batch_end` methods are called at the start and end of a batch, and
their `on_test_begin` and `on_test_end` are called at the start and end of
an evaluation epoch. Note that `SidecarEvaluator` may skip some checkpoints
because it always picks up the latest checkpoint available, and during an
evaluation epoch, multiple checkpoints can be produced from the training
side.

Example:
```python
model = tf.keras.models.Sequential(...)
model.compile(metrics=tf.keras.metrics.SparseCategoricalAccuracy(
    name="eval_metrics"))
data = tf.data.Dataset.from_tensor_slices(...)

tf.keras.SidecarEvaluator(
    model=model,
    data=data,
    # dir for training-saved checkpoint
    checkpoint_dir='/tmp/checkpoint_dir',
    steps=None,  # Eval until dataset is exhausted
    max_evaluations=None,  # The evaluation needs to be stopped manually
    callbacks=[tf.keras.callbacks.TensorBoard(log_dir='/tmp/log_dir')]
).start()
```

`SidecarEvaluator.start` writes a series of summary files which can be
visualized by tensorboard (which provides a webpage link):

```bash
$ tensorboard --logdir=/tmp/log_dir/validation
...
TensorBoard 2.4.0a0 at http://host:port (Press CTRL+C to quit)
```

If the training cluster uses a CTL, the `checkpoint_dir` should contain
checkpoints that track both `model` and `optimizer`, to fulfill
`SidecarEvaluator`'s expectation. This can be done by a
`tf.train.Checkpoint` and a `tf.train.CheckpointManager`:

```python
# Same `checkpoint_dir` supplied to `SidecarEvaluator`.
checkpoint_dir = ...
checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
checkpoint_manager = tf.train.CheckpointManager(
    checkpoint, checkpoint_dir=..., max_to_keep=...)
checkpoint_manager.save()
```

If the training cluster uses TF-Keras `Model.fit` API, a
`tf.keras.callbacks.ModelCheckpoint` should be used, with
`save_weights_only=True`, and the `filepath` should have 'ckpt-{epoch}'
appended:

```python
# Same `checkpoint_dir` supplied to `SidecarEvaluator`.
checkpoint_dir = ...
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'ckpt-{epoch}'),
    save_weights_only=True)
model.fit(dataset, epochs, callbacks=[model_checkpoint])
```
Nc                     Xl         X l        X0l        [        R                  " S[
        [        R                  S9U l        XPl        X@l	        U=(       d    / U l
        g)a  Initializes an `SidecarEvaluator` object.

Args:
  model: Model to use for evaluation. The model object used here should
    be a `tf.keras.Model`, and should be the same as the one that is
    used in training, where `tf.keras.Model`s are checkpointed. The
    model should have one or more metrics compiled before using
    `SidecarEvaluator`.
  data: The input data for evaluation. `SidecarEvaluator` supports all
    data types that TF-Keras `model.evaluate` supports as the input data
    `x`, such as a `tf.data.Dataset`.
  checkpoint_dir: Directory where checkpoint files are saved.
  steps: Number of steps to perform evaluation for, when evaluating a
    single checkpoint file. If `None`, evaluation continues until the
    dataset is exhausted. For repeated evaluation dataset, user must
    specify `steps` to avoid infinite evaluation loop.
  max_evaluations: Maximum number of the checkpoint file to be
    evaluated, for `SidecarEvaluator` to know when to stop. The
    evaluator will stop after it evaluates a checkpoint filepath ending
    with '<ckpt_name>-<max_evaluations>'. If using
    `tf.train.CheckpointManager.save` for saving checkpoints, the kth
    saved checkpoint has the filepath suffix '<ckpt_name>-<k>' (k=1 for
    the first saved), and if checkpoints are saved every epoch after
    training, the filepath saved at the kth epoch would end with
    '<ckpt_name>-<k>. Thus, if training runs for n epochs, and the
    evaluator should end after the training finishes, use n for this
    parameter. Note that this is not necessarily equal to the number of
    total evaluations, since some checkpoints may be skipped if
    evaluation is slower than checkpoint creation. If `None`,
    `SidecarEvaluator` will evaluate indefinitely, and the user must
    terminate evaluator program themselves.
  callbacks: List of `keras.callbacks.Callback` instances to apply
    during evaluation. See
    [callbacks](/api_docs/python/tf/tf_keras/callbacks).

iterations)r   initial_valuedtypeN)modeldatacheckpoint_dirr   Variable_ITERATIONS_UNINITIALIZEDint64_iterationsmax_evaluationssteps	callbacks)selfr    r!   r"   r(   r'   r)   s          r   __init__SidecarEvaluator.__init__   sK    X 
	,;;3((

  /
"b    c                 @    [         R                  " S[         S35        g)Nz(No checkpoints appear to be found after a   seconds. Please check if you are properly using a `tf.train.Checkpoint/CheckpointManager` or `tf.keras.callbacks.ModelCheckpoint(save_weights_only=True)` to save checkpoints by the training. See `tf.keras.SidecarEvaluator` doc for recommended flows of saving checkpoints.F)logginginfo_CHECKPOINT_TIMEOUT_SEC)r*   s    r   _timeout_fnSidecarEvaluator._timeout_fn   s&    6&' (%%		
 r-   c                    U R                   R                  (       ap  [        U R                   R                  [        R                  5      (       a=  [        R
                  R                  U R                   U R                   R                  S9nOO[        R
                  R                  U R                  S9n[        R
                  R                  U R                   US9n[        R
                  R                  U R                  [        U R                  S9 GH  n UR                  U5      R                  5         [        U5      nSU;  a  U R                   R                  U5        U R                   R                  (       al  [        U R                   R                  [        R                  5      (       d9  U R                   R                  R                   R#                  U R                  5        U R                  R3                  5       [4        :X  aL  [        U R                   R                  [        R                  5      (       d  [7        S	U R                   S
35      e[*        R,                  " SU 35        U R                   R9                  U R:                  U R<                  U R>                  SS9  0 nU R                   R@                   HI  nURC                  5       n[        U[D        5      (       a  URG                  U5        M;  XURH                  '   MK     [*        R,                  " SSRK                  URM                  5        V	V
s/ s H  u  pU	 SU
R3                  5        3PM     sn
n	5      5        U RN                  (       d  GMm  U RN                  [Q        URS                  S5      S   5      ::  d  GM  [*        R,                  " S5          g   g! [        R$                  R&                  4 aj  n[        U[        R$                  R(                  5      (       a  Ue[*        R,                  " SU SUR.                  R0                   SU 35         SnAGM:  SnAff = fs  sn
n	f )zStarts the evaluation loop.)r    r   )iter)timeout
timeout_fnr    zESidecarEvaluator encountered an error when loading the checkpoint at z. Retrying. Error: z: NzCVariable `iterations` cannot be loaded from the checkpoint file at zQ. Please ensure `iterations` is included in the checkpoint saved during training.zDEvaluation starts: Model weights loaded from latest checkpoint file    )r(   r)   verbosezEnd of evaluation. Metrics: %s =-r   z2Last checkpoint evaluated. SidecarEvaluator stops.)*r    r   
isinstance	Optimizerr   r   
Checkpointr&   checkpoints_iteratorr"   r1   r2   restoreexpect_partialr   load_weightsr   assignerrorsOpErrorUnavailableErrorr/   r0   	__class____name__numpyr$   RuntimeErrorevaluater!   r(   r)   metricsresultdictupdater   joinitemsr'   intr   )r*   
checkpointoptimizer_checkpointlatest_checkpointcheckpoint_attributesereturn_metricsmetricrN   r   values              r   startSidecarEvaluator.start   s   ::JJJ  )"5"5%
 %
 ,,jjDJJ,@,@ - J $&88#6#6D<L<L#6#M ,,jj,@ - J "$!>!>+'' "? "

. ""#45DDF(B%)% "77JJ++,=> ::''
JJ((''1 1 JJ((33::4;K;KL.   &&(,EE"JJ(('' 
 #**.*=*=)> ?HH  LL##4"57 JJ		t~~q     N**,,fd++"))&1286;;/ - LL0 ,:+?+?+A+AKD  &%++-1+A ###$$,=,C,CC,H,L(MM H I"
> II%%' a!;!;<< G %%6$7 8kk2232aS:
 )ls    CO>!QQ'AQQ)r&   r)   r"   r!   r'   r    r(   )NNN)	rI   
__module____qualname____firstlineno____doc__r+   r2   r\   __static_attributes__ r-   r   r   r   5   s%    Pn 6)pqr-   r   z#keras.experimental.SidecarEvaluatorc                   ,   ^  \ rS rSrSrU 4S jrSrU =r$ )SidecarEvaluatorExperimentaliB  zDeprecated. Please use `tf.keras.utils.SidecarEvaluator` instead.

Caution: `tf.keras.experimental.SidecarEvaluator` endpoint is
  deprecated and will be removed in a future release. Please use
  `tf.keras.utils.SidecarEvaluator`.
c                 R   > [         R                  " S5        [        TU ]  " U0 UD6  g )Nz`tf.keras.experimental.SidecarEvaluator` endpoint is deprecated and will be removed in a future release. Please use `tf.keras.utils.SidecarEvaluator`.)r/   warningsuperr+   )r*   argskwargsrH   s      r   r+   %SidecarEvaluatorExperimental.__init__L  s(    1	

 	$)&)r-   rc   )rI   r^   r_   r`   ra   r+   rb   __classcell__rH   s   @r   re   re   B  s    * *r-   re   z+keras.callbacks.SidecarEvaluatorModelExportc                   <   ^  \ rS rSrSrU 4S jrSS jrS rSrU =r	$ )SidecarEvaluatorModelExportiU  a  Callback to save the best TF-Keras model.

It expands the functionality of the existing ModelCheckpoint callback to
enable exporting the best models after evaluation with validation dataset.

When using the `SidecarEvaluatorModelExport` callback in conjunction with
`keras.utils.SidecarEvaluator`, users should provide the `filepath`, which
is the path for this callback to export model or save weights to, and
`ckpt_filepath`, which is where the checkpoint is available to extract
the epoch number from. The callback will then export the model that the
evaluator deems as the best (among the checkpoints saved by the training
counterpart) to the specified `filepath`. This callback is intended to be
used by SidecarEvaluator only.

Example:

```python
model.compile(loss=..., optimizer=...,
              metrics=['accuracy'])
sidecar_evaluator = keras.utils.SidecarEvaluator(
    model=model,
    data=dataset,
    checkpoint_dir=checkpoint_dir,
    max_evaluations=1,
    callbacks=[
        SidecarEvaluatorModelExport(
            export_filepath=os.path.join(checkpoint_dir,
                                  'best_model_eval',
                                  'best-model-{epoch:04d}'),
            checkpoint_filepath=os.path.join(checkpoint_dir,
            'ckpt-{epoch:04d}'),
            save_freq="eval",
            save_weights_only=True,
            monitor="loss",
            mode="min",
            verbose=1,
        ),
    ],
)
sidecar_evaluator.start()
# Model weights are saved if evaluator deems it's the best seen so far.

Args:
    export_filepath: Path where best models should be saved by this
      `SidecarEvaluatorModelExport` callback. Epoch formatting options, such
      as `os.path.join(best_model_dir, 'best-model-{epoch:04d}')`, can be
      used to allow saved model to preserve epoch information in the file
      name. SidecarEvaluatorModelExport will use the "training epoch" at
      which the checkpoint was saved by training to fill the epoch
      placeholder in the path.
    checkpoint_filepath: Path where checkpoints were saved by training. This
      should be the same as what is provided to `filepath` argument of
      `ModelCheckpoint` on the training side, such as
      `os.path.join(checkpoint_dir, 'ckpt-{epoch:04d}')`.
c                 8   > [         TU ]  " SUSS.UD6  X l        g )NT)filepathsave_best_onlyrc   )rh   r+   _checkpoint_filepath)r*   export_filepathcheckpoint_filepathrj   rH   s       r   r+   $SidecarEvaluatorModelExport.__init__  s-     	
$	
 	
 %8!r-   c                     U R                  U R                  5      nUb,  [        [        R                  " SU5      S   5      S-
  U l        gSU l        g)z.Updates export_index to the latest checkpoint.Nz.*ckpt-(?P<ckpt>\d+)ckpt   r   )1_get_most_recently_modified_file_matching_patternrs   rS   rematchexport_index)r*   logsmost_recent_filepaths      r   on_test_begin)SidecarEvaluatorModelExport.on_test_begin  sd     BB)) 	
  +HH46JK
   !"Dr-   c                 d    U =R                   S-  sl         U R                  U R                  SUS9  g)z3Saves best model at the end of an evaluation epoch.ry   N)epochbatchr~   )epochs_since_last_save_save_modelr}   )r*   r~   s     r   on_test_end'SidecarEvaluatorModelExport.on_test_end  s0     	##q(#t004Hr-   )rs   r}   )N)
rI   r^   r_   r`   ra   r+   r   r   rb   rl   rm   s   @r   ro   ro   U  s    6p8"(I Ir-   ro   )ra   r{   
tensorflowr   tensorflow.python.platformr   r/   tensorflow.python.utilr   tf_keras.src.callbacksr   tf_keras.src.optimizersr    tensorflow.python.util.tf_exportr   _PRINT_EVAL_STEP_EVERY_SECr$   r1   r   r   deprecated_endpointsre   ro   rc   r-   r   <module>r      s    ) 	  = . 2 - 9!   @( ,4I I 5IX 3;!!"GH*#3 * I <*" ;<ZI/ ZI =ZIr-   