
    6bi                         S r SSKrSSKJs  Jr  SSKJr  SSKJ	r
  SSKJr  SSKJr  SSKJr  SSKJr  \" 5       \" S	/ S
9 " S S\5      5       5       rg)z,Sharpness Aware Minimization implementation.    N)data_adapter)deserialize)Model)register_keras_serializable)serialize_keras_object)keras_exportz4keras.models.experimental.SharpnessAwareMinimization)v1c                   n   ^  \ rS rSrSrSU 4S jjrS rS rU 4S jr\	SU 4S jj5       r
S rS	 rS
rU =r$ )SharpnessAwareMinimization   a&  Sharpness aware minimization (SAM) training flow.

Sharpness-aware minimization (SAM) is a technique that improves the model
generalization and provides robustness to label noise. Mini-batch splitting
is proven to improve the SAM's performance, so users can control how mini
batches are split via setting the `num_batch_splits` argument.

Args:
  model: `tf.keras.Model` instance. The inner model that does the
    forward-backward pass.
  rho: float. The gradients scaling factor. Defaults to `0.05`.
  num_batch_splits: int. The number of mini batches to
    split into from each data batch. If None, batches are not split into
    sub-batches. Defaults to `None`.
  name: string. The name of the SAM model. Defaults to `None`.

Reference:
  [Pierre Foret et al., 2020](https://arxiv.org/abs/2010.01412)
c                 D   > [         TU ]  US9  Xl        X l        X0l        g )N)name)super__init__modelrhonum_batch_splits)selfr   r   r   r   	__class__s        j/home/james-whalen/.local/lib/python3.13/site-packages/tf_keras/src/models/sharpness_aware_minimization.pyr   #SharpnessAwareMinimization.__init__6   s#    d#
 0    c                    [         R                  " U5      u  p#nU R                  bA  [        R                  " X R                  5      n[        R                  " X0R                  5      nOU/nU/n/ n/ n[        XV5       GH  u  p/ n[        R                  " 5        nU R                  U	5      nU R                  X5      nSSS5        UR                  W5        U R                  R                  nWR                  WU5      nU R                  U5      nU R                  US-   -  n[        UU5       HK  u  nnUU-  nU R                  UU[        R                  R!                  5       5        UR                  U5        MM     [        R                  " 5        nU " U	5      nU R                  X5      nSSS5        UR                  X5      n[#        U5      S:X  a  U H  nUR                  U/5        M     O'[        UU5       H  u  nnUR                  U5        M     [        X5       H6  u  nnU R                  UU* [        R                  R!                  5       5        M8     GM     / nU H'  nUR                  [        R$                  " USS95        M)     U R&                  R)                  [        UW5      5        [        R*                  " USS9nU R,                  R/                  X=U5        U R0                   Vs0 s H  nUR2                  UR5                  5       _M      sn$ ! , (       d  f       GNe= f! , (       d  f       GN= fs  snf )zThe logic of one SAM training step.

Args:
  data: A nested structure of `Tensor`s. It should be of structure
    (x, y, sample_weight) or (x, y).

Returns:
  A dict mapping metric names to running average values.
Ng-q=r   )axis)r   unpack_x_y_sample_weightr   tfsplitzipGradientTaper   compiled_lossappendtrainable_variablesgradient_gradients_order2_normr   _distributed_apply_epsilon_w
distributeget_strategylen
reduce_sum	optimizerapply_gradientsconcatcompiled_metricsupdate_statemetricsr   result)r   dataxysample_weightx_splity_splitgradients_all_batchespred_all_batchesx_batchy_batchepsilon_w_cachetapepredlossr"   	gradientsgradients_order2_normscaler#   variable	epsilon_wgradient_all_batchesms                           r   
train_step%SharpnessAwareMinimization.train_step<   s    +CCDIm  ,hhq"7"78Ghhq"7"78GcGcG " #G 5G O"dzz'*))'8 # ##D)"&**"@"@d,?@I$($?$?	$J!HH 5 =>E&))5H&I"($u,	11i)C)C)E  &&y1 'J "dG}))'8 # d@I()Q. )H)00(< !* 7:472H2 )//97 (+#(#)
 11yj"--*D*D*F(? !6P 	$9 R]]+?aHI %:&&s96I'JKyy)2**1MB,0LL9Lq
"L99[ #"" #"8 :s   #LL-3%L?
L*	-
L<	c                 $    U R                  U5      $ )zForward pass of SAM.

SAM delegates the forward pass call to the wrapped model.

Args:
  inputs: Tensor. The model inputs.

Returns:
  A Tensor, the outputs of the wrapped model for given `inputs`.
)r   )r   inputss     r   callSharpnessAwareMinimization.call   s     zz&!!r   c                    > [         TU ]  5       nUR                  [        U R                  5      U R
                  S.5        U$ )N)r   r   )r   
get_configupdater   r   r   )r   configr   s     r   rM   %SharpnessAwareMinimization.get_config   s;    #%/

;xx	
 r   c                    > [         R                  " U5      n[        UR                  S5      US9nX1S'   [        TU ]  X5      $ )Nr   )custom_objects)copydeepcopydeserialize_layerpopr   from_config)clsrO   rR   r   r   s       r   rW   &SharpnessAwareMinimization.from_config   sD     v&!JJw
  ww"6::r   c                    [        [        R                  R                  5       [        R                  R                  R
                  [        R                  R                  R                  45      (       aU  S n[        R                  R                  R                  R                  U[        R                  R                  5       X5        g UR                  U5        g )Nc                 @    U R                   R                  US U4SS9  g )Nc                 $    U R                  U5      $ N)
assign_add)r2   r3   s     r   <lambda>cSharpnessAwareMinimization._distributed_apply_epsilon_w.<locals>.distribute_apply.<locals>.<lambda>   s    ar   F)argsgroup)extendedrN   )strategyvarrC   s      r   distribute_applyQSharpnessAwareMinimization._distributed_apply_epsilon_w.<locals>.distribute_apply   s)    !!((0#	 ) r   )
isinstancer   r&   r'   experimentalParameterServerStrategyCentralStorageStrategy__internal__interimmaybe_merge_callr^   )r   re   rC   rd   rf   s        r   r%   7SharpnessAwareMinimization._distributed_apply_epsilon_w   s    MM&&(**BB**AA
 
 OO&&..?? "--"<"<"> NN9%r   c           
          [         R                  " [         R                  " U Vs/ s H  o"c  M  [         R                  " U5      PM     sn5      5      nU$ s  snf r]   )r   normstack)r   r?   gradrq   s       r   r$   1SharpnessAwareMinimization._gradients_order2_norm   sB    wwHH	N	mbggdm	NO
  Os
   AA)r   r   r   )g?NNr]   )__name__
__module____qualname____firstlineno____doc__r   rF   rJ   rM   classmethodrW   r%   r$   __static_attributes____classcell__)r   s   @r   r   r      sA    (1D:L" ; ;&0 r   r   )ry   rS   tensorflow.compat.v2compatv2r   tf_keras.src.enginer   tf_keras.src.layersr   rU   tf_keras.src.modelsr   'tf_keras.src.saving.object_registrationr   %tf_keras.src.saving.serialization_libr    tensorflow.python.util.tf_exportr   r    r   r   <module>r      sY    3  ! ! , @ % O H : DL^ ^ M ^r   