
    6bi'                         S r SSKJs  Jr  SSKJr  SSKJr  SSK	J
r
  SSKJr  \" SSS/S	9 " S
 S\R                  5      5       rg)zNadam optimizer implementation.    N)backend_config)optimizer_v2)learning_rate_schedule)keras_exportzkeras.optimizers.legacy.Nadamzkeras.optimizers.Nadam)v1c                   v   ^  \ rS rSrSrSr     SU 4S jjrS rS rU 4S jr	SS jr
SS	 jrU 4S
 jrSrU =r$ )Nadam   a  Optimizer that implements the NAdam algorithm.
Much like Adam is essentially RMSprop with momentum, Nadam is Adam with
Nesterov momentum.

Args:
  learning_rate: A Tensor or a floating point value.  The learning rate.
  beta_1: A float value or a constant float tensor. The exponential decay
    rate for the 1st moment estimates.
  beta_2: A float value or a constant float tensor. The exponential decay
    rate for the exponentially weighted infinity norm.
  epsilon: A small constant for numerical stability.
  name: Optional name for the operations created when applying gradients.
    Defaults to `"Nadam"`.
  **kwargs: keyword arguments. Allowed arguments are `clipvalue`,
    `clipnorm`, `global_clipnorm`.
    If `clipvalue` (float) is set, the gradient of each weight
    is clipped to be no higher than this value.
    If `clipnorm` (float) is set, the gradient of each weight
    is individually clipped so that its norm is no higher than this value.
    If `global_clipnorm` (float) is set the gradient of all weights is
    clipped so that their global norm is no higher than this value.

Usage Example:
  >>> opt = tf.keras.optimizers.legacy.Nadam(learning_rate=0.2)
  >>> var1 = tf.Variable(10.0)
  >>> loss = lambda: (var1 ** 2) / 2.0
  >>> step_count = opt.minimize(loss, [var1]).numpy()
  >>> "{:.1f}".format(var1.numpy())
  9.8

Reference:
  - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf).
Tc                   > UR                  SS5      US'   UR                  SU5      n[        U[        R                  5      (       a  [        S5      e[        TU ]  " U40 UD6  U R                  SUR                  SU5      5        U R                  SU R                  5        U R                  SU5        U R                  SU5        U=(       d    [        R                  " 5       U l        S U l        g )	Nschedule_decaygMbp?decaylrzdThe Nadam optimizer does not support tf.keras.optimizers.LearningRateSchedules as the learning rate.learning_ratebeta_1beta_2)popget
isinstancer   LearningRateSchedule
ValueErrorsuper__init__
_set_hyper_initial_decayr   epsilon_m_cache)selfr   r   r   r   namekwargs	__class__s          ^/home/james-whalen/.local/lib/python3.13/site-packages/tf_keras/src/optimizers/legacy/nadam.pyr   Nadam.__init__D   s     !**%5u=w

471FF
 
 !  	((D-)HI!4!45&)&):."8"8":    c           	      h   US   R                   R                  nU R                  cV  U R                  S/ USS[        R
                  R                  S9U l        U R                  R                  U R                  5        U H  nU R                  US5        M     U H  nU R                  US5        M     g )Nr   momentum_cacheonesF)shapedtypeinitializer	trainableaggregationmv)
r(   
base_dtyper   
add_weighttfVariableAggregationONLY_FIRST_REPLICA_weightsappendadd_slot)r   var_list	var_dtypevars       r!   _create_slotsNadam._create_slotsa   s    QK%%00	==  OO "22EE , DM MM  /CMM#s#  CMM#s# r#   c                 4   [         R                  " U R                  SU5      5      n[         R                  " U R                  SU5      5      n[         R                  " U R                  SU5      5      n[         R                  " U R                  S-   U5      n[         R                  " U R                  S-   U5      n[         R                  " SU5      n	USS[         R
                  " XR                  U-  5      -  -
  -  n
USS[         R
                  " XR                  U-  5      -  -
  -  n[         R                  " U R                  U5      U
-  nX R                  R                  L aP  [         R                  " [         R                  R                  R                  U R                  XR                  S	95      nX-  n[        UU* [         R                  " U R                   U5      UUU
USU-
  SU-
  SU
-
  SU-
  SU-
  S[         R
                  " Xg5      -
  S
9X1U4'   g )Nr   r   r         gQ?g      ?g      ?use_locking)lr_tneg_lr_tr   beta_1_tbeta_2_tm_tm_t_1one_minus_beta_1_tone_minus_beta_2_tone_minus_m_tone_minus_m_schedule_newone_minus_m_schedule_nextv_t_prime_denominator)r0   identity
_get_hypercast
iterationspowr   _m_cache_readr   r(   compatr   assign_use_lockingdictconvert_to_tensorr   )r   
var_devicer7   apply_stater@   rB   rC   
local_step	next_step
decay_baserD   rE   m_schedule_newm_schedule_nexts                 r!   _prepare_localNadam._prepare_localu   s   {{4???IFG;;txCD;;txCDWWT__q0)<
GGDOOa/;	WWT9-
#
,?,?*,LMNN
 #
,?,?),KLMM
 !3!3Y?#E+++[[		##MM>?P?P $ N
 )0/3U((yA 8| 8|)%(>%9&)O&;"%x(D"D0
+,r#   c                 l   > [         R                  " U R                  5      U l        [        TU ]  U5      $ N)r0   rL   r   rQ   r   _prepare)r   r6   r    s     r!   rb   Nadam._prepare   s*      [[7w))r#   c                 8   UR                   UR                  R                  pTU=(       d    0 R                  XE45      =(       d    U R	                  XE5      nU R                  US5      nU R                  US5      nXS   -  n	US   U-  US   U-  -   n
[        R                  R                  R                  XzU R                  S9n
XS   -  nUS   U-  US	   [        R                  " U5      -  -   n[        R                  R                  R                  XU R                  S9nXS
   -  nUS   U	-  US   U-  -   nX&S   U-  [        R                  " U5      US   -   -  -
  n[        R                  R                  R                  X/U R                  S9R                  $ )Nr,   r-   rI   rB   rF   r>   rJ   rC   rG   rK   rH   rE   r@   r   )devicer(   r.   r   _fallback_apply_stateget_slotr0   rR   r   rS   rT   squaresqrtop)r   gradr8   rX   rW   r7   coefficientsr,   r-   g_primerD   	m_t_primev_t	v_t_primem_t_barvar_ts                   r!   _resource_apply_denseNadam._resource_apply_dense   s    #

CII,@,@I#)r..#
 ?''
> 	 MM#s#MM#s#&@AA$q(/0478 	 iill!!!d6G6G!H'BCC	:&*\ .
IIdO.  iill!!!d6G6G!H'>??	)G37#i/0 	 6*W4GGIi!88
 
 yy||""34;L;L"MPPPr#   c                 H   UR                   UR                  R                  peU=(       d    0 R                  XV45      =(       d    U R	                  XV5      nU R                  US5      nU R                  US5      n	XS   -  n
XS   -  n[        R                  R                  R                  XUS   -  U R                  S9n[        R                  " U/5         U R                  XU5      n[        R                  " X5      nS S S 5        WUS   -  nUS   U
-  US	   U-  -   nX-  US
   -  n[        R                  R                  R                  XUS   -  U R                  S9n[        R                  " U/5         U R                  XU5      n[        R                  " UU5      nS S S 5        WUS   -  n[        R                  " U5      US   -   nU R                  UUUS   U-  U-  5      n[        R                  " UUU/6 $ ! , (       d  f       GN	= f! , (       d  f       Nv= f)Nr,   r-   rI   rF   rB   r>   rJ   rH   rE   rG   rC   rK   r   rA   )re   r(   r.   r   rf   rg   r0   rR   r   rS   rT   control_dependencies_resource_scatter_addgatherri   group)r   rk   r8   indicesrX   rW   r7   rl   r,   r-   rm   m_scaled_g_valuesrD   	m_t_slicern   rq   v_scaled_g_valuesro   	v_t_slicerp   v_prime_sqrt_plus_eps
var_updates                         r!   _resource_apply_sparseNadam._resource_apply_sparse   s(    #

CII,@,@I#)r..#
 ?''
> 	 MM#s#MM#s#&@AA !0D#EEiill!!<
++9J9J " 
 $$cU+,,Q9JKC		#/I , -H II	)G37#i/0 	 "[L9M,NNiill!!<
++9J9J " 
 $$cU+,,Q9JKC		#w/I , -D EE	 "	 2\)5L L//$w.1FF


 xx*gs3449 ,+  ,+s   )H9*H
H
H!c                    > [         TU ]  5       nUR                  U R                  S5      U R                  U R                  S5      U R                  S5      U R
                  S.5        U$ )Nr   r   r   )r   r   r   r   r   )r   
get_configupdate_serialize_hyperparameterr   r   )r   configr    s     r!   r   Nadam.get_config   sg    #%!%!?!?#" ,,88B88B<<
	
 r#   )r   rQ   r   )gMbP?g?g+?gHz>r	   ra   )__name__
__module____qualname____firstlineno____doc___HAS_AGGREGATE_GRADr   r9   r^   rb   rs   r   r   __static_attributes____classcell__)r    s   @r!   r	   r	      sM    
 D  :$('
R*Q<-5^ r#   r	   )r   tensorflow.compat.v2rR   v2r0   tf_keras.srcr   tf_keras.src.optimizers.legacyr   !tf_keras.src.optimizers.schedulesr    tensorflow.python.util.tf_exportr   OptimizerV2r	    r#   r!   <module>r      sS    & ! ! ' 7 D : # "AB_L$$ _	_r#   