
    6bi                         S r SSKJs  Jr  SSKJr  SSKJr  SSK	J
r
  \" 5       \
" SSS/ S	9 " S
 S\R                  5      5       5       r\R                   R                  S\R                  5      \l         g)z!RMSprop optimizer implementation.    N)	optimizer)register_keras_serializable)keras_exportz%keras.optimizers.experimental.RMSpropzkeras.optimizers.RMSpropz-keras.dtensor.experimental.optimizers.RMSprop)v1c                   j   ^  \ rS rSrSr              SU 4S jjrU 4S jrS rU 4S jrSr	U =r
$ )	RMSprop   a  Optimizer that implements the RMSprop algorithm.

The gist of RMSprop is to:

- Maintain a moving (discounted) average of the square of gradients
- Divide the gradient by the root of this average

This implementation of RMSprop uses plain momentum, not Nesterov momentum.

The centered version additionally maintains a moving average of the
gradients, and uses that average to estimate the variance.

Args:
    learning_rate: Initial value for the learning rate:
        either a floating point value,
        or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance.
        Defaults to 0.001.
    rho: float, defaults to 0.9. Discounting factor for the old gradients.
    momentum: float, defaults to 0.0. If not 0.0., the optimizer tracks the
        momentum value, with a decay rate equals to `1 - momentum`.
    epsilon: A small constant for numerical stability. This epsilon is
        "epsilon hat" in the Kingma and Ba paper (in the formula just before
        Section 2.1), not the epsilon in Algorithm 1 of the paper.
        Defaults to `1e-7`.
    centered: Boolean. If `True`, gradients are normalized by the estimated
        variance of the gradient; if False, by the uncentered second moment.
        Setting this to `True` may help with training, but is slightly more
        expensive in terms of computation and memory. Defaults to `False`.
    {{base_optimizer_keyword_args}}

Usage:

>>> opt = tf.keras.optimizers.RMSprop(learning_rate=0.1)
>>> var1 = tf.Variable(10.0)
>>> loss = lambda: (var1 ** 2) / 2.0  # d(loss) / d(var1) = var1
>>> opt.minimize(loss, [var1])
>>> var1.numpy()
9.683772

Reference:
    - [Hinton, 2012](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) # noqa: E501
c                    > [         TU ]  " SUUUU	U
UUUUS.	UD6  U R                  U5      U l        X l        X0l        X@l        XPl        g )N)	weight_decayclipnorm	clipvalueglobal_clipnormuse_emaema_momentumema_overwrite_frequencyjit_compilename )super__init___build_learning_rate_learning_raterhomomentumepsiloncentered)selflearning_rater   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                   Y/home/james-whalen/.local/lib/python3.13/site-packages/tf_keras/src/optimizers/rmsprop.pyr   RMSprop.__init__M   se    $ 	 	
%+%$;#	
 	
 #77F      c                   > [         TU ]  U5        [        U S5      (       a  U R                  (       a  g SU l        / U l        U H.  nU R                  R                  U R                  US5      5        M0     / U l        U R                  S:  a4  U H.  nU R                  R                  U R                  US5      5        M0     / U l	        U R                  (       a5  U H.  nU R                  R                  U R                  US5      5        M0     g g )N_builtTvelocityr   r   average_gradient)r   buildhasattrr%   _velocitiesappendadd_variable_from_reference
_momentumsr   _average_gradientsr   )r   var_listvarr    s      r!   r(   RMSprop.buildq   s    h4""t{{C##00jA 
 ==1&&44S*E  
 #%==''..44S:LM   r#   c                    [         R                  " U R                  UR                  5      nU R	                  U5      nU R
                  U R                  U      nSnU R                  S:  a  U R                  U R                  U      nSnU R                  (       a  U R                  U R                  U      nU R                  n[        U[         R                  5      (       Ga  UR                  X-  5        UR                  [         R                  " [         R                   " UR"                  5      SU-
  -  UR$                  5      5        U R                  (       az  UR                  X-  5        UR                  [         R                  " UR"                  SU-
  -  UR$                  5      5        U[         R                   " U5      -
  U R&                  -   n	OXPR&                  -   n	[         R(                  " XR$                  5      n
[         R                  " X1R"                  -  [         R*                  R-                  U
5      -  UR$                  5      nU R                  S:  aB  UR                  U R                  U-  5        UR                  U5        UR/                  U* 5        gUR                  U* 5        gUR                  X-  SU-
  [         R                   " U5      -  -   5        U R                  (       aC  UR                  X-  SU-
  U-  -   5        U[         R                   " U5      -
  U R&                  -   n	OXPR&                  -   n	X1-  [         R*                  R-                  U	5      -  nU R                  S:  a4  UR                  U R                  U-  U-   5        UR/                  U* 5        gUR/                  U* 5        g)z=Update step given gradient and the associated model variable.Nr      )tfcastr   dtype_var_keyr*   _index_dictr   r-   r   r.   r   
isinstanceIndexedSlicesassignscatter_addsquarevaluesindicesr   gathermathrsqrt
assign_add)r   gradientvariablelrvar_keyr&   r   average_gradr   denominatordenominator_slices	increments               r!   update_stepRMSprop.update_step   s   WWT''8--)##D$4$4W$=>==1t'7'7'@AH==2243C3CG3LMLhhh 0 011OOCN+    IIhoo.!c':H<L<L
 }}##C$67(($$ 1s73X5E5E
 '<)@@4<<O&5!#;8H8H!I((__$rww}}5G'HH  I
 }}q  89$$Y/##XI.$$iZ0 OOCNa#g89L-LLM}}##C$6!c'X9M$MN&<)@@4<<O&5k(BBI}}q  89 DE##XI.##YJ/r#   c                    > [         TU ]  5       nUR                  U R                  U R                  5      U R
                  U R                  U R                  U R                  S.5        U$ )N)r   r   r   r   r   )	r   
get_configupdate_serialize_hyperparameterr   r   r   r   r   )r   configr    s     r!   rO   RMSprop.get_config   s]    #%!%!?!?''" xx MM<< MM
	
 r#   )	r.   r%   r   r-   r*   r   r   r   r   )gMbP?g?g        gHz>FNNNNFgGz?d   Tr   )__name__
__module____qualname____firstlineno____doc__r   r(   rL   rO   __static_attributes____classcell__)r    s   @r!   r   r      sP    )Z  #"!H4:0x r#   r   z{{base_optimizer_keyword_args}})rY   tensorflow.compat.v2compatv2r4   tf_keras.src.optimizersr   'tf_keras.src.saving.object_registrationr    tensorflow.python.util.tf_exportr   	Optimizerr   replacebase_optimizer_keyword_argsr   r#   r!   <module>re      s    ( ! ! - O : +3		ti!! t tn //))%y'L'Lr#   