
    hg.                        % S SK r S SKJr  S SKJrJrJrJrJrJ	r	  SSK
Jr  SSKJr  SSKJrJr  \\\4   r\\\\   \4   r\\\\   \4   rSS	S
S.r\\\\\\4   4   \S'   SSSS\S   \S   S	S.r\\\\\\4   4   \S'   \R6                  " S5      \S   4\S   \S   \S   \S   \	" \\S   5      \S   S	S.S\S\S\S\S\S\S\S\4S jjj5       r\R6                  " S5      \S   4\S   \S   \S   \S   \S   \	" \\S   5      S	S.S\S\S\S\S\S\S\S\4S  jjj5       r\R6                  " S!5      \S   \S   \	" \\S   5      S	S".S\S\S\S\S\4
S# jj5       r " S$ S%\5      r / S&Qr!g)'    N)defaultdict)DictListOptionalTupleUnioncast   )get_array_ops)registry)FloatsXd	Generator        T      ?)L2L2_is_weight_decay	grad_clipSGD_DEFAULTSgMbP?g?g+?g:0yE>r   r   )
learn_ratebeta1beta2epsr   r   r   ADAM_DEFAULTSzRAdam.v1r   r   r   r   r   )r   r   r   r   r   r   use_averagesr   c                $    [        U UUUUUUUSS9	$ )NT)r   r   r   r   r   r   r   	use_radam	Optimizer)r   r   r   r   r   r   r   r   s           J/home/james-whalen/.local/lib/python3.13/site-packages/thinc/optimizers.pyRAdamr       s,     -!
 
    zAdam.v1)r   r   r   r   r   r   r   c                $    [        U UUUUUUUSS9	$ )NF)r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   s           r   Adamr#   8   s,     -!
 
r!   zSGD.v1)r   r   r   r   c          
           [        U UUUSSUS9$ )Nr   )r   r   r   r   r   r   r   )r   r   r   r   r   s        r   SGDr%   Q   s&     -! r!   c                      \ rS rSr% Sr\\\4   \S'   \\\4   \S'   \	\\\4      \S'   \\
\4   \S'   \\\4   \S'   \\\4   \S'   \\S	'   \\S
'   \\S'   \\S'   \\S'   \\S'   \\S'   \\S'   \\\	\         \S'   / SQr\S   \S   \S   \S   \S	   SSSS.S
\S\S\S\S\S	\S\S\S\4S jjrS rS rSS.S\\\
4   S\S \S!\4S" jjrS# rS$ rS%rg&)'r   e   zDo various flavours of stochastic gradient descent, with first and
second order momentum. Currently support 'vanilla' SGD, Adam, and RAdam.
mom1mom2averages	schedules	nr_update	last_seenr   r   b1b2r   r   r   r   _radam_buffer)r(   r)   r*   r+   r,   r-   r   r   r.   r/   r   r   r   r   r0   r   r   TF)r   r   r   r   r   r   r   r   r   c                   0 U l         0 U l        U(       a  0 U l        OSU l        0 U l        [	        [
        5      U l        [	        [
        5      U l        U R                  SU5        U R                  SU5        U R                  SU5        U R                  SU5        U R                  SU5        U R                  SU5        Xl	        Xl
        [        S5       V
s/ s H  n
/ S	QPM	     sn
U l        gs  sn
f )
a  
Initialize an optimizer.

learn_rate (float): The initial learning rate.
L2 (float): The L2 regularization term.
beta1 (float): First-order momentum.
beta2 (float): Second-order momentum.
eps (float): Epsilon term for Adam etc.
grad_clip (float): Gradient clipping.
use_averages (bool): Whether to track moving averages of the parameters.
use_radam (bool): Whether to use the RAdam optimizer.
L2_is_weight_decay (bool): Whether to interpret the L2 parameter as a
    weight decay term, in the style of the AdamW optimizer.
Nr   r   r.   r/   r   r   
   )NNN)r(   r)   r*   r+   r   intr,   r-   _set_attr_or_scheduler   r   ranger0   )selfr   r   r   r   r   r   r   r   r   _s              r   __init__Optimizer.__init__   s    6 		DM DM$S)$S)"";	:""<<""4/""4/""5#.""4,""4:?)D)Q0)DDs   C0c                 T   [        U[        [        [        45      (       a  [	        XU5        g [        U[
        5      (       a  [        U5      nX R                  U'    [	        X[        U5      5        g ! [        [        4 a$  nSU S[        U5       SU 3n[        U5      eS nAff = f)NzInvalid schedule for 'z' (z)
)
isinstancefloatboolr3   setattrlistiterr+   nextStopIteration	TypeErrortype
ValueError)r6   namevalueeerrs        r   r4   Optimizer._set_attr_or_schedule   s    eeT3/00D&%&&U#(NN4 &DK0!9- &.tfCU}CsK o%&s   A3 3B'B""B'c                     U R                   R                  5        H  u  p [        U5      n[        XU5        M     g ! [         a    [	        X5      n N(f = f)N)r+   itemsrA   rB   getattrr>   )r6   keyschedulerG   s       r   step_schedulesOptimizer.step_schedules   sP    !^^113MC+X Du% 4 ! +*+s   =AAr   )lr_scalerN   weightsgradientrR   c                   [        U5      S:  a  X#4$ [        U5      nU R                  U==   S-  ss'   U R                  U   nU R                  S:w  a"  U R                  (       d  X0R                  U-  -  nU R
                  (       a  UR                  X0R
                  5      nU R                  (       a  U R                  XRX4X5      u  p#O`U R                  S:  a&  U R                  S:  a  U R                  XRX4X5      u  p#O*U R                  S:  a  [        eX$U R                  -  U-  -  nUS-  nU R                  S:w  a2  U R                  (       a!  X$U R                  -  U R                  -  U-  -  nU R                  bU  XR                  ;  a'  UR                  UR                   SS9U R                  U'   UR#                  U R                  U   X&5        X#4$ )zCall the optimizer with weights and a gradient. The key is the
identifier for the parameter, usually the node ID and parameter name.
r
   r   r   float32)dtype)lenr   r,   r   r   r   clip_gradientr   _radamr.   r/   _adamNotImplementedErrorr   r*   allocshapeupdate_averages)r6   rN   rS   rT   rR   opsnr_upds          r   __call__Optimizer.__call__   s    x=1$$G$sq $77a< 7 7'))H>>((>>BH>> $h#!GX WWs]tww} $

h#!GX WWs]%%$//1H<<GA77a<D33$//1DGG;gEEG==$--'%(YYw}}IY%Nc"c 2GD  r!   c                    XPR                   ;  a(  UR                  UR                  5      U R                   U'   XPR                  ;  a(  UR                  UR                  5      U R                  U'   UR	                  X"R                  5      nUR	                  X3R                  5      nU R
                  U   U R                   U   U R                  U   S.n	U R                  U R                  U R                  /U R                  SU R                  S.n
SnU	S   U	S   pU
S   u  pX-  nUSU-
  US	-  -  -  nX-  nUSU-
  U-  -  nU	S
==   S-  ss'   U
S   [        U	S
   S-  5         nU	S
   US   :X  a  US   US	   nnOU	S
   US'   XS
   -  nS	SU-
  -  S-
  nUS	U	S
   -  U-  SU-
  -  -
  nUUS'   US:  aC  [        R                  " SU-
  US-
  -  US-
  -  US	-
  -  U-  U-  US	-
  -  5      SXS
   -  -
  -  nOU(       a  SSXS
   -  -
  -  nOSnUUS	'   US:  aN  U
S   S:w  a  XzS   * U
S   -  U-  -  nUR                  R                  U5      U
S   -   nUU* U
S   -  UU-  -  -  nO/US:  a)  U
S   S:w  a  XzS   * U
S   -  U-  -  nUU* U
S   -  U-  -  nUR                  XrR                   5      UR                  XR                   5      4$ )N)stepexp_avg
exp_avg_sqr   )lrbetasr   weight_decaybufferTrf   rg   ri   r
      re   rk   r2   r         r   rj   rh   r   )r(   alloc1fsizer)   	reshape1fr,   r   r.   r/   r   r0   r3   mathsqrtxp	reshape_fr^   )r6   r`   rS   gradrR   rN   ra   
weights_1Dgradient_1Dstategroupdegenerated_to_sgdrf   rg   r   r   bufferedN_sma	step_sizebeta2_t	N_sma_maxdenoms                         r   rZ   Optimizer._radam   sV   ii [[6DIIcNii [[6DIIcN]]7LL9
mmD))4
 NN3'yy~))C.
 //ggtww'88((
 "#I.l0CW~ 	
q5y[!^44
AI,,f?3uV}r'9#:;=HQK''{HQK9E9-HQKV},GQY!+IE&M 1G ;q7{ KKEHQK z II[qy" 1}& qy" 	
    !1}& -//1	 $1uf'=#=>		#HQK A:^$)^44uT{BZOO
FFKK
+eEl:E9*uT{2goFFJ]^$)^44uT{BZOO
9*uT{2W<<JMM*mm4MM+zz2
 	
r!   c                    UR                  X"R                  5      nUR                  X3R                  5      nXPR                  ;  a(  UR                  UR                  5      U R                  U'   XPR                  ;  a(  UR                  UR                  5      U R                  U'   U R                  U   n	U R                  U   n
U R
                  nU R                  nSX-  -
  nSX-  -
  nU R                  US-  -  U-  nU R                  nUR                  XxXXUX-  5      u  pxpXR                  U'   XR                  U'   UR                  XrR                  5      UR                  XR                  5      4$ )Nr   g      ?)rr   rq   r(   rp   r)   r.   r/   r   r   adamrv   r^   )r6   r`   rS   rT   rR   rN   ra   rx   ry   r(   r)   r.   r/   fix1fix2rh   r   s                    r   r[   Optimizer._adamH  s4   ]]7LL9
mmHmm<ii [[6DIIcNii [[6DIIcNyy~yy~WWWWbj!bj!__tSy(4/hh.1hhTbm/
+
 		#		#MM*mm4MM+~~6
 	
r!   )	r   r0   r*   r-   r(   r)   r,   r+   r   N)__name__
__module____qualname____firstlineno____doc__r   KeyTr   __annotations__r   strr   r3   r<   r=   r   	__slots__r   
FloatOrSeqr8   r4   rP   r   rb   rZ   r[   __static_attributes__ r!   r   r   r   e   s    tX~

tX~
tD(N+,,CN##D#ID#III	JIOXh/011I, 't,)'2)'2'. -k :!#',E,E 	,E
 ,E ,E ,E ,E ,E ,E !,E\&& '!38_'! '! 	'! '!RL
\
r!   r   )r#   r    r%   r   r   r   )"rs   collectionsr   typingr   r   r   r   r   r	   backendsr   configr   typesr   r   r3   r   r   r<   r   IntOrSeqr   r=   r   r   
optimizersr    r#   r%   objectr   __all__r   r!   r   <module>r      s    # ; ; #  &S#X5$u+y01
d3i*+ 4d3eT3.//0  
t
k*5tCudC/001  
Z *<8 &g.%g.#E*"4(#D-8L*MN)+6  	
 
 	    !0 
Y*<8 #4(%g.%g.#E*)+6#D-8L*MN 	 	
  
     0 
X "$'(5#D,7K*LM 	 	
   &{
 {
| Qr!   