
    h                     @    S SK JrJr  SSKJr  S	S jrS rS rS
S jrg)    )Modelnormal_init   )registryc           
      D    [        S[        [        XX#S.S S S S.SU0S9nU$ )Nprecomputable_affine)nOnInFnP)Wbpaddropout_rate)initdimsparamsattrs)r   forwardr   )r	   r
   r   r   dropoutmodels         X/home/james-whalen/.local/lib/python3.13/site-packages/spacy/ml/_precomputable_affine.pyPrecomputableAffiner      s6    5T2w'E L    c                 Z  ^ ^^^^^^	 T R                  S5      mT R                  S5      mT R                  S5      m	T R                  S5      mT R                  S5      mT R                  R                  TR                  S   S-   TT-  T	-  SS	9nT R                  R                  TTR                  TT-  T	-  T45      S
USS  S9  UR                  UR                  S   TTT	45      nT R                  R                  R                  T R                  S5      S5      US'   UUU UUUU	4S jnX44$ )Nr   r	   r   r
   r   r      F)zerosT)trans2outr   c                   > U u  pUR                   S:X  d   eUR                  S   T:X  d   UR                  5       eUR                  S   T:X  d   UR                  5       eT	R                  S[        T	X5      5        TU   nUR	                  UR                  S   T
T-  45      nT	R                  SUR                  SS95        UR	                  UR                  S   TT-  45      nTR                  S5      nUR	                  TT-  T
T-  45      nT	R                  R                  UR	                  UR                  S   TT-  45      U5      nT	R                  R                  XS	S
9nUR	                  TTT
T45      nUR                  S5      nT	R                  SU5        UR	                  UR                  S   T
T45      $ )N   r   r   r   r   r   )axis)r   r   r   r!   Ttrans1)r   r   r   r!   r   )	ndimshapeinc_grad&_backprop_precomputable_affine_paddingreshapesum	transposeopsgemm)dY_idsdYidsXfWopfidXfdWopfir   Xr   r   r
   r	   r   s          r   backwardforward.<locals>.backward"   s    ww!||xx{b *"((* xx{b *"((* uDUBTUsVZZ!b2g./sBFFFN+ZZ!b2g./L)rBwR01iinnRZZ!b2g(>?Gt4RR 01!!,/sF#{{CIIaL"b122r   )	get_dim	get_paramr,   alloc2fr&   r-   r)   xpsqueeze)
r   r5   is_trainYfr6   r   r   r
   r	   r   s
   ``   @@@@@r   r   r      s	   	t	B	t	B	t	B	t	BA			1771:>27R<u		EB	IINN1aiib2r 23DbfNM	RXXa["b"-	.B
 IILL  !7;BqE#3 #3J <r   c                 @   UR                   S   nU R                  S5      nU R                  S5      nU R                  S5      nU R                  R                  US:  SS9nU R                  R	                  XqR                  X6U-  5      SS9nUR                  S	XFU45      $ )
Nr   r   r   r	   fdtypeTr#   r   )r&   r8   r,   asarrayr-   r)   )	r   r/   r0   nBr   r   r	   maskd_pads	            r   r(   r(   J   s    	!B	t	B	t	B	t	B 99S1WC0DIINN4BR!8NFE==!RR))r   Nc           
        ^ ^^^^ T R                  S5      (       a%  T R                  S5      R                  5       (       a  gT R                  S5      mT R                  S5      mT R                  S5      mT R                  S5      nT R                  R                  TTTU5      nT R                  R                  TT5      mT R                  R                  STTT5      nT R                  n[        XdR                  [        UR                  R                  ST-  U-  5      5      S	9n[        XeR                  SS	9nT R                  SU5        T R                  S
T5        T R                  SU5        UR                  ST4SS9nXvR                  R                  R                  SSUR                  5      -  nUR!                  USS9nUR                  SU4SS9nXR                  R                  R#                  SSUR$                  S9R'                  UR                  5      -  nUU UUU4S jn	Sn
SnSnT R                  S5      R)                  5       nT R                  S
5      R)                  5       m[+        U5       H  nU	" Xx5      nT R                  R                  R-                  U5      nT R                  R                  R/                  U5      n[1        US-
  5      U
:  a<  UT R                  R                  R                  U5      -  nT R                  SU5        M  [1        U5      U:  a  TU-  mT R                  S
T5        M    g   g)ah  This is like the 'layer sequential unit variance', but instead
of taking the actual inputs, we randomly generate whitened data.

Why's this all so complicated? We have a huge number of inputs,
and the maxout unit makes guessing the dynamics tricky. Instead
we set the maxout weights to values that empirically result in
whitened outputs given whitened inputs.
r   Nr   r	   r   r
   r   g      ?)meanr   r   i  r@   rA   r   i  ig        )locscalesizec                   > TR                  US S 5      nTR                  R                  U R                  S   TT-  4SS9nUR	                  UR                  S   T-  TT-  45      nTR                  R                  X0R                  5       U5        UR	                  UR                  S   TT45      nUT-  nTR                  R                  U5      nTS:  a  TR                  R                  U5      S   $ X3S:  -  $ )Nr   r@   rA   r   )	predictr,   allocr&   r)   scatter_addflattenrC   maxout)	r0   tokvecshiddensvectorsr   r   r   r	   r   s	       r   rO   init.<locals>.predict   s    ---))//399Q<b"9/E//7==#3b#8"r'"BC		g{{}g>//7==#3R"<=1))##G,799##G,Q//l++r   g{Gz?
   )	has_paramr9   anyr8   r,   alloc4fr:   r   r&   floatr;   sqrt	set_paramrP   randomuniformrC   normalrL   r)   copyrangevarrH   abs)r   r5   Yr
   r   r   r,   r0   rT   rO   tol_vartol_meant_maxt_iacts1rd   rH   r   r   r	   r   s   `                @@@@r   r   r   a   s    s 4 8 8 : :	t	B	t	B	t	B	t	B		"b"b)A		"b!A
))

Ar2r
*C
))CCuSVV[[rB-G'HIA
c993
/C	OOC	OOC	OOE3
))T2Jc)
*C66==  D#))44C
++c+
%Ciir
#i.Gvv}}##3W\\#JRR G, , GHE!!#A!!#AU|%iillu%yy||  'sSy>W$""3''AOOC#Y("IAOOC# r   )g?)NN)		thinc.apir   r   utilr   r   r   r(   r    r   r   <module>ro      s!    ( 	5p*.Br   