
    h2                         S SK r SSKJr  SSKJrJrJr  SSKJr  SSK	J
r
JrJrJrJrJrJr  SSKJr  SS	KJr  SS
KJr  \R(                  " S5       " S S\5      5       r\b  \R.                  " SSSS5      rOSrS rg)    N   )registry)cublascupycupyx)DeviceTypes)is_cupy_arrayis_mxnet_gpu_arrayis_tensorflow_gpu_arrayis_torch_cuda_arraymxnet2xptensorflow2xptorch2xp   )_custom_kernels)NumpyOps)OpsCupyOpsc                   Z  ^  \ rS rSrSr\r\r S2S\	S\
SS4S jjrSS.S	 jrU 4S
 jrS3U 4S jjrS3U 4S jjrS3U 4S jjrS3U 4S jjrS4S jrS5S jrS6U 4S jjrU 4S jrU 4S jrS3S jrS3S jr     S7S\S\S\S\S\4
U 4S jjjr     S7S\S\S\S\S\4
U 4S jjjrS3S\4U 4S jjjrS3S\4U 4S jjjrS8U 4S jjrS8U 4S  jjr S3U 4S! jjr!S3U 4S" jjr"S# r#SS$.U 4S% jjr$SS$.U 4S& jjr%U 4S' jr&U 4S( jr'U 4S) jr(U 4S* jr)U 4S+ jr*U 4S, jr+S- r,S. r- S9S/ jr.S:S0 jr/S1r0U =r1$ );r      r   device_type	device_idreturnNc                     Xl         X l        g N)r   r   )selfr   r   kwargss       Q/home/james-whalen/.local/lib/python3.13/site-packages/thinc/backends/cupy_ops.py__init__CupyOps.__init__   s     '"    )
byte_orderc                    [        U[        R                  5      (       d  UR                  5       nU(       a/  UR                  R                  U5      n[        R                  " XS9nU$ )Ndtype)
isinstancenumpyndarraygetr%   newbyteorderasarray)r   datar"   r%   s       r   to_numpyCupyOps.to_numpy    sH    $..88:DJJ++J7E==3Dr!   c                 n   > UR                   S;   a  [        R                  " X5      $ [        TU ]	  X5      $ Nfloat32float64)r%   r   
gather_addsuper)r   tableindices	__class__s      r   r4   CupyOps.gather_add(   s1    ;;00"--e==7%e55r!   c                 f   > UR                   S;   a  [        R                  " XS9$ [        TU ]	  XS9$ Nr1   inplace)r%   r   dishr5   r   Xr=   r8   s      r   r>   CupyOps.dish.   s2    77,,"'';;7<<33r!   c                    > UR                   UR                   :X  a%  UR                   S;   a  [        R                  " XUS9$ [        TU ]	  XUS9$ r;   )r%   r   backprop_dishr5   r   dYr@   r=   r8   s       r   rC   CupyOps.backprop_dish4   sG    77bhh177.D#D"00HH7((@@r!   c                 h   > UR                   S;   a  [        R                  " XSS9$ [        TU ]	  XS9$ Nr1   g      @r=   	thresholdr<   )r%   r   gelur5   r?   s      r   rK   CupyOps.gelu:   s4    77,,"''cJJ7<<33r!   c                    > UR                   UR                   :X  a&  UR                   S;   a  [        R                  " XUSS9$ [        TU ]	  XUS9$ rH   )r%   r   backprop_gelur5   rD   s       r   rN   CupyOps.backprop_gelu@   sJ    77bhh177.D#D"00SVWW7((@@r!   c                 T   [        U[        R                  5      (       d  [        U[        R                  5      (       a  [        S5      eU(       a  UR                  nU(       a  UR                  nUc  U R
                  R                  X5      $ U R
                  R                  XUS9  U$ )NzaEncountered a numpy array when processing with cupy. Did you call model.ops.asarray on your data?)out)r&   r'   r(   
ValueErrorTxpdot)r   xyrQ   trans1trans2s         r   gemmCupyOps.gemmF   s    a'':a+G+G?  AA;77;;q$$GGKK#K&Jr!   c                 Z   [        U5      (       a  U R                  R                  XS9nOm[        U5      (       a  [	        U5      nOQ[        U5      (       a  [        U5      nO5[        U5      (       a  [        U5      nOU R                  R                  XS9nUb  UR                  USS9nU$ )Nr$   F)r%   copy)r	   rT   r+   r   r   r   r   r
   r   arrayastype)r   r,   r%   r^   s       r   r+   CupyOps.asarrayV   s    GGOODO6E &&TNE$T**!$'E%%TNEGGMM$M4ELLu5L9Er!   c                   > U(       d  [        S5      e[        [        S U 5       5      5      S:w  a  [        S5      e[        [        S U 5       5      5      S:w  a  [        S5      e[        [        S U 5       5      5      S:w  a  [        S5      e[        S	 U 5       5      (       a  US
   R                  S;  a  [
        TU ]  X5      $ [        R                  " X5      $ )zPerform padding on a list of arrays so that they each have the same
length, by taking the maximum dimension across each axis. This only
works on non-empty sequences with the same `ndim` and `dtype`.
zCannot pad empty sequencec              3   8   #    U  H  oR                   v   M     g 7fr   )ndim.0seqs     r   	<genexpr>CupyOps.pad.<locals>.<genexpr>p   s     ,t88t   r   z)Cannot pad sequences with different ndimsc              3   8   #    U  H  oR                   v   M     g 7fr   r$   rd   s     r   rg   rh   r   s     -99ri   z*Cannot pad sequences with different dtypesc              3   >   #    U  H  oR                   S S v   M     g7f)r   N)shaperd   s     r   rg   rh   t   s     1DS99QR=D   z4Cannot pad sequences that differ on other dimensionsc              3   >   #    U  H  oR                   S    v   M     g7f)C_CONTIGUOUSN)flagsrd   s     r   rg   rh   x   s     =99^,rm   r   )r2   r3   int32int64)rR   lensetallr%   r5   padr   )r   seqsround_tor8   s      r   rv   CupyOps.padh   s     899s,t,,-2HIIs---.!3IJJs1D112a7STT ====a W
 B
 7;t..""422r!   c                 n   > UR                   S;   a  [        R                  " U5      $ [        TU ]	  U5      $ r0   )r%   r   maxoutr5   )r   r@   r8   s     r   r{   CupyOps.maxout   s0    77,,"))!,,7>!$$r!   c                    > UR                   S;   a'  UR                   S:X  a  [        R                  " XU5      $ [        TU ]	  XU5      $ Nr1   rq   )r%   r   backprop_maxoutr5   )r   rE   whichPr8   s       r   r   CupyOps.backprop_maxout   s@    88--%++2H"222a@@7*2a88r!   c                 0    U(       d  XS:  -  $ XS:  -  nU$ Nr    )r   r@   r=   s      r   reluCupyOps.relu   s!    A;QJAHr!   c                 0    U(       d  XS:  -  $ XS:  -  nU$ r   r   )r   rE   Yr=   s       r   backprop_reluCupyOps.backprop_relu   s!    Q<
!e	r!   slopeoffsetmin_valmax_valr=   c           	      z   > UR                   S;   a  [        R                  " UUUUUUS9$ [        TU ]	  UUUUUUS9$ )Nr1   )r=   r   r   r   r   )r%   r   clipped_linearr5   )r   r@   r   r   r   r   r=   r8   s          r   r   CupyOps.clipped_linear   sb     77,,"11  7) *  r!   c           
         > UR                   UR                   :X  a*  UR                   S;   a  [        R                  " UUUUUUUS9$ [        TU ]	  UUUUUUUS9$ )Nr1   )r   r   r   r   r=   )rE   r@   r   r   r   r   r=   )r%   r   backprop_clipped_linearr5   )	r   rE   r@   r   r   r   r   r=   r8   s	           r   r   CupyOps.backprop_clipped_linear   sw     77bhh177.D#D"::  72 3  r!   c                    > UR                   UR                   :X  a%  UR                   S;   a  [        R                  " XUS9$ [        TU ]	  XUS9$ r;   )r%   r   backprop_hard_swishr5   rD   s       r   r   CupyOps.backprop_hard_swish   sG    77bhh177.D#D"66rgNN7.rg.FFr!   c                    > UR                   UR                   :X  a%  UR                   S;   a  [        R                  " XUS9$ [        TU ]	  XUS9$ r;   )r%   r   backprop_hard_swish_mobilenetr5   rD   s       r   r   %CupyOps.backprop_hard_swish_mobilenet   sH    77bhh177.D#D"@@PWXX788PPr!   c                 n   > UR                   S;   a  [        R                  " XUS9$ [        TU ]	  XU5      $ Nr1   rI   )r%   r   mishr5   )r   r@   rJ   r=   r8   s       r   r   CupyOps.mish   s4    77,,"''iPP7<g66r!   c                    > UR                   UR                   :X  a%  UR                   S;   a  [        R                  " XXCS9$ [        TU ]	  XX45      $ r   )r%   r   backprop_mishr5   )r   rE   r@   rJ   r=   r8   s        r   r   CupyOps.backprop_mish   sK    77bhh177.D#D"00w  7(	CCr!   c                 h   > UR                   S;   a  [        R                  " XSS9$ [        TU ]	  XS9$ Nr1   g      1@rI   r<   )r%   r   swishr5   r?   s      r   r   CupyOps.swish   s4    77,,"((tLL7==44r!   c                    > UR                   UR                   s=:X  a  UR                   :X  a)  O  O&UR                   S;   a  [        R                  " XX4SS9$ [        TU ]	  XX4S9$ r   )r%   r   backprop_swishr5   )r   rE   r@   r   r=   r8   s        r   r   CupyOps.backprop_swish   sY    77bhh)!'')agg9O.O"11qT  7)")DDr!   c                 ~    S n[         R                  " U" U5      S5      nU[         R                  " X$5      U-  -  nU$ )Nc                 P    U R                  S5      n[        R                  " U5      $ )N)reshaper   nrm2)r@   X_vecs     r   frobenius_norm-CupyOps.clip_gradient.<locals>.frobenius_norm  s    IIbME;;u%%r!   g-q=)r   maximumminimum)r   gradientrJ   r   	grad_norms        r   clip_gradientCupyOps.clip_gradient   s;    	& LL!95A	DLL6BBr!   lengthsc                   > UR                   S;   a(  Ub  UR                   S:X  a  [        R                  " XUS9$ [        TU ]	  XUS9$ )zGiven an (M, N) sequence of vectors, return an (M, N*(nW*2+1)) sequence.
The new sequence is constructed by concatenating nW preceding and succeeding
vectors onto each column in the sequence, to extract a window of features.
r1   rq   r   )r%   r   seq2colr5   )r   rf   nWr   r8   s       r   r   CupyOps.seq2col
  sH    
 99..Ow}}7"**3GDD7?3G?<<r!   c                   > UR                   S;   a(  Ub  UR                   S:X  a  [        R                  " XUS9$ [        TU ]	  XUS9$ Nr1   rq   r   )r%   r   backprop_seq2colr5   )r   rE   r   r   r8   s       r   r   CupyOps.backprop_seq2col  sH    88--Ow}}7"33BGLL7+BG+DDr!   c                    > UR                   S;   a$  UR                   S:X  a  [        R                  " XS9$ [        TU ]	  X5        g r   )r%   r   reduce_meanr5   r   r@   r   r8   s      r   r   CupyOps.reduce_mean  s9    77,,'1I"..qBBG+r!   c                    > UR                   S;   a&  UR                   S:X  a  [        R                  " X5      $ [        TU ]	  X5        g r~   )r%   r   backprop_reduce_meanr5   )r   d_meansr   r8   s      r   r   CupyOps.backprop_reduce_mean$  s9    ==22w}}7O"77IIG(:r!   c                    > UR                   S;   a&  UR                   S:X  a  [        R                  " X5      $ [        TU ]	  X5        g r~   )r%   r   
reduce_maxr5   r   s      r   r   CupyOps.reduce_max*  s9    77,,'1I"--a99Gq*r!   c                    > UR                   S;   a7  UR                   S:X  a'  UR                   S:X  a  [        R                  " XU5      $ [        TU ]	  XU5        g r~   )r%   r   backprop_reduce_maxr5   )r   d_maxesr   r   r8   s       r   r   CupyOps.backprop_reduce_max0  sJ    MM33w&("66wwOOG'@r!   c                    > UR                   S;   a&  UR                   S:X  a  [        R                  " X5      $ [        TU ]	  X5      $ r~   )r%   r   
reduce_sumr5   r   s      r   r   CupyOps.reduce_sum:  s<    77,,'1I"--a997%a11r!   c                    > UR                   S;   a&  UR                   S:X  a  [        R                  " X5      $ [        TU ]	  X5      $ r~   )r%   r   backprop_reduce_sumr5   )r   d_sumsr   r8   s      r   r   CupyOps.backprop_reduce_sum@  s<    <<11gmmw6N"66vGG7.v??r!   c                 .    [         R                  " X5      $ r   )r   hash)r   idsseeds      r   r   CupyOps.hashF  s    ##C..r!   c                 <    U R                   R                  XU5        g r   )_xp2scatter_add)r   r6   r7   valuess       r   r   CupyOps.scatter_addI  s    		ef5r!   c
           
          [        X5        [        X5        [        X5        [        X(SU-
  SU-
  XqX45        UR                  S5        XX44$ )Nr   r   )_check_compatible_shapeadam_kernelfill)
r   weightsr   mom1mom2beta1beta2eps
learn_ratemod_rates
             r   adamCupyOps.adamL  sP     	 2..!e)QYd	
 	a$,,r!   c                 T    [        5       R                  XX4S9nU R                  U5      $ )N)periodrQ   )r   position_encoder+   )r   NDr   rQ   	positionss         r   r   CupyOps.position_encodeY  s(    J..qF.L	||I&&r!   )r   r   )gpur   )F)NFFr   )r   )      ?        r   r   F)g      4@F)r   )i'  N)2__name__
__module____qualname____firstlineno__namer   rT   r   r   r   intr   r-   r4   r>   rC   rK   rN   rZ   r+   rv   r{   r   r   r   floatboolr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __static_attributes____classcell__)r8   s   @r   r   r      s   D	BD BC#&#;>#	# ,0 64A4A $34%9   	
    @  	
     >G$ G GQD Q Q7D5E
 +/ 
= 
= 37 E E,;+A2@/6 VY-' 'r!   z9T grad, T lr, T one_minus_beta1, T one_minus_beta2, T epszT param, T m, T vzm += one_minus_beta1 * (grad - m);
        v += one_minus_beta2 * (grad * grad - v);
        param -= lr * m / (sqrt(v) + eps);r   c                     U R                   UR                   :w  a'  SU R                    SUR                    3n[        U5      eg )Nz!arrays have incompatible shapes: z and )rl   rR   )uvmsgs      r   r   r   k  s;    ww!''1!''%yIo r!   )r'    r   compatr   r   r   typesr   utilr	   r
   r   r   r   r   r   r   	numpy_opsr   opsr   r   ElementwiseKernelr   r   r   r!   r   <module>r     s      ( (       
iF'c F' F'R
 ((C	. 	K Kr!   