
    h                        S SK r S SKrS SKJr  S SKJr  S SKJr  S SK	J
r
  S\R                  S\R                  S\R                  S\R                  S	\R                  S
\R                  4S jr\R                  " \5      r\R                  " S S S S.5      " \5      r\" 5       " \5      r    SS\ R                  S\ R                  S\ R                  S\S\ R                  S-  S\S-  S\ R$                  S-  S\ R                  4S jjrg)    N)indexed_dot_autotune)b_bin_fn)softcappingBLOCK_BBLOCK_DGROUP_B
HAS_VALIDSEVEN_DSHIFTc                    [         R                  " SS9n[         R                  " X^5      n[         R                  " Xo5      nUU-  nUU-  nUU-  n[        UU-
  U5      nUUU-  U-  -   nUU-  U-  n[         R                  " SU5      UU-  -   U-  nU(       a  [         R
                  " X<U-  -   5      n[         R                  " SU5      UU-  -   nXUS S 2S 4   -  UUS S S 24   -  -   -   nU(       a  [         R
                  " U5      n O [         R
                  " UUS S S 24   U:  SS9n [         R
                  " X+U(       a  US-   OU-  -   5      n!UU!S S 2S 4   U	-  US S S 24   U
-  -   -   n"U(       a  [         R
                  " U"5      n#O [         R
                  " U"US S S 24   U:  SS9n#[         R                  " SU5      UU-  -   nUU-   n$U U#-  R                  [         R                  5      n%[         R                  " U%S5      R                  U$R                  R                  5      * n&[         R                  " U$U&UU:  S9  g )Nr   )axisg        )maskother   )r   )tl
program_idcdivminarangeloadtofloat32sumdtype
element_ty
atomic_add)'ECIndsValidsOutBD	stride_eb	stride_ed	stride_cv	stride_cd	stride_ib	stride_vbB_BINr   r   r   r	   r
   r   pidnum_b_chunksnum_d_chunksnum_d_in_groupgroup_idfirst_pid_bgroup_size_bpid_bpid_doffs_boffs_de_ptrseindsc_ptrscout_ptrsdotneg_dots'                                          W/home/james-whalen/.local/lib/python3.13/site-packages/cut_cross_entropy/indexed_dot.py_indexed_neg_dot_forward_kernelr?      s   , --Q
C771&L771&L|+Nn$HW$K|k17;LC.0L@AE>!l2Eii7#ego5:Ff"445YYq'"UW_4FfQWo-	F47O0KKLFGGFOGGFa1!4C@774u
&IIJD$q$w-)+fT1Wo	.IIJFGGFOGGFa1!4C@YYq'"UW_4FV|Hq5**RZZ
 Cvvc1~  !:!:;;GMM(G&1*5    c                     U S   U S   -  S:H  $ )Nr#   r   r    argss    r>   <lambda>rE   H   s    tCy4	?:a?r@   c                     U S   S L$ )Nr    rB   rC   s    r>   rE   rE   I   s    4>#=r@   c                     g)N   rB   rC   s    r>   rE   rE   J   s    r@   )r
   r	   r   r7   r:   r8   shiftvalidssoftcap	out_dtypereturnc                 h  ^ ^	 UR                   S:X  d   eT R                   S:X  d   eUR                   S:X  d   eUR                  S5      T R                  S5      :X  d   eUR                  S5      T R                  S5      :X  d   eUb$  UR                   S:X  d   eUR                  S5      m	OT R                  S5      m	T R                  T	4[        R                  S9nS[
        [           4U	U 4S jjn[        U   " T UUUUT	T R                  S5      T R                  S5      T R                  S5      UR                  S5      UR                  S5      UR                  S5      Uc  SOUR                  S5      [        T	5      US9  Ub  [        Xu5      nUc  T R                  nUR                  U5      nU$ )Nr      r   )r   rM   c                    > [         R                  " TU S   5      [         R                  " TR                  S5      U S   5      -  4$ )Nr   r   r   )tritonr   size)METAr"   r7   s    r>   grid,indexed_neg_dot_forward_kernel.<locals>.gridg   s7    AtI/&++affQii2YY[[r@   )r*   r   )ndimrR   	new_zerostorchr   tupleintr?   strider   r   r   r   )
r7   r:   r8   rI   rJ   rK   rL   outrT   r"   s
   `        @r>   indexed_neg_dot_forward_kernelr]   P   s    99>>66Q;;66Q;;99Q<166!9$$$66!9q	!!!{{aKKNFF1I
++qd%--+
0C\eCj \ \ $D)				q					A^q!1qk$ #'GG	
&&
CJr@   )FNNN)rX   rQ   triton.languagelanguager   cut_cross_entropy.tl_autotuner   cut_cross_entropy.tl_utilsr   cut_cross_entropy.utilsr   	constexprr?   jit
heuristicsTensorboolfloatr   r]   rB   r@   r>   <module>ri      sB      > / /76 \\76  \\!76" \\#76$ %76& LL'76( <<)76t #)**-L"M "("3"3?=!# "##  #7"89X"Y  "& $(4||4||4 ,,4 	4
 LL44 T\4 {{T!4 \\4r@   