
    h+"                        S SK r S SKJr  \" 5       (       d  S rgS SKrS SKJr  SSKJ	r	J
r
  S rS r\R                  " \R                  " SS	S
SS.SSS9\R                  " S	SS
SS.SSS9\R                  " S	SS
SS.SSS9\R                  " SS	S
SS.SSS9\R                  " SSS
SS.SSS9\R                  " SSS
SS.SSS9\R                  " SSS
SS.SSS9\R                  " SS
S
SS.SSS9\R                  " SS
S
SS.SSS9\R                  " SS	SSS.SSS9\R                  " S	SSSS.SSS9\R                  " S	SSSS.SSS9\R                  " SS	SSS.SSS9\R                  " SSSSS.SSS9\R                  " SSSSS.SSS9\R                  " SSSSS.SSS9\R                  " SS
SSS.SSS9\R                  " SS
SSS.SSS9/\" 5       Q/ SQ\	\
SS.S9\R                  " SS 05      \R                   S\R"                  S\R"                  S\R"                  S\R"                  S\R"                  S\R"                  S\R"                  S\R"                  4S  j5       5       5       rS! rg)"    N)is_triton_availablec                     g N )abstate_xstate_wbiass        l/home/james-whalen/.local/lib/python3.13/site-packages/bitsandbytes/triton/int8_matmul_rowwise_dequantize.pyint8_matmul_rowwise_dequantizer      s           )early_config_pruneestimate_matmul_timec                    ^  U 4S j$ )Nc                 *   > U T   R                  5       $ r   )zero_)nargsnames    r   <lambda>init_to_zero.<locals>.<lambda>   s    U4[..0r   r   )r   s   `r   init_to_zeror      s	    00r   c                  .   / n S H  nS H  nS Hz  nS Hq  nUS::  a  SOSnU R                  [        R                  " X$USS	.UUS
95        S H5  nU R                  [        R                  " X$X6S	.UU[        S5      S95        M7     Ms     M|     M     M     U $ )N)               )       )r!   @   )r!   r"         r"   r   r   r   BLOCK_MBLOCK_NBLOCK_KSPLIT_K
num_stages	num_warps)r   r      r    C)r+   r,   pre_hook)appendtritonConfigr   )configsr+   block_mblock_kblock_nr,   split_ks          r   get_configs_io_boundr8      s    )J#'G#5)0BAA	"MM,3T[hi j+5*3 (5G#NN &07X_$t/9.7-9#->	!" (5 $6  ( $ *, r   r#   r$   r!   r%   r   r-   r*   r"   r   r   r   )MNK
   )r   
perf_modeltop_k)r3   keyprune_configs_byEVEN_Kc                 *    U S   U S   U S   -  -  S:H  $ )Nr;   r(   r)   r   r   )argss    r   r   r   N   s     49Y$y/0Q#RVW#Wr   has_biasr&   r'   r(   GROUP_Mr)   ACC_TYPEc                    [         R                  " S5      n[         R                  " S5      n[         R                  " UU5      n[         R                  " UU5      nUU-  nUU-  n[        UUU-  -
  U5      nUU-  UU-  -   nUU-  U-  n UU-  [         R                  " SU5      -   n!U U-  [         R                  " SU5      -   n"[         R
                  " [         R                  " U!U-  U5      U5      n#[         R
                  " [         R                  " U"U-  U5      U5      n$UU-  [         R                  " SU5      -   n%U U#S S 2S 4   U-  U%S S S 24   U-  -   -   n UU%S S 2S 4   U-  U$S S S 24   U-  -   -   nUU-  [         R                  " SU5      -   n!U U-  [         R                  " SU5      -   n"[         R                  " UU$-   5      S S S 24   n&[         R                  " UU#-   5      S S 2S 4   n'[         R                  " UU4[         R                  S9n([        S[         R                  " UUU-  5      5       H  n)U(       a-  [         R                  " U 5      n*[         R                  " U5      n+OKUU)UU-  -  -
  n,[         R                  " U U%S S S 24   U,:  SS9n*[         R                  " UU%S S 2S 4   U,:  SS9n+U([         R                  " U*U+5      -  n(U UU-  U-  -  n UUU-  U-  -  nM     U&U'U(U	-  -  -  n(U(R                  UR                  R                  5      n(U
(       aH  [         R                  " UU"-   5      R                  UR                  R                  5      nU(US S S 24   -   n(UU!S S 2S 4   U-  U"S S S 24   U-  -   -   nU!U:  S S 2S 4   U"U:  S S S 24   -  n-US:X  a  [         R                  " UU(U-S9  g [         R                   " UU(U-S9  g )Nr   r   )dtypeg        )maskother)rI   )tl
program_idcdivminarangemax_contiguousmultiple_ofloadzerosint32rangedottorH   
element_tystore
atomic_add).ABr.   r   state_x_ptrstate_w_ptrr9   r:   r;   	divfactorrD   	stride_am	stride_ak	stride_bk	stride_bn	stride_cm	stride_cnr&   r'   r(   rE   r)   rA   rF   pidpid_zgrid_mgrid_nwidthgroup_id
group_sizepid_mpid_nrmrnramrbnrkw_factorx_factoracckr   r   k_remainingrI   s.                                                 r   _int8_matmul_rowwise_dequantizery   1   s   x mmAa G$G$& %<(W"44g>
7"cJ&67u*-W_ryyG44W_ryyG44rAvw ?IrAvw ?IW_ryyG44QW	)BtQwK),CCDAtGy(3tQw<)+CCD W_ryyG44W_ryyG4477;,-dAg677;,-ag6 hh):q"''!Ww%678AGGAJGGAJ!w'8"99GGABtQwK+$=SIGGABq$wK+$=SI266!Q<C7"Y..A7"Y..A 9 (cIo67ffQWW''(774"9%((););<DT1W%CAtGy(2dAg;+BBCQ4 BFD!G#44a<HHQ$'MM!St,r   c                   ^^ SnUc  SOSnU R                   nU R                  S5      S:  a%  U R                  S5      S:  a  U R                  5       n UR                  S5      S:  a%  UR                  S5      S:  a  UR                  5       nU R                  S   UR                  S   :X  d   S5       eU R                  u  mnUR                  u  n	m[        R
                  " TT4U[        R                  S9n
[        R                  nUU4S jn[        U   " U UU
UUUTTUUUU R                  S5      U R                  S5      UR                  S5      UR                  S5      U
R                  S5      U
R                  S5      SUS9  U
$ )	NgA@?r   r   zincompatible dimensions)devicerH   c                 x   > [         R                  " TU S   5      [         R                  " TU S   5      -  U S   4$ )Nr&   r'   r)   )r1   rM   )METAr9   r:   s    r   r   0int8_matmul_rowwise_dequantize.<locals>.<lambda>   s5    V[[DO<v{{1dS\o?^^`den`opr   r-   )rE   rF   )
r{   stride
contiguousshapetorchemptyfloat16rK   float32ry   )r   r   r	   r
   r   r_   rD   r{   r;   _crF   gridr9   r:   s                @@r   r   r      sO   )	1!88A;?qxx{QA88A;?qxx{QAwwqzQWWQZ'B)BB'ww1ww1KKAvU]]C::p'-HHQKHHQKHHQKHHQKHHQKHHQK'	
* r   )r    bitsandbytes.triton.triton_utilsr   r   r1   triton.languagelanguagerK   matmul_perf_modelr   r   r   r8   autotuner2   
heuristicsjit	constexprry   r   r   r   <module>r      s    @  K14 __ MMccbUVWdeqrsMMccbUVWdeqrsMMcbRTUVcdpqrMMbSRTUVcdpqrMMccbUVWdeqrsMMcbRTUVcdpqrMMbSRTUVcdpqrMMcbRTUVcdpqrMMbRBSTUbcopqMMcccVWXefrstMMcccVWXefrstMMcbSUVWdeqrsMMbSSUVWdeqrsMMcccVWXefrstMMcbRTUVcdpqrMMbSRTUVcdpqrMMcbRTUVcdpqrMMbRBSTUbcopq)
* "#+
. 0BRfqst36 W	

 ZZQ- ,,Q-$ %Q-& 'Q-( )Q-* +Q-, -Q-. /Q-0 ,,1Q- 7BQ-f*r   