
    h&
                        S SK r S SKrS SKJr  \" 5       (       d  S\R                  4S jrgS SKrS SKJr	  \R                  " \R                  " 0 SS9\R                  " 0 SS9\R                  " 0 SS9\R                  " 0 S	S9\R                  " 0 S
S9\R                  " 0 SS	S9\R                  " 0 SS	S9\R                  " 0 SS	S9\R                  " 0 S	S	S9\R                  " 0 S
S	S9\R                  " 0 SS9\R                  " 0 SS9\R                  " 0 SS9\R                  " 0 S	S9/S/S9\R                  S\	R                  S\	R                  S\	R                  S\	R                  4S j5       5       rS\R                  4S jrg)    N)is_triton_availablexc                     g )N )r   s    o/home/james-whalen/.local/lib/python3.13/site-packages/bitsandbytes/triton/quantize_columnwise_and_transpose.py!quantize_columnwise_and_transposer   	   s           )
num_stages            )r   	num_warps)r   
n_elements)configskeyMN
BLOCK_SIZEP2c                    [         R                  " SS9nUn	[         R                  " SU5      n
X:  nX-  nX-   n[         R                  " X-   US9n[         R                  " U5      n[         R
                  " [         R                  " XS5      SS9n[         R                  R                  SUU-  -  5      nX-  nUU
-   n[         R                  " UU-   UUS9  [         R                  " X(-   U5        g )Nr   )axis)maskg     _@)
tl
program_idarangeloadabsmaxwhere	libdevicellrintstore)x_ptr
output_ptroutput_maxsr   r   r   r   r   pidblock_start	p2_arangep2_arange_maskr   offsetsr   abs_xmax_valoutput	new_startnew_offsetss                       r   "_quantize_columnwise_and_transposer2      s    < mm#IIa$	"&GGEO.9q	&&.;!D$$Ua'k%:;G	)+
k)6G
"G,r	   c                   ^ U R                   u  p[        R                  " X!U R                  [        R                  S9n[        R                  " U R                   S   U R                  [        R
                  S9n[        S[        R                  " [        R                  " U5      5      -  5      nU R                  (       a  UR                  (       d   eUR                  5       mU4S jn[        U   " XUTXXS9  X44$ )N)devicedtyper
   r   c                 :   > [         R                  " TU S   5      4$ )Nr   )tritoncdiv)metar   s    r   <lambda>3quantize_columnwise_and_transpose.<locals>.<lambda>I   s    V[[T,5GHJr	   )r   r   )shapetorchemptyr4   int8float16intmathceillog2is_cudanumelr2   )r   r   r   r/   r'   r   gridr   s          @r   r   r   @   s    wwQ!((%**Ekk!''!*QXXU]]Styy1./0yyV^^++\\^
J*40KUVfgo""r	   )rB   r=    bitsandbytes.triton.triton_utilsr   Tensorr   r7   triton.languagelanguager   autotuneConfigjit	constexprr2   r   r	   r   <module>rP      s|     @U\\   
 __MM"+MM"+MM"+MM"+MM",MM"a8MM"a8MM"a8MM"a8MM"q9MM"*MM"*MM"*MM"*
  N#& ZZ-
 <<- <<- LL- LL- '(-4#U\\ #r	   