
    h                     2   S SK r S SKrS SKJr  \R
                  S\R                  S\R                  4S j5       r   SS\ R                  S\ R                  S\ R                  S\	S	\ R                  S
\ R                  4S jjr\R
                  S\R                  S\R                  S\R                  4S j5       rSS jr\R
                  S\R                  S\R                  S\R                  4S j5       r\R
                  S\R                  4S j5       rg)    NQUANT_BLOCK
SPLIT_SIZEc                     [         R                  " SS9nXv-  nU[         R                  " SU5      -   n	X:  n
[        X	X#X5      n[         R                  " X-   X5        g )Nr   axis)tl
program_idarange"dequant_8bit_blockwise_kernel_utilstore)a_ptrout_ptrcode_ptr
absmax_ptrnr   r   pidblock_startoffsetsmaskout_dqs               i/home/james-whalen/.local/lib/python3.13/site-packages/bitsandbytes/backends/triton/kernels_8bit_quant.pydequant_8bit_kernelr      sV     --Q
C"KBIIa44G;D/VZhFHHW-    aabsmaxquant_state_codequant_blocksizedtypeoutc           	          U R                  5       nUc-  Uc  [        S5      e[        R                  " XU R                  S9nSn[
        R                  " Xg5      4n[        U   " U UUUUUU5        U$ )Nz'If out is None, dtype must be specified)r   device   )numel
ValueErrortorch
empty_liker!   tritoncdivr   )	r   r   r   r   r   r   r   r   grids	            r   dequant_8bit_blockwiser*   -   sz     	
	A
{=FGGqahh?JKK&(D		 Jr   
BLOCK_SIZE	CODE_SIZESPLIT_NUM_BLOCKSc                 \   [         R                  " S5      U-  n[         R                  " SXu-  5      n	X-  U	-   n
X:  n[         R                  " X
-   USS9n[	        XXeU5      u  p[         R
                  " X(-   [         R                  " SU5      -   U5        [         R
                  " X:-   XS9  g )Nr           )r   other)r   )r   r	   r
   load#quantize_8bit_blockwise_kernel_utilr   )A_ptrr   r   r   
n_elementsr+   r,   r-   block_start_idx
thread_idxr   r   A	quantizedr   s                  r   quantize_8bit_blockwise_kernelr9   S   s     mmA&)99O1.;<J*Z7GD
d#6A;A`pqIHHZ)BIIa9I,JJFSHHW	5r   c                    U R                  5       nXR* -  * nUc+  [        R                  " U4U R                  U R                  S9nUc1  [        R
                  " U R                  5       [        R                  S9nSn[        R                  " Xg5      4n[        U   " U UUUUUUR                  5       US9  UR                  U R                  5      nXC4$ )N)r!   r   r      )r3   r   r   r   r4   r+   r,   r-   )r#   r%   emptyr!   r   r&   flattenuint8r'   r(   r9   reshapeshape)	r7   code	blocksizer   r   r   blockssplit_num_blocksr)   s	            r   quantize_blockwise_tritonrF   k   s    		AJF~fYqxxqwwG
{qyy{%++>KK13D"4(**,) ++agg
C;r   N_PER_THc                    [         R                  " XU45      n[         R                  " [         R                  " U5      SS9nXVS S 2S 4   -  n[         R                  " USS5      n[         R
                  " XC4[         R                  S9n[         R                  " XC4US-
  [         R                  S9n	[        S5       HT  n
X-   S-  n[         R                  " X-   5      nX|:  n[         R                  " XU5      n[         R                  " XU5      n	MV     [         R                  " X-   5      n[         R                  " X-   5      n[         R                  " X~-
  5      n[         R                  " X-
  5      n[         R                  " UU:*  X5      R                  [         R                  5      n[         R                  " UX4-  45      nUU4$ )Nr<   r   g      g      ?r;         )r   r@   maxabsclampzerosint32fullranger1   wheretor?   )r   r   r,   r+   rG   
a_reshapedr   a_normalizedlower_pivotupper_pivot_pivotval	is_higher	lower_val	upper_val
lower_dist
upper_distr8   quantized_flats                       r   r2   r2      sn    A*56J VVBFF:&Q/Fq$w/L88L$4L((H1BK''80)a-rxxPK 1X*q0ggh&' &	hhy=hhyu=  ./I./I01J01Jz1;LOOPRPXPXYI ZZ	J,A+CDN6!!r   c                     [         R                  " X-   USS9R                  [         R                  5      n[         R                  " X&-   U5      nX-  n[         R                  " X8-   USSS9n	Xy-  n
U
$ )Nr   )r0   r/   
evict_last)r   r0   eviction_policy)r   r1   rS   r?   )r   r   r   r   r   r+   r   scaled_int8absmax_offsetsr   r   s              r   r   r      sf     	Q/22288<A''(,-K*NWWZ0t3XdeF!FMr   )@   NN)NN)r%   r'   triton.languagelanguager   jit	constexprr   Tensorintr   r*   r9   rF   r2   r    r   r   <module>rn      sO     . . . . .* ||LL ll 	
 ;; 
L 6 6 ||6 ll6 6.: '" ||'" 	'"
 ll'" '"T   r   