
    hU                     &   % S SK JrJrJrJr  S SKrS SKJr  S SKJ	r	  S SK
JrJrJrJrJrJrJrJrJrJrJr  S SKJrJr  SSKJrJrJrJrJrJrJ r   / S	Qr!S
 r"Sq#\\$   \%S'   S r&S r'0 r(\\\$   \\\$   \\\$   \)4   4   4   \%S'    " S S\5      r* " S S5      r+S3S jr,S\RZ                  S\\)   4S jr.S r/S r0\Rb                  4S jr2S r3\Rb                  4S jr4\Rb                  4S jr5SS\Rl                  \Rn                  SS4S  jr8\Rl                  4S! jr9S" r:SS\Rn                  4S# jr;SS\Rn                  4S$ jr<SS\Rl                  \Rn                  S4S% jr=  S4S& jr>SS\Rb                  \R~                  S4S' jr@SS\Rb                  \R~                  4S( jrA\Rb                  \Rb                  S4S)\RZ                  S*\R                  S+\R                  S,\\C   S\RZ                  4
S- jjrDS. rES/\\)S04   S1\S\\)S04   4S2 jrFg)5    )DictListOptionalTupleN)TorchDispatchMode)int_scaled_matmul)MappingTypeZeroPointDomain)_choose_qparams_affine_dont_preserve_zero_choose_qparams_affine_tinygemm _dequantize_affine_no_zero_point_dequantize_affine_tinygemm_quantize_affine_no_zero_point_quantize_affine_tinygemmchoose_qparams_affinedequantize_affinequantize_affine)check_cpu_versioncheck_xpu_version   )GranularityPerAxisPerBlockPerGroupPerRow	PerTensorPerToken)compute_error%_quantize_activation_per_token_absmax$_quant_int8_dynamic_per_token_linear dynamically_quantize_per_channeldequantize_per_tensordequantize_per_channelget_groupwise_affine_qparamspack_tinygemm_scales_and_zeros unpack_tinygemm_scales_and_zeros-groupwise_affine_quantize_tensor_from_qparams/groupwise_affine_dequantize_tensor_from_qparams groupwise_affine_quantize_tensor"groupwise_affine_dequantize_tensorper_token_dynamic_quantget_group_qparams_symmetric"recommended_inductor_config_setterc                     [         R                  R                  U 5      n[         R                  R                  X-
  5      nS[         R                  " X#-  5      -  $ )N   )torchlinalgnormlog10)xyPsPns       T/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/utils.pyr   r   ?   sB    			1	B			15	!BBG$$$    _cur_fqnc                    ^  U 4S jnU$ )Nc                 
   > Tq g N)r:   )moduleinputfqns     r8   forward_hook'_get_logging_hook.<locals>.forward_hookK   s    r9    )r@   rA   s   ` r8   _get_logging_hookrD   J   s     r9   c                 j    U R                  5        H  u  pUR                  [        U5      5        M!     g r=   )named_modulesregister_forward_pre_hookrD   )modelnamemods      r8   _apply_logging_hookrK   R   s+    ((*	%%&7&=> +r9   _fqn_to_op_to_shape_to_countc                       \ rS rSrSS jrSrg)LoggingTensorMode]   rC   Nc                    Uc  0 nU" U0 UD6nUR                    SUR                   3nSnU HH  n[        U[        R                  5      (       d  M$  U[        [        UR                  5      5      S-   -  nMJ     US:w  a  US S n[        [        ;  a  0 [        [        '   U[        [           ;  a  0 [        [           U'   U[        [           U   ;  a  S[        [           U   U'   [        [           U   U==   S-  ss'   U$ )N. z, r   r   )

__module____name__
isinstancer0   Tensorstrlistshaper:   rL   )	selffunctypesargskwargsrsop_name	shape_strargs	            r8   __torch_dispatch__$LoggingTensorMode.__torch_dispatch__^   s    >F4"6"//*!DMM?;	C#u||,,Scii1D88	  ?!#2I7757(26x@@>@(27;8B7KKIJ(27;IF$X.w7	BaGB	r9   )rC   N)rU   rT   __qualname____firstlineno__rd   __static_attributes__rC   r9   r8   rN   rN   ]   s    r9   rN   c                   2    \ rS rSrS rS rS rS rS rSr	g)	_MultiInputv   c                 $    [        U5      U l        g r=   )rY   values)r[   inputss     r8   __init___MultiInput.__init__w   s    6lr9   c                 <    U R                   R                  U5        U $ r=   )rm   append)r[   r?   s     r8   	add_input_MultiInput.add_inputz   s    5!r9   c                 2    [        U R                  U   5      $ r=   )rj   rm   )r[   slices     r8   __getitem___MultiInput.__getitem__~   s    4;;u-..r9   c                     U R                    Vs/ s H4  n[        U[        R                  5      (       a  UR	                  5       OUPM6     snU l         g s  snf r=   )rm   rV   r0   rW   cudar[   vals     r8   rz   _MultiInput.cuda   sA    LPKK
LWS*S%,,77CHHJS@K
 
   ;Ac                     U R                    Vs/ s H4  n[        U[        R                  5      (       a  UR	                  5       OUPM6     snU l         g s  snf r=   )rm   rV   r0   rW   xpur{   s     r8   r   _MultiInput.xpu   sA    KO;;
KVCC66CGGIC?;
 
r~   )rm   N)
rU   rT   rf   rg   ro   rs   rw   rz   r   rh   rC   r9   r8   rj   rj   v   s    #/


r9   rj   c           	          Ub/  U R                   U:w  a  [        SU SU SU R                    S35      eUb8  U R                  5       U:w  a#  [        SU SU SU R                  5        S35      eg g )NzExpected Tensor argument z to have dtype z
, but got z	 instead.z to have size )dtype
ValueErrorsize)
tensor_argarg_namer   r   s       r8   _guard_dtype_sizer      s    Z--6'zzR\RbRbQcclm
 	
 JOO-5'zvZPZP_P_PaObbkl
 	
 6r9   r4   returnc                     / n[        [        U R                  5      S-
  5       H  nUR                  S5        M     UR                  U R                  S   5        U$ )Nr   )rangelenrZ   rr   )r4   
block_size_s      r8   _get_per_token_block_sizer      sM    J3qww<!#$! %aggbk"r9   c                 b   [         R                  n[        U R                  5      n[	        [        U5      S-
  5       H  nSX#'   M	     [        R                  nSnSnSnU R                  [        R                  :X  a  [        R                  OS n[        U UUUUUUUS9u  p[        XXXFU5      nX4$ )Nr   gh㈵>i   )scale_dtype)r	   	SYMMETRICrY   rZ   r   r   r0   int8r   float16float32r   r   )tmapping_typer   ir   eps	quant_min	quant_maxr   scale
zero_point	quantizeds               r8   r   r      s    ((LaggJ3z?Q&'
 (JJE
C II#$77emm#;%--K-		E  	u%II r9   c                 H    [        U 5      u  pV[        XVXU5      nUb  Xs-   nU$ )zY
like F.linear, but with int8 dynamic quantization of activation,
and a quantized weight
)r   _quant_int8_per_token_matmul)r4   w_vals_int8_tw_scalesbias	out_dtypex_vals_int8x_scalesmm_outs           r8   r    r       s7     B!DK)}	F Mr9   c                 N   U R                   [        R                  :X  d   SU R                    S35       eUR                   [        R                  :X  d   SUR                    S35       eUR                   [        R                  [        R                  4;   d   SUR                    35       eU R                  SU R                  S   5      n[        XRUR                  SS5      5      nXc-  R
                  " / U R                  SS QUR                  S   P76 nUR                  U5      nU$ )a  
Quantized matmul of int8 operands that accumulates to int32 and returns
output_dtype. For now, this is written for approximate numerical
Assumes that activation and weight quantization are symmetric,
i.e. act_zp and w_zp is 0.
Assumes that weight quantization is per-channel.

see
https://github.com/google/gemmlowp/blob/master/doc/quantization.md
for an overview of quantized matmul compute

in scalar form, assuming output_dtype is fp32 and zw == 0:

  Y_i_j_fp32 = sx * sw dot(X_i, W_j)
zx dtype z not yet supportedzw dtype z?x_scales needs to be a torch.float32 or torch.bfloat16 but got r   r   N)	r   r0   r   floatbfloat16reshaperZ   r   to)r   r   r   r   output_dtypetmpy_dot_scaledr5   s           r8   r   r      s6   . 

* 
;$$%%78* %**, 
=&&''9:, >>   J(..IYZ	 $ 

b+"3"3B"7
8C$S9I9I"a9PQL		 )) 				3B		!-!3!3B!7	A
 	
\AHr9   c                 >   U R                  5       S:X  d   S5       e[        R                  " [        R                  5      R                  nSU R
                  S   4n[        R                  n[        R                  n[        U UUUUUUUS9u  p[        XXX1U5      n
XU	4$ )zn
assumes symmetric quantization
assumes axis == 0
assumes dense memory format
TODO(future): relax ^ as needed
   only support 2d Tensorsr   )target_dtyper   r   r   zero_point_dtype)dimr0   finfor   r   rZ   int64r	   r   r   r   )r4   r   r   r   r   r   r   r   r   r   quants              r8   r!   r!     s     557a<222<
++emm
$
(
(CQWWQZJ{{((L-	!)	E 	u,9E ##r9   c           	          U R                   nU R                  nUR                  5       S:X  d   SUR                  5        35       e[        XXXSS9nU$ )Nr   zscale size: r   )rZ   r   numelr   )int_reprr   r   r   r   input_dtypedequantizeds          r8   r"   r"   3  sS    J..K;;=A=ekkm_==#eK r9   c           	          U R                  5       S:X  d   S5       eU R                  5       n U R                  S   S4nU R                  n[	        XXXSS9nUR                  5       nU$ )Nr   r   r   r   r   )r   r   rZ   r   r   )r   scaleszero_pointsr   r   r   r   s          r8   r#   r#   >  si    <<>Q9 99
 zz|H..#Q'J..K#f;K --/Kr9         Fc                    X R                   S   :  a  U R                   S   nUS:  d   eU R                   S   U-  S:X  d   eU R                  5       S:X  d   eUS::  d
   SU 35       e[        R                  n[        R
                  nSU4n	Sn
SU-  S-
  nUc  SnUnU[        R                  :w  a  UO[        R
                  nU[        R                  :X  a  U(       d  [        U UU	UU
UUUUS9	u  pOBU[        R                  :X  a  U(       d  [        U UU	UU
UUUUS9	u  pO[        U UU	UU
UUUUS9	u  pUR                  US	9R                  U R                   S   S5      UR                  US	9R                  U R                   S   S5      4$ )
Nr   r   r   r      z-only n_bit smaller than 8 is supported, got: gư>)r   r   r   )rZ   r   r	   
ASYMMETRICr0   int32r
   INTFLOATr   r   r   r   r   )wn_bit	groupsizer   zero_point_domainpreserve_zeror   r   r   r   r   r   r   r   r   r   s                   r8   r$   r$   O  s    772;GGBK	q==772;"a'''557a<<A:NFugNN:))L;;LYJI51I
{K"o&9&99u{{  O111-;#-

z 
o11	1-E#-

z 2#-

 88%8 ((R8*-- ;H ;gaggaj" r9   c                    [        U SX!R                  5       S9  [        USUS9  U R                  5       n[        R                  " U R                  S5      UR                  S5      /U5      R                  SS5      R                  5       $ )Nr   )r   r   zerosr   r   rS   )r   r   r   r0   cat	unsqueeze	transpose
contiguous)r   r   r   r   s       r8   r%   r%     su    fhe**,GeWE2
**,C		  $# 	
 
2r		
r9   c                 |    U R                   S   S:X  d   e[        R                  " U R                  SS5      SS5      $ )Nr   r   r   rS   r   )rZ   r0   splitr   )scales_and_zeross    r8   r&   r&     s<    !!"%***;;'11"b91bAAr9   c           	         US:  d   eX@R                   S   :  a"  UR                   S   S:X  a  U R                   S   nU R                   S   U-  S:X  d   eU R                  5       S:X  d   eSU4n[        R                  nSnSU-  S-
  n	U[        R
                  :X  a  [        n
OHU[        R                  :X  a  [        n
O-[        [        R                  :X  a  [        n
O[        SU 35      eU
" U UUUUUU	5      nU R                   S   S:  a  [        UR                  5      (       dS  [        UR                  5      (       d9  US S 2S S S24   S-  US S 2SS S24   -  R                  [        R                   5      n[        UR                  5      (       a9  US S 2SS S24   S-  US S 2S S S24   -  R                  [        R                   5      nU$ )Nr   r   r   r   z Unrecognized zero point domain: r   )rZ   r   r0   r   r
   r   r   r   r   NONEr   r   r   devicer   r   uint8)r   r   r   r   r   r   r   r   r   r   _quantize_affineint_datas               r8   r'   r'     s    q==772;6<<#3q#8GGBK	772;"a'''557a<<YJ;;LI51IO///*	o33	34	O00	09;<M;NOPP	H 	wwr{Q"8??33"8??33 SqS)Q."add(1CCGGTHX__-- QTT*a/(2ss72CCGGTHOr9   c                    US:  d   eU R                  5       S:X  d   eU R                  [        R                  :X  d  U R                  S   S:  a  [        U R                  5      (       d  U R                  [        R                  5      nUS-	  nUS-  n[        R                  " U R                  S   U R                  S   S-  4[        R                  U R                  S9n	[        U R                  5      (       d  XyS S 2S S S24'   XS S 2SS S24'   OXS S 2S S S24'   XyS S 2SS S24'   OU n	XIR                  S   :  a"  UR                  S   S:X  a  U	R                  S   nU	R                  S   U-  S:X  d   eSU4n
[        R                  nSnSU-  S-
  nU[        R                  :X  a  [        nO!U[        R                  :X  a  [        nO[         nU" U	U
UUUUUUR                  S9$ )	Nr   r   r   r      r   )r   r   r   )r   r   r0   r   rZ   r   r   r   r   r   r   r
   r   r   r   r   r   )w_int4x8r   r   r   r   r   data	high_bitslow_bitsw_int32r   r   r   r   _dequantize_affines                  r8   r(   r(     s    q==<<>Q%++%);a)?(//**{{5;;'AI	$;++^^Aq 1A 56++??

 "(//22(B!G (B1H'B!G )B1H ==$$b)9Q)>MM"%	==y(A---YJ++KI51IO///.	o33	38=\\	 	r9   c           	      V    [        U UUUUUS9u  pg[        XXqX$S9n[        XgU5      n	X4$ )N)r   r   )r   )r$   r'   r%   )
r   r   r   r   r   r   r   r   r   r   s
             r8   r)   r)     sM     1	+#MF =	5H 6fUK%%r9   c                 6    [        U5      u  pE[        XXRU5      $ r=   )r&   r(   )r   r   r   r   r   r   s         r8   r*   r*   '  s%     55EFMF:%	 r9   c                 \   X R                   S   :  a  U R                   S   nUS:  d   eU R                   S   U-  S:X  d   eU R                  5       S:X  d   eUS::  d
   SU 35       eSU4nUc*  [        R                  " U R                  5      R
                  n0 nSUS'   [        SS5       H  nSUS-
  -  * SUS-
  -  S-
  4Xx'   M     Xq   u  p[        U UU[        R                  U	U
UUUS	9	u  pUR                  U R                   S   S5      UR                  U R                   S   S5      4$ )
Nr   r   r   r   r   zunsupported n_bit: )r   r   	   )r   r   r   r   r   r   )
rZ   r   r0   r   r   r   r   r   r   r   )r   r   r   	precisionr   r   r   rangesr   r   r   r   r   s                r8   r,   r,   4  sC    772;GGBK	q==772;"a'''557a<<A:4,UG44:YJ
{kk!''"&&FF1I1a[QU|_aAElQ&67	 !=I-	ZZ"
E ==R(**<*<QWWQZ*LLLr9   c           	          [        XX#U5      u  pVSnSUS-
  -  S-
  nSUS-
  -  * nSSKJn	  U	" XXhU[        R                  U5      n
XU4$ )Nr   r   r   r   )8_quantized_decomposed_quantize_per_channel_group_wrapper)r,   torchao._executorch_opsr   r0   r   )r   r   
group_sizer   r   r   r   max_intmin_intr   w_int8s              r8   group_quantize_tensor_symmetricr   [  sp     0	*MF EEAI"Geai !G F	57EJJ
F 5  r9   r?   r   r   r   c                     [         R                  n[        U 5      nSnSn[        R                  nU R
                  n	[        U UUUUUUUUS9	u  p[        U UU
UUUU5      n[        UUU
UUUUU	S9nU$ )Nir   )r   r   r   r   )	r	   r   r   r0   r   r   r   r   r   )r?   r   r   r   r   r   r   r   quant_dtyper   r   r   qdqs                 r8   r+   r+   u  s     ))L*51JII**K;;L/)
F 		A 
	!	
B Ir9   c                  z   S[         R                  R                  l        S[         R                  R                  l        S[         R                  R                  l        S[         R                  R                  l        S[         R                  R                  R                  l        [         R                  " S5        g)ai  
Set inductor config to use the following optimizations which have been showed to improve performance for quantized models:
    coordinate_descent_tuning = True
    coordinate_descent_check_all_directions = True
    force_fuse_int_mm_with_mul = True
    fx_graph_cache = True
    triton.unique_kernel_names = True
    torch.set_float32_matmul_precision("high")
ThighN)
r0   	_inductorconfigcoordinate_descent_tuning'coordinate_descent_check_all_directionsforce_fuse_int_mm_with_mulfx_graph_cachetritonunique_kernel_namesset_float32_matmul_precisionrC   r9   r8   r-   r-     sr     8<EOO4EIEOOB8<EOO5,0EOO)8<EOO!!5	&&v.r9   input_shape.granularityc                    [        U[        5      (       a  U $ [        U[        5      (       a$  [        U 5      nSX!R                  '   [        U5      $ [        U[        5      (       ah  UR                  n[        U5      [        U 5      :X  d   SU SU  35       e[        [        U5      5       H  nX   X#   -  S:X  a  M   SU  SU 35       e   U$ [        U[        [        45      (       a  S[        U 5      S-
  -  U S   4-   $ [        U[        5      (       aO  U S   UR                  -  S:X  d   S	U S    S
UR                   35       eS[        U 5      S-
  -  UR                  4-   $ [        SU 35      e)zGet the block size based on the input shape and granularity type.
Args:
    input_shape: The input tensor shape possibly more than 2 dimensions
    granularity: The granularity type of the quantization
r   zBlock size z8 must have the same number of dimensions as input shape r   zNot all shapes in input shape z are divisible by block size )r   r   zLast dimension of input z  is not divisible by group size zUnsupported Granularity: )rV   r   r   rY   axistupler   r   r   r   r   r   r   r   r   )r  r  r   r   s       r8   get_block_sizer    s    +y))	K	)	)+&
'(
##$Z  	K	*	* ++
:#k"22 	
*%]^i]jk	
2 s:'A>JM1Q6 0=Z[eZfg6 ( 	K&(!3	4	4s;'!+,B/AAA	K	*	*2!7!771< 	
&{2&77WXcXnXnWop	
< s;'!+,0F0F/HHH
0>
??r9   )NN)r   r   )Gtypingr   r   r   r   r0   torch.utils._python_dispatchr   torchao.kernelr   %torchao.quantization.quant_primitivesr	   r
   r   r   r   r   r   r   r   r   r   torchao.utilsr   r   r  r   r   r   r   r   r   r   __all__r   r:   rX   __annotations__rD   rK   rL   intrN   rj   r   rW   r   r   r    r   r   r!   r"   r#   r   r   r$   r%   r&   r'   r(   r)   r*   r   r,   r   r   r   r+   r-   r  rC   r9   r8   <module>r     s}   / .  :   
  *% (3- ?  dSM4tHSM3,>'??@@ 
) 2
 
,
 $s) B2 9x$@ BG  EJMM & 
..%++CL 9> "B @U@U)` %++6v 
..%++&4 		 mm&&$MR mm&&!8  %}}$)MM	+<<++ kk+ 
%	+
 \\+\/$@sCx@/:@
38_@r9   