
    hY                        S SK r S SKJr  S SKJrJrJrJrJr  S SK	J
r
JrJr  SSKJr  / SQr\ R                   R"                  r " S S	\ R$                  5      r " S
 S\ R(                  R*                  5      r\ R.                  R0                  S 5       r " S S\5      r " S S\5      r\ R.                  R0                  S 5       r " S S\5      r " S S\5      r\ R.                  R0                  S 5       r " S S\5      r  " S S\5      r!g)    N)return_and_correct_aliasing)$_quant_int8_dynamic_per_token_lineardequantize_per_channel dynamically_quantize_per_channel groupwise_affine_quantize_tensor unpack_tinygemm_scales_and_zeros)check_cpu_versioncheck_xpu_versionfind_multiple   )ZeroPointDomain)$Int8DynamicallyQuantizedLinearWeight#Int8WeightOnlyQuantizedLinearWeight#Int4WeightOnlyQuantizedLinearWeightc                       \ rS rSrSr\S 5       rS r\S 5       rS r	S r
S rS	 rS
 rS rS rS rS r\S 5       r\S 5       r\SS j5       r\S 5       rSrg)QuantizedLinearWeightBase&   a  
Base quantized tensor subclass for quantized linear weights. When the from_float method is used,
to create an instance of any QuantizedLinearWeightBase, we assume the input
weight is oriented the way it is in a normal linear op, i.e. out-channels x in-channels.

The shape and dtype of the tensor subclass represent how the tensor subclass looks externally,
regardless of the internal representation's type or orientation.
c                    UR                   US'   UR                  SS5      (       a  UR                  S5      OUR                  US'   SU;   d   eUR                  SS5      (       a   eSUS'   [        R                  R
                  " X40 UD6$ )NdevicelayoutFdtyperequires_grad)r   getr   torchTensor_make_wrapper_subclass)clsint_data
transposedshapeargskwargss         W/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/subclass.py__new__!QuantizedLinearWeightBase.__new__0   s    #??x$*JJx$?$?FJJx X__ 	x &   ::ou5555"'||223HHH    c                     Xl         X l        g Nr   r   )selfr   r   r!   r"   s        r#   __init__"QuantizedLinearWeightBase.__init__;   s     $r&   c                     g r(    act_mat	w_qtensorbiass      r#   _quantized_op'QuantizedLinearWeightBase._quantized_op@       r&   c                     U R                   R                   SU R                  5        SU R                   SU R                   SU R
                   SU R                   S3$ )Nz(data=z, shape=z	, device=z, dtype=z, requires_grad=))	__class____name__
dequantizer    r   r   r   r*   s    r#   __repr__"QuantizedLinearWeightBase.__repr__D   s_    ~~&&'vdoo.?-@ Ukk](4::,6FtGYGYFZZ[]	
r&   c                     g r(   r.   r;   s    r#   r:   $QuantizedLinearWeightBase.dequantizeJ       r&   c                     g r(   r.   r;   s    r#   int_repr"QuantizedLinearWeightBase.int_reprM   r@   r&   c                     g r(   r.   r;   s    r#   q_params"QuantizedLinearWeightBase.q_paramsP   r@   r&   c                 @    U R                  [        R                  5      $ r(   )tor   float16r;   s    r#   halfQuantizedLinearWeightBase.halfS   s    wwu}}%%r&   c                     [         R                  R                  R                  " U0 UD6u  p4pVUc  U R                  OUnUc  U R
                  OUnUb  UO[         R                  nUUUS.nU$ )N)r   r   memory_format)r   _C_nn	_parse_tor   r   preserve_format)r*   r!   r"   r   r   _rM   s          r#   _get_to_kwargs(QuantizedLinearWeightBase._get_to_kwargsV   sp    */((,,*@*@$*Q&*Q'q &F#m

*6ME<Q<Q 	 *

 r&   c                     g r(   r.   r*   fns     r#   _apply_fn_to_data+QuantizedLinearWeightBase._apply_fn_to_datad   r@   r&   c                     g r(   r.   r;   s    r#   _change_shape'QuantizedLinearWeightBase._change_shapeg   r@   r&   c                     g r(   r.   r;   s    r#   __tensor_flatten__,QuantizedLinearWeightBase.__tensor_flatten__j   r@   r&   c                     g r(   r.   )r   tensor_data_dicttensor_attributes
outer_sizeouter_strides        r#   __tensor_unflatten__.QuantizedLinearWeightBase.__tensor_unflatten__m   s     	r&   c                     g r(   r.   )r   input_floats     r#   
from_float$QuantizedLinearWeightBase.from_floats   r5   r&   Nc                    Uc  0 OUnU[         R                  R                  R                  L aD  US   US   [	        U5      S:  a  US   OS pvnUR
                  (       a   eU R                  XVU5      $  [         R                  R                  5          U" U0 UD6sS S S 5        $ ! , (       d  f       g = f! [         a    [        SU 35         g f = f)Nr   r      z ERR: subclass doesn't implement )r   nn
functionallinearlenr   r3   rN   DisableTorchFunctionSubclass	Exceptionprint)r   functypesr!   r"   mat1r1   r2   s           r#   __torch_function__,QuantizedLinearWeightBase.__torch_function__y   s    ~6588&&---QQt9q=Qd "D
 !++++$$Td;;	=668T,V, 988 	=4TF;<	=s0   4B5 B$	B5 $
B2.B5 2B5 5CCc           
      |   U[         R                  R                  [         R                  R                  4;   Ga'  US   R	                  5       (       Ga  US   R
                  (       a  U[         R                  R                  :X  a]  US   R                  S   US   R                  S   :X  d(   SUS   R                   SUS   R                   S35       eUS   US   US   pvnOmUS   R                  S   US   R                  S   :X  d(   SUS   R                   SUS   R                   S	35       eUS   US   [        U5      S:X  a  S OUS   pvnU R                  XVU5      $ U[         R                  R                  L a,  [        XXCS   R                  [        R                  5      5      $ U[         R                  R                  L a,  [        XXCS   R                  [        R                  5      5      $ U[         R                  R                  L aO  US   R                  (       + US   l        US   R!                  US   R                  S S S2   5      n[        XXH5      $ U[         R"                  R                  L aA  [        UUUUS   R$                  " USS  0 UD6R                  [        R                  5      5      $ g )
Nr   r   rl   zneed mat1 shape: z finaldim to match mat2 shape: z first dim z final dimto match mat2 shape: z
 first dim)atenmmdefaultaddmmis_floating_pointis_cudar    rp   r3   detachr   rX   r   clonetr   r[   _to_copyrH   )	r   rt   ru   r!   r"   rv   r1   r2   news	            r#   __torch_dispatch__,QuantizedLinearWeightBase.__torch_dispatch__   s    TWW__djj&8&899Q))++Qtzz)))Aw}}R(DGMM!,<< 'Q 7004QkK<
 GGG "& Aw}}R(DGMM!,<< 'Q 7,,0GMM?*F<
 GGINDQ "& $$Td;;4;;&&&.FG$=$=ell$K  4::%%%.FG$=$=ekk$J  466>>!%)!W%7%7!7DGq'''Qdd(;<C.t6GG4==(((.Q

DH//AA%++N	  )r&   r)   )r.   N)r9   
__module____qualname____firstlineno____doc__staticmethodr$   r+   r3   r<   r:   rB   rE   rJ   rS   rX   r[   r^   classmethodre   ri   rw   r   __static_attributes__r.   r&   r#   r   r   &   s     I I%
  
&  
  
 = =$ 7 7r&   r   c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )ConstructTensorSubclass   c                 :   > [         TU ]  5         Xl        X l        g r(   )superr+   r!   r"   )r*   r!   r"   r8   s      r#   r+    ConstructTensorSubclass.__init__   s    	r&   c                     g r(   r.   )r*   xs     r#   forwardConstructTensorSubclass.forward   r@   r&   c                 h    UR                  5       u  p#U Vs/ s H  n[        X5      PM     sn$ s  snf r(   )r^   getattr)r*   tensor_subclass_instancefieldsrR   fields        r#   right_inverse%ConstructTensorSubclass.right_inverse   s0    ,??A	FLMfU08fMMMs   /r!   r"   )	r9   r   r   r   r+   r   r   r   __classcell__r8   s   @r#   r   r      s    
N Nr&   r   c                      [        U 0 UD6$ r(   )r   r   s     r#   from_qtensor_components_int8dynr      s    /@@@r&   c                       \ rS rSrS rSrg)ConstructTensorSubclassInt8Dyn   c                 H    [        X/U R                  Q70 U R                  D6$ r(   )r   r!   r"   r*   r   q_scaless      r#   r   &ConstructTensorSubclassInt8Dyn.forward   s*    .
!%
.2kk
 	
r&   r.   Nr9   r   r   r   r   r   r.   r&   r#   r   r          
r&   r   c                      ^  \ rS rSrSr\r\SU 4S jj5       rSU 4S jjr	\S 5       r
SS jrS rS rS	 rS
 rS rS r\ SS j5       r\SS j5       rSrU =r$ )r      z
A Tensor subclass that when applied to a weight used in a linear op/module, changes the
linear op to a dynamically quantized linear op with symmetric per-token and per-channel
quantization on the activation and weight respectively.
c                 N   > Uc  UR                   nXVS'   [        TU ]  " XX440 UD6$ Nr   r   r   r$   )r   r   r   r   r    r   r"   r8   s          r#   r$   ,Int8DynamicallyQuantizedLinearWeight.__new__   s.    =NNEwwsjJ6JJr&   c                 0   > X l         [        TU ]	  X5        g r(   )r   r   r+   )r*   r   r   r   r    r   r"   r8   s          r#   r+   -Int8DynamicallyQuantizedLinearWeight.__init__   s     .r&   c                 X    [        XR                  UR                  X R                  5      $ r(   )r   r   r   r   r/   s      r#   r3   2Int8DynamicallyQuantizedLinearWeight._quantized_op   s&    3''););T==
 	
r&   c                    [         R                  " U R                  R                  U R                  R                  U R                  R
                  S9n[        U R                  R                  5       U R                  UUc  U R
                  OU5      R                  U R
                  5      nU R                  (       d  U$ UR                  5       $ )zA
Obtain the dequantized version of the quantized tensor subclass
r   r   )r   zerosr   r    r   r   r   r   r   rH   r   )r*   r   zero_pointsdq_ts       r#   r:   /Int8DynamicallyQuantizedLinearWeight.dequantize   s     kkMM(<(<DMMDWDW

 &MMOOMM-DJJU	

 "TZZ. 	  ??t88r&   c                 p    U R                   (       a  U R                  $ U R                  R                  5       $ )zA
Get the internal integer representation of the quantized tensor
)r   r   r   r;   s    r#   rB   -Int8DynamicallyQuantizedLinearWeight.int_repr  s$     !%t}}FT]]__5FFr&   c                     SU R                   0$ )z6
Get the quantization scales for the quantized tensor
r   r   r;   s    r#   rE   -Int8DynamicallyQuantizedLinearWeight.q_params  s     DMM**r&   c                     U R                   " U0 UD6nU R                  " U R                  R                  US   5      U R                  R                  US   5      U R
                  U R                  40 UD6$ Nr   )rS   r8   r   rH   r   r   r    r*   r!   r"   s      r#   rH   'Int8DynamicallyQuantizedLinearWeight.to  sn    $$d5f5~~MMVH-.MMVH-.OOJJ	

 
 	
r&   c                     U R                  U" U R                  5      U" U R                  5      U R                  U R                  U R
                  S9$ Nr   )r8   r   r   r   r    r   rV   s     r#   rX   6Int8DynamicallyQuantizedLinearWeight._apply_fn_to_data#  sE    ~~t}}t}}OOJJ**  
 	
r&   c                 v    U R                  U R                  U R                  U R                  XR                  S9$ r   )r8   r   r   r   r   r*   r    s     r#   r[   2Int8DynamicallyQuantizedLinearWeight._change_shape.  s1    ~~MM4==$//5

  
 	
r&   c                 P    SS/U R                   U R                  U R                  /4$ )Nr   r   )r   r    r   r;   s    r#   r^   7Int8DynamicallyQuantizedLinearWeight.__tensor_flatten__3  s%    J'$//4::tzz)RRRr&   c           	      >    US   US   peUu  pxn	U " UUUUc  UOUU	US9$ )Nr   r   )r   stridesr.   )
r   ra   rb   rc   rd   r   r   r   r    r   s
             r#   re   9Int8DynamicallyQuantizedLinearWeight.__tensor_unflatten__7  sF     .j9;KJ;W(#4 
5'EZ 
 	
r&   c                 
   Uc  UR                   n[        XU[        R                  5      u  pVnUR	                  5       R                  5       n[        U [        5      (       d  UR	                  5       nU " UUSUR                  US9$ )z
Method used to convert a linear weight tensor to an instance of the
Int8DynamicallyQuantizedLinearWeight subclass.

Example usage::

    model.lin_mod.weight = (
        Int8DynamicallyQuantizedLinearWeight.from_float(model.lin_mod.weight)
    )
Fr   )	r   r   r   int8
contiguousr   
issubclassr   r    )	r   rh   qminqmaxr   
w_int_reprw_scalesrR   r   s	            r#   ri   /Int8DynamicallyQuantizedLinearWeight.from_floatF  s     =%%E #CtUZZ#

a ((*,,.#CDD**,H
 	
r&   r   r(   NN)i   N)r9   r   r   r   r   r   subclass_constructorr   r$   r+   r3   r:   rB   rE   rH   rX   r[   r^   r   re   ri   r   r   r   s   @r#   r   r      s     :K K/ 
 

9$G+



S PT
 
  
  
r&   r   c                      [        U 0 UD6$ r(   )r   r   s     r#   from_qtensor_components_int8wor   j      .???r&   c                       \ rS rSrS rSrg)ConstructTensorSubclassInt8woio  c                 H    [        X/U R                  Q70 U R                  D6$ r(   )r   r!   r"   r   s      r#   r   %ConstructTensorSubclassInt8wo.forwardp  s*    -
!%
.2kk
 	
r&   r.   Nr   r.   r&   r#   r   r   o  r   r&   r   c                   ,    \ rS rSrSr\r\S 5       rSr	g)r   iv  z
A Tensor subclass that when applied to a weight used in a linear op/module,
changes the linear op to a weight-only quantized linear op with symmetric
per-channel quantization on the weight.
c                 n   U R                   n[        R                  " U R                  SU R                  S   5      UR
                  R                  U R                   5      5      UR                  -  nUR                  " / U R                  S S QUR                  S   P76 nUb  XB-  nUR                  U5      $ )Nrz   )r   r   r|   reshaper    r   rH   r   )r0   r1   r2   
orig_dtypeys        r#   r3   1Int8WeightOnlyQuantizedLinearWeight._quantized_op  s    ]]
HHGMM"$56""%%gmm4   	! 	
 II7w}}Sb)71772;7IAttJr&   r.   N)
r9   r   r   r   r   r   r   r   r3   r   r.   r&   r#   r   r   v  s!     9   r&   r   c                      [        U 0 UD6$ r(   )r   r   s     r#   from_qtensor_components_int4wor     r   r&   c                       \ rS rSrS rSrg)ConstructTensorSubclassInt4woi  c                 H    [        X/U R                  Q70 U R                  D6$ r(   )r   r!   r"   )r*   r   scales_and_zeross      r#   r   %ConstructTensorSubclassInt4wo.forward  s*    -
)-
6:kk
 	
r&   r.   Nr   r.   r&   r#   r   r     r   r&   r   c                     ^  \ rS rSrSr\r\SS\R                  SS4U 4S jj5       r
U 4S jr\S	 5       rS
 rS rS rS rS rS rS r\ SS j5       r\SS\R                  SS4S j5       r\SS\R                  S4S j5       rSrU =r$ )r   i  z
A Tensor subclass that when applied to a weight used in a linear op/module,
changes that linear op to a weight-only int4 quantized linear op with groupwise
affine quantization on the weight.
      FNc
                 N   > U	c  UR                   n	XS'   [        TU ]  " XX440 U
D6$ r   r   )r   r   r   r   r    	groupsizeinner_k_tileszero_point_domainpreserve_zeror   r"   r8   s              r#   r$   +Int4WeightOnlyQuantizedLinearWeight.__new__  s2     =$**EwwsjJ6JJr&   c
                 `   > X l         XPl        X`l        Xpl        Xl        [
        TU ]  X5        g r(   )r   r   r   r   r   r   r+   )r*   r   r   r   r    r   r   r   r   r   r"   r8   s              r#   r+   ,Int4WeightOnlyQuantizedLinearWeight.__init__  s/    $ !1"*!2*.r&   c                    U R                  5       nU R                  nU R                  SU R                  S   5      R	                  [
        R                  5      n [        U R                  S   S5      n[
        R                  R                  R                  U SXPR                  S   -
  45      n [        U R                  5      (       aF  [        R                  U R                  5       UR                   UR"                  UR$                  5      nGO['        U R                  5      (       a  UR(                  [*        R,                  :X  dE  [        R/                  U R                  5       UR                   UR"                  UR$                  5      nO[        R1                  U R                  5       UR                   UR"                  UR$                  S   UR$                  S   5      nOD[        R/                  U R                  5       UR                   UR"                  UR$                  5      nUR2                  (       a  UR                  S   OUR                  S   nUS S 2S U24   nUR                  " / US S QUP76 nUb  Xb-  nUR	                  U5      $ )Nrz      r   r   )sizer   r   r    rH   r   bfloat16r   rm   rn   padr	   r   r{   _weight_int4pack_mm_for_cpur   r   r   r   r
   r   r   INT_weight_int4pack_mm)_weight_int4pack_mm_with_scales_and_zerosr   )r0   r1   r2   orig_act_sizer   pad_sizer   orig_out_featuress           r#   r3   1Int4WeightOnlyQuantizedLinearWeight._quantized_op  s$   ]]
 //"gmmB&78;;ENNK r!2D9((%%))'Ax--PRBS7S3TU W^^,,00""$""##**	A w~~..../2E2EE,,&&(&&''..	 BB&&(&&''..q1..q1 ((""$""##**	A $-#7#7IOOBY__R=P 	 a####$II=}Sb)=+<=IAttJr&   c                 P   U R                   (       d  U R                  S   OU R                  S   nU R                  [        R                  " XR
                  U R                  S9U S 5      nU R                   (       a  UOUR                  5       nUR                  U R                  5      $ )Nr   r   r   )	r   r    r3   r   eyer   r   r   rH   )r*   	eye_shapew_dqs      r#   r:   .Int4WeightOnlyQuantizedLinearWeight.dequantize  sq    )-DJJqMdjjm	!!IIi4::Fd

 tDFFHwwtzz""r&   c                     U R                   $ r(   )r   r;   s    r#   rB   ,Int4WeightOnlyQuantizedLinearWeight.int_repr  s    }}r&   c                 8    [        U R                  5      u  pXS.$ )N)r   q_zero_points)r   r   )r*   scalesr   s      r#   rE   ,Int4WeightOnlyQuantizedLinearWeight.q_params  s#    >!!
 #AAr&   c           
      B   U R                   " U0 UD6nU R                  " U R                  R                  US   5      U R                  R                  US   5      U R
                  U R                  U R                  U R                  U R                  U R                  40 UD6$ r   )rS   r8   r   rH   r   r   r    r   r   r   r   r   s      r#   rH   &Int4WeightOnlyQuantizedLinearWeight.to  s    $$d5f5~~MMVH-.!!$$VH%56OOJJNN""

 

 
	
r&   c                     U R                  U" U R                  5      U" U R                  5      U R                  U R                  U R
                  U R                  U R                  U R                  U R                  S9	$ r   )
r8   r   r   r   r    r   r   r   r   r   rV   s     r#   rX   5Int4WeightOnlyQuantizedLinearWeight._apply_fn_to_data&  si    ~~t}}t$$%OOJJNN""**  

 
	
r&   c                     U R                  U R                  U R                  U R                  UU R                  U R
                  U R                  U R                  U R                  S9	$ r   )	r8   r   r   r   r   r   r   r   r   r   s     r#   r[   1Int4WeightOnlyQuantizedLinearWeight._change_shape5  s[    ~~MM!!OONN""**  

 
	
r&   c                     SS/U R                   U R                  U R                  U R                  U R                  U R
                  U R                  44$ )Nr   r   )r   r    r   r   r   r   r   r;   s    r#   r^   6Int4WeightOnlyQuantizedLinearWeight.__tensor_flatten__B  sQ    ./OOJJNN""JJ2
 
 	
r&   c                 P    US   US   peUu  nnn	n
nnnU " UUUUc  UOUU	U
UUUUS9
$ )Nr   r   )r   r   r   r   r.   )r   ra   
attributesrc   rd   r   r   r   r    r   r   r   r   r   s                 r#   re   8Int4WeightOnlyQuantizedLinearWeight.__tensor_unflatten__M  sk     Z(/0 # 	
'EZ/' 
 	
r&   c                 ~    Uc  UR                   nU R                  UUUUUS9u  pxpn
U " UUU	UR                  UUUUUS9	$ )z
Method used to convert a linear weight tensor to an instance of the
Int4WeightOnlyQuantizedLinearWeight subclass.

Example usage::

    model.lin_mod.weight = (
        Int4WeightOnlyQuantizedLinearWeight.from_float(model.lin_mod.weight)
    )
)r   r   )r   r   r   )r   to_qtensor_componentsr    )r   rh   r   r   r   r   r   r   r   r   inner_k_tilss              r#   ri   .Int4WeightOnlyQuantizedLinearWeight.from_floatn  st    ( =%%E %%"3+ &  	HJ< /'

 
	
r&   c           	         US;   d   eUS;   d   eUR                   u  pg[        US5      n[        US5      n	[        R                  R                  R                  USX-
  SX-
  45      n[        USUUR                  UUS9u  p[        UR                  5      (       a  [        R                  X5      nO[        R                  X5      nXSX#4$ )	N)   r   @       )r      rl   r   r   r   r*  )r   r   r   F)r    r   r   rm   rn   r  r   r   r	   r   r{   #_convert_weight_to_int4pack_for_cpu_convert_weight_to_int4pack)r   rh   r   r   r   r   r
  orig_in_featuresin_featuresout_featuresinput_int4x8r   r   s                r#   r#  9Int4WeightOnlyQuantizedLinearWeight.to_qtensor_components  s     ....	))).9.?.?+ $$4d;$%6:hh))--.<3ST
 *J##/'*
& [//00??H 77TH5)JJr&   )r   r   r   r   r   r   )r9   r   r   r   r   r   r   r   r   FLOATr$   r+   r3   r:   rB   rE   rH   rX   r[   r^   r   re   ri   r#  r   r   r   s   @r#   r   r     s     9 )//K K$/2 2  2 h#B


	
 
 JN
	 
:  )//)
 )
V  )//#K #Kr&   r   )"r   torch.utils._python_dispatchr   torchao.quantization.utilsr   r   r   r   r   torchao.utilsr	   r
   r   quant_primitivesr   __all__opsr{   r   r   rm   Moduler   _dynamoallow_in_graphr   r   r   r   r   r   r   r   r   r.   r&   r#   <module>r<     s    D   yy~~^ ^BNehhoo N A A
%< 
F
+D F
R @ @
$; 
 *N  2 @ @
$; 
cK*C cKr&   