
    hRS                     N   S SK r S SKJrJrJrJrJr  S SKrS SKJ	r	  S SK
J	s  Jr  S SKJr  S SKJr  SSKJrJr  SSKJr  SSKJrJrJr  \R4                  R6                  r/ S	QrS'S
 jr\R<                  \R<                  4S\R>                  S\R>                  S\R>                  S\ S\ S\RB                  S\RB                  4S jjr" " S S\R                  RF                  5      r$S\R<                  \R<                  \$S4S\R                  RF                  S\ S\\    S\%S\\   S\RB                  S\RB                  S\\R                  RF                     S\%4S jjr& S(S jr' " S S\5      r(S  r) " S! S"\R                  RF                  5      r* S)S\R                  RF                  S\ S\%S\RB                  S\RB                  S\\R                  RF                     S\%4S# jjr+S\R                  RF                  S\ S\%S\RB                  S\RB                  4
S$ jr, " S% S&\5      r-g)*    N)AnyCallableDictOptionalType)	is_device)find_multiple   )MappingTypedequantize_affine)	Quantizer)group_quantize_tensor_symmetric groupwise_affine_quantize_tensorper_token_dynamic_quant)WeightOnlyInt4LinearInt4WeightOnlyQuantizerInt8DynActInt4WeightQuantizerc                 D    X-  S:H  nUb  XS-  -  S:H  nU=(       a    U$ U$ )Nr       )k	groupsizeinner_k_tilesk_divisible_by_groupsize%k_divisible_by_16_times_inner_k_tiless        c/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/linear_quant_modules.py_check_linear_int4_kr   )   s:     }1 01R5G0HA0M-'Q,QQ##    xweight_int4packscales_and_zerosout_featuresr   	precisionscales_precisionc                 T   U R                  5       nU R                  SUS   5      n [        U R                  R                  S5      (       ab  [
        R                  R                  R                  U R                  U5      UUUR                  U5      5      R                  U R                  S9nOa[
        R                  R                  R                  U R                  U5      UUUR                  U5      5      R                  U R                  S9nUS S U4-   n	UR                  U	5      nU$ )Ncpudtype)sizereshaper   devicetypetorchopsaten_weight_int4pack_mm_for_cputor)   _weight_int4pack_mm)
r   r    r!   r"   r   r#   r$   origin_x_sizec	new_shapes
             r   linear_forward_int4r7   1   s     FFHM			"mB'(A&&IINN66DDO 01	

 "177"
 	
 IINN..DDO 01	

 "177"
 	
 cr"l_4I			)AHr   c                      ^  \ rS rSr% SS/r\\S'   \\S'   \R                  \S'   SSSSS\R                  \R                  4S\S\S	\S
\S\R                  S\R                  SS4U 4S jjjrS\R                  S\R                  4S jrSrU =r$ )r   O   in_featuresr"   weightFN      r   r   r#   r$   returnc
           
      
  > [         T
U ]  5         [        XU5      (       + U l        U R                  (       a  Xl        [        US5      nXl        X l        U(       a   S5       eX@l        X`l	        Xpl
        Xl        Xl        Ub  [        S5      eUS-  S:X  d   S5       eXS-  -  S:X  d   S5       e[        UR                  S	5      (       a:  U R!                  S
["        R$                  " UUS-  4["        R&                  US95        OCU R!                  S
["        R$                  " US-  XS-  -  SUS-  4["        R(                  US95        XPl        U R!                  S["        R$                  " X-  US4U R                  US95        g )N   zrequire bias=False-Please specify 'precision' instead of 'dtype'r=   r   zrequire out_features % 8 == 0r   z-require in_features % (innerKTiles * 16) == 0r'   r;      )r)   r,       r!   )super__init__r   paddingorigin_in_featuresr	   r:   r"   r,   r   r   r#   r$   
ValueErrorr   r-   register_bufferr.   zerosuint8int32r)   )selfr:   r"   biasr,   r)   r   r   r#   r$   	__class__s             r   rE   WeightOnlyInt4Linear.__init__U   s    	/VV<<&1#'T:K&(---x"*" 0LMMa1$E&EE$b01Q6 	
;	
6 V[[%((  $#q(  ++!
   $)#(:;%*	  ++!	 
KK)<;++	
r   inputc           	         U R                   (       a.  [        R                  " USU R                  U R                  -
  4S9n[        UU R                  U R                  U R                  U R                  U R                  U R                  5      $ )Nr   pad)rF   FrT   r:   rG   r7   r;   r!   r"   r   r#   r$   rM   rQ   s     r   forwardWeightOnlyInt4Linear.forward   so    <<EE%a)9)9D<S<S)S%TUE"KK!!NNNN!!
 	
r   )
r,   r)   r   r:   r   rG   r"   rF   r#   r$   )__name__
__module____qualname____firstlineno____constants__int__annotations__r.   Tensorbfloat16r)   rE   rW   __static_attributes____classcell__rO   s   @r   r   r   O   s    "N3MLL !&(-E
E
 E
 E
 E
 ;;E
  ++E
 
E
 E
N
U\\ 
ell 
 
r   r   Fmoduler   padding_allowedskip_layer_funclinear_classcopy_weightsc	                 F   U R                  5        GH  u  p[        U
[        R                  5      (       a  U
R                  c  Ub  U" U
R
                  5      (       d  [        U
R                  X5      (       d  U(       a  U" U
R                  U
R                  SU
R
                  R                  UUUUS9nU(       a?  U
R
                  R                  [        R                  " S5      :w  a  U
R
                  Ul        [        X	U5        M  M  [        U
UUUUUUUU5	        GM     g )NF)rN   r,   r   r   r#   r$   meta)named_children
isinstancennLinearrN   r;   r   r:   r"   r,   r.   setattr_replace_linear_int4)re   r   r   rf   rg   r#   r$   rh   ri   namechild
new_linears               r   rq   rq      s     ,,. ubii((

" (0M0M %U%6%6	QQ")%%&& <<..'"/'%5	
  ELL$7$75<<;O$O(-J%j1% #( ! 
; /r   c           	      (    [        U UUUU[        S9  g )N)rh   )rq   r   )re   r   r   rf   rg   s        r   replace_linear_int4rv      s     )r   c                      ^  \ rS rSrSSS\R
                  " S5      \R                  4S\S\S\	\   S	\R
                  S
\R                  SS4U 4S jjjr\R                  " 5       S\R                  R                  S\\\R"                  4   4S j5       rS\R                  R                  S\R                  R                  4S jrS\R                  R                  S\S\S\R                  R                  4S jrSrU =r$ )r         Tr=   cudar   rf   r   r,   r#   r>   Nc                 ~   > [         TU ]  5         US;   d   eUS;   d   eX0l        Xl        X l        X@l        XPl        g )N)rB      r=   )rC   @   r<   ry   )rD   rE   r   r   rf   r,   r#   )rM   r   rf   r   r,   r#   rO   s         r   rE    Int4WeightOnlyQuantizer.__init__   sH     		)))....*'%4$*&/r   modelc           	      <   UR                  5       nUR                  5        GHv  u  p4[        U[        R                  R
                  5      (       d  M1  UR                  b  M@  UR                  nUR                  n[        R                  " SU SU SU 35        X`R                  -  S:X  d   SU SU R                   S35       eUR                  R                  n[        X`R                  U R                  5      (       dv  U R                   (       aE  SS KJs  Jn  [        R&                  " SU S	35        [)        US
5      n	UR*                  " USX-
  4S9nO [        R&                  " SU S3S-   5        GMR  [-        USU R                  U R.                  5      u  p[1        U
R2                  R4                  S5      (       aN  [        R6                  R8                  R;                  U
R=                  U R2                  5      U R                  5      nOM[        R6                  R8                  R?                  U
R=                  U R2                  5      U R                  5      nUR=                  U R2                  5      X# S3'   UR=                  U R2                  5      X# S3'   GMy     U$ )Nlinear: , in=, out=r   require in_features: % self.groupsize: == 0	warning: - is padded to satisfy in_features % 1024 == 0r@   rS   P is skipped, int4 requires that in_features is 32, 64, or is divisible by 1024, =and that groupsize and inner_k_tiles*16 evenly divide into itr|   r'   .weightz.scales_and_zeros) 
state_dictnamed_modulesrm   r.   rn   ro   rN   r"   r:   logginginfor   r;   datar   r   rf   torch.nn.functional
functionalwarningr	   rT   r   r#   r   r,   r-   r/   r0   #_convert_weight_to_int4pack_for_cpur2   _convert_weight_to_int4pack)rM   r   cur_state_dictfqnmodr"   r:   r;   rU   padded_in_featuresw_int4x8r!   r    s                r   _create_quantized_state_dict4Int4WeightOnlyQuantizer._create_quantized_state_dict   sA    ))+++-HC#uxx//CHH4D"//!ooxuE+f\NST"^^3q8 *;-7I$..IYY^_8 +1C1C  ++77'u,YZ .;;-M*!"",>,L(M"  'u,|}]^ !/ONNNN	0, X__11599		JJ$KK4d6H6H $ ',iinn&P&P DKK0$2D2D'O 3B2D2DT[[2Qg/<L<O<OKK=&789c .h r   c           
          [        UU R                  U R                  U R                  S U R                  U R                  S9  U$ )N)rg   r#   r$   )rq   r   r   rf   r#   rM   r   s     r   _convert_for_runtime,Int4WeightOnlyQuantizer._convert_for_runtime;  s>    NN   nn!^^	
 r   argskwargsc                 j    U R                  U5      nU R                  U5      nUR                  USS9  U$ NF)strictr   r   load_state_dictrM   r   r   r   r   s        r   quantize Int4WeightOnlyQuantizer.quantizeG  <     66u=
))%0j7r   )r,   r   r   rf   r#   )rY   rZ   r[   r\   r.   r,   ra   r^   boolr   r)   rE   no_gradrn   Moduler   strr`   r   r   r   r   rb   rc   rd   s   @r   r   r      s     $'($||F3!&00 0  }	0
 0 ;;0 
0 0& ]]_8XX__8	c5<<	 8 8t
%((// 
ehhoo 
XX__-0<?	 r   r   c                 r   [        U [        R                  [        R                  [        R                  " [        R                  5      R                  S9n SnSUS-
  -  * n	SUS-
  -  S-
  n
SU4n[        UUUU[        R                  U	U
US9n[        R                  R                  R                  XU5      nU$ )N)scale_dtypezero_point_dtypeepsr|   rB   r
   )output_dtype)
r   r.   float32finfor   r   int8rn   r   linear)r   weight_int8rN   scalesrJ   r"   r   output_precisionn_bit	quant_min	quant_max
block_sizew_dqr5   s                 r   linear_forward_8da4wr   Q  s     	 	MMKK&**		A E	"#Ieai 1$IYJ

%	D 	""1D1A
 Hr   c                   :  ^  \ rS rSr% SS/r\\S'   \\S'   \R                  \S'   \R                  \S'    SSSS\R                  \R                  4S\S\S	\S
\R                  S\R                  SS4U 4S jjjrS\R                  S\R                  4S jrSrU =r$ )Int8DynActInt4WeightLineari  r:   r"   r;   rN   TNry   r   r#   r$   r>   c	                   > [         T	U ]  5         X-  S:X  d   SU SU S35       eXl        X l        X`l        Xpl        Ub  [        S5      eU R                  S[        R                  " X!4[        R                  S95        U R                  S[        R                  " X!U-  4US95        U R                  S	[        R                  " X!U-  4US95        U(       a%  U R                  S
[        R                  " X'S95        g S U l        g )Nr   r   z % groupsize:r   rA   r;   r(   r   rJ   rN   )rD   rE   r:   r"   r   r#   rH   rI   r.   rJ   r   rN   )
rM   r:   r"   rN   r,   r)   r   r#   r$   rO   s
            r   rE   #Int8DynActInt4WeightLinear.__init__  s    	 &!+ 	
";-}YKuM	
+ '(" #LMM 	KK35::F	
 	KKi78&	
 	KKi78&	
   \)STDIr   rQ   c           
          UR                  U R                  5      n[        UU R                  U R                  U R
                  U R                  U R                  U R                  U R                  5      $ N)	r2   r#   r   r;   rN   r   rJ   r"   r   rV   s     r   rW   "Int8DynActInt4WeightLinear.forward  sY    ( $KKIIKKJJNNNN	
 		
r   )rN   r   r:   r"   r#   )rY   rZ   r[   r\   r]   r^   r_   r.   r`   r   r)   rE   rW   rb   rc   rd   s   @r   r   r     s    "N3MLL
,, !&(-88 8 8 ;;8  ++8 
8 8t
U\\ 
ell 
 
r   r   c                   ^^^^^^ SSK Jn  S[        R                  R                  S[
        S[        4UU4S jjnS[        R                  R                  S[        R                  R                  4UUUUU4S jjn	U" X	U5        g )Nr   ))_replace_with_custom_fn_if_matches_filterrs   cur_fqnr>   c                    > [        U [        R                  5      =(       a    [        U R                  T5      =(       d    T$ r   )rm   rn   ro   r   r:   )rs   r   r   rf   s     r   	filter_fn(_replace_linear_8da4w.<locals>.filter_fn  s0    %+ 
 !2!2I>Q/	
r   c           
      6  > T" U R                   U R                  U R                  S LU R                  R                  TTTS9nT(       aP  U R                  R                  [
        R                  " S5      :w  a"  U R                  Ul        U R                  Ul        U$ )N)rN   r,   r   r#   r$   rk   )r:   r"   rN   r;   r,   r.   )rs   rt   ri   r   rh   r#   r$   s     r   replacement_fn-_replace_linear_8da4w.<locals>.replacement_fn  s}    !4'<<&&-

 ELL//5<<3GG %J#jjJOr   )torchao.quantization.quant_apir   r.   rn   r   r   r   )
re   r   rf   r#   r$   rh   ri   r   r   r   s
    ``````   r   _replace_linear_8da4wr     sd     Y
 
3 
4 
 

ehhoo %((//  $ .fiPr   c                 ,    [        U UUUU[        5        g r   )r   r   )re   r   rf   r#   r$   s        r   replace_linear_8da4wr     s     "r   c                   <  ^  \ rS rSrSS\R
                  \R
                  \R                  " S5      \R                  4S\	S\
S\R                  S\R                  S	\R                  S
\SS4U 4S jjjr\R                  " 5       S\R                  R                  S\\\R$                  4   4S j5       rS\R                  R                  S\R                  R                  4S jrS\R                  R                  S\S\S\R                  R                  4S jrSrU =r$ )r   i  ry   Fr'   r   rf   r#   r$   r,   mapping_typer>   Nc                 j   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        g r   )rD   rE   r   rf   r#   r$   r,   r   )rM   r   rf   r#   r$   r,   r   rO   s          r   rE   &Int8DynActInt4WeightQuantizer.__init__  s1     	'%4&/-=$*)5r   r   c           	         UR                  5       nUR                  5        GH  u  p4[        U[        R                  R
                  5      (       d  M1  UR                  nUR                  n[        R                  " SU SU SU 35        X`R                  -  S:X  d   SU SU R                   S35       eUR                  R                  n[        X`R                  5      (       dv  U R                  (       aE  SS KJs  Jn  [        R"                  " SU S	35        [%        US
5      n	UR&                  " USX-
  4S9nO [        R"                  " SU S3S-   5        GM8  [)        UR+                  U R,                  5      SU R                  U R.                  U R0                  S9u  n
nnU
R+                  U R2                  5      X# S3'   UR+                  U R2                  5      X# S3'   UR+                  U R2                  5      X# S3'   GM     U$ )Nr   r   r   r   r   r   r   r   r   r@   rS   r   r   r|   )r   r   z.scalesz.zeros)r   r   rm   r.   rn   ro   r"   r:   r   r   r   r;   r   r   rf   r   r   r   r	   rT   r   r2   r#   r$   r   r,   )rM   r   r   r   r   r"   r:   r;   rU   r   r   r   rJ   s                r   r   :Int8DynActInt4WeightQuantizer._create_quantized_state_dict*  s    ))+++-HC#uxx//"//!ooxuE+f\NST"^^3q8 *;-7I$..IYY^_8 +KHH++77'u,YZ .;;-M*!"",>,L(M"  'u,|}]^ !
 4IIdnn-NN))!%!2!2	 3>..2Mg/28))DKK2Hg/16$++1Ff~.U .X r   c                 t    [        UU R                  U R                  U R                  U R                  5        U$ r   )r   r   rf   r#   r   s     r   r   2Int8DynActInt4WeightQuantizer._convert_for_runtime]  s2    NN  NNNN	
 r   r   r   c                 j    U R                  U5      nU R                  U5      nUR                  USS9  U$ r   r   r   s        r   r   &Int8DynActInt4WeightQuantizer.quantizeh  r   r   )r,   r   r   rf   r#   r$   )rY   rZ   r[   r\   r.   r   r,   r   	SYMMETRICr^   r   r)   rE   r   rn   r   r   r   r`   r   r   r   r   rb   rc   rd   s   @r   r   r     s&     %!&(-$||E2$/$9$966 6 ;;	6
  ++6 6 "6 
6 6" ]]_0XX__0	c5<<	 0 0d	%((// 	ehhoo 	XX__-0<?	 r   r   )r
   Nr   )F).r   typingr   r   r   r   r   r.   torch.nnrn   r   r   rU   torchao.dtypes.utilsr   torchao.utilsr	   quant_primitivesr   r   unifiedr   utilsr   r   r   r/   r0   __all__r   ra   r`   r^   r)   r7   r   r   r   rq   rv   r   r   r   r   r   r   r   r   r   <module>r      si    6 6     * '   yy~~$ #^^$)NN||\\ ll 	
  {{ kk<X
588?? X
@ +/"^^$)NN*>2HHOO22 C=2 	2
 h'2 {{2 kk2 uxx'2 2l HL
bi bJ2jX
 X
D #QHHOO#Q#Q #Q {{	#Q
 kk#Q uxx'#Q #QLHHOO  {{	
 kk"WI Wr   