
    he              	          S SK r S SKrS SKJr  S SKrS SKJr  S SKJr  \ R                  " \
5      r\R                  \ R                  5        \ R                  " \R                  5      r\ R"                  " S5      r\R'                  \5        \R)                  \5         SS\R*                  S\S\S\4S	 jjr " S
 S\R2                  5      r0 4S\R2                  4S jjr " S S5      rg)    N)Optional)quantize_per_channel_groupz4%(asctime)s - %(name)s - %(levelname)s - %(message)svals
group_sizenbithas_weight_zerosc           
      v   US:  a  US::  d   eU(       a  SUS-
  -  * nSUS-
  -  S-
  nO
SnSU-  S-
  nU R                   u  pxU R                  SU5      n [        R                  " U SS9u  p[        R                  " U SS9u  pX-
  Xe-
  -  nU(       d  [        R
                  " U5      nOU[        R                  " X-  5      -
  nU R                  Xx5      n UR                  US5      nUR                  US5      n[        U UUUUU(       a  [        R                  O[        R                  US9nU(       d  S nXU4$ )N      r   )axis)inputscaleszero_points	quant_min	quant_maxdtyper   )
shapereshapetorchminmax
zeros_likeroundr   int8uint8)r   r   r   r   signedqminqmaxnkvmins_vmaxsgroup_scalesgroup_zerosgroup_qvalss                  X/home/james-whalen/.local/lib/python3.13/site-packages/torchao/experimental/quant_api.py	_quantizer)      s6    19""tax!dQh1$T	Q::DA<<J'DyyA&HEyyA&HEMdk2L&&|4U[[)=>><<D''2.L%%a,K,"ejjK k11    c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )UIntxWeightOnlyQuantizedLinearE   c                 :   > [         TU ]  5         Xl        X l        g )N)super__init___pack_weights_op
_linear_op)selfpack_weight_op	linear_op	__class__s      r(   r0   'UIntxWeightOnlyQuantizedLinear.__init__F   s    
 	 .#r*   c                 b   X l         X0l        [        XR                  U R                   SSS9u  pEnU* U-  n[        R                  " USS9U l        [        R                  " USS9U l        U R                  UR                  5       5      R                  SS9n[        R                  " USS9U l
        g )NTF)r   r   )requires_gradmps)device)r   r   r)   nn	Parameterweight_scalesweight_zerosr1   cputopacked_weights)r3   weightsr   r   weight_qvalsr>   r?   rB   s           r(   quantize_and_pack_weights8UIntxWeightOnlyQuantizedLinear.quantize_and_pack_weightsO   s    	$4=__dii$u5
1\ %}}4\\-uMLLUK..|/?/?/ABEEUES ll>Or*   c                    UR                  5       S:  d   eUR                  5       S:X  a=  U R                  UU R                  U R                  U R                  U R
                  5      $ UR                  SS nUR                  S   nU R                  R                  S   nU R                  UR                  SU5      U R                  U R                  U R                  U R
                  5      R                  " / UQUP76 $ )N   r   r   )dimr2   rB   r   r>   r?   r   r   )r3   x
lead_shaper!   r    s        r(   forward&UIntxWeightOnlyQuantizedLinear.forward\   s    uuw!||557a<??##""!!  WWQr]
GGBK$$Q'IIb!OO
 ' " "  !" 	"r*   )r2   r1   r   r   rB   r>   r?   )	__name__
__module____qualname____firstlineno__r0   rE   rL   __static_attributes____classcell__)r6   s   @r(   r,   r,   E   s    $P" "r*   r,   modulec           
         US   nUS   n[        U [        R                  5      (       a   eUS:  a  US::  d   eU R                  5        H  u  pE[        U[        R                  5      (       d  [	        XQ5        M1  UR
                  b   e[        [        [        R                  R                  SU S35      [        [        R                  R                  SU S35      S	9n[        XU5        UR                  UR                  X25        M     g )
Nr   r   r
      _pack_weight_bit_linear_fp_act_
bit_weight)r4   r5   )
isinstancer<   Linearnamed_children)_replace_linear_with_quantized_linear_mpsbiasr,   getattrr   opstorchaosetattrrE   weight)rT   kwargsr   r   namechildqlinears          r(   r^   r^   t   s    %J&>D&")),,,,19"",,.%++5eD::%%%4&uyy'8'8M$s:ST!II%%j'IG F'*--ellDM /r*   c                   v    \ rS rSrSSS.S\\   S\\   4S jjrS\R                  S\R                  4S	 jr	S
r
g)UIntxWeightOnlyLinearQuantizer   N)bitwidth	groupsizerl   rm   c                   US:w  a  [        S5      eXl        U[        R                  [        R                  [        R
                  4;  a  [        S5      eX l        Uc  Sn[        R                  SU S35        U[        SS5      ;  a  [        S	5      eX0l        Uc  S
n[        R                  SU S35        US;  a  [        S5      eX@l        g )Nr:   zHOnly device=mps is currently supported in UIntxWeightOnlyLinearQuantizerz[Only precisions float32, float16 & bfloat16 are supported in UIntxWeightOnlyLinearQuantizer   z&bitwidth not specified, defaulting to .r
   r   zDOnly bitwidts 1 to 7 are supported in UIntxWeightOnlyLinearQuantizer   z'groupsize not specified, defaulting to )    @   rq      zQOnly groupsizes 32, 64, 128 & 256 are supported in UIntxWeightOnlyLinearQuantizer)NotImplementedErrorr;   r   float32float16bfloat16
ValueError	precisionloggerwarningrangerl   rm   )r3   r;   rz   rl   rm   s        r(   r0   'UIntxWeightOnlyLinearQuantizer.__init__   s     U?%Z  !KU]]EMM5>>JJm  'NHNNCH:QOP5A;&V  %MINNDYKqQR..c  'Nr*   modelreturnc                     UR                  U R                  5      R                  U R                  5      n[        UU R                  U R
                  S.S9  U$ )N)r   r   )re   )rA   r;   rz   r^   rm   rl   )r3   r   s     r(   quantize'UIntxWeightOnlyLinearQuantizer.quantize   sH    %((81"nn	
 r*   )rl   r;   rm   rz   )rN   rO   rP   rQ   r   intr0   r<   Moduler   rR    r*   r(   rj   rj      sG     #'#'('
 3-(' C=('T	bii 	BII 	r*   rj   )T)loggingsystypingr   r   torch.nnr<   $torch.ao.quantization.fx._decomposedr   	getLoggerrN   r{   setLevelWARNINGStreamHandlerstdouthandler	Formatter	formattersetFormatter
addHandlerTensorr   boolr)   r   r,   r^   rj   r   r*   r(   <module>r      s     
    
		8	$    




+TU	   Y    '  TX'2
,,'2$''2/2'2FJ'2T+"RYY +"^ IK Nbii N,4 4r*   