
    h@M                        S SK r S SKJr  S SKJrJrJrJr  S SKrS SK	J
r
  S SKJr  S SKJrJr  S SKJrJrJrJrJrJr  S SKJrJrJrJrJr  S S	KJr  S S
KJ r   SSK!J"r"   " S S\ RF                  5      r$\ " S S\$5      5       r%\ " S S\$5      5       r&\ " S S\$5      5       r' " S S\'5      r(S\
S\\\$   \\$   4   4S jr)g)    N)	dataclass)AnyOptionalTupleUnion)AOBaseConfig)
e4m3_dtype)FP8Granularity_normalize_granularity)GranularityPerAxisPerGroupPerRow	PerTensorPerToken)_SUB_BYTE_INT_BOUNDS_SUB_BYTE_UINT_BOUNDSMappingTypeTorchAODTypeZeroPointDomain)Int4PackingFormat)_is_float8_type   _log_deprecation_warningc                       \ rS rSrSrSrg)FakeQuantizeConfigBase(   z7
Base class for representing fake quantization config.
 N)__name__
__module____qualname____firstlineno____doc____static_attributes__r       g/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/qat/fake_quantize_config.pyr   r   (   s     	r&   r   c                       \ rS rSr% Sr\r\R                  \S'   \	" 5       r
\\S'   Sr\\   \S'   Sr\\   \S'   S rS	rg)
Float8FakeQuantizeConfig0   a  
Config for float8 fake quantization, targeting :class:`~torchao.quantization.Float8Tensor`.

Args:
   dtype (torch.dtype): the dtype for float8 Tensor
   granularity (FP8Granularity): the granularity for the Tensor, currently either PerRow() or PerTensor()
   hp_value_lb (Optional[float]): the lower bound for high precision floating point value for calculating scale
   hp_value_ub (Optional[float]): the upper bound for high precision floating point value for calculating scale
dtypegranularityNhp_value_lbhp_value_ubc                 2   [        U R                  5      (       d  [        U R                   S35      e[        U R                  [
        5      (       a  [        S5      e[        U R                  5      [        [        4;  a  [        SU R                   35      eg)z7
Verify dtype and granularity are the ones we support.
z is not a float8 dtypez[Please specify the granularity object instead of the class, e.g. PerRow() instead of PerRowz.Expected PerRow or PerTensor granularity, got N)r   r+   
ValueError
isinstancer,   typer   r   selfs    r'   __post_init__&Float8FakeQuantizeConfig.__post_init__A   s     tzz**

|+ABCCd&&--m    !&))<<@AQAQ@RS  =r&   r   )r    r!   r"   r#   r$   r	   r+   torch__annotations__r   r,   r
   r-   r   floatr.   r5   r%   r   r&   r'   r)   r)   0   sD     $E5;;#"((K*#'K%'#'K%'r&   r)   c                   P    \ rS rSr% SrSr\\S'   \r	\
R                  \S'   S rSrg)	Int4WeightFakeQuantizeConfigQ   a  
Config for pint4 weight fake quantization that targets the numerics in the following preshuffled kernel:
    torch.ops.fbgemm.f8i4bf16_shuffled
    torch.ops.fbgemm.bf16i4bf16_shuffled
    torch.ops.fbgemm.bf16i4bf16_rowwise

Currently this only supports float8 input activations. It is expected to be used in conjunction with
:class:`~torchao.quantization.Float8DynamicActivationInt4WeightConfig`. In the future, we may extend
this to support bfloat16 as well.
   
group_sizeactivation_dtypec                 r    U R                   [        [        R                  4;  a  [	        S[         S35      eg )NzOnly z+ or torch.bfloat16 activation are supported)r?   r	   r7   bfloat16r0   r3   s    r'   r5   *Int4WeightFakeQuantizeConfig.__post_init__a   s9      U^^(DD
|#NO  Er&   r   N)r    r!   r"   r#   r$   r>   intr8   r	   r?   r7   r+   r5   r%   r   r&   r'   r;   r;   Q   s'    	 J$.ekk.r&   r;   c                   f  ^  \ rS rSr% Sr\\R                  \4   \	S'   \
\	S'   \\	S'   \R                  \	S'   \R                  \	S'   \\	S'   S	r\\	S
'   Sr\\	S'   Sr\\   \	S'   SS\R&                  \R(                  \R*                  S	SS4SSS.S\\R                  \4   S\\
\S4   S\\   S\R                  S\R                  S\S
\S\S\\   S\\   S\\   4S jjjrS rS\\
\S4   S\\   S\
4S jrS\\   S\\   S\4S jr\S\4S j5       r\S\4S j5       rS\S\4U 4S jjr Sr!U =r"$ )IntxFakeQuantizeConfigh   aM	  
Config for how to fake quantize weights or activations,
targeting integer dtypes up to torch.int8.

Args:
    dtype: dtype to simulate during fake quantization, e.g. torch.int8.
        For PyTorch versions older than 2.6, you may use `TorchAODType` to represent
        torch.int1 to torch.int7 instead, e.g. TorchAODType.INT4.
    granularity: granularity of scales and zero points, e.g. PerGroup(32).
        We also support the following strings:
           1) 'per_token': equivalent to PerToken()
           2) 'per_channel': equivalent to PerAxis(0)
           3) 'per_group': equivalent to PerGroup(group_size), must be combined
               with separate `group_size` kwarg, Alternatively, just set the
               `group_size` kwarg and leave this field empty.
    mapping_type: whether to use symmetric (default) or asymmetric quantization
        Alternatively, set `is_symmetric` (bool) and leave this field empty.
    scale_precision: scale dtype (default torch.fp32)
    zero_point_precision: zero point dtype (default torch.int32)
    zero_point_domain: whether zero point is in integer (default) or float domain
    is_dynamic: whether to use dynamic (default) or static scale and zero points
    range_learning (prototype): whether to learn scale and zero points during training
        (default false), not compatible with `is_dynamic`.

Keyword args:
    group_size: size of each group in per group fake quantization,
        can be set instead of `granularity`
    is_symmetric: whether to use symmetric or asymmetric quantization,
        can be set instead of `mapping_type`

Example usage::

    # Per token asymmetric quantization
    IntxFakeQuantizeConfig(torch.int8, "per_token", is_symmetric=False)
    IntxFakeQuantizeConfig(torch.int8, PerToken(), MappingType.ASYMMETRIC)

    # Per channel symmetric quantization
    IntxFakeQuantizeConfig(torch.int4, "per_channel")
    IntxFakeQuantizeConfig(torch.int4, "per_channel", is_symmetric=True)
    IntxFakeQuantizeConfig(torch.int4, PerAxis(0), MappingType.SYMMETRIC)

    # Per group symmetric quantization
    IntxFakeQuantizeConfig(torch.int4, group_size=32)
    IntxFakeQuantizeConfig(torch.int4, group_size=32, is_symmetric=True)
    IntxFakeQuantizeConfig(torch.int4, "per_group", group_size=32, is_symmetric=True)
    IntxFakeQuantizeConfig(torch.int4, PerGroup(32), MappingType.SYMMETRIC)
r+   r,   mapping_typescale_precisionzero_point_precisionzero_point_domainT
is_dynamicFrange_learningNeps)r>   is_symmetricr>   rN   c
                B   Uc  [        S5      eXl        U R                  X*5      U l        U R	                  X;5      U l        X@l        XPl        X`l        Xpl	        Xl
        Xl        [        R                  [        R                  /nUR                  [!        ["        R$                  " 5       5      5        UR                  [!        [&        R$                  " 5       5      5        X;  a  [        SU< SU< 35      eU(       a  U(       a  [        S5      eU R)                  5         g )Nz/Please use ZeroPointDomain.NONE instead of NonezUnsupported dtype 'z', choose from z4`is_dynamic` is not compatible with `range_learning`)r0   r+   _get_granularityr,   _get_mapping_typerG   rH   rI   rJ   rK   rL   rM   r7   int8uint8extendlistr   keysr   r5   )r4   r+   r,   rG   rH   rI   rJ   rK   rL   rM   r>   rN   
all_dtypess                r'   __init__IntxFakeQuantizeConfig.__init__   s     $NOO
00I 22<N.$8!!2$, jj%++.
$388:;<$499;<="<A:N 
 .STTr&   c                     g)zS
For deprecation only, can remove after https://github.com/pytorch/ao/issues/2630.
Nr   r3   s    r'   r5   $IntxFakeQuantizeConfig.__post_init__   s     	r&   returnc                 `   Ub  US:w  a  Ub  [        SU-  5      e[        U[        5      (       a`  [        U[        [        [
        45      (       d  [        SU-  5      e[        U[        5      (       a  UR                  S:w  a  [        S5      eU$ US:X  a
  [        5       $ US:X  a	  [	        SS9$ US:X  a  Uc  [        S	5      e[        U5      $ [        U[        5      (       a  [        S
U< S/ SQ< 35      eUb  [        SU< S[        U5      < 35      eUc  [        S5      e[        U5      $ )aI  
Parse the `Granularity` represented in the args.

Granularity can be specified in one of three ways:
    1) `Granularity` object: one of PerToken(), PerAxis(), and PerGroup(group_size)
    2) str: one of 'per_token', 'per_channel', and 'per_group'
    3) None: `group_size` must be set instead, represents per group granularity
	per_groupz,`group_size` conflicts with granularity '%s'z!Granularity '%s' is not supportedr   z0Only axis=0 is supported for PerAxis granularity	per_tokenper_channel)axisz7Granularity was 'per_group' but no `group_size` was setzUnexpected granularity: 'z', must be one of )r_   r`   r^   zGranularity 'z' has unexpected type z9At least one of `granularity` or `group_size` must be set)	r0   r1   r   r   r   r   ra   strr2   )r4   r,   r>   s      r'   rP   'IntxFakeQuantizeConfig._get_granularity   sK    "{*'>L 
 k;//kHgx+HII !D{!RSS+w//K4D4D4I !STT +%:M)?"K'! M  J''S)) IK  "[ 13  K  
##r&   c                    Ub  Ub  [        S5      eUc  Uc  [        R                  $ Ub4  U[        R                  [        R                  4;  a  [        SU-  5      eU$ Uc   eU(       a  [        R                  $ [        R                  $ )z
Parse the `MappingType` represented in the args.

Mapping type can be specified in one of two ways:
    1): `MappingType` object: one of SYMMETRIC or ASYMMETRIC
    2): is_symmetric bool
z1Cannot set both `mapping_type` and `is_symmetric`z!MappingType '%s' is not supported)r0   r   	SYMMETRIC
ASYMMETRIC)r4   rG   rN   s      r'   rQ   (IntxFakeQuantizeConfig._get_mapping_type  s     #(@PQQ L$8((( #K$9$9;;Q;Q#RR !D|!STT '''((()))r&   c                     [        U R                  [        5      (       a  U R                  R                  $ [	        SU R                  -  5      e)zU
If this is per group granularity, return the group size.
Otherwise, throw an error.
z,`group_size` is undefined for %s granularity)r1   r,   r   r>   r0   r3   s    r'   r>   !IntxFakeQuantizeConfig.group_size0  sD     d&&11##...>AQAQQ r&   c                 <    U R                   [        R                  :H  $ )zD
Return True if mapping type is symmetric, else False (asymmetric).
)rG   r   re   r3   s    r'   rN   #IntxFakeQuantizeConfig.is_symmetric=  s    
   K$9$999r&   namevaluec                    > US:X  a  [         TU ]  S[        U5      5        gUS:X  a8  U(       a  [        R                  O[        R
                  n[         TU ]  SU5        g[         TU ]  X5        g)z2
Support setting `group_size` and `is_symmetric`.
r>   r,   rN   rG   N)super__setattr__r   r   re   rf   )r4   rl   rm   rG   	__class__s       r'   rp   "IntxFakeQuantizeConfig.__setattr__D  sY     <Gx?^#49;00{?U?ULG=G,r&   )	r+   rM   r,   rK   rG   rL   rH   rJ   rI   )#r    r!   r"   r#   r$   r   r7   r+   r   r8   r   r   r   rK   boolrL   rM   r   r9   float32int32INTrb   rC   rX   r5   rP   rQ   propertyr>   rN   r   rp   r%   __classcell__)rq   s   @r'   rE   rE   h   s   .` l*++[[ ++%&&J ND C%
 6:.2',}},1KK-<-@-@$#( %)'+(U[[,./( ;T12( {+	(
 ( $kk( +( ( ( e_( SM( tn(T:$;T12:$ SM:$ 
	:$x*{+* tn* 
	*@ 
C 
 
 :d : :
- 
-C 
- 
-r&   rE   c                       \ rS rSrSrS rSrg)FakeQuantizeConfigiR  z\
(Deprecated) Please use :class:`~torchao.quantization.qat.IntxFakeQuantizeConfig` instead.
c                     [        U 5        g )Nr   r3   s    r'   r5    FakeQuantizeConfig.__post_init__W  s
     &r&   r   N)r    r!   r"   r#   r$   r5   r%   r   r&   r'   rz   rz   R  s    'r&   rz   base_configr\   c                 	   SSK JnJn  SSKJn  SSKJnJnJnJ	nJ
nJn	  [        X5      (       au  [        [        R                  SU R                   ["        R$                  :H  S9n
[        [        R&                  U R(                  U R*                  ["        R$                  :H  S9nX4$ [        X5      (       Ga  Sn
U R,                  S	:X  aY  [.        R0                  [.        R2                  /nU R4                  U;  a  [7        S
U 35      e[9        S[        R:                  S9nX4$ U R,                  S:X  as  SSKJn  U R@                  [B        RD                  :X  a  U[G        U RH                  5         S   nOU R@                  n[        [        RJ                  U R(                  SUS9nX4$ [7        S[G        U 5       35      e[        X5      (       a  U R,                  S	:w  a  [7        S[G        U 5       S35      e[M        U RN                  5      u  nn[Q        U RR                  UU RT                  U RV                  S9n
[Q        U RX                  US9nX4$ [        X5      (       a'  [Q        [Z        []        5       S9n
[9        S[Z        S9nX4$ [        X5      (       aE  UR^                  (       a  U" U R`                  SSS9n
OSn
U" U R`                  SU Rb                  S9nX4$ [        X5      (       Ga  U R,                  S	:  d   S5       eU Rd                  S:X  d   S5       eU RX                  [        Rf                  :w  d   S5       eU R                   ["        Rh                  :X  d   S5       eU Rj                  ["        R$                  :X  d   S5       eU Rl                  b   S5       e[        [        R                  SSU Rl                  S9n
[        U RX                  U Rn                  U Rj                  U Rl                  S 9nX4$ [        X	5      (       a  U R,                  S	:  d   S5       eU Rd                  S:X  d   S5       eU R*                  ["        R$                  :X  d   S!5       eU RX                  [        Rf                  :w  d   S5       eU Rp                  b   S"5       eSn
[        U RX                  U RN                  U R*                  U Rp                  S 9nX4$ [7        S#U -  5      e)$a  
Given a base post-training quantization (PTQ) config, infer the corresponding
`FakeQuantizeConfigBase`s for both the activations and the weights.
This is called during the prepare phase of QAT.

Return a 2-tuple of (activation_config, weight_config) for fake quantization.
r   )NVFP4InferenceConfigNVFP4MMConfig)NVFP4FakeQuantizeConfig))Float8DynamicActivationFloat8WeightConfig'Float8DynamicActivationInt4WeightConfigInt4WeightOnlyConfig%Int8DynamicActivationInt4WeightConfig%Int8DynamicActivationIntxWeightConfigIntxWeightOnlyConfigr_   )r+   r,   rN   )r+   r>   rN   N   zPacking format must be one of r=   )r>   r?   r   )LAYOUT_TO_ZERO_POINT_DOMAINF)r+   r>   rN   rJ   zUnknown version on base config zOnly version 2 of z is supported)r+   r,   r-   r.   )r+   r,   )use_per_tensor_scaleuse_swizzled_scalesuse_triton_kernelTzOnly version 2+ is supportedunpacked_to_int8z"Only unpacked_to_int8 is supportedzOnly int2+ is supportedz/Only asymmetric activation mapping is supportedz*Only symmetric weight mapping is supportedz.Specifying weight_scale_dtype is not supported)rN   rH   )r+   r,   rG   rH   z#Only symmetric mapping is supportedz'Specifying scale_dtype is not supportedzUnexpected base config: %s)9torchao.prototype.mx_formatsr   r   torchao.prototype.qatr   torchao.quantizationr   r   r   r   r   r   r1   rE   r7   rR   act_mapping_typer   re   int4r>   rG   versionr   PLAINPRESHUFFLEDint4_packing_formatr0   r;   rA   torchao.quantization.quant_apir   rJ   r   NONEr2   layoutuint4r   r,   r)   r?   activation_value_lbactivation_value_ubweight_dtyper	   r   DYNAMICuse_dynamic_per_tensor_scaler   intx_packing_formatint1rf   weight_mapping_typeweight_scale_dtypeweight_granularityscale_dtype)r}   r   r   r   r   r   r   r   r   r   
act_configweight_configsupported_packing_formatsr   	zp_domainact_granularityr   s                    r'   _infer_fake_quantize_configsr   [  s
     +EE+**#$559N9NN


 /**"--$11[5J5JJ
~ &&u 
K	6	6
!#!''!--)% ..6OO 45N4OP  9!&M^ &&W   A% ,,0D0DD7[=O=O8PQRST	'99	2kk&11""+	MB &&u >tK?P>QRSS	K	K	K!#1${2C1DMRSS0F##1
-, ...'#77#77	

 1***
Z &&S 
K	I	I-

 5'
H &&A 
K	6	6  0%0%M%M$)"'J J/!,!I!I $);;
n &&e 
K	G	G""a'G)GG'..2DD 	
0	
D ''5::5P7PP5++{/E/EE 	
=	
E ..+2G2GG 	
8	
G --5 	
<	
5 ,JJ'::	

 /**#66$88'::	
8 &&- 
K	6	6""a'G)GG'..2DD 	
0	
D '';+@+@@ 	
1	
@ ''5::5P7PP5&&. 	
5	
. 
.**#//$11'33	
 && 5CDDr&   )*abcdataclassesr   typingr   r   r   r   r7   torchao.core.configr   torchao.float8.configr	   torchao.float8.inferencer
   r    torchao.quantization.granularityr   r   r   r   r   r   %torchao.quantization.quant_primitivesr   r   r   r   r   (torchao.quantization.quantize_.workflowsr   torchao.utilsr   utilsr   ABCr   r)   r;   rE   rz   r   r   r&   r'   <module>r      s     ! . .  , ,   G ) +	SWW 	 5  @ #9  , e-3 e- e-R'/ 'a'a'
8*+X6L-MMNa'r&   