
    hSm                    (   % S SK r S SKJrJr  S SKJrJrJrJrJ	r	J
r
  S SKrS SKJrJrJr  S SKJrJr  / SQr " S S\5      r " S	 S
\5      r " S S\5      r\R.                  R1                  \\/5        \R2                  \R4                  \R6                  \R8                  1r \R<                  S\R>                  S\R@                  S\RB                  S0r"\\
\RF                  \4   \	\$\$4   4   \%S'   \RL                  S\RN                  S\RP                  S\RR                  S\RT                  S\RV                  S\RX                  S\R<                  S\R>                  S\R@                  S\RB                  S0r-\\
\RF                  \4   \	\$\$4   4   \%S'   0 r.\\
\RF                  \4   \	\$\$4   4   \%S'   \RL                  S\RN                  S\RP                  S \RR                  S!\RT                  S"\RV                  S#\RX                  S$0r/\\
\RF                  \4   \	\$\$4   4   \%S%'   \R`                  S&\Rb                  S'\Rd                  S(\Rf                  S)\Rh                  S*\Rj                  S+\Rl                  S,0r.\-Ro                  \R`                  S\Rb                  S\Rd                  S\Rf                  S\Rh                  S\Rj                  S\Rl                  S05        \/Ro                  \Rp                  S\Rr                  S\Rt                  S \Rv                  S!\Rx                  S"\Rz                  S#\R|                  S$05        \-Ro                  \Rp                  S\Rr                  S\Rt                  S\Rv                  S\Rx                  S\Rz                  S\R|                  S05        \"Ro                  \.5        \"Ro                  \/5        \-R                  5       \"R                  5       :X  d   eS-r@\A" S5       V s/ s H  n \" U 5      PM     sn rB\R                  R                  S.S/5      rE\" \E5      rF " S0 S1\R                  R                  5      rI " S2 S3\R                  R                  5      rJS4 rKS5 rL\R                  " 5         SS6\R                  S7\	\$S84   S9\R                  S:\\R                     S;\RF                  S<\\
\$\O4      S=\\
\$\O4      S>\R                  4S? jj5       rP\F  SS6\R                  S7\\$   S9\R                  S:\\R                     S;\RF                  S<\\
\$\O\Q4      S=\\
\$\O\Q4      S>\R                  4S@ jj5       rRS6\R                  S7\\$   S9\R                  S:\\R                     S<\
\$\O4   S=\
\$\O4   S>\R                  4SA jrS  SS6\R                  S7\\$   S9\R                  S:\\R                     S;\RF                  S<\\
\$\O\Q4      S=\\
\$\O\Q4      S>\R                  4SB jjrT  SS6\R                  S7\	\$S84   S9\R                  S:\\R                     S<\\
\$\O4      S=\\
\$\O4      S>\R                  4SC jjrU  SS6\R                  S7\\$   S9\R                  S:\\R                     S;\RF                  S<\\
\$\O\Q4      S=\\
\$\O\Q4      S>\R                  4SD jjrV  SS6\R                  S7\	\$S84   S9\R                  S:\\R                     S<\\
\$\O4      S=\\
\$\O4      S>\R                  4SE jjrW  S\R                  SF.S6\R                  S7\	\$S84   S9\R                  S:\\R                     SG\RF                  S<\\
\$\O4      S=\\
\$\O4      S;\RF                  S>\R                  4SH jjjrY\FSS\R                  4S6\R                  S7\\$   S9\R                  S:\\R                     SG\RF                  S<\\
\$\O\Q4      S=\\
\$\O\Q4      S;\RF                  S>\R                  4SI jj5       rZ\R                  4S6\R                  S7\\$   S9\R                  S:\\R                     S<\
\$\O4   S=\
\$\O4   S;\RF                  S>\R                  4SJ jjr[\R                  4S6\R                  S7\\$   S9\R                  S:\\R                     S<\
\$\O4   S=\
\$\O4   S;\RF                  S>\R                  4SK jjr\  S\R                  SF.S6\R                  S7\	\$S84   S9\R                  S:\\R                     SG\RF                  S<\\
\$\O4      S=\\
\$\O4      S;\RF                  S>\R                  4SL jjjr]\R                  4S6\R                  S7\\$   S9\R                  S:\\R                     S<\
\$\O4   S=\
\$\O4   S;\RF                  S>\R                  4SM jjr^  S\R                  SF.S6\R                  S7\	\$S84   S9\R                  S:\\R                     SG\RF                  S<\\
\$\O4      S=\\
\$\O4      S;\RF                  S>\R                  4SN jjjr_SS\R                  4S6\R                  S7\	\$S84   S9\R                  S:\\R                     SO\RF                  S<\\
\$\O4      S=\\
\$\O4      SP\S>\R                  4SQ jjraSS\R                  4S6\R                  S7\	\$S84   S9\R                  S:\\R                     SO\RF                  S<\\
\$\O4      S=\\
\$\O4      SP\S>\	\R                  \R                  4   4SR jjrbSS\R                  4S6\R                  S7\	\$S84   S9\R                  S:\\R                     SO\RF                  S<\\
\$\O4      S=\\
\$\O4      SP\S>\	\R                  \R                  4   4SS jjrc\R                  " 5       SSSS\RB                  4S6\R                  ST\S7\	\$   SU\RF                  S<\\
\$\O4      S=\\
\$\O4      SV\\O   SW\\RF                     SX\\RF                     S>\	\R                  \R                  4   4SY jj5       rd\R                  " 5            SS6\R                  ST\S7\	\$   SU\RF                  S<\\
\$\O4      S=\\
\$\O4      SV\\O   SW\\RF                     SX\\RF                     S>\	\R                  \R                  4   4SZ jj5       re     SS6\R                  ST\S7\	\$   SU\RF                  S<\\
\$\O\Q4      S=\\
\$\O\Q4      SV\\O   SW\\RF                     SX\\RF                     S>\	\R                  \R                  4   4S[ jjrfSSSSSS\\R                  4S]\R                  S^\R                  ST\S7\	\$S84   SU\RF                  S<\\$   S=\\$   SV\\O   SW\\RF                     SX\\RF                     S_\QSP\S>\	\R                  \R                  4   4S` jjrg\F     SS6\\R                     ST\hS7\\$   SU\RF                  S<\\
\$\O\Q4      S=\\
\$\O\Q4      SV\\O   SW\\RF                     SX\\RF                     S>\	\R                  \R                  4   4Sa jj5       riSb\R                  Sc\$Sd\$S>\	\R                  \R                  \R                  4   4Se jrjS6\\R                     S7\\$   SU\RF                  S>\	\R                  \R                  \R                  \R                  4   4Sf jrkS6\R                  S7\\$   SU\RF                  Sg\R                  Sh\R                  Si\R                  Sj\R                  S>\R                  4Sk jrl SS6\R                  S7\\$   SU\RF                  Sg\R                  Sh\R                  Si\R                  Sj\R                  S;\\RF                     S>\R                  4Sl jjrm SSb\R                  Sm\R                  Sn\R                  Sc\$Sd\$S;\\RF                     4So jjrnSp\R                  Sq\OSr\OS>\R                  4Ss jro\R                  " 5       S SSStSuSvSwSxS\Sy.4Sz\R                  S9\R                  S{\R                  S|\qS}\$S~\
\RF                  S4   S\
\hS4   S\QS\rS>\s4S jj5       rtS\$S\$S>\Q4S jruS\R                  S9\R                  S{\R                  S\$S\
\\	\R                  4   S>\	4S jrwSSS\S\R                  SStSt\t4	Sz\R                  S\OSd\$S\QS}\$S\RF                  S\hS\QS\QS\S>\s4S jjry\R                  " 5       SxStSS.S\R                  S7\\$   S\$S\$S\$S\QS\OS>\	\R                  \R                  4   4S jj5       rzSz\R                  S\$S\$S>\R                  4S jr{Sz\R                  S9\R                  S\$S\$S>\R                  4
S jr|\R                  4Sz\R                  S9\R                  S\$S\$S;\RF                  S>\R                  4S jjr}\F\R2                  \R                  SS4Sz\R                  S7\\$   S\RF                  SW\RF                  S\\O   S\\O   S>\R                  4S jj5       r~S9\R                  S\R                  S>\R                  4S jr\R2                  4Sz\R                  S9\R                  S\RF                  S>\R                  4S jjr\R                  4Sz\R                  S9\R                  S;\RF                  S>\R                  4S jjr\" \ESt5      \R2                  4Sz\R                  S9\R                  S\RF                  S>\R                  4S jj5       r\" \ES5      \R2                  4Sz\R                  S9\R                  S\RF                  S>\R                  4S jj5       r\" \ESt5      \R                  4Sz\R                  S9\R                  S;\RF                  S>\R                  4S jj5       r\" \ES5      \R                  4Sz\R                  S9\R                  S;\RF                  S>\R                  4S jj5       rgs  sn f )    N)Enumauto)CallableDictListOptionalTupleUnion)_f32_to_floatx_unpacked_floatx_unpacked_to_f32_n_ones)_register_custom_op_register_meta_op)choose_qparams_affine"choose_qparams_affine_with_min_maxquantize_affinedequantize_affineMappingTypeZeroPointDomainTorchAODType_choose_qparams_affine_tinygemm)_choose_qparams_affine_dont_preserve_zero_choose_qparams_affine_floatx'_choose_qparams_and_quantize_affine_hqq+_choose_qparams_and_quantize_scale_only_hqq'_choose_qparams_and_quantize_affine_qqq_choose_scale_float8_choose_qparams_gguf_quantize_affine_no_zero_point_quantize_affine_tinygemm_quantize_affine_floatx_quantize_affine_float8_quantize_gguf _dequantize_affine_no_zero_point_dequantize_affine_tinygemm_dequantize_affine_floatx_dequantize_affine_qqq_dequantize_affine_float8_dequantize_gguf_fake_quantize_affine_fake_quantize_affine_cachemaskc                   B    \ rS rSrSr\" 5       r\" 5       r\" 5       rSr	g)r   7   a  How floating point number is mapped to integer number

symmetric mapping means floating point range is symmetrically mapped to integer range
let's say we have floating point range (-3.5, 10.2) and integer range (-8, 7) (int4)
we'll use (-10.2, 10.2) as the range for floating point and map that to (-8, 7)
e.g. scale = (10.2 - (-10.2)) / (7 - (-8))

SYMMETRIC_NO_CLIPPING_ERR is a variant of symmetric mapping, where the scale is the max of smin
and smax, where smin = min_val_neg / quant_min, and smax = max_val_pos / quant_max. By calculating
smin and smax individually, there can be less round error on negative values, and no out-of-range
of all floating point values.

asymmetric mapping means we just directly map the floating point range to integer range,
for the above example, we will map (-3.5, 10.2) to (-8, 7) and calculate quantization parameter
based on this mapping
e.g. scale = (10.2 - (-3.5)) / (7 - (-8))
 N)
__name__
__module____qualname____firstlineno____doc__r   	SYMMETRICSYMMETRIC_NO_CLIPPING_ERR
ASYMMETRIC__static_attributes__r.       _/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/quant_primitives.pyr   r   7   s    $ I $Jr8   r   c                   B    \ rS rSrSr\" 5       r\" 5       r\" 5       rSr	g)r   O   a8  Enum that indicate whether zero_point is in integer domain or floating point domain

integer domain: quantized_val = (float_val / scale) (integer) + zero_point (integer)
float domain: quantized_val = (float_val - (zero_point (float) - scale * mid_point)) / scale
none domain: quantized_val = (float_val / scale)
r.   N)
r/   r0   r1   r2   r3   r   INTFLOATNONEr7   r.   r8   r9   r   r   O   s     &CFE6Dr8   r   c                   z    \ rS rSrSr\" 5       r\" 5       r\" 5       r\" 5       r	\" 5       r
\" 5       r\" 5       rSrg)r   \   z?
Placeholder for dtypes that do not exist in PyTorch core yet.
r.   N)r/   r0   r1   r2   r3   r   INT1INT2INT3INT4INT5INT6INT7r7   r.   r8   r9   r   r   \   s:     6D6D6D6D6D6D6Dr8   r   )r      )   )i i  )i   i_DTYPE_TO_QVALUE_BOUNDS                               _DTYPE_TO_BIT_WIDTH_SUB_BYTE_UINT_BOUNDS)r   )rL   )rN   )irR   )i   )i   )i?   _SUB_BYTE_INT_BOUNDSr   rL   )r   rN   )r   rR   )r   r[   )r   r\   )r   r]   )r   rJ      torchaoFRAGMENTc                       \ rS rSrSr\S\R                  S\R                  4S j5       r\S\R                  S\R                  4S j5       r	Sr
g	)
_Round   z>
Implementation of generic round operation with backward STE.
xreturnc                 .    [         R                  " U5      $ N)torchround)ctxrf   s     r9   forward_Round.forward   s    {{1~r8   gyc                     U$ ri   r.   rl   ro   s     r9   backward_Round.backward   s    	r8   r.   N)r/   r0   r1   r2   r3   staticmethodrj   Tensorrm   rr   r7   r.   r8   r9   rd   rd      s[         %,, 5<<  r8   rd   c                       \ rS rSrSr\S\R                  S\R                  S\R                  4S j5       r	\S\R                  S\R                  4S j5       r
S	rg
)_RoundToFloat8   z@
Implementation of `tensor.to(float8_dtype)` with backward STE.
rf   float8_dtyperg   c                 $    UR                  U5      $ ri   )to)rl   rf   ry   s      r9   rm   _RoundToFloat8.forward   s    ttL!!r8   ro   c                 
    US 4$ ri   r.   rq   s     r9   rr   _RoundToFloat8.backward   s    4xr8   r.   N)r/   r0   r1   r2   r3   rt   rj   ru   dtyperm   rr   r7   r.   r8   r9   rw   rw      sf     " "EKK "ELL " " %,, 5<<  r8   rw   c                 >   U [         ;   a@  [        R                  " U 5      R                  [        R                  " U 5      R                  pCO#U [
        ;  a  [        SU  35      e[
        U    u  p4Uc  UnUc  UnX:  d   SU SU 35       eX$::  d   SU SU 35       eX4$ )a  Get quant_min and quant_max args based on dtype and also verify bounds.

Args:
    dtype: Target quantization dtype (e.g., torch.uint8, torch.int8, or FP8 types)
    quant_min: Minimum quantized value, or None to use dtype default
    quant_max: Maximum quantized value, or None to use dtype default

Returns:
    Tuple[int/float, int/float]: Validated (quant_min, quant_max) values

Raises:
    ValueError: If dtype is unsupported
    AssertionError: If quant_min/quant_max are out of bounds for dtype
zUnsupported dtype: z9quant_min out of bound for dtype, quant_min_lower_bound: z quant_min: z9quant_max out of bound for dtype, quant_max_upper_bound: z quant_max: )	FP8_TYPESrj   finfominmaxrK   
ValueError)r   	quant_min	quant_maxquant_min_lower_boundquant_max_upper_bounds        r9   _get_and_check_qmin_qmaxr      s     	KK""KK""  5 
-	-.ug6777Nu7U4)	)	- 	""7!8YK	Q-
 - 	""7!8YK	Q- r8   c                    [        U 5      [        U5      :X  d   e/ n/ nSn[        [        U 5      5       H  nX   X   :w  ar  X   S:  aj  X   X   -  S:X  d   SU SX    SU SX    35       eUR                  X   X   -  5        UR                  X   5        UR                  US-   5        US-  nM  UR                  X   5        X   S:w  a  UR                  U5        US-  nM     X#4$ )a  Given block_size and input size find the parameters for reduction:

Output:
    shape_for_reduction: the shape we use to `view` input to prepare it for reduction
    reduction_dims: the dims we'll do reduction over

Example::
    Input:
      block_size: (3, 3, 2, 10)
      input_size: (3, 3, 10, 10)

    Output:
      shape_for_reduction: (3, 3, 5, 2, 10)
      reduction_dim: [0, 1, 3, 4]
r   rL   zExpecting input size at z dimension: z" to be divisible by block_size at rM   )lenrangeappend)
block_size
input_sizeshape_for_reductionreduction_dimscur_dimis         r9   _get_reduction_paramsr     s*     z?c*o---NG3z?#=JM)jma.?=:=0A5 *1#\*-Hjkljmmy  {E  {H  zI  J5  &&z}
'EF&&z}5!!'A+.qLG  &&z}5 }!%%g.qLG# $$ ..r8   inputr   .scale
zero_pointoutput_dtyper   r   rg   c           	      $    [        U UUUUUU5      $ )a  
Args:
  input (torch.Tensor): original float32, float16 or bfloat16 Tensor
  block_size: (Tuple[int, ...]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
       e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
  scale (float): quantization parameter for affine quantization
  zero_point (int): quantization parameter for affine quantization
  output_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
  quant_min (Optional[int]): minimum quantized value for output Tensor, if not specified, it will be derived from dtype
  quant_max (Optional[int]): maximum quantized value for output Tensor, if not specified, it will be derived from dtype

Note:
  How can block_size represent different granularities?
  let's say we have a Tensor of size: (3, 3, 10, 10), here is the table showing how block_size represents different
  granularities:

   granularity type       |     block_size
     per_tensor           |    (3, 3, 10, 10)
     per_axis (axis=0)    |    (1, 3, 10, 10)
     per_axis (axis=1)    |    (3, 1, 10, 10)
 per_group (groupsize=2)  |    (3, 3, 10, 2)
 per_group (groupsize=2) for axis = 3 | (3, 3, 2, 10)


Output:
  quantized tensor with requested dtype
)_quantize_affiner   r   r   r   r   r   r   s          r9   r   r   @  s'    J  r8   c                     [        XEU5      u  pVU[        ;   a  [        R                  n[	        U UUUUU5      R                  U5      $ )a#  Quantize tensor using affine quantization with integer zero point domain.

Op definition that has compatible signatures with custom op library.

Args:
    input: Input tensor to quantize (float32, float16, or bfloat16)
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (optional)
    output_dtype: Target quantized dtype (e.g., torch.uint8, torch.int8)
    quant_min: Minimum quantized value, derived from dtype if None
    quant_max: Maximum quantized value, derived from dtype if None

Returns:
    Quantized tensor with requested dtype

Note:
    zero_point_domain is pre-defined as INT, meaning:
    quantized_val = (float_val / scale) (integer) + zero_point (integer)
)r   rW   rj   uint8_quantize_affine_no_dtype_castr{   r   s          r9   r   r   p  sR    < 4LYWI ,,{{) 	br8   c                    U R                   [        R                  [        R                  [        R                  4;   d   SU R                    35       e[        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        XR                  5       5      u  pgU R                  nU R                  U5      n Un	U H  n
SX'   M	     UR                  U	5      nUb&  UR                  5       S:  a  UR                  U	5      nOSn[        R                  " [        R                  U SU-  -  5      U-   XE5      nUR                  U5      nU$ )aa  Quantize tensor using affine quantization without dtype casting.

Performs quantization with integer zero point domain without casting to target dtype.

Args:
    input: Input tensor to quantize (float32, float16, or bfloat16)
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (optional)
    quant_min: Minimum quantized value
    quant_max: Maximum quantized value

Returns:
    Quantized tensor without dtype casting

The op does the following:
1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. Quantize the input based on the quantization parameters scale and zero_point with zero_point_domain = INT
3. Reshape the quantized result to original shape
Unsupported input dtype: Got input dim:, block_size: rL   Nr         ?r   rj   float32float16bfloat16r   dimr   sizeshapeviewnumelclamprd   applyr   r   r   r   r   r   r   r   original_shapeshape_after_reductionr   quants               r9   r   r     sG   > ;;  1 
#5;;-0	1 
 z?eiik) 
^J<@) +@JJL+' [[NJJ*+E/#$  JJ,-E*"2"2"4q"8__%:;
 
KKUcEk*+j8)E JJ~&ELr8   c                     [        XEU5      u  pVU[        ;   a  [        R                  n[	        U UUUUU5      R                  U5      $ )a  Quantize tensor using affine quantization with float zero point domain for tinygemm.

Specialized quantization for tinygemm int4mm kernel where zero point is in floating point domain.

Args:
    input: Input tensor to quantize (float32, float16, or bfloat16)
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (optional)
    output_dtype: Target quantized dtype (e.g., torch.uint8, torch.int8)
    quant_min: Minimum quantized value, derived from dtype if None
    quant_max: Maximum quantized value, derived from dtype if None

Returns:
    Quantized tensor with requested dtype

The op does the following:
1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. Quantize the input based on the quantization parameters scale and zero_point with zero_point_domain = FLOAT
3. Reshape the quantized result to original shape

Note:
    zero_point_domain is pre-defined as FLOAT, meaning:
    quantized_val = (float_val - (zero_point (float) - scale * mid_point)) / scale
)r   rW   rj   r   '_quantize_affine_tinygemm_no_dtype_castr{   r   s          r9   r    r      sS    F 4LYWI ,,{{2 	br8   c                    U R                   [        R                  [        R                  [        R                  4;   d   SU R                    35       e[        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        XR                  5       5      u  pgU R                  nU R                  U5      n Un	U H  n
SX'   M	     UR                  U	5      nUb&  UR                  5       S:  a  UR                  U	5      nOSnXT-   S-   S-  nX2U-  -
  n[        R                  " [        R                  X-
  U-  5      XE5      nUR                  U5      nU$ )a  Quantize tensor using affine quantization with float zero point domain without dtype casting.

Specialized quantization for tinygemm int4mm kernel where zero point is in floating point domain.

Args:
    input: Input tensor to quantize (float32, float16, or bfloat16)
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (optional)
    quant_min: Minimum quantized value
    quant_max: Maximum quantized value

Returns:
    Quantized tensor without dtype casting

The op does the following:
1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. Quantize the input based on the quantization parameters scale and zero_point with zero_point_domain = FLOAT
3. Reshape the quantized result to original shape
r   r   r   rL   Nr   rM   r   )r   r   r   r   r   r   r   r   r   r   r   	mid_pointmin_valr   s                 r9   r   r     sZ   > ;;  1 
#5;;-0	1 
 z?eiik) 
^J<@) +@JJL+' [[NJJ*+E/#$  JJ,-E*"2"2"4q"8__%:;
 
&*a/I9,,GKKeo%>?VEJJ~&ELr8   c                     [        XEU5      u  pVU[        ;   a  [        R                  n[	        U UUUUU5      R                  U5      $ )a  Quantize tensor using affine quantization without zero point.

Specialized quantization for cases where zero point is not needed (e.g., floatx quantization).

Args:
    input: Input tensor to quantize (float32, float16, or bfloat16)
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (ignored, should be None)
    output_dtype: Target quantized dtype (e.g., torch.uint8, torch.int8)
    quant_min: Minimum quantized value, derived from dtype if None
    quant_max: Maximum quantized value, derived from dtype if None

Returns:
    Quantized tensor with requested dtype

The op does the following:
1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. Quantize the input based on the quantization parameters scale with zero_point_domain = NONE
3. Reshape the quantized result to original shape

Note:
    zero_point_domain is pre-defined as NONE, meaning:
    quantized_val = (float_val / scale) | This is primarily used for floatx quantization
    where we do not want to round values to nearest integer and instead scale and cast.
)r   rW   rj   r   ,_quantize_affine_no_zero_point_no_dtype_castr{   r   s          r9   r   r   O  sS    H 4LYWI ,,{{7 	br8   c                    U R                   [        R                  [        R                  [        R                  4;   d   SU R                    35       e[        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        XR                  5       5      u  pgU R                  nU R                  U5      n Un	U H  n
SX'   M	     UR                  U	5      nUb&  UR                  5       S:  a  UR                  U	5      nOSn[        R                  " [        R                  U SU-  -  5      XE5      nUR                  U5      nU$ )a  Quantize tensor using affine quantization without zero point and without dtype casting.

Specialized quantization for cases where zero point is not needed without casting to target dtype.

Args:
    input: Input tensor to quantize (float32, float16, or bfloat16)
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (ignored, should be None)
    quant_min: Minimum quantized value
    quant_max: Maximum quantized value

Returns:
    Quantized tensor without dtype casting

The op does the following:
1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. Quantize the input based on the quantization parameters scale with zero_point_domain = NONE
3. Reshape the quantized result to original shape
r   r   r   rL   Nr   r   r   r   s               r9   r   r     s=   > ;;  1 
#5;;-0	1 
 z?eiik) 
^J<@) +@JJL+' [[NJJ*+E/#$  JJ,-E*"2"2"4q"8__%:;
 
KKUcEk%:;YREJJ~&ELr8   r   input_dtypec                "    [        U UUUUUUUS9$ )a  
Args:
  input (torch.Tensor): quantized tensor, should match the dtype `dtype` argument
  block_size: (List[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
                           e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
  scale (Tensor): quantization parameter for affine quantization
  zero_point (Tensor): quantization parameter for affine quantization
  input_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
  quant_min (Optional[int]): minimum quantized value for input Tensor
  quant_max (Optional[int]): maximum quantized value for input Tensor
  output_dtype (torch.dtype): dtype for output Tensor, default is fp32

  Default value for zero_point is in integer domain, zero point is added to the quantized integer value during quantization

Output:
  dequantized Tensor, with requested dtype or fp32
r   )_dequantize_affiner   r   r   r   r   r   r   r   s           r9   r   r     s)    8 !	 	r8   c           	         U[         ;  a'  U R                  U:X  d   SU SU R                   35       eU[        R                  [        R                  [        R
                  4;   d
   SU 35       e[        XEU5      u  pV[        U UUUUUU5      $ )a  Dequantize tensor using affine dequantization with integer zero point domain.

Op definition that has compatible signatures with custom op library.

Args:
    input: Quantized tensor to dequantize
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (optional)
    input_dtype: Expected dtype of input tensor (e.g., torch.uint8, torch.int8)
    quant_min: Minimum quantized value for input tensor
    quant_max: Maximum quantized value for input tensor
    output_dtype: Target output dtype (default: torch.float32)

Returns:
    Dequantized tensor with requested output dtype

Expected: , got: Unsupported output dtype: )rW   r   rj   r   r   r   r   !_dequantize_affine_no_dtype_checkr   s           r9   r   r     s    : //{{k) 	
WU[[M:	
)   3 
$L>2	3 
 4KIVI, r8   c                    [        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        XR                  5       5      u  pxU R                  n	U R                  U5      n Un
U H  nSX'   M	     UR                  U
5      nUb  UR                  U
5      nU R                  USS9nUb  XR                  U5      -
  nX-  nUR                  U	5      R                  U5      $ )a  Dequantize tensor using affine dequantization without dtype checking.

Converts quantized tensors to their high precision floating point representation.

Args:
    input: Quantized tensor to dequantize
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (optional)
    quant_min: Minimum quantized value for input tensor
    quant_max: Maximum quantized value for input tensor
    output_dtype: Target output dtype (default: torch.float32)

Returns:
    Dequantized tensor with requested output dtype

The op does the following:
1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. Dequantize the input based on the quantization parameters scale and zero_point
3. Reshape the quantized result to original shape and change dtype to the output_dtype
r   r   rL   T)copyr   r   r   r   r   r   r{   r   r   r   r   r   r   r   r   r   r   r   r   dequants                r9   r   r     s    > z?eiik) 
^J<@) +@JJL+' [[NJJ*+E/#$  JJ,-E__%:;
 hh|$h/GMM,77oG<<'**<88r8   c                    [        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        XR                  5       5      u  pxU R                  n	U R                  U5      n Un
U H  nSX'   M	     UR                  U
5      nUb   S5       eU R                  U5      nX-  nUR                  U	5      R                  U5      $ )a  Dequantize tensor using affine dequantization without zero point and without dtype checking.

Converts quantized tensors to their high precision floating point representation without zero point.

Args:
    input: Quantized tensor to dequantize
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (ignored, should be None)
    quant_min: Minimum quantized value for input tensor
    quant_max: Maximum quantized value for input tensor
    output_dtype: Target output dtype (default: torch.float32)

Returns:
    Dequantized tensor with requested output dtype

The op does the following:
1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. Dequantize the input based on the quantization parameters scale (no zero point)
3. Reshape the quantized result to original shape and change dtype to the output_dtype
r   r   rL   z>zero_point should be None for _dequantize_affine_no_zero_pointr   r   s                r9   /_dequantize_affine_no_zero_point_no_dtype_checkr   S  s    > z?eiik) 
^J<@) +@JJL+' [[NJJ*+E/#$  JJ,-E H hh|$GoG<<'**<88r8   c          	         U[         ;  a'  U R                  U:X  d   SU SU R                   35       eU[        R                  [        R                  [        R
                  4;   d
   SU 35       e[        XEU5      u  pV[        U UUUUUU5      $ )a  
Args:
  input (torch.Tensor): quantized tensor, should match the dtype `dtype` argument
  block_size: (List[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
                           e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
  scale (Tensor): quantization parameter for affine quantization
  zero_point (Tensor): quantization parameter for affine quantization, no zero point is used for this op
  input_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
  quant_min (Optional[int]): minimum quantized value for input Tensor
  quant_max (Optional[int]): maximum quantized value for input Tensor
  output_dtype (torch.dtype): dtype for output Tensor, default is fp32

  Default value for zero_point is in integer domain, zero point is added to the quantized integer value during quantization

Output:
  dequantized Tensor, with requested dtype or fp32
r   r   r   )rW   r   rj   r   r   r   r   r   r   s           r9   r$   r$     s    : //{{k) 	
WU[[M:	
)   3 
$L>2	3 
 4KIVI: r8   c                    [        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        XR                  5       5      u  pxU R                  n	U R                  U5      n Un
U H  nSX'   M	     UR                  U
5      nUb  UR                  U
5      nXT-   S-   S-  nX-
  nUR                  U5      nX-  nUb  X-  nUR                  U	5      R                  U5      $ )a  This function converts AQT tensors to their high precision floating point representation

The op does the following:
1. figure out the dimension for reduction based on block_size, also reshape the input to align with
   the shape after reduction
2. dequantize the input based on the quantization parameters scale and zero_point and args like zero_point_domain
3. reshape the quantized result to origianl shape and change dtype to the output_dtype
r   r   rL   rM   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                 r9   *_dequantize_affine_tinygemm_no_dtype_checkr     s   " z?eiik) 
^J<@) +@JJL+' [[NJJ*+E/#$  JJ,-E__%:;
 &*a/IGjj&GG<<'**<88r8   c          	         U[         ;  a'  U R                  U:X  d   SU SU R                   35       eU[        R                  [        R                  [        R
                  4;   d
   SU 35       e[        XEU5      u  pV[        U UUUUUU5      $ )a  
Args:
  input (torch.Tensor): quantized tensor, should match the dtype `dtype` argument
  block_size: (List[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
                           e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
  scale (Tensor): quantization parameter for affine quantization
  zero_point (Tensor): quantization parameter for affine quantization
  input_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
  quant_min (Optional[int]): minimum quantized value for input Tensor
  quant_max (Optional[int]): maximum quantized value for input Tensor
  output_dtype (torch.dtype): dtype for output Tensor, default is fp32

  Default value for zero_point is in floating point domain, zero point is subtracted from the floating point (unquantized)

Output:
  dequantized Tensor, with requested dtype or fp32
r   r   r   )rW   r   rj   r   r   r   r   r   r   s           r9   r%   r%     s    : //{{k) 	
WU[[M:	
)   3 
$L>2	3 
 4KIVI5 r8   quant_dtypezero_point_domainc           
          Uc  [        S5      eU[        R                  L a  Ub  [        S5      e[        U UUUUUUU5      u  pU	$ )aR  
General fake quantize op for quantization-aware training (QAT).
This is equivalent to calling `quantize_affine` + `dequantize_affine`
but without the dtype casts.

Args:
  input (torch.Tensor): original float32, float16 or bfloat16 Tensor
  block_size: (Tuple[int, ...]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
       e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
  scale (float): quantization parameter for affine quantization
  zero_point (int): quantization parameter for affine quantization
  quant_dtype (torch.dtype): desired quantized dtype for determining and validating quant_min and quant_max values.
  quant_min (Optional[int]): minimum quantized value for output Tensor, if not specified, it will be derived from dtype
  quant_max (Optional[int]): maximum quantized value for output Tensor, if not specified, it will be derived from dtype
  zero_point_domain (ZeroPointDomain): the domain that zero_point is in, should be either integer or float
    if zero_point is in integer domain, zero point is added to the quantized integer value during
    quantization
    if zero_point is in floating point domain, zero point is subtracted from the floating point (unquantized)
    value during quantization
    default is ZeroPointDomain.INT
/Please use ZeroPointDomain.NONE instead of None8zero_point should be None when zero_point_domain is NONE)r   r   r>   _do_fake_quantize_affine)
r   r   r   r   r   r   r   r   _fqs
             r9   r*   r*     s`    >  JKK	o22	2z7MSTT&	GQ Ir8   c           
          Uc  [        S5      eUc  Ub  [        S5      e[        U UUUUUUU5      u  p[        R                  " X:  X:*  5      n
X4$ )a0  
General fake quantize op for quantization-aware training (QAT).
This is equivalent to calling `quantize_affine` + `dequantize_affine`
but without the dtype casts.

Note: Compared to :func:`~torchao.quantization.quant_primitives._fake_quantize_affine`,
this consumes more memory and returns an additional outlier mask for
intermediate quantized values.

Args:
  Same as :func:`~torchao.quantization.quant_primitives._fake_quantize_affine`.

Returns:
  A 2-tuple of (
      final fake quantized values,
      outlier mask for intermediate quantized values
  )

r   r   )r   r   rj   logical_and)r   r   r   r   r   r   r   r   qdqmasks              r9   r+   r+   I  sq    :  JKK		"z'=STT&	GQ an@D:r8   c           
      P   U R                   n[        XEU5      u  pVU[        R                  :X  a  [        n	[
        n
OPU[        R                  :X  a  [        n	[        n
O/U[        R                  :X  a  [        n	[        n
O[        SU 35      eU	" U UUUUU5      nU
" UUUUUUUS9nX4$ )a  Helper function for fake quantization that returns both intermediate and final values.

Performs quantization followed by dequantization without dtype casting, returning both
the intermediate quantized values and the final dequantized values.

Args:
    input: Input tensor to fake quantize (float32, float16, or bfloat16)
    block_size: Granularity of quantization - size of tensor elements sharing same qparam
    scale: Quantization scale parameter
    zero_point: Quantization zero point parameter (optional)
    quant_dtype: Target quantized dtype for determining quant_min/quant_max
    quant_min: Minimum quantized value, derived from dtype if None
    quant_max: Maximum quantized value, derived from dtype if None
    zero_point_domain: Domain of zero point (INT, FLOAT, or NONE)

Returns:
    Tuple of (intermediate quantized values, final dequantized values)

Helper function for `_fake_quantize_affine` that returns both the
intermediate quantized values and the final dequantized values.
z Unrecognized zero point domain: r   )r   r   r   r<   r   r   r=   r   r   r>   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   s                r9   r   r   x  s    > ++K3KIVIO///9>	o33	3BG	o22	2GL;<M;NOPP	A 
	 
B 7Nr8   mapping_typetarget_dtypeepsscale_dtypezero_point_dtypec	                 <    [        U UR                  UUUUUUU5	      $ )a  
Args:
    input (torch.Tensor): fp32, bf16, fp16 input Tensor
    mapping_type (MappingType): determines how the qparams are calculated, symmetric or asymmetric
    block_size: (Tuple[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
      e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
    target_dtype (torch.dtype): dtype for target quantized Tensor
    quant_min (Optional[int]): minimum quantized value for target quantized Tensor
    quant_max (Optioanl[int]): maximum quantized value for target quantized Tensor
    eps (Optional[float]): minimum scale, if not provided, default to eps of input.dtype
    scale_dtype (torch.dtype): dtype for scale Tensor
    zero_point_dtype (torch.dtype): dtype for zero_point Tensor, defaults to torch.int32
    Now removed params:
        zero_point_domain (ZeroPointDomain): the domain that zero_point is in, defaults to Integer or None
        preserve_zero (bool): whether to preserve zero in the quantized Tensor, defaults to True

Output:
    Tuple of scales and zero_points Tensor with requested dtype
)_choose_qparams_affinename)	r   r   r   r   r   r   r   r   r   s	            r9   r   r     s2    > "
 
r8   c	                    [        X4U5      u  pEU[        R                  L d
   SU 35       eUc  U R                  nUc*  [        R
                  " U R                  5      R                  n[        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        X R                  5       5      u  pU R                  U	5      n [        R                  " X
SS9n[        R                  " X
SS9nUnUnX-
  [        XT-
  5      -  n[        R                  " XS9nXT-   S-   S-  nXU-  -   nUc  U R                  nUR!                  US	9nUR!                  XpR"                  S
9U4$ )a  
Specialized version of choose_qparams_affine

This is used for tinygemm int4mm kernel where zero point is in floating point domain
and zero does not have to be exactly representable.

Args:
    input (torch.Tensor): fp32, bf16, fp16 input Tensor
    mapping_type (MappingType): determines how the qparams are calculated, symmetric or asymmetric
    block_size: (Tuple[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
    target_dtype (torch.dtype): dtype for target quantized Tensor
    quant_min (Optional[int]): minimum quantized value for target quantized Tensor
    quant_max (Optioanl[int]): maximum quantized value for target quantized Tensor
    eps (Optional[float]): minimum scale, if not provided, default to eps of input.dtype
    scale_dtype (torch.dtype): dtype for scale Tensor
    zero_point_dtype (torch.dtype): dtype for zero_point Tensor

Output:
    Tuple of scales and zero_points Tensor with requested dtype
Unsupported mapping type: r   r   Fr   keepdimr   rL   rM   r   r   device)r   r   r6   r   rj   r   r   r   r   r   r   r   aminamaxfloatr   r{   r   )r   r   r   r   r   r   r   r   r   r   r   r   max_valmin_val_negmax_val_posr   r   r   s                     r9   r   r     sr   @ 4LYWI;111 
$\N31 kk
{kk%++&**z?eiik) 
^J<@) +@JJL+' JJ*+EjjEBGjjEBG KK&%	0E*FFEKK'E &*a/Iy00J ;;%56J88+ll8;ZGGr8   c	                    [        X4U5      u  pEU[        R                  :X  d
   SU 35       eUc  U R                  nUc*  [        R
                  " U R                  5      R                  n[        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        X R                  5       5      u  pU R                  U	5      n [        R                  " X
SS9n[        R                  " X
SS9nUnUnX-
  [        XT-
  5      -  n[        R                  " XS9nU[         R#                  X-  5      -
  n[        R                  " UXE5      nUc  [        R$                  nUR'                  XpR(                  S9UR'                  US94$ )	a|  Specialized version of choose_qparams_affine with zero_point_domain=ZeroPointDomain.INT and preserve_zero=False.

Args:
    input (torch.Tensor): fp32, bf16, fp16 input Tensor
    mapping_type (MappingType): determines how the qparams are calculated, asymmetric only
    block_size: (Tuple[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
    target_dtype (torch.dtype): dtype for target quantized Tensor
    quant_min (Optional[int]): minimum quantized value for target quantized Tensor
    quant_max (Optioanl[int]): maximum quantized value for target quantized Tensor
    eps (Optional[float]): minimum scale, if not provided, default to eps of input.dtype
    scale_dtype (torch.dtype): dtype for scale Tensor
    zero_point_dtype (torch.dtype): dtype for zero_point Tensor
    Now removed params default values:
        zero_point_domain (ZeroPointDomain): the domain that zero_point is in, defaults to Integer
        preserve_zero (bool): whether to preserve zero in the quantized Tensor, defaults to False

Output:
    Tuple of scales and zero_points Tensor with requested dtype
r   r   r   Fr   r   r   r   )r   r   r6   r   rj   r   r   r   r   r   r   r   r   r   r   r   rd   r   int32r{   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                    r9   r   r   -  s{   < 4LYWI;111 
$\N31 kk
{kk%++&**z?eiik) 
^J<@) +@JJL+' JJ*+EjjEBGjjEBG KK&%	0E*FFEKK'EV\\+*=>>JZ>J ;;88+ll8;Z]] >K >  r8   Tr   r   preserve_zeroc                    Uc  [        S5      e[        XEU5      u  pVU[        R                  [        R                  [        R
                  4;   d
   SU 35       eU b  Uc   S5       eU R                  UR                  :X  d   S5       eUc  U R                  nUc*  [        R                  " U R                  5      R                  nU R                  nU
(       aW  [        R                  " U [        R                  " U 5      5      n[        R                  " U[        R                  " U5      5      nOU nUnU[        R                  :X  d  U[        R                  :X  Ga  U[        R                  :X  a,  [        R                  " U* U5      nU[        Xe-
  5      S-  -  nOOU[        R                  :X  d   eU[        U5      -  nU[        U5      -  nUU:  n[        R                  " UUU5      nU
(       d  [        S5      eU[         R"                  :X  a  [        S5      eU[         R$                  :X  a  SnO([        R&                  " U[)        Xe-   S	-   S-  5      5      n[        R*                  " XS
9nOU[        R
                  :X  d   eX-
  [        R,                  " [        Xe-
  5      XS9-  n[        R*                  " XS
9nU[         R$                  :X  a  SnOU[         R.                  :X  aE  U[0        R3                  X-  5      -
  n[        R*                  " UXV5      nU	c  [        R4                  n	O,U[         R"                  :X  d   S5       eXe-   S	-   S-  nXU-  -   nUb  UR7                  U	S9nUR7                  XR                  S9U4$ )aL  A variant of :func:`~torchao.quantization.quant_primitives.choose_qparams_affine`
operator that pass in min_val and max_val directly instead of deriving these from a single input.
This is used for observers in static quantization where min_val and max_val may be obtained through
tracking all the data in calibration data set.

Args:
  Mostly same as :func:`~torchao.quantization.quant_primitives.choose_qparams_affine`. with one
  difference: instead of passing in `input` Tensor and use that to calculate min_val/max_val
  and then scale/zero_point, we pass in min_val/max_val directly
Nr   r   z@Need to provide `min_val` and `max_val`, got: {min_val, max_val}z]Expecting `min_val` and `max_val` to have the same dtype, got: {min_val.dtype, max_val.dtype}rM   zBpreserve_zero == False is not supported for symmetric quantizationzbzero_point_domain should be ZeroPointDomain.INT or ZeroPointDomain.NONE for symmetric quantizationrL   r   r   zGzero_point must be in FLOAT/INT/None domain for asymmetric quantizationr   )r   r   r   r4   r5   r6   r   rj   r   r   r   r   
zeros_liker   r   wherer   r=   r>   	full_likeintr   tensorr<   rd   r   r   r{   )r   r   r   r   r   r   r   r   r   r   r   r   scale_devicer   r   r   sminsmaxr   r   r   s                        r9   r   r   q  sU   0  JKK3LYWI--  3 
$L>2	3  7#6 J6 ==GMM) g) mm
{kk'--(,,>>Lii)9)9')BCii)9)9')BC 	---;@@@ ;000))[L+>K5)>#?!#CDE;#H#HHHH y!11Dy!11D$;DKKdD1ET   5 55 t   4 44JY5JQ5NRS4S0TUJE+{55555*ell)'(/
 
 E+ 4 44J/"5"55"V\\+2E%FFJZFJ'#(;; $(=(== Y= #.2a7I %y'88J]])9]:
88+nn8=zIIr8   c	                    [        X4U5      u  pEU[        R                  R                  [        R                  R                  [        R
                  R                  4;   d
   SU 35       eUc  U R                  nUc*  [        R                  " U R                  5      R                  n[        U5      U R                  5       :X  d   SU R                  5        SU 35       e[        X R                  5       5      u  pU R                  U	5      n [        R                  " X
SS9n[        R                   " X
SS9n[        R"                  " U[        R$                  " U5      5      n[        R&                  " U[        R$                  " U5      5      nU[        R                  R                  :X  d  U[        R                  R                  :X  a  U[        R                  R                  :X  a,  [        R&                  " U* U5      nU[)        XT-
  5      S-  -  nOYU[        R                  R                  :X  d   eU[)        U5      -  nU[)        U5      -  nUU:  n[        R*                  " UUU5      n[        R,                  " U[/        XT-   S-   S-  5      5      n[        R0                  " XS9nOU[        R
                  R                  :X  d   eX-
  [)        XT-
  5      -  n[        R0                  " XS9nU[2        R5                  X-  5      -
  n[        R0                  " UXE5      nUc  [        R6                  nUR9                  XpR:                  S	9UR9                  US
94$ )aW  op definition that has compatible signatures with custom op library

The op does the following:
1. figure out the dimension for reduction based on block_size
2. find min_val/max_val based on the dimension for reduction
3. calculate quantization parameters based on min_val/max_val based on args like `preserve_zero`
   and `zero_point_domain`
r   r   r   Fr   rM   rL   r   r   r   )r   r   r4   r   r5   r6   r   rj   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rd   r   r   r{   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                       r9   r   r     s   ( 4LYWI""--22##  3 
$L>2	3  kk
{kk%++&**z?eiik) 
^J<@) +@JJL+' JJ*+EjjEBGjjEBG))GU%5%5g%>?K))GU%5%5g%>?K 	--222;@@EEE ;00555))[L+>K5)>#?!#CDE;#H#H#M#MMMM y!11Dy!11D$;DKKdD1E__UC1F1Ja0O,PQ
E+{55:::::*eI4I.JJE+k.A!BB
[[YB
#${{88+ll8;Z]] >K >  r8   wnum_bits
group_sizec                   ^^ US:X  d
   SU 35       eU R                   u  mmUSST4;   d
   SU 35       eU R                  nUS:X  a  TnUT:  Ga  U R                  SU45      n SU-  S-
  nUS-   S-  n[        R                  " [        R
                  " U 5      SSS	9nUSU-  -  n[        R                  X-  5      R                  5       nXu-  n[        R                  " US
U5      nXu-
  R                  5       U-  nUU4S jn	U	" U5      nU	" U5      n[        R                  " [        R
                  " U5      SSS	9n
U
S-  n
X-  R                  5       R                  SS5      R                  [        R                  5      nUR                  5       U
-  nU
R                  SS5      R                  [        R                  S9n
UR                  TS5      R                  5       U
-  R                  [        R                  S9nOSUS-
  -  S-
  n[        R                  " [        R
                  " U 5      SSS	9n
X-  n
[        R                  X
-  5      R                  5       n[        R                  " Xt* U5      nUR                  5       U
-  n[        R                   " / [        R                  US9nU
SSU-
  -  -  n
U
R                  TS5      R                  5       R                  [        R                  5      n
XvX4$ )NrO   Unsupported num_bits = rX      Unsupported groupsize = rM   rL   T)r   r   c                 J   > U R                  TT45      R                  5       n U $ ri   reshape
contiguousr  size_ksize_ns    r9   	reshape_w:_choose_qparams_and_quantize_affine_qqq.<locals>.reshape_wZ  $    		66*+668AHr8   g     _@rI   rJ   r   r   rS   )r   r   r  rj   r   absrd   r   r   r   halfrk   r{   int8r   r  r   )r  r  r  orig_device	max_q_val
half_q_vals_groupq_ww_refr  	s_channelt_int8r  r  s               @@r9   r   r   ;  s   
 q=>3H:>>=WWNFF"c6**S.Fzl,SS*((KR
FIIr:&'xK!O	!m)
 **UYYq\2t<1y=  ll1;'++-kk#q),!'')G3	 n%  JJuyy/TB	U	#**,224=@@L	)%%b!,//ekk/B	 ??62.99;iGKK** L 
 (Q,'!+	 JJuyy|R>		 ll1=)--/kk#z95
Y&,,rKHQ1x<((	%%fb1<<>AA%++N	))r8   c                 8   U R                   n[        XR                  5       5      u  pEU R                  U5      n [        R
                  " XSS9n[        R                  " XSS9nSnSn	U[        X-
  5      S-  -  n
Xv-
  [        X-
  5      -  n
Un[        US   -  S:X  d   eS[        US   -  4n[        XR                  5       5      u  pEU
R                  U5      n
UR                  U5      nUR                  5       nU H  nSX'   M	     [        R                  " [        R                  " U
5      USS9n[        R                  " [        R                  " U5      USS9nSnSnU[        UU-
  5      -  nU[        UU-
  5      -  nUR                  U5      nUR                  U5      n[        R                  " U
U-  UU5      n[        R                  " UU-  UU5      nUR                  U5      UR                  U5      UR                  U5      UR                  U5      4$ )	a  
There are two sets of qparams: quantized_block_scale, quantized_block_min and super_block_scale_scale and super_block_min_scale
the relationship is the following:
block_scale = quantized_block_scale * super_block_sclae
block_min = quantized_block_min * super_block_min
quantized_val = (float_val - block_min) / block_scale + quant_min
first we calculate block_scale and block_min
then we calculate super_block_scale_scale and super_block_min_scale
after that we can calculate quantized_block_scale and quantized_min_scale
the returned values are: super_block_scale_scale, super_block_min_scale, quantized_block_scale
and quantized_min_scale
Fr   r[   r   rM   rX   rL   r]   )r   r   r   r   rj   r   r   r   
_GGUF_QK_Kr   r  r   r{   )r   r   r   r   r   r   r   r   r   r   block_scale	block_minsuper_block_sizer   r   block_scale_absmaxblock_min_absmaxqparam_quant_maxqparam_quant_minsuper_block_scale_scalesuper_block_min_scalesuper_block_scale_scale_viewsuper_block_min_scale_viewquantized_block_scalequantized_block_mins                            r9   r   r     sP   " KKE +@JJL+' JJ*+EjjEBGjjEBGIIU9#89A=>K$i.C(DDKI 
2&!+++:B78*?**,+' ""#67K23I/446#$   		+NE zz		).%  05++4  -u++0  $;#?#?@U#V !6!;!;<Q!R "KK224DFV  ++..0@BR 	 ""5)  '  'u%	 r8   r'  r(  r+  r,  c                    U[         R                  :X  d   e[        XR                  5       5      u  pxUR	                  5       n	U H  n
SX'   M	     U R
                  nU R                  U5      n UR                  U	5      nUR                  U	5      nS[        US   -  S4n[        XR                  5       5      u  pUR	                  5       nU H  n
SX'   M	     UR                  U5      nUR                  U5      nUR                  U5      nUR                  U5      nX5-  nXF-  nUR                  U	5      nUR                  U	5      nU U-
  U-  nUR                  U5      nU$ NrL   rX   )rj   uint4r   r   r   r   r   r  )r   r   r   r'  r(  r+  r,  input_shape_for_reductionr   "block_qparam_shape_after_reductionr   r   r"  %super_block_input_shape_for_reduction(super_block_qparam_shape_after_reductionr   r!  int_datas                     r9   r#   r#     s    5;;&&& 1FJJL1- *C)G)G)I&01*- [[NJJ01E166* .223UV :B7;<Q446=9) 	.224 - 6703  266- .22- 6::0 2660 *AK%;I ""#EFKABI	![0H}}^,HOr8   c                    [        XR                  5       5      u  pUR                  5       n
U	 H  nSX'   M	     U R                  nU R	                  U5      n UR	                  U
5      nUR	                  U
5      nS[
        US   -  S4n[        XR                  5       5      u  pUR                  5       nU	 H  nSX'   M	     UR	                  U5      nUR	                  U5      nUR	                  U5      nUR	                  U5      nX5-  nXF-  nUR	                  U
5      nUR	                  U
5      nU U-  U-   nUR	                  U5      nUb  UR                  U5      nU$ r.  )r   r   r   r   r   r  r{   )r   r   r   r'  r(  r+  r,  r   r0  r   r1  r   r   r"  r2  r3  r   r!  r   s                      r9   r)   r)     s    1FJJL1- *C)G)G)I&01*-  [[NJJ01E166* .223UV :B7;<Q446=9) 	.224 - 6703 166- .22- 6::0 2660 *AK%;I ""#EFKABIk!I-Gll>*G**\*Nr8   r  r  c                   ^
^ US:X  d
   SU 35       eU R                   u  mm
USST
4;   d
   SU 35       eUS:X  a  T
nUT
:  aj  U R                  SU45      n SU-  S-
  nUS-   S-  nXR                  5       -  nX-
  R                  5       UR                  SS5      -  nU
U4S jn	U	" U5      nOUSS	U-
  -  -  nU R                  5       U-  nUc!  UR                  [        R
                  5      nU$ UR                  U5      nU$ )
NrO   r  rX   r  r  rM   rL   c                 J   > U R                  TT45      R                  5       n U $ ri   r
  r  s    r9   r  )_dequantize_affine_qqq.<locals>.reshape_wj  r  r8   rS   )r   r  r  r{   rj   r   )r  r  r  r  r  r   r  r  w_dqr  r  r  s             @@r9   r'   r'   Q  s(    q=>3H:>>=WWNFF"c6**S.Fzl,SS*R
FIIr:&'xK!O	!m)
NN,,$$&Q)??	  q8|!45	vvx)#wwu}}% K ww|$Kr8   rf   betalp_normc           
         US:X  aZ  [         R                  " U 5      [         R                  R                  R	                  [         R
                  " U 5      SU-  -
  5      -  $ [         R                  " U 5      [         R                  R                  R	                  [         R
                  " U 5      SU-  [         R                  " [         R
                  " U 5      US-
  5      -  -
  5      -  $ )NrL   r   )rj   signnn
functionalrelur  pow)rf   r:  r;  s      r9   _shrink_lp_oprB    s    !|zz!}uxx2277		!sTz8QRRRzz!}uxx2277IIaLC$J%))EIIaL'A+*NNN 
 
 	
r8   Fgffffff?g      $@g)\(?   )r;  r:  kappaiters
early_stopr   zeromin_maxaxisr   r   verbose
opt_paramsc	                 :   US   US   US   US   US   4u  ppnUc  U R                   O[        R                   " U5      nUc0  UR                  S:X  a  [        R                  O[        R                  nU R                  XVS9nUR                  XVS9nUR                  XVS9nSn[        U5       H  n[        R                  " X-  U-   5      R                  US	   US
   5      nUU-
  U-  n[        UU-
  X5      n[        R                  " UUU-
  U-  -
  USS9nX-  n
[        [        R                  " UU-
  5      R                  5       5      nU(       a'  [        S[        US
-   5      -   S[        U5      -   5        U(       d  M  UU:  a  UnM    O   UR                  U R                   5      nUR                  U R                   5      nAAAA[        R                  R!                  5         [        R                  " X-  U-   5      R                  US	   US
   5      nUX4$ )Nr;  r:  rD  rE  rF  cudar   g     @r   rL   TrI  r   zIter z
 | Error: )r   rj   typer   r   r{   r   rk   r   rB  meanr   r  printstrrM  empty_cache)r   r   rG  rH  rI  r   r   rJ  rK  r;  r:  rD  rE  rF  W_f
best_errorr   W_qW_rW_ecurrent_errors                        r9    optimize_weights_proximal_legacyrZ    s   & 	9677< /+G5  &~V]]ELL4HF}"(++"7emm
))%)
/CHH5H0E777.DJ5\kk#+,-33GAJ
KTzU"C#It5zz#se 33$Meiic	2779:'CAJ&s=7I(IJ:z)*
   HHV]]#E776==!DS#s	JJ
++fnt+
,
2
271:wqz
JCr8   val1val2c                 P    [        U[        R                  " X-  5      -  5      U :H  $ ri   )r   mathceil)r[  r\  s     r9   _is_divisibler`    s"    tdii,,-55r8   rV  nbitsr   c                     SnSU-  S-
  nXe-   S-   S-  nXrR                  5       -
  UR                  5       -  R                  UR                  5      nUn	U R                  U5      n
XU4$ )Nr   rM   rL   )r   r{   r   r   )rV  r   rG  ra  r   r   r   r   zero_aoscale_aoW_q_aos              r9   "_convert_to_affinequantized_formatrf    sm     I51I&*a/IJJL(EKKM9==djjIGHXXe_FW$$r8   @   rM  optimizecompute_dtype
raw_outputoptimize_weightsc
           
         US;   d   S5       eUbK  [        U R                  5       U5      (       d,   S[        U R                  5      -   S-   [        U5      -   5       eU R	                  U[
        R                  S9n
U
R                  nUb,  US:X  a  U
R                  SU/5      OU
R                  US/5      n
U
R                  USS	9S
   nU
R                  USS	9S
   n[        SU-  S-
  5      nS
nX/nXU-
  -  R                  SS9nU* U-  nUS;   a  [        R                  U5      nU(       a  U	" U
UUUUUUS9u  nnnOSUR	                  U5      nUR	                  U5      n[        R                  U
U-  U-   5      R                  US
   US   5      nSU-  nUSL a  [        UUUX5      u  nnnOlUR                  U5      nUS:X  a+  UR                  US
   S5      nUR                  US
   S5      nO*UR                  SUS   5      nUR                  SUS   5      nUR	                  [
        R                  US9nUR	                  XVS9nUR	                  XVS9nA
AA[
        R                   R#                  5         UUUU4$ )a@  Choose quantization parameters and quantize tensor using HQQ (Half-Quadratic Quantization).

Performs quantization using HQQ method with optional weight optimization via proximal solver.

Args:
    tensor: Input tensor to quantize (float32, float16, or bfloat16)
    nbits: Number of bits for quantization (default: 4)
    group_size: Size of quantization groups (default: 64)
    optimize: Whether to optimize weights using proximal solver (default: True)
    axis: Axis along which to perform quantization (0 or 1, default: 1)
    compute_dtype: Target compute dtype (default: torch.float16)
    device: Target device for computation (default: "cuda")
    verbose: Whether to print optimization error information (default: False)
    raw_output: If True, return params in HQQ library format (default: False)
    optimize_weights: Weight optimization function (default: optimize_weights_proximal_legacy)

Returns:
    Tuple of (quantized_weights, scale, zero_point, original_shape)

Note:
    Uses proximal solver to minimize ||W - dequantize(quantize(W))||_p^p for weight optimization.
r_   zaxis should be either 0 or 1zEgroup_size should be divisble by the total tensor dimensions. shape: z, group_size: )r   r   rL   rX   TrN  r   rM   g     @)r   )rO   )r   r   rG  rH  rI  r   rJ  r   Fr   )r`  r   rR  r   r{   rj   r   r  r   r   rk   r   rd   r   rf  r   rM  rS  )r   ra  r  rh  rI  ri  r   rJ  rj  rk  Wr   _min_maxmax_vmin_vrH  r   rG  rV  s                       r9   r   r     s   D 6>999>V\\^Z88 	
S&,,  *o	
8 			u}}	5AGGE ,0AIAIIr:&'AIIzSUFV<W 55dD5)!,D55dD5)!,D!U(Q,EEnG Tk"))c)2E55=D |||D! +
UD ww}%'ll1u9t+,2271:wqzJ %KE U=e
UD kk% 19MM%(B/E<<a"-DMM"eBi0E<<E"I.D
&&u{{6&
2CHH=H8E7776D 	
4	JJtU""r8   gh㈵>)rE  
stochasticearly_stop_tol	hp_tensorqminqmaxrE  rr  rs  c                   U R                   S:X  d   S5       e[        U[        [        45      (       a  [	        U5      S:X  d   S5       eUS   S:X  a	  US   S:  d   S5       eX#:  d   S5       e[
        R                  n[
        R                  " U5      R                  nU R                  u  p[        US   5      nX-  S:X  d   SU
 S	U 35       eS
[
        R                  S[
        R                  4S jnS
[
        R                  S[
        R                  4S jnU(       a  UOUnU R                  U5      R                  5       nX-  nUR                  U	UU5      n[        [!        U5      [!        U5      5      =(       d    SnUR!                  5       R#                  SS9U-  R%                  U5      nUR'                  5       n[)        [        SU5      5       H  nU" UUR+                  S5      -  5      R-                  X#5      nUU-  R/                  S[
        R                  S9nUU-  R/                  S[
        R                  S9n[
        R0                  " US:  UU-  U5      nUR%                  U5      R!                  5       nUU-
  R!                  5       UR%                  U5      -  R                  5       nUU:  a    OUnM     U" UUR+                  S5      -  5      R-                  X#5      nUR                  X5      R                  5       R                  [
        R2                  5      nU R4                  nUR                  U5      nUU4$ )a  
Half-Quadratic Quantization (scale-only, symmetric) for 2D weights with row-wise blocks.
- hp_tensor: [out, in] (bf16/fp16/fp32 accepted; promoted to fp32 internally)
- block_size: must be [1, group_size]; groups along the last dim
- qmin, qmax: integer range (e.g., -8, 7 for signed 4-bit)
Returns:
  qdata: int32, same shape as hp_tensor
  scale: hp_tensor.dtype, shape [out, in // group_size] (one scale per row-wise block)
rM   zhp_tensor must be 2D [out, in]z)block_size must be a 2-element list/tupler   rL   z7block_size must be [1, group_size] with group_size >= 1zqmin must be < qmaxzin_features=z! must be divisible by group_size=rf   rg   c                 "    U R                  5       $ ri   )rk   rf   s    r9   	round_det>_choose_qparams_and_quantize_scale_only_hqq.<locals>.round_dety  s    wwyr8   c                 \    [         R                  " U [         R                  " U 5      -   5      $ ri   )rj   floor	rand_likery  s    r9   round_stoch@_choose_qparams_and_quantize_scale_only_hqq.<locals>.round_stoch}  s    {{1uq1122r8   r   rX   )r   r   )ndim
isinstancelisttupler   rj   r   r   r   r   r   ru   r{   r  r   r   r  r   	clamp_mincloner   	unsqueezer   sumr   r   r   )rt  r   ru  rv  rE  rr  rs  ri  compute_epsnkr  rz  r  _rrm  n_groupsWgqabsr   
prev_scaler   Qgnumdenrelqdata	out_dtypes                               r9   r   r   Q  s   * >>Q@ @@j4-00S_5I 3I a=A*Q-1"4 A4 ;---; MMM++m,00K??DAZ]#J>Q 
qc::,GU\\ ell 3u|| 3 3 #	B 	]#..0AH	
8Z	(B s4y#d)$)DVVX]]q]!D(33K@EJ 3q%=! U__R(()//; Bwmmm7Bwmmm7C!GS3Y
;

#% 	 
"'')J,@,@,MMRRT
# "( 
B$$	%	+	+D	7B GGAM$$&))%++6EIHHYE%<r8   ebitsmbitsc                    [         US-
     nS[         U   U-
  -  [         US-      SU-  -  -  nU R                  nU R                  5       n U R                  5       R	                  S5      R                  SS9U-  nUR                  U5      $ )a  Choose quantization parameters for floatx quantization.

Calculates scale parameter for quantizing to custom floating point format.

Args:
    tensor: Input tensor to quantize (float32, float16, or bfloat16)
    ebits: Number of exponent bits in target floatx format
    mbits: Number of mantissa bits in target floatx format

Returns:
    Scale tensor for floatx quantization

Note:
    Uses global lookup table as workaround for torch.compile() compatibility
    since _n_ones() is not compatible due to << operator.
rL   rM   g-q=r   )_ONES_TABLEr   r   r  r   r   r{   )r   r  r  exp_bias
max_normalr   r   s          r9   r   r     s    2 519%H{5)H45EAI!U(+J LLE\\^FJJLa &&5&1J>E88E?r8   c                 b    U R                  5       n [        XR                  SS5      -  X#5      nU$ )a  Quantizes the float32 high precision floating point tensor to low precision floating point number and
converts the result to unpacked floating point format with the format of 00SEEEMM (for fp6_e3m2) where S means sign bit, e means exponent bit and m means mantissa bit
rX   rL   )r   r   r   )r   r   r  r  tensor_floatxs        r9   r!   r!     s/     \\^F+FZZA5F,FUMr8   c                     [        XU5      n XR                  5       R                  SS5      -  n U R                  US9n U $ )NrX   rL   r   )r   r   r   r{   )r   r   r  r  r   s        r9   r&   r&     s@     %VE:Fkkm((Q//FYY\Y*FMr8   ry   hp_value_lbhp_value_ubc                 >   [         R                  " U5      R                  n[        U5      S:X  a>  U R	                  5       R                  5       nUc  Ub  [         R
                  " XtUS9nXv-  nO[        XR                  5      u  pU R                  U	5      nUR	                  5       R                  U
SS9nUc  Ub  [         R
                  " XtUS9nXv-  n[        U R                  5       VVs/ s H  u  pXU   -  PM     nnnUR                  U5      nU[         R                  LaW  U[         R                  L d   S5       e[         R                  " [        R!                  [         R"                  " U5      5      5      nUR%                  [         R                  S9$ s  snnf )a  
Calculates float8 scaling factor for the given high precision tensor.

Args:
    tensor (torch.Tensor): Input tensor to be quantized.
    float8_dtype (torch.dtype): Data type of the quantized tensor (e.g., torch.float8_e4m3fn, torch.float8_e5m2).
    scale_dtype (torch.dtype): Data type of the scaling factor (e.g., torch.float32).
    block_size (Optional[Tuple[int, ...]]): Block size for block-wise quantization. If None, tensorwise quantization is used.
    hp_value_lb (Optional[float]): the lower bound for high precision floating point value for calculating scale
    hp_value_ub (Optional[float]): the upper bound for high precision floating point value for calculating scale
r   r   r   Tr   z!Only float8_e8m0fnuz is supportedr   )rj   r   r   r   r  r   r   r   r   r   	enumerater  r   float8_e8m0fnuexp2rd   r   log2r{   )r   r   ry   r   r  r  r   max_absr   r   r   tensor_reshapedr   r   output_shapes                  r9   r   r     sd   ( L)--I
:!**,""$"k&=kk'LG#.C/
+ !++&9:!%%',,,N"k&=kk'LG# >Gv||=T
=TMAJQ-'=T 	 
 l+%--'e222W4WW2

6<<

5(9:;88%--8((
s   +Ftarget_shapec                   ^ ^ T R                   T:X  a  T $ T R                  5       S:X  a  T $ [        S [        T R                   T5       5       5      (       a  T $ [	        T R                   5      [	        T5      :w  a-  [        S[	        T R                   5       S[	        T5       35      e[        U U4S j[        [	        T5      5       5       5      n[        [        TT R                   U5      5       H+  u  nu  pEnXEU-  :w  d  M  [        SU SU SU S	XE-   S
3	5      e   T n[        U5       H  u  p6US:  d  M  UR                  XcS9nM     U$ )a~  
Expand a scale tensor to match the target tensor shape for block-wise quantization.
If this is rowwise quantization, however, just return the scale as is.

Args:
    scale (torch.Tensor): Scale tensor with shape corresponding to block structure
    target_shape (torch.Size): Target tensor shape to expand to

Returns:
    torch.Tensor: Scale tensor expanded to match target_shape
rL   c              3   F   #    U  H  u  pX:H  =(       d    US :H  v   M     g7f)rL   Nr.   ).0abs      r9   	<genexpr>6_maybe_expand_scale_to_tensor_shape.<locals>.<genexpr>4	  s"     
G(F16Q!V(Fs   !zScale tensor has z dimensions but target has c              3   N   >#    U  H  nTU   TR                   U   -  v   M     g 7fri   )r   )r  r   r   r  s     r9   r  r  =	  s&      3KaQ5;;q>)3Ks   "%z
Dimension z: target size z' is not evenly divisible by scale size z (block size would be )r  )
r   r   allzipr   r   r  r   r  repeat_interleave)r   r  block_sizesr   
target_dim	scale_dimr   expanded_scales   ``      r9   #_maybe_expand_scale_to_tensor_shaper  	  s`    {{l"{{} 
GEKK(F
GGG 5;;3|,,EKK 011LSQ]M^L_`
 	
  38\9J3K K
 3<L%++{33..J: Z//QC~j\ :!!*+A*BXAYYZ\ 	3 N";/>+==j=PN 0 r8   c                     U R                  [        R                  5      n[        XR                  5      nX4-  n[        R
                  " U5      R                  nUR                  U* US9n[        R                  Xr5      $ )h
Quantizes the high precision floating point tensor to a float8 tensor, using the given scaling factor.
r  )
r{   rj   r   r  r   r   r   r   rw   r   )r   r   ry   tensor_fp32scale_expandedtensor_scaled	max_valuetensor_clampeds           r9   r"   r"   T	  sj     ))EMM*K 9MN0ML)--I"((iZY(GN==r8   c                     U R                  [        R                  5      n[        XR                  5      nX4-  nUR                  U5      $ )9
Dequantizes the float8 tensor to high precision tensor.
)r{   rj   r   r  r   )r   r   r   
fp8_tensorr  rt  s         r9   r(   r(   g	  s=     5==)J 9MN+I<<%%r8   c                     [        U UUS9$ )r  r   r   ry   )r"   r  s      r9   &_quantize_affine_float8_non_decomposedr  x	  s     #! r8   %quantize_affine_float8_non_decomposedc                 *    [         R                  " XS9$ Nr   rj   
empty_liker  s      r9   _quantize_affine_float8_metar  	       F77r8   c                     [        U UUS9$ )r  r   r   r   )r(   r  s      r9   (_dequantize_affine_float8_non_decomposedr  	  s     %! r8   'dequantize_affine_float8_non_decomposedc                 *    [         R                  " XS9$ r  r  r  s      r9   _dequantize_affine_float8_metar  	  r  r8   )NN)NNNNNri   )r^  enumr   r   typingr   r   r   r   r	   r
   rj   !torchao.prototype.custom_fp_utilsr   r   r   torchao.utilsr   r   __all__r   r   r   serializationadd_safe_globalsfloat8_e4m3fnfloat8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzr   r   r  int16r   rK   r   r   __annotations__rA   rB   rC   rD   rE   rF   rG   rV   rW   r^   uint1uint2uint3r/  uint5uint6uint7updateint1int2int3int4int5int6int7keysr  r   r  libraryLibrary	quant_libregister_custom_opautogradFunctionrd   rw   r   r   no_gradru   r   r   boolr   r   r    r   r   r   r   r   r   r   r   r$   r   r%   r<   r*   r+   r   r   r   r   r   rR  r   r   r   r#   r)   r'   rB  inference_moder  dictr  rZ  r`  Sizerf  r   r   r   r   r!   r&   r   r  r"   r(   r  r  r  r  )r   s   0r9   <module>r     s     ? ?  

@$ 0
d 
4      $ $k?%C D 
					
 
KK	JJ	KK&	KK&	T eEKK$=>c3hOP  qqqqqqq	KK	JJ	KK	KKP T%\ 9:E#s(OKL  RT tE%++|";<eCHoMN S*******Q d5l!:;U38_LM  
KK	KK	KK	KK	KK	KK	KK    QQQQQQQ
   

'

'

'

'

'

'

'
   

A

A

A

A

A

A

A
   4 5   3 4!%<%A%A%CC CC
#(8,8awqz8,MM!!)Z8	(3 U^^$$ U^^,, & R&/R  .2-1,<<,c3h, <<, &	,
 ++, c5j)*, c5j)*, \\, ,^  4837)<<)S	) <<) &	)
 ++) c5$./0) c5$./0) \\) )X=<<=S	= <<= &	=
 S%Z = S%Z = \\=L 4837/<</S	/ <</ &	/
 ++/ c5$./0/ c5$./0/ \\/n .2-1=<<=c3h= <<= &	=
 c5j)*= c5j)*= \\=L 48370<<0S	0 <<0 &	0
 ++0 c5$./00 c5$./00 \\0p .2-1;<<;c3h; <<; &	;
 c5j)*; c5j)*; \\;H .2-1% !&%<<%c3h% <<% &	%
 % c5j)*% c5j)*% ++% \\%P  4837 %.<<.S	. <<. &	.
 . c5$./0. c5$./0. ++. \\. .p !&69<<69S	69 <<69 &	69
 S%Z 69 S%Z 69 ++69 \\69@ !&29<<29S	29 <<29 &	29
 S%Z 29 S%Z 29 ++29 \\29v .2-1/ !&/<</c3h/ <</ &	/
 / c5j)*/ c5j)*/ ++/ \\/r !&*9<<*9S	*9 <<*9 &	*9
 S%Z *9 S%Z *9 ++*9 \\*9f .2-1/ !&/<</c3h/ <</ &	/
 / c5j)*/ c5j)*/ ++/ \\/p .2-1)8)<)<-<<-c3h- <<- &	-
 - c5j)*- c5j)*- '- \\-l .2-1)8)<)<,<<,c3h, <<, &	,
 , c5j)*, c5j)*, ', 5<<%&,j .2-1)8)<)<=<<=c3h= <<= &	=
 = c5j)*= c5j)*= '= 5<<%&=@  .2-1)-.3kk(<<(( c
( ++	(
 c5j)*( c5j)*( 
%( %++&( u{{+( 5<<%&( (X  .2-1)-.2CH<<CHCH c
CH ++	CH
 c5j)*CH c5j)*CH 
%CH %++&CH u{{+CH 5<<%&CH CHX 4837)-.2@<<@@ c
@ ++	@
 c5$./0@ c5$./0@ 
%@ %++&@ u{{+@ 5<<%&@T  $#)-.2)8)<)<tJ\\tJ\\tJ tJ c3h	tJ
 ++tJ }tJ }tJ 
%tJ %++&tJ u{{+tJ tJ 'tJ 5<<%&tJn  4837)-.2OELL!OO S	O ++	O
 c5$./0O c5$./0O 
%O %++&O u{{+O 5<<%&O OdC*||C*C* C* 5<<u||34	C*LPELL!PS	P ++P 5<<u||U\\AB	Pf;<<;S	; ++; #\\	;
 !<<; !<<; ; \\;L +/<<<<S	< ++< #\\	<
 !<<< !<<< < 5;;'< \\<J +/(||(\\( ||( 	(
 ( 5;;'(\
U\\ 
 
 
5<< 
  &*#9LL9<<9 ,,9 	9
 9 d"#9 #t)9 9 9  !9 9z6 63 64 6
%	%<<% ,,% 	%
 uejj()% %& !&!Aq#LLq#q# q# 	q#
 q# ;;q# q# q# q# q# q#h   Y||YS	Y Y 	Y Y Y Y 5<<%&Y Yx!LL!!$!-0!
\\!HLL!&69BE
\\  !&
LL
<<
 
 	

 ++
 \\
  !& 3 3$}}#'#'.)LL.)S	.) ++.) 	.)
 %.) %.) \\.) .)b5<<5',zz5
\\5v !& 3 3>LL><<> ++> \\	>, !&&LL&<<& ++& \\	&" Y& !& 3 3LL<< ++ \\	 ' 9EF !& 3 38LL8<<8 ++8 \\	8 G8 Y& !&LL<< ++ \\	 ' 9GH !&8LL8<<8 ++8 \\	8 I8qF -s   AQ