
    h
                     l    S SK r S SKJr  SSKJrJr  S/r " S S\ R                  R                  5      rg)    N   )$_quant_int8_dynamic_per_token_linear dynamically_quantize_per_channel!DynamicallyPerAxisQuantizedLinearc            	          ^  \ rS rSrSr SS\S\S\SS4U 4S jjjrS	\R                  S\R                  4S
 jr
\S\R                  R                  SS 4S j5       rSrU =r$ )r      z
This class is a replacement for `torch.nn.Linear`. It implements a
quantized matmul using int8 dynamic symmetric per-token activation,
and int8 symmetric per-channel weight quantization
in_featuresout_featuresbiasreturnNc                 &   > [         TU ]  XU5        g )N)super__init__)selfr	   r
   r   	__class__s       \/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/dynamic_quant.pyr   *DynamicallyPerAxisQuantizedLinear.__init__   s     	D9    Xc                 r    [        XR                  U R                  U R                  UR                  5      nU$ )al  
Performs the forward pass of the quantized linear layer which consists
of int8 dynamic symmetric per-token activation and int8 symmetric per-channel weight
quantization

Args:
    X (torch.Tensor): The input floating point tensor to the quantized linear layer.

Returns:
    torch.Tensor: The output floating point tensor after the quantized matmul and rescale.

)r   W_int_repr_tW_scalesr   dtype)r   r   argskwargsYs        r   forward)DynamicallyPerAxisQuantizedLinear.forward!   s0     1  $--AGG
 r   modc                    Su  p#U " UUUR                   SLS9nUR                  Ul        UR                  Ul        [        UR                  SS[
        R                  5      u  pVnUR                  SUR                  5       R                  5       5        [        R                  " U5      Ul        UR                   Ul         U?[        UR                  5       5      R                  nUR!                  U5        U$ )a  
Converts a `mod` of class `torch.nn.Linear` to the
`DynamicallyPerAxisQuantizedLinear` class

Args:
    mod (torch.nn.Linear): The original `torch.nn.Linear` module to convert.

Returns:
    DynamicallyPerAxisQuantizedLinear: The converted quantized linear module.

)   r!   N)r   i   r   )r   r	   r
   r   weighttorchint8register_buffer
contiguoustnn	Parameterr   next
parametersdeviceto)	clsr   fake_in_featuresfake_out_featuresnew_mod
W_int_reprr   _W_zpsdevice_to_uses	            r   
from_float,DynamicallyPerAxisQuantizedLinear.from_float4   s     /3+%

 "oo"//'GJJc5::(
$
f 	
0E0E0G0I0I0KL<<1xxNS^^-.55

=!r    )T)__name__
__module____qualname____firstlineno____doc__intboolr   r$   Tensorr   classmethodr)   Linearr6   __static_attributes____classcell__)r   s   @r   r   r      s     	:: : 	:
 
: : 5<< &  UXX__  1T    r   )	r$   torch.nnr)   utilsr   r   __all__rB   r   r8   r   r   <module>rH      s2     
 /
/C Cr   