
    h-                     &   S SK Jr  S SKrS SKJs  Js  Jr  S SKJr  S SKJ	r	  / SQr
S\R                  S\S\S	\\R                  \R                  4   4S
 jr\R                  \R                  4S\S\S\S	\R                  4S jjrS\R                  S\R                  S\S\S	\\R                  \R                  \R                  4   4
S jrS\R                  S\R                  S\R                  S\R$                  S\S\S	\\R                  \R                  4   4S jrS\R                  S\S\S\S	\\R                  \R                  4   4
S jrS\R                  S\R                  S\S\S\S	\R                  4S jrS\R                  S\S\S\S	\R                  4
S jrS\R                  S\S\S\S	\R                  4
S jrS\R                  S\S\S\S\S	\R                  4S  jrS\R                  S\S\S\S\S	\R                  4S! jrg)"    )TupleN)const)mask_creator)	inject_24marlin_24_workspacepack_to_marlin_24unpack_from_marlin_24wsize_ksize_nreturnc                     U R                   X4:X  d   e[        U R                  5       5      R                  5       R                  5       R	                  5       nX0-  R                  5       UR                  5       4$ )a  Injects 2:4 sparsity into a weight tensor. The sparsity is applied in a 2:4 ratio, where for every
group of 4 weights, 2 will be pruned based on their value. The mask will be created based on the
ranked weight values.

Args:
    w (torch.Tensor): The weight tensor to inject sparsity into.
    size_k (int): The number of input features.
    size_n (int): The number of output features.
Returns:
    Tuple[torch.Tensor, torch.Tensor]: The pruned weight tensor and the mask tensor.
)shaper   tcudabool
contiguous)r
   r   r   masks       Z/home/james-whalen/.local/lib/python3.13/site-packages/torchao/sparsity/marlin/__init__.pyr   r      sa     77v&&&&  "'')..0DH  "DOO$555    out_featuresmin_thread_nmax_parallelc                     X-  S:X  d   SU  SU 35       eX-  U-  n[         R                  " U[         R                  SS9$ )a  Creates a workspace for marlin 2:4 quantization. The workspace is used to coordinate the locks
during the execution of the kernel.

Args:
    out_features (int): The number of output features.
    min_thread_n (int, optional): The minimum number of threads per block. Defaults to `MARLIN_24_MIN_THREAD_N`.
    max_parallel (int, optional): The maximum number of parallel threads. Defaults to `MARLIN_24_MAX_PARALLEL`.
Returns:
    torch.Tensor: The workspace tensor fully initialized with zeros.
r   zout_features = z, min_thread_n = r   dtypedevice)torchzerosint)r   r   r   max_workspace_sizes       r   r   r   $   sU     &!+ 
,'8G+ '6,F;;)6JJr   q_w_24scalesnum_bits
group_sizec                 ~    U R                   u  pE[        XXR5      u  pgUS-  n[        XhXR5      n	[        XXSU5      n
XU4$ )a  Packs the quantized weights and scales into the marlin 2:4 format.

Args:
    q_w_24 (torch.Tensor): The quantized weight tensor with 2:4 sparsity applied.
    scales (torch.Tensor): The scale tensor.
    num_bits (int): The number of bits used for quantization.
    group_size (int): The group size that was applied during quantization.
Returns:
    Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: The packed quantized weights, the packed scales, and the meta tensor.
   )r   _compress_quantized_24_weight_to_marlin_weights_to_marlin_scales)r"   r#   r$   r%   in_featuresr   q_w_24_compmetain_features_compmarlin_24_q_w_compmarlin_24_ss              r   r   r   :   sc      !'K 6\K #a' ,| $\xK D00r   r,   r-   original_shapec                 l    Uu  pg[        U/UQUPUP76 nUS-  n	[        X	Xu5      n
[        XXU5      nX4$ )a  Unpacks the quantized weights and scales from the marlin 2:4 format.
Args:
    q_w_24_comp (torch.Tensor): The packed quantized weights.
    scales (torch.Tensor): The packed scales.
    meta (torch.Tensor): The meta tensor.
    original_shape (torch.Size): The original shape of the weight tensor.
    group_size (int): The group size that was applied during quantization.
    num_bits (int): The number of bits used for quantization.
Returns:
    Tuple[torch.Tensor, torch.Tensor]: The unpacked quantized weights and scales.
r'   )_from_marlin_scale_from_marlin_weights_decompress_quantized_24_weight)r,   r#   r-   r1   r%   r$   r+   r   unpacked_scalesr.   unpacked_q_w_24_compunpacked_q_w_24s               r   r	   r	   _   sc    & !/K )W.W*WhWO"a' 0|
 6$4HO ++r   q_24c                 j   U R                   X4:X  d   eSU-  S-
  nUS-   S-  nX-
  nUR                  5       R                  5       n[        R                  " U5      u  pxUR                  5       R                  5       nXu-   n	UR                  UR                   S   S-  UR                   S   S-  5      nX4$ )a  Compresses the quantized weights to a 2:4 sparse format. Normalizes the weights over 0
before compressing them.

Args:
    q_24 (torch.Tensor): The quantized weight tensor.
    size_k (int): The number of input features.
    size_n (int): The number of output features.
    num_bits (int): The number of bits used for quantization.
Returns:
    Tuple[torch.Tensor, torch.Tensor]: The compressed quantized weight tensor and the meta tensor.
   r'   r   )r   r   r   utils)sparse_semi_structured_from_dense_cutlassresize_)
r9   r   r   r$   	max_q_valzp
q_24_no_zpq_24_no_zp_compr-   	q_24_comps
             r   r(   r(      s     ::&)))) h!#I
a-A	BJ **,J!KKJWO%'')446O  $I <<

1*DJJqMA,=>D?r   rC   c                    U R                   X#4:X  d   eUR                  UR                   S   S-  UR                   S   S-  5      nSU-  S-
  nUS-   S-  nX-
  nUR                  5       R                  5       n[        R
                  " Xq5      nUR                  5       R                  5       nUR                  UR                   S   S-  UR                   S   S-  5      nX-   n	U	$ )a  Decompresses the quantized weights from a 2:4 sparse format and restores the original shape.

Args:
    q_24_comp (torch.Tensor): The compressed quantized weight tensor in 2:4 sparse format.
    meta (torch.Tensor): The meta tensor.
    size_k (int): The number of input features.
    size_n (int): The number of output features.
    num_bits (int): The number of bits used for quantization.
Returns:
    torch.Tensor: The decompressed quantized weight tensor.
r;   r'   r   )r   r>   r   r   r<   'sparse_semi_structured_to_dense_cutlass)
rC   r-   r   r   r$   r?   r@   rB   rA   r9   s
             r   r5   r5      s     ??v.... <<

1*DJJqMA,=>D h!#I
a-A	BnO &'')446O>>UJ**,J <<

1*DJJqMA,=>D ?DKr   q_wc                    [         R                  " U5      u  n  n[         R                  " XX$5      n [         R                  " U5      nU R	                  [
        R                  5      n [
        R                  " U R                  S   U R                  S   U-  4[
        R                  U R                  S9n[        U5       H  nXpSS2USU24   X8-  -  -  nM     UR	                  [
        R                  S9nU$ )a  Converts a quantized and 2:4 sparse format weight tensor to the marlin 2:4 format.

Args:
    q_w (torch.Tensor): The quantized weight tensor in 2:4 sparse format.
    size_k (int): The number of input features.
    size_n (int): The number of output features.
    num_bits (int): The number of bits used for quantization.
Returns:
    torch.Tensor: The weight tensor in the marlin 2:4 format.
r   r;   r   Nr   )r<   get_perms_24marlin_permute_weightsget_pack_factortor   int64r   r   r   rangeint32)	rF   r   r   r$   perm_24_pack_factorq_packedis	            r   r)   r)      s    " &&x0MGQ

&
&sF
DC ''1K &&
C{{	1syy|{23kkzzH
 ;1>k>)*x|<<   {{{-HOr   rS   c                    [         R                  " U5      u  n  n[         R                  " U5      nU R                  [        R
                  5      n [        R                  " U R                  S   U R                  S   U-  4[        R
                  U R                  S9n[        U5       H  nXU-  -	  SU-  S-
  -  USS2USU24'   M     UR                  [        R                  S9n[         R                  " XqX$5      n	U	$ )a  Converts a weight tensor in the marlin 2:4 format to a regular quantized 2:4 sparse format.

Args:
    q_packed (torch.Tensor): The weight tensor in the marlin 2:4 format.
    size_k (int): The number of input features.
    size_n (int): The number of output features.
    num_bits (int): The number of bits used for quantization.
Returns:
    torch.Tensor: The weight tensor in the quantized 2:4 sparse format.
r   r;   r   NrH   )r<   get_reverse_perms_24rK   rL   r   rM   r   r   r   rN   rO   reverse_marlin_permute_weights)
rS   r   r   r$   rP   rQ   rR   q_w_unpackedrT   q_w_comps
             r   r4   r4      s     ..x8MGQ''1K
 {{5;;'H;;		HNN1-;<kkL
 ;+31+E(]a+
Q;&'  
  ???5L33fH Or   c                    [         R                  " U5      u  pVnX1:  a*  US:w  a$  U R                  S[        U5      45      SS2U4   n O#U R                  S[        U5      45      SS2U4   n U R                  SU45      R	                  5       n U $ )a  Converts a scale tensor to the format necessary for marlin.
Args:
    scales (torch.Tensor): The scale tensor.
    size_k (int): The number of input features.
    size_n (int): The number of output features.
    group_size (int): The group size that was applied during quantization.
    num_bits (int): The number of bits used for quantization.

Returns:
    torch.Tensor: The scale tensor in the marlin format.
N)r<   rI   reshapelenr   r#   r   r   r%   r$   rQ   scale_perm_24scale_perm_single_24s           r   r*   r*     s     .3-?-?-I*A*zR/S%7 89!]:JKS)=%> ?@##
 ^^RL)446FMr   c                 "   [         R                  " U5      u  pVnX1:  a>  US:w  a8  U R                  S[        U5      45      SS2U4   n U R                  X-  U45      $ U R                  S[        U5      45      SS2U4   n U R                  S5      $ )a  Converts a scale tensor from the marlin format to their original format.

Args:
    scales (torch.Tensor): The scale tensor in the marlin format.
    size_k (int): The number of input features.
    size_n (int): The number of output features.
    group_size (int): The group size that was applied during quantization.
    num_bits (int): The number of bits used for quantization.
Returns:
    torch.Tensor: The scale tensor in their original format
r[   N)r;   r[   )r<   rV   r\   r]   r^   s           r   r3   r3   5  s     .3-G-G-Q*A*zR/S%7 89!]:JK~~v3V<==S)=%> ?@##
 ~~g&&r   )typingr   r   torchao.sparsity.marlin.utilssparsitymarlinr<   r   torchao.sparsity.utilsr   __all__Tensorr    r   MIN_THREAD_NMAX_PARALLELr   r   Sizer	   r(   r5   r)   r4   r*   r3    r   r   <module>rm      s     - - / /6||6 6*-6
5<<%&6* ****KKK K \\	K,"1LL"1LL"1 "1 	"1
 5<<u||34"1J$,$,LL$, ,,$, JJ	$,
 $, $, 5<<%&$,N 
,,  # -0 <? 
5<<%& F#||##(<<#9<#FI#UX#
\\#L#	## # 	#
 \\#L$ll$$'$14$@C$
\\$NLL"%/2@COR
\\2'LL'"%'/2'@C'OR'
\\'r   