
    hq                     :   S SK JrJr  S SKJrJr  S SKrS SKrS SKJ	r	J
r
  \" SS9 " S S5      5       r\" 5       rS	\S
\4S jr\R                  4S\R                   S\S\S\R                   S\S
\R                   4S jjr\R                  4S\R                   S\S\S\R                   S\S
\R                   4S jjrS	\S\	S
\R                   4S jrS	\S
\\\   \\   4   4S jrS	\S\	S
\R                   4S jrS	\S
\\R                   \R                   4   4S jrg)    )	dataclassfield)ListTupleN)GranularityPerAxisT)frozenc                   |    \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   \	" S S9r
\\   \S	'   \	" S
 S9r\\   \S'   Srg)MarlinQQQConstants      TILE@   MIN_THREAD_NMAX_PARALLELc                      S/$ )N    r       _/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/marlin_qqq/utils.py<lambda>MarlinQQQConstants.<lambda>   s    1#r   )default_factorySUPPORTED_NUM_BITSc                  
    SS/$ )N   r   r   r   r   r   r      s    b#Yr   SUPPORTED_GROUP_SIZESr   N)__name__
__module____qualname____firstlineno__r   int__annotations__r   r   r   r   r   r   __static_attributes__r   r   r   r   r      sH    D#NL#L#$)+$FS	F',=N'O49Or   r   num_bitsreturnc                 2    SU -  S:X  d
   SU  35       eSU -  $ )zCompute the packing factor for a given number of bits.

Args:
    num_bits (int): Number of bits to pack.
Returns:
    int: The packing factor.
    r   zUnsupported num_bits = r   )r&   s    r   get_pack_factorr*      s-     =AC!8
CC>r   q_wsize_ksize_npermtilec                    U R                   X4:X  d   eX-  S:X  d   SU SU 35       eX$-  S:X  d   SU SU 35       eU R                  X-  XBU-  U45      n U R                  S5      n U R                  X-  X$-  45      n U R                  SUR                  5       45      SS2U4   R                  U R                   5      n U $ )av  Permute weights to 16x64 Marlin tiles.

Args:
    q_w (torch.Tensor): Quantized weights.
    size_k (int): Number of input features.
    size_n (int): Number of output features.
    perm (torch.Tensor): The computed permutation tensor to be applied.
    tile (int, optional): Tile size. Defaults to `TILE`.
Returns:
    torch.Tensor: Weight tensor permuted to Marlin tiles.
r   	size_k = 	, tile = r            r   N)shapereshapepermutenumel)r+   r,   r-   r.   r/   s        r   marlin_permute_weightsr;   +   s    $ 99((((=AB6()D6BB=AB6()D6BB ++v~tt^TB
CC
++l
#C
++v~v}5
6C
++r4::<(
)!T'
2
:
:399
ECJr   q_w_unpackedreverse_permc                    U R                   S   U4X-  U R                   S   U-  4:X  d   eX-  S:X  d   SU SU 35       eX$-  S:X  d   SU SU 35       eU R                  SUR                  5       45      SS2U4   R                  U R                   5      nUR                  X-  X$-  XD45      nUR                  S5      nUR                  X45      nU$ )a  Reverse permute weights from 16x64 Marlin tiles.
Args:
    q_w_unpacked (torch.Tensor): Unpacked quantized weights.
    size_k (int): Number of input features.
    size_n (int): Number of output features.
    reverse_perm (torch.Tensor): The computed reverse permutation tensor to be applied.
    tile (int, optional): Tile size. Defaults to `TILE`.
Returns:
    torch.Tensor: Weight tensor reverse permuted from Marlin tiles.
r   r5   r1   r2   r   Nr3   )r7   r8   r:   r9   )r<   r,   r-   r=   r/   q_w_comps         r   reverse_marlin_permute_weightsr@   K   s   $ q!6*1%/    =AB6()D6BB=AB6()D6BB ##R););)=$>?	<gl  !   LMH-H 01HOr   granularityc           
         / n[        S5       H  n/ nUS-  nS HN  nSUS-  -  SUS-  -  S-   SUS-  -  S-   SUS-  -  S-   4 H   nUR                  SU-  U-   SU-  -   5        M"     MP     [        S5       H*  nUR                  U V	s/ s H
  oS	U-  -   PM     sn	5        M,     M     [        R                  " U5      n
U S:X  aG  [        U[        5      (       a  [        R                  " / S
Q5      nO3[        R                  " / SQ5      nO[        SR                  U 5      5      eU
R                  S[        U5      45      SS2U4   R                  5       n
[        R                  " U
5      n
U
$ s  sn	f )zPrecompute permutations for the marlin weight shuffling.

Args:
    num_bits (int): Number of bits to pack.
    granularity (Granularity): The weight quantization granularity.
Returns:
    torch.Tensor: The weight permutation tensor.
r)   r   )r   r5   r5   r4   r6   r         )r   r      r5      r4      r6   )r   r4   r   rF   r5   r6   rE   rG   num_bits must be 4, got {}r   N)rangeappendextendnumpyarray
isinstancer   	Exceptionformatr8   lenraveltorch
from_numpy)r&   rA   	perm_listiperm1colblockrowjpr.   
interleaves               r   get_qqq_weight_permr^   p   sa    I2Y1fEQUQUaQUaQUa	 R#X^a%i78  qA595a#'k59:   ;;y!D1}k7++%=>J%=>J4;;HEFF<<S_-.q*}=CCEDD!DK :s   	E/c           	      \   U S:w  a  [        SR                  U 5      5      e/ n[        S5       H3  nUR                  [        S5       Vs/ s H
  o2SU-  -   PM     sn5        M5     / n[        S5       H+  nUR                  S Vs/ s H  nSU-  U-   PM     sn5        M-     X4$ s  snf s  snf )zPrecompute permutations for the marlin scale shuffling.
Args:
    num_bits (int): Number of bits to pack.
Returns:
    Tuple[List[int], List[int]]: Scale permutation list and
    scale permutation list for a single group.
r   rH   rC   )r   r5   rC   	   r            r4   )rO   rP   rI   rK   )r&   
scale_permrV   r[   scale_perm_singles        r   get_qqq_scale_permsrf      s     1}4;;HEFFJ1XeAh7hq1u9h78 #%1X  5Q!R5Q!a%!)5Q!RS ((	 8 "Ss   	B$
B)
c                 <    [        X5      nUR                  5       nU$ )a  Reverse permutation for Marlin weight shuffling from `get_qqq_weight_perm`.
Args:
    num_bits (int): Number of bits to pack.
    granularity (Granularity): The weight quantization granularity.
Returns:
    torch.Tensor: The reversed weight permutation tensor.
)r^   argsort)r&   rA   r.   s      r   get_qqq_weight_reverse_permri      s     x5D<<>DKr   c                     [        U 5      u  p[        R                  " U5      R                  5       n[        R                  " U5      R                  5       nX4$ )a  Reverse permutation for Marlin scale shuffling from `get_qqq_scale_perms`.
Args:
    num_bits (int): Number of bits to pack.
Returns:
    Tuple[List[int], List[int]]: The reversed scale permutation list and
    the reversed scale permutation list for a single group.
)rf   rS   tensorrh   )r&   rd   re   s      r   get_qqq_scale_reverse_permsrl      sJ     %8$A!Jj)113J%67??A((r   )dataclassesr   r   typingr   r   rL   rS    torchao.quantization.granularityr   r   r   constr#   r*   r   Tensorr;   r@   r^   rf   ri   rl   r   r   r   <module>rr      s   )    $P P P 		c 	c 	" 

	  ,,	
  \\J 

!,,!! ! ,,	!
 ! \\!J$# $K $ELL $N)# )%S	490D*E )& +
\\ )# )%ell8R2S )r   