
    h                     <   S SK r S SKrS SKrS SKJr  S SKJr  S SKJ	r	  \ R                  " \5      r\R                  \ R                  " 5       5         S SKJr  \" \" \R*                  " SS 5      5      5      rS\R.                  S	\R.                  S
\R.                  4S jrS\R.                  S\R.                  S
\R.                  4S jrS\R.                  S\R.                  S\R.                  S
\R.                  4S jrg! \ a    \R%                  S5        Sr Nf = f)    N)is_compiling)	out_dtype)check_cpu_version)intmm_tritonzTWarning: Detected no triton, on systems without Triton certain kernels will not workTORCHAO_AUTOTUNER_ENABLEinputmat2returnc                    [        5       (       d  SU R                  5       ;   a  U R                  R                  S:X  ad  [	        [
        R                  R                  R                  R                  [
        R                  U R                  5       UR                  5       5      $ [	        [
        R                  R                  R                  R                  [
        R                  X5      $ UR                  U R                  :X  d!   SUR                   SU R                   35       eSUR                  R                  U R                  R                  4;   nU R                  S   S-  S:H  =(       a    U R                  S   S:  nUR                  S   S-  S:H  =(       a    UR                  S   S:  nU=(       a    U(       + nU(       d  U(       a  [
        R                  " U R                  5       R                  [
        R                  5      UR                  5       R                  [
        R                  5      5      R                  U R                  R                  5      $ UR!                  5       (       d  UR#                  5       nU R!                  5       (       d&  U R                  S   S-  S:w  a  U R#                  5       n  [	        [
        R                  R                  R                  R                  [
        R                  X5      $ ! [$         aq    [
        R                  " U R                  [
        R&                  5      UR                  [
        R&                  5      5      R                  [
        R                  5      s $ f = f)a  
Performs a safe integer matrix multiplication, considering different paths for
torch.compile, cublas, and fallback cases.

Args:
    input (torch.Tensor): The input tensor of shape [i, j].
    mat2 (torch.Tensor): The matrix to multiply with, of shape [j, k].

Returns:
    torch.Tensor: The result of the matrix multiplication.

Raises:
    AssertionError: If the tensors are not on the same device.

FakeTensorcpuz3need both tensors to be on the same device but got z and       r   )dynamo_is_compiling__repr__devicetyper   torchopsatenmmdefaultint32floatshapematmulr   tois_contiguous
contiguous	Exceptionfloat32)r   r	   
device_cpuj_is_nonzero_multiple_of_8k_is_nonzero_multiple_of_8bad_dimensions_for_cublass         N/home/james-whalen/.local/lib/python3.13/site-packages/torchao/kernel/intmm.pysafe_int_mmr'      s~     0@ @<<%		!!))5;;tzz|  **22EKKMM ;;%,,& 
=dkk]%PUP\P\~^& 4;;++U\\->->??J"'++a.1"4"9!SAQR@R"&**Q-!"3q"8!Qtzz!}q?P"A'A! .||EIIKNN5;;7u{{9STWWLL
 	

  !!Aa  	
**22EKKMM 
 ||EHHU]]3TWWU]]5KLOOKK
 	

s   >AL A8N ?N abc                     [         b4  [        (       a)  [        R                  R                  R                  X5      $ [        X5      $ )a<  
Performs integer matrix multiplication using intmm_triton if available and autotuner is enabled,
otherwise falls back to safe_int_mm.

Args:
    a (torch.Tensor): The first matrix to multiply.
    b (torch.Tensor): The second matrix to multiply.

Returns:
    torch.Tensor: The result of the matrix multiplication.
)r   AUTOTUNER_ENABLEr   r   torchao
int_matmulr'   )r(   r)   s     r&   r-   r-   [   s6     $4$4yy  ++A11q    scales1c                 l   U R                   u  p4UR                   u  pEX2R                  S5      :X  d  UR                  5       S:X  d   eSUR                  S5      :X  d   eUR                  5       (       d   eUR	                  X545      nUR                  5       S:X  d   e[        UR                  5      (       a4  [        R                  " X5      nUR                  UR                  5      U-  $ [        b5  [        (       a*  [        R                  R                  R!                  XU5      $ [#        X5      nXb-  $ )a  
Performs scaled integer matrix multiplication.

Args:
    a (torch.Tensor): The first matrix to multiply.
    b (torch.Tensor): The second matrix to multiply.
    scales1 (torch.Tensor): The scaling factors for the rows of the result.

Returns:
    torch.Tensor: The result of the scaled matrix multiplication.

Raises:
    AssertionError: If the dimensions of the input tensors do not match the expected shapes.
r   r      )r   sizenumelr   expanddimr   r   r   _int_mmr   dtyper   r+   r   r,   int_scaled_matmulr'   )r(   r)   r/   MKNcs          r&   r8   r8   l   s    " 77DA77DAQ7==?a#777Q  """"nnaV$G;;=A(( MM!ttGMM"W,,$4$4yy  221AAAA;r.   )loggingosr   torch._dynamor   r   !torch._higher_order_ops.out_dtyper   torchao.utilsr   	getLogger__name__logger
addHandlerNullHandlertorchao.kernelr   ImportErrorwarningboolintgetenvr+   Tensorr'   r-   r8    r.   r&   <module>rO      s    	  = 7 +			8	$   '%%' (+ BII&@!DEF :
u|| :
5<< :
ELL :
z%,, 5<< ELL "#||##/4||#
\\#q  
NN^ Ls   C? ?DD