
    h%                     :   S SK JrJrJr  S SKrS SKJr  S SKJrJ	r	  S SK
JrJr  S SKJr  \R                  R                   r\" S5      S\R"                  S	\R"                  S
\R"                  S\S\S\R"                  4S j5       r\" S5      S\R"                  S	\R"                  S
\R"                  S\S\S\R"                  4S j5       r\" S5      S\R"                  S	\R"                  S
\R"                  S\R"                  S\R"                  S\R"                  S\R"                  4S j5       r\" S5      S\R"                  S	\R"                  S
\R"                  S\R"                  S\R"                  S\R"                  S\R"                  4S j5       r\" S5      S\R"                  S\R"                  S	\R"                  S
\R"                  S\S\S\R"                  S\R"                  4S j5       r\" S5      S\R"                  S\R"                  S	\R"                  S
\R"                  S\S\S\R"                  S\R"                  4S j5       r " S S\5      r\R4                  r\" \R6                  R8                  5      S 5       r\" \R<                  R8                  5      S 5       r\" \R@                  R"                  5      S 5       r!\" \RD                  RF                  5      S  5       r$\" \RJ                  R8                  5      S! 5       r&\" \RN                  R8                  5      S" 5       r(\" \RR                  R8                  5      S# 5       r*\" \RV                  R8                  5      S$ 5       r,\" \RZ                  R\                  R^                  5      S% 5       r0g)&    )ListOptionalTupleN)return_and_correct_aliasing)broadcast_batch_dimsbsr_dense_addmm)register_custom_opregister_custom_op_impl)TorchAOBaseTensorzblocksparse::bsr_to_densecrow_indicescol_indicesvaluesMKreturnc                 L    [         R                  " XX#U4S9R                  5       $ )N)r   r   r   size)torchsparse_bsr_tensorto_denser   r   r   r   r   s        V/home/james-whalen/.local/lib/python3.13/site-packages/torchao/sparsity/blocksparse.pybsr_to_denser      s)     ""!6TUPVhj    c                 X    [         R                  " X44UR                  UR                  S9$ )Ndtypedevice)r   emptyr   r   r   s        r   bsr_to_dense_abstractr        s!     ;;vV\\&--HHr   zblocksparse::int_addmmA
left_alpharight_alphac                 j   UR                   [        R                  :X  d   eUR                  S   nUR                  S   nUR                  S   n[        R                  " XX&U4S9n	[        [        X5      n
UR                  XU4-   [        R                  S9n[        UU	USSUUUS9R                  5       $ )Nr   )r      r   )alphabetaoutr"   r#   )r   r   int8shaper   r   blocksparse_int_addmm	new_emptybfloat16r   t)r   r   r   r!   r"   r#   r   r   N
weight_bsroriginal_batch_dims_broadcastedr+   s               r   r.   r.   +   s     <<5::%%%A	A	A((FUVQWXJ&:z'# ++5A>enn+
UC		 ac	
r   c                     UR                   S   nUR                   S   n[        R                  " Xv4[        R                  UR                  S9R                  5       $ )Nr%   r   )r-   r   r   r0   r   r1   )r   r   r   r!   r"   r#   r2   r   s           r   blocksparse_int_addmm_abstractr6   I   sG     	
AA;;vU^^AHHEGGIIr   zblocksparse::addmmx_paddedbiasc           	          Ub   e[         R                  " XX4U4S9nU R                  S   nU R                  XH45      n	[	        U	UU SSU	S9  U	$ )Nr'   r(   r   )r)   r*   r+   )r   r   r-   r/   r   )
r7   r   r   r   r   r   r8   bsrN_paddedr+   s
             r   blocksparse_addmmr<   X   se     <<

!
!,Va&
QC~~a H


a]
+C Jr   c                 D    U R                   S   nU R                  XG45      $ )Nr(   )r-   r/   )r7   r   r   r   r   r   r8   r;   s           r   blocksparse_addmm_abstractr>   q   s%     ~~a Hqm,,r   c                      \ rS rSr% \\R                     \S'   \\R                     \S'   \\R                     \S'   \\S'   / SQr	\
 SS\R                  S\S\\R                     S\\R                     S\\R                     S\4S	 jj5       rS
\4S jrS
\\\   \\R                  \\4   4   4S jr\S\\R                  \\4   S
\R                  4S j5       r\S 5       rS rSrg)BlockSparseTensor   bsr_crow_indicesbsr_col_indices
bsr_values	blocksize)rB   rC   rD   r-   requires_gradc                     Uc  [        S5      eUnUR                  UR                  UR                  US.n[        R
                  R                  " X40 UD6n	X)l        X9l        XYl	        XIl
        U	$ )NzCNo values passed to BlockSparseTensor: bsr_values must be provided!)r   r   layoutrF   )
ValueErrorr   r   rH   r   Tensor_make_wrapper_subclassrE   rB   rD   rC   )
clsr-   rE   rB   rC   rD   rF   previous_tensorkwargstensors
             r   __new__BlockSparseTensor.__new__   s     U  )O &,,$**%,,*	
 44SJ6J$"2&!0r   r   c                 t    [        U S5      (       d   eU R                  R                   SU R                   S3$ )Nr-   z(shape=))hasattr	__class____name__r-   )selfs    r   __repr__BlockSparseTensor.__repr__   s7    tW%%%%..))*'$**Q??r   c                    ^  [        [        U 4S jT R                  5      5      nT R                  T R                  T R
                  4nX4$ )Nc                     > [        TU 5      S L$ )N)getattr)xrW   s    r   <lambda>6BlockSparseTensor.__tensor_flatten__.<locals>.<lambda>   s    WT1-T9r   )listfilter	__slots__r-   rF   rE   )rW   inner_tensorstensor_metas   `  r   __tensor_flatten__$BlockSparseTensor.__tensor_flatten__   sA    94>>J
 zz4#5#5t~~F))r   rd   c           
          Uu  pVnU " UUUR                  SS 5      UR                  SS 5      UR                  SS 5      US9$ )NrB   rC   rD   r-   rE   rB   rC   rD   rF   )get)rL   rc   rd   
outer_sizeouter_strider-   rF   rE   s           r   __tensor_unflatten__&BlockSparseTensor.__tensor_unflatten__   sV     +6'i*../A4H)--.?F$((t<'
 	
r   c           	          UR                  U5      nU " UR                  UUR                  5       UR                  5       UR	                  5       SS9$ )NFrh   )to_sparse_bsrr-   r   r   r   )rL   dense_tensorrE   
bsr_tensors       r   
from_denseBlockSparseTensor.from_dense   sR    !//	:
$$'446&224!((*
 	
r   c           	          [        U R                  U R                  U" U R                  5      U" U R                  5      U" U R
                  5      U R                  S9$ )Nrh   )r@   r-   rE   rB   rC   rD   rF   )rW   funcs     r   apply_fn_to_shard#BlockSparseTensor.apply_fn_to_shard   sN     **nn!$"7"78 !5!56DOO,,,
 	
r    N)F)rV   
__module____qualname____firstlineno__r   r   rJ   __annotations__intrb   staticmethodSizeboolrP   strrX   r   r   re   classmethodrl   rr   rv   __static_attributes__rx   r   r   r@   r@      s6   u||,,ell++&&NEI $zz  #5<<0	
 "%,,/ U\\*  :@# @*E$s)U5::tS;P5Q*Q$R * 
 5::tS01
 

 
$ 

 


r   r@   c                 Z    [        XX2S   R                  [        R                  5      5      $ Nr   )r   rv   r   detachru   typesargsrN   s       r   block_sparse_detachr      s'    &FG55ellC r   c           	      t   [        U5      S:X  d   e[        U5      S:X  d   eUS   S:X  d   eUS   nUR                  5       S:X  d   eUR                  (       a   e[        UR                  S-   UR
                  UR                  5       UR                  5       UR                  5       R                  S5      SS9$ )N   r   r%   )r(   F)rF   )
lendimrF   r@   r-   rE   r   r   r   	unsqueeze)ru   r   r   rN   r:   s        r   block_sparse_unsqueezer      s    t9>>v;!8q==
q'C779>>    		D

r" r   c                     [        U5      S:X  d   e[        U5      S:X  d   eUu  pES n[        U[        R                  5      (       a  [        U[        5      (       a  U" XT5      $ U" XE5      $ )Nr   r   c                    [        U [        5      (       d   e[        U[        R                  5      (       d   eU R	                  5       S:X  d   eUR	                  5       S:X  d   eU R
                  (       a   eUR                  S5      S:X  d   eUR                  UR                  S5      UR                  S5      U R                  -  U R                  S5      nUR                  SS5      R                  SU R                  5       5      nU R                  5       U-  n[        U R                  U R                  U R                  5       U R                  5       U5      $ )N   r   r(   )
isinstancer@   r   rJ   r   rF   r   viewrE   	transposeindex_selectr   r   r-   r   )r:   r1   	t_blockedmasked_t
new_valuess        r   my_mul block_sparse_mul.<locals>.my_mul   s   #01111!U\\****wwyA~~uuw!||$$$$vvayA~~FF166!9affQi3==&@#--QRS	&&q!,99!S__=NOZZ\H,
 IIs}}c&6&6&8#//:KZ
 	
r   )r   r   r   rJ   r@   )ru   r   r   rN   r:   r1   r   s          r   block_sparse_mulr      sd    t9>>v;!FC
 #u||$$A7H)I)Ia~#>r   c                    Uu  pE[        U5      [        :X  d   e[        U5      S:X  d   eUS   nUS:X  d   e[        R                  R
                  R                  UR                  5       UR                  5       UR                  S   5      $ )Nr(   r   )
typer`   r   r   opsblocksparsesumr   r   r-   )ru   r   r   rN   r:   r   s         r   block_sparse_sumr     sv    HC9s8q==
a&C!8O899  $$SZZ\33C3C3EsyyQR|TTr   c                 <    US   R                   R                  5       $ r   )rD   r   r   s       r   block_sparse_valuesr     s    7$$&&r   c                 <    US   R                   R                  5       $ r   )rB   r   r   s       r   block_sparse_crow_indicesr   !  s    7##**,,r   c                 <    US   R                   R                  5       $ r   )rC   r   r   s       r   block_sparse_col_indicesr   &  s    7""))++r   c                 :    US   R                   R                  S   $ r   )rD   r-   r   s       r   block_sparse__nnzr   +  s    7##A&&r   c           	         Uu  pEnUR                  SUR                  S5      5      R                  5       nUR                  S   nUR                  S   n	[        R
                  R                  R                  UUR                  5       UR                  5       UR                  5       UU	S 5      n
U
R                  5       nUc  U$ X-   $ )Nr%   r   r(   )reshaper   r1   r-   r   r   r   addmmr   r   r   )ru   r   r   rN   x_origwr8   r]   r   r   r+   out_origs               r   block_sparse_linearr   0  s    OFtr6;;r?+--/A	
A	
A
))


%
%				
		C uuwH|?r   )1typingr   r   r   r   torch.utils._python_dispatchr   torchao.kernel.bsr_triton_opsr   r   torchao.opsr	   r
   torchao.utilsr   r   atenrJ   r}   r   r    r.   r6   r<   r>   r@   
implementsr   defaultr   r   r   mulr   r   dim_IntListr   r   r   r   r   r   r   _nnzr   nn
functionallinearr   rx   r   r   <module>r      sL   ) (  D O C +yy~~ 45	,,		 LL	 		
 	 \\	 6	 /0I,,II LLI 	I
 I \\I 1I 12
,,

 LL
 ||	

 
 
 \\
 3
: ,-J,,JJ LLJ ||	J
 J J \\J .J -.ll,,  LL	
   ,, \\ /0 ()
-ll
-,,
- 
- LL	
-
 
- 
- ,,
- \\
- *
-Y
) Y
z ))
 DKK  ! DNN""# $" DHHOO 0 DHH  !U "U DKK ' !' D%%&- '- D$$%, &, DII' ' EHH&&' (r   