
    h                         S SK r S SKrS SKJr  S SKJs  Jr  SrSrS r	 " S S\R                  R                  5      r " S S\R                  R                  5      r " S S	\R                  5      rg)
    Ng   Ј Bc                     S[        S[        U5      -  U R                  5       S-
  -  5      -   nU R                  S5      R	                  U5      R
                  $ )z,Return the value that is larger than q% of t   g{Gz?)roundfloatnumelviewkthvaluevalues)tqks      T/home/james-whalen/.local/lib/python3.13/site-packages/torchao/sparsity/supermask.py
percentiler      sG    	E$q/QWWY]344A66":q!(((    c                   8    \ rS rSrSr\S 5       r\S 5       rSrg)	GetSubnet   Supermask STE functionc                     UR                  [        [        S9n[        XTS-  5      n[        R
                  " XV:  UR                  UR                  5      UR                  UR                  5      5      $ )N)minmaxd   )clamp
SCORES_MIN
SCORES_MAXr   torchwheretodevice)ctxscoreszerosonessparsityclamped_scoresk_vals          r   forwardGetSubnet.forward   sW    **E>c>:{{"EHHV]]$;TWWV]]=S
 	
r   c                     US S S 4$ N )r!   gs     r   backwardGetSubnet.backward   s    $d""r   r,   N	__name__
__module____qualname____firstlineno____doc__staticmethodr(   r.   __static_attributes__r,   r   r   r   r      s+     
 
 # #r   r   c                   8    \ rS rSrSr\S 5       r\S 5       rSrg)	ApplyMask#   r   c                 
    X-  $ r+   r,   )r!   weightr"   s      r   r(   ApplyMask.forward&   s
    r   c                 f    S =p#U R                   S   (       a  UnU R                   S   (       a  UnX#4$ )Nr   r   )needs_input_grad)r!   grad_outputgrad_weightgrad_scoress       r   r.   ApplyMask.backward*   s9    $(("%K"%K''r   r,   Nr0   r,   r   r   r9   r9   #   s+       ( (r   r9   c                   `   ^  \ rS rSrSrU 4S jrS rS r\  S	S j5       r	\S 5       r
SrU =r$ )
SupermaskLinear4   z Supermask class for Linear layerc                 n  > [         [        U ]
  " U0 UD6  SS[        R                  " U R
                  R                  5        Vs/ s H  n[        R                  " Xr-  5      PM     sn5      -  -
  nXl        U R                  U:  aA  [        SU R                   SU 3SU R
                  R                  5        SU S35        Xl        X l        SU l        [        R                  " [        R                   " U R
                  R                  5        V	s/ s H.  n	[#        S[%        [        R                  " X-  5      5      5      PM0     sn	5      U(       + S9U l        [        R(                  R+                  U R&                  [        R,                  " S	5      S
9  U(       + U R
                  l        g s  snf s  sn	f )Nr   zreducing sparsity from z to z'(maximum sparsity for layer with shape z and tile size )F)requires_grad   )a)superrE   __init__mathprodr<   sizeceilsparsity_levelprintr%   	blocksizesparsify_weightsnn	Parameterr   emptyr   intr"   initkaiming_uniform_sqrtrI   )selfrR   rT   
fixed_maskfixed_weightargskwargsr   max_sparsity_levelwn	__class__s             r   rM   SupermaskLinear.__init__7   sh    	ot-t>v>		T[[=M=M=OP=O499Q]3=OPQQ
 -!33)$--=O<PQ9$++:J:J:L9M_]f\gghi #5" %llKKBF++BRBRBTUBTBQDIIbn567BTU ).	
 	  		! = )5$4!+ Q Vs   "F-5F2c                    [         R                  U R                  [        R                  " U R                  5      [        R
                  " U R                  5      U R                  5      nU R                  S:w  aZ  [        U R                  R                  5       H7  u  p#UR                  U R                  US9n[        R                  " XSU5      nM9     U$ )Nr   )dimr   )r   applyr"   r   
zeros_like	ones_likerR   rT   	enumerater<   shaperepeat_interleavenarrow)r]   subnetir   s       r   get_maskSupermaskLinear.get_maskT   s    KKT[[)OODKK(	
 >>Q!$++"3"3411$..a1HfA6 5 r   c                     U R                  5       n[        R                  U R                  U5      n[        R
                  " XU R                  5      $ r+   )rq   r9   rh   r<   Flinearbias)r]   xro   ws       r   r(   SupermaskLinear.forwardc   s6    OODKK0xxdii((r   c           
      $   [        U[        R                  R                  5      (       d   e[	        UUSSUR
                  UR                  UR                  SLS9R                  UR                  R                  UR                  R                  S9nUR                  R                  R                  UR                  R                  5        UR                  b9  UR                  R                  R                  UR                  R                  5        U$ )zE
Main entrypoint for creating a SupermaskLinear from a Linear layer.
FNrv   r    dtype)
isinstancer   rV   LinearrE   in_featuresout_featuresrv   r   r<   r    r}   datacopy_)clsru   rR   rT   supermask_linears        r   from_linearSupermaskLinear.from_linearh   s     &%((//2222*D(
 "FMM((0C0C"
D 	 	$$**6==+=+=>;;"!!&&,,V[[-=-=>r   c                    Un[         R                  R                  UR                  UR                  UR
                  SLS9R                  UR                  R                  UR                  R                  S9nUR                  5       nUR                  R                  R                  UR                  U-  5        UR
                  b9  UR
                  R                  R                  UR
                  R                  5        U$ )z\
Convert a SupermaskLinear to a Linear layer.
Replaces the old sparsify_offline() function.
Nr{   r|   )r   rV   r   r   r   rv   r   r<   r    r}   rq   r   r   )r   r   r]   ru   masks        r   	to_linearSupermaskLinear.to_linear   s      $& ! 
 "DKK&&dkk.?.?"
@	 	 }}  t!3499 KK""499>>2r   )rT   r"   rU   rR   )g        r   )r1   r2   r3   r4   r5   rM   rq   r(   classmethodr   r   r7   __classcell__)rd   s   @r   rE   rE   4   sE    *5:)
  	   2  r   rE   )rN   r   torch.nnrV   torch.nn.functional
functionalrt   r   r   r   autogradFunctionr   r9   r   rE   r,   r   r   <module>r      sc        

)#'' # ('' ("`bii `r   