
    hZ&                        S SK Jr  S SKrS SKJr  S SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJrJr  S SKJr  S S	KJr   " S
 S\R(                  R*                  5      r " S S\R(                  R*                  5      r " S S\R(                  R*                  5      r " S S\R2                  5      r\" \SS9r\" \SSS9r\" \SS9r " S S\R(                  R*                  5      r " S S\R2                  5      rg)    )partialN)dequantize_rowwise)int8_matmul_mixed_dequantize)int8_matmul_rowwise_dequantize)!quantize_columnwise_and_transpose)quantize_globalquantize_global_transpose)quantize_rowwise)is_triton_availablec                   4    \ rS rSr\S 5       r\S 5       rSrg)_switchback_global   c                    UR                  SUR                  S5      5      n[        U5      u  pV[        U5      u  pxXB4U l        [        XWR                  5       XhU5      R                   " / UR                  5       S S QSP76 $ Nviewsizer
   r   save_for_backwardr   t	ctxX_3DWbiasXX_int8state_XW_int8state_Ws	            ^/home/james-whalen/.local/lib/python3.13/site-packages/bitsandbytes/nn/triton_based_modules.pyforward_switchback_global.forward   s     IIb$))B-( +1-)!, !" ,FHHJRVW\\s^b^g^g^ijmkm^nsprss    c                 4   UR                  SUR                  S5      5      nS =n=pEU R                  u  pgU R                  S   (       aX  [	        U5      u  p[        U5      u  p[        XR                  5       XS 5      R                  " / UR                  5       S S QSP76 nU R                  S   (       a>  [        R                  " UR                  5       UR                  UR                  5      5      nU R                  S   (       a  UR                  SS9nX4U4$ Nr   r         dim)reshaper   r   needs_input_gradr
   r	   r   r   r   torchmatmultodtypesum)r   G_3DGgrad_Xgrad_W	grad_biasr   r   G_int8state_Gr   r    s               r!   backward_switchback_global.backward)   s     LLTYYr]+&***$$" /q1OF7:OF1&((*gX\]bb Sb!F "\\!##%agg7F"!Iy((r$    N__name__
__module____qualname____firstlineno__staticmethodr"   r9   __static_attributes__r;   r$   r!   r   r      s*    t t ) )r$   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)_switchback_vectorrizeD   c                    UR                  SUR                  S5      5      nXB4U l        [        U5      u  pV[        U5      u  px[	        XWR                  5       XhU5      R                   " / UR                  5       S S QSP76 $ r   )r   r   r   r
   r   r   r   s	            r!   r"   _switchback_vectorrize.forwardE   s     IIb$))B-( ! +1-*1- .fhhj'TXY^^u`d`i`i`klomo`purtuur$   c                 4   U R                   u  p#UR                  SUR                  S5      5      nS =n=pgU R                  S   (       aX  [	        U5      u  p[        U5      u  p[        XR                  5       XS 5      R                  " / UR                  5       S S QSP76 nU R                  S   (       a>  [        R                  " UR                  5       UR                  UR                  5      5      nU R                  S   (       a  UR                  SS9nXVU4$ r&   )r   r+   r   r,   r
   r   r   r   r   r-   r.   r/   r0   r1   )r   r2   r   r   r3   r4   r5   r6   r7   r8   r   r    s               r!   r9   _switchback_vectorrize.backwardT   s    $$LLTYYr]+&***" /q1OF?BOF3FHHJZ^_dd Sb!F "\\!##%agg7F"!Iy((r$   r;   Nr<   r;   r$   r!   rD   rD   D   s*    v v ) )r$   rD   c                   4    \ rS rSr\S 5       r\S 5       rSrg) _switchback_global_mem_efficientn   c                    UR                  SUR                  S5      5      nUR                  5       n[        U5      u  pgA[        U5      u  pXgX4U l        [        XhR                  5       XyU5      R                   " / US S QSP76 $ r   r   )
r   r   r   r   r   X_3D_szr   r   r   r    s
             r!   r"   (_switchback_global_mem_efficient.forwardo   s     IIb$))B-())+ +1-)!, !' @ ,FHHJRVW\\o^efigi^jolnoor$   c                 x   UR                  SUR                  S5      5      nUR                  5       nS =n=pVU R                  u  pxpU R                  S   (       aK  [	        Xx5      nA[
        R                  " UR                  5       UR                  UR                  5      5      nAU R                  S   (       a  UR                  SS9nU R                  S   (       a\  [        U5      u  pAU	R                  5       R                  5       n	[        XR                  5       XS 5      R                  " / US S QSP76 nXEU4$ )Nr   r'   r(   r   r)   )r+   r   r   r,   r   r-   r.   r   r/   r0   r1   r
   
contiguousr   r   )r   r2   r3   G_3D_szr4   r5   r6   r   r   r   r    real_Xr7   r8   s                 r!   r9   )_switchback_global_mem_efficient.backward   s    LLTYYr]+))+&***+.+@+@("'8F\\!##%177);<F"!I".q1OFXXZ**,F1&((*gX\]bbudklomodpurtuFy((r$   r;   Nr<   r;   r$   r!   rK   rK   n   s*    p p" ) )r$   rK   c                   Z   ^  \ rS rSr     SS\S\S\S\S\4
U 4S jjjrS rS	 rS
r	U =r
$ )SwitchBackLinear   in_featuresout_featuresr   vector_wise_quantizationmem_efficientc                   > [         TU ]  XX4U5        [        5       (       d  [        S5      eX`l        U R                  (       a*  [
        U l        U(       a  [        S5        [        S5        g g U(       a  [        U l        g [        U l        g )NzCould not import triton. Please install triton to use SwitchBackLinear.
                               Alternatively, you can use bnb.nn.SwitchBackLinearBnb, but it will be slowerz<mem efficient is not supported for vector-wise quantization.r'   )super__init__r   ImportErrorrZ   rD   _fnprintexitrK   r   )	selfrX   rY   r   devicer0   rZ   r[   	__class__s	           r!   r^   SwitchBackLinear.__init__   sx     	D%H"$$ o p p )A%((-DHTUQ  ;-r$   c                     [        S5        U R                  (       a  [        U R                  5      u  pO[	        U R                  5      u  pU R                  SU5        U R                  SU5        U ?g )Nz=> preparing for eval.r   r    )ra   rZ   r
   weightr   register_buffer)rc   r   r    s      r!   prepare_for_eval!SwitchBackLinear.prepare_for_eval   s]     	&'((.t{{;OFG-dkk:OFXv.Y0Kr$   c                    U R                   (       a0  U R                  R                  XR                  U R                  5      $ [        U S5      (       d0  U R                  R                  XR                  U R                  5      $ UR                  SUR                  S5      5      n[        U5      u  p4U R                  (       a\  [        X0R                  R                  5       X@R                  U R                  5      R                  " / UR                  5       S S QSP76 $ [        X0R                  R                  5       X@R                  U R                  5      R                  " / UR                  5       S S QSP76 $ )Nr   r   )trainingr`   applyrh   r   hasattrr   r   r
   rZ   r   r   r   r    r   )rc   xr   r   r   s        r!   r"   SwitchBackLinear.forward   s!   ==88>>![[$))<< 4**xx~~adii@@ r166":&A.q1OF,,5fkkmmowXdXdfjfofopuu VVXcr] 
 4FKKMMOWVbVbdhdmdmnss VVXcr] r$   )r`   rZ   )TNNFF)r=   r>   r?   r@   intboolr^   rj   r"   rB   __classcell__)re   s   @r!   rV   rV      s]    
 ).#.. . 	. #'. . .:& r$   rV   F)rZ   T)rZ   r[   c                   8    \ rS rSr\SS j5       r\S 5       rSrg)StandardLinearFunction   Nc                 :   UR                  SUR                  S5      5      nU R                  XBU5        UR                  UR	                  5       5      nUb"  XSR                  S5      R                  U5      -  nUR                   " / UR                  5       S S QSP76 $ )Nr   r   )r   r   r   r.   r   	unsqueeze	expand_as)r   inputrh   r   r   outputs         r!   r"   StandardLinearFunction.forward   s    JJr5::b>*a.fhhj)nnQ'11&99F{{2EJJL"-2r22r$   c                    U R                   u  p#nUR                  SUR                  S5      5      nS =n=pxU R                  S   (       aM  UR	                  UR                  UR                  5      5      R                  " / UR                  5       S S QSP76 nU R                  S   (       a8  UR                  5       R	                  UR                  UR                  5      5      nUb%  U R                  S   (       a  UR                  S5      nXgU4$ )Nr   r   r'   r(   )
saved_tensorsr+   r   r,   r.   r/   r0   r   r   r1   )	r   grad_output_3Dr{   rh   r   grad_output
grad_inputgrad_weightr6   s	            r!   r9   StandardLinearFunction.backward   s    !//t$,,R1D1DR1HI/33
3["$++FIIk6G6G,HINNoP^PcPcPefigiPjolnoJ"%--/00+:K:K1LMK 4 4Q 7#*I	11r$   r;   Nr<   r;   r$   r!   rv   rv      s(    3 3 2 2r$   rv   c                       \ rS rSrS rSrg)StandardLineari  c                 V    [         R                  XR                  U R                  5      $ r   )rv   rn   rh   r   )rc   rp   s     r!   r"   StandardLinear.forward  s    %++A{{DIIFFr$   r;   N)r=   r>   r?   r@   r"   rB   r;   r$   r!   r   r     s    Gr$   r   ) 	functoolsr   r-   torch.nnnn&bitsandbytes.triton.dequantize_rowwiser   0bitsandbytes.triton.int8_matmul_mixed_dequantizer   2bitsandbytes.triton.int8_matmul_rowwise_dequantizer   5bitsandbytes.triton.quantize_columnwise_and_transposer   #bitsandbytes.triton.quantize_globalr   r	   $bitsandbytes.triton.quantize_rowwiser
    bitsandbytes.triton.triton_utilsr   autogradFunctionr   rD   rK   LinearrV   SwitchBackLinearGlobal"SwitchBackLinearGlobalMemEfficientSwitchBackLinearVectorwiserv   r   r;   r$   r!   <module>r      s       E B @))00 ))X')U^^44 ')T))u~~'>'> ))XFryy FR !!1ER %,-=X]mq%r "$%5PTU 2U^^44 2:GRYY Gr$   