
    h7                        S SK Jr  S SKrS SKJr  S SKrS SKrS SKJrJ	r	  S SK
Jr  S r " S S\R                  R                  5      r " S S	\R                  R                  5      r " S
 S\R                  R                  5      rS r   SS\R(                  S\R(                  S\R(                  S\R(                  S\\R(                     S\S\4S jjr   SS\R(                  S\R(                  S\R(                  S\R(                  S\\R(                     S\S\4S jjr    SS\R(                  S\R(                  S\\R(                     S\\	   4S jjrg)    )reduceN)Optional)GlobalOutlierPoolerMatmulLtStatec                 8    [        [        R                  U S5      $ )N   )r   operatormul)iterables    c/home/james-whalen/.local/lib/python3.13/site-packages/bitsandbytes/research/autograd/_functions.pyprodr      s    (,,!,,    c                   8    \ rS rSr\SS j5       r\S 5       rSrg)MatMulFP8Mixed   Nc                    SU l         [        UR                  5      S:X  a  SU l         Xl        X l        UR                  nUR                  S   US   :X  a=  [
        R                  " UR                  S S USS  -   UR                  UR                  S9$ [
        R                  " UR                  S S US S -   UR                  UR                  S9$ [        R                  " XUS9u  p[        R                  " XUS9R                  UR                  5      n[        R                  " UR                  5       US	9u  p[        R                  " X5      R                  UR                  5      n[
        R                   " X5      nX@l        XPl        X`l        Xpl        UR                  UR                  sU l        U l        [/        U R0                  S S
 5      (       a
  X4U l        U$ SU l        U$ )NFr   Tr   dtypedevicecode	blocksizer   r      NN)is_emptyr   shapeABtorchemptyr   r   Fquantize_blockwisedequantize_blockwisetoquantizefloat
dequantizematmulfw_codebw_codebszbsz2dtype_Adtype_Banyneeds_input_gradtensorsctxr    r!   outr,   r-   r.   r/   B_shapecAstatefp8AcBfp8Boutputs                  r   forwardMatMulFP8Mixed.forward   s    =ACLEEggGwwr{gaj({{1773B<'!"+#=QWWUVU]U]^^{{1773B<'"1+#=QWWUVU]U]^^ ((CH	%%b3?BB177KJJqwwyw7	||B&))!''2d)
 #$77AGG S[s##BQ'(()CK  'CKr   c                    U R                   (       aE  [        R                  " U R                  5      [        R                  " U R                  5      S S S S S 4$ U R
                  u  p#        nU R                  u  pVSu  px[        R                  " XR                  U R                  S9u  p[        R                  " XU R                  S9R                  UR                  5      nU(       aV  [        R                  " XR                  5       R                  UR                  5      5      R                  UR                  5      nU(       a  [!        UR"                  5      S:X  a!  UR%                  SS5      R'                  5       nO UR%                  SS5      R'                  5       n[        R                  " UR                  UR                  5      U5      R                  UR                  5      nXxS S S S S 4$ )Nr   r   r      r   r   r   )r   r"   
zeros_liker    r!   r3   r4   r$   r%   r-   r/   r&   r'   r   r+   tlenr   	transpose
contiguous)r6   grad_output	req_gradA	req_gradB_r    r!   grad_Agrad_B	cgrad_outr:   fp8outAts                r   backwardMatMulFP8Mixed.backward?   sp   <<##CEE*E,<,<SUU,CT4QUW[]aaa.1.B.B+	aAq!{{# //++Y\YaYab	''	CHHMPPQ\QbQbc \\&##%((6<<*@ADDQWWMF177|q [[A&113[[A&113 \\"%%(9(9":KHKKAGGTFtT4t;;r    NNN   rU   __name__
__module____qualname____firstlineno__staticmethodr?   rQ   __static_attributes__rS   r   r   r   r      +     ' 'R "< "<r   r   c                   8    \ rS rSr\SS j5       r\S 5       rSrg)MatMulFP8Globale   Nc                    SU l         [        UR                  5      S:X  a  SU l         Xl        X l        UR                  nUR                  S   US   :X  a=  [
        R                  " UR                  S S USS  -   UR                  UR                  S9$ [
        R                  " UR                  S S US S -   UR                  UR                  S9$ [        R                  " UR                  5       US9u  p[        R                  " X5      R                  UR                  5      n[        R                  " UR                  5       US9u  p[        R                  " X5      R                  UR                  5      n[
        R                  " X5      nX@l        XPl        X`l        Xpl        UR                  UR                  sU l        U l        [+        U R,                  S S 5      (       a
  X4U l        U$ S	U l        U$ )
NFr   Tr   r   r   r   r   r   )r   r   r   r    r!   r"   r#   r   r   r$   r(   r)   r*   r'   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   s                  r   r?   MatMulFP8Global.forwardi   s    =ACLEEggGwwr{gaj({{1773B<'!"+#=QWWUVU]U]^^{{1773B<'"1+#=QWWUVU]U]^^ JJqwwyw7	||B&))!''2JJqwwyw7	||B&))!''2d)
 #$77AGG S[s##BQ'(()CK  'CKr   c                    U R                   (       aE  [        R                  " U R                  5      [        R                  " U R                  5      S S S S S 4$ U R
                  u  p#        nU R                  u  pVSu  px[        R                  " UR                  5       U R                  S9u  p[        R                  " X5      R                  UR                  5      nU(       aV  [        R                  " XR                  5       R                  UR                  5      5      R                  UR                  5      nU(       Ga  [!        UR"                  5      S:X  a!  UR%                  SS5      R'                  5       nO UR%                  SS5      R'                  5       n[        R                  " UR                  5       U R(                  S9u  p[        R                  " X5      R                  UR                  5      n[        R                  " UR                  UR                  5      U5      R                  UR                  5      nXxS S S S S 4$ )Nr   r   rB   r   r   r   )r   r"   rC   r    r!   r3   r4   r$   r(   r)   r-   r*   r'   r   r+   rD   rE   r   rF   rG   r,   )r6   rH   rI   rJ   rK   r    r!   rL   rM   rN   r:   rO   rP   r9   fp8Ats                  r   rQ   MatMulFP8Global.backward   s   <<##CEE*E,<,<SUU,CT4QUW[]aaa.1.B.B+	aAq!{{# ::k&7&7&9L	i/22;3D3DE \\&##%((6<<*@ADDQWWMF177|q [[A&113[[A&113

288:CKK@IBLL+..qww7E\\%((6<<"8&ADDQWWMFtT4t;;r   rS   rT   rV   rS   r   r   r_   r_   e   r]   r   r_   c                   F    \ rS rSr\SS\\   4S jj5       r\S 5       rSr	g)SwitchBackBnb   Nr:   c                    U=(       d
    [        5       nSU l        [        UR                  5      S:X  a  SU l        Xl        X l        X@l        UR                  S   UR                  S   :X  aG  [        R                  " UR                  S S UR                  SS  -   UR                  UR                  S9$ [        R                  " UR                  S S UR                  S S -   UR                  UR                  S9$ UR                  nUR                  c  [        R                  " 5       Ul        UR                  [        R                  :w  a$  [        R                   " SUR                   S35        [#        UR                  5      S	:X  a-  UR%                  SUR                  S   5      R'                  5       n[(        R*                  " UR-                  [        R                  5      UR.                  S
9u  pxpnUR.                  S:  a  Ub  UR0                  (       aE  UnSUS S 2U4'   US S 2U4   nUS S 2U4   R3                  5       R'                  5       Ul        Xl        OhUR8                  c  UR:                  R                  S4Ul        O=UR0                  (       d*  UR8                  c  UR:                  R                  S4Ul        S nUR0                  (       Ga  [=        USS 5      S LnUR?                  5       (       + =(       a!    UR                  S   URA                  S5      :H  nU(       a  UR'                  5       nURB                  (       a  U(       a  UR8                  cz  URE                  5         [(        R*                  " UR-                  [        R                  5      5      u  Ul        Ul#        Ul$        Ul%        nUR:                  R                  S4Ul        OSnUb  UR0                  (       d  Xl        UR:                  S S 2UR6                  RM                  5       4   RO                  5       nUURH                  R%                  SS5      -  S-  R3                  5       R'                  5       R-                  UR                  5      Ul        SUS S 2UR6                  RM                  5       4'   US S 2UR6                  RM                  5       4   nUR8                  S   n[#        U5      S	:X  a  US   US   US   4nO
US   US   4n[(        RP                  " XuR:                  5      nUb  UR                  [        R                  :X  a:  [(        RR                  " UXRH                  US9R-                  UR                  5      nOJ[(        RR                  " UXRH                  S S9R-                  UR                  5      nURU                  U5        Ub&  Wb#  U[        RV                  " XR4                  5      -  nXPl,        X`l-        UR                  UR                  Uc  S OUR                  sU l.        U l/        U l0        [c        U Rd                  S S 5      (       a  UWU4U l3        XR6                  4U l4        O"/ SQU l3        SU l4        U Rk                  S S 5        [#        U5      S	:X  a  [        RN                  OS nU" UR%                  U5      5      $ )NFr   Tr   r   r   z'MatMul8bitLt: inputs will be cast from z to float16 during quantizationrB   )	threshold        rowgradg     _@)biasr   NNNr   c                     U $ )NrS   )xs    r   <lambda>'SwitchBackBnb.forward.<locals>.<lambda>,  s    !r   )6r   r   r   r   r    r!   rn   r"   r#   r   r   outlier_poolr   get_instancefloat16warningswarnrE   viewrG   r$   int8_double_quantr'   rj   has_fp16_weightsrD   subBidxSBCBgetattris_contiguousstrideis_trainingreset_gradsCBtSCBSCBtlongcloneint8_linear_matmulint8_mm_dequantadd_r+   r:   
grad_shaper0   r1   
dtype_biasr2   r3   r4   tensor_statessave_for_backward)r6   r    r!   r7   rn   r:   input_shapeCACAtSCASCAtoutlier_colsr}   subAhas_gradis_transposedrK   outliersshapeBoutput_shapeout32r>   
clone_funcs                          r   r?   SwitchBackBnb.forward   s>   ( =ACLEEHwwr{aggaj({{1773B<!''!"+#=QWWUVU]U]^^{{1773B<!''"1+#=QWWUVU]U]^^ gg%!4!A!A!CE 77emm#MMCAGG9Lklm qww<1r1772;'224A+,+>+>qttEMM?R^c^m^m+n(L??S \%=%%"1c6
Cyq#vY[[]557
	88# %6EH))ehh.>!HHNNE2D !!!q&$/t;H ! 11OaggajAHHQK6OMLLN!!(uxx7G!!# ''U]](;<HIIJ!HHNNE2H#E,B,B$Ixx599>>#3 34::<H"UYY^^B%::UBEEGRRTWWXYX_X_`EJ&'Bq%))..""#Q		(()D!{q 'NKNF1IFL'NF1I6L $$R2 <4::6&&uc994HKKAGGTF&&uc994HKKAGGTFKK #(8ell444F 	$3477AGGT\T_c_i_i0S[#.s##BQ'((a.CK!%yy 1C,CK ,C!!$-$'$5$:U[[
&++l344r   c                    U R                   (       ar  U R                  c  S O[        R                  " U R                  5      n[        R                  " U R                  5      [        R                  " U R
                  5      S US 4$ U R                  u  p4pVnU R                  u  pxn	U R                  u  pU R                  nS =n=pU(       a  UR                  SU R                  S9n[        UR                  5      S:X  a-  UR                  SUR                  S   5      R                  5       n[         R"                  " UR%                  [        R&                  5      5      u  nnnnnU(       a%  [        R(                  " UR+                  5       U	5      nU(       a  UR,                  b  UR,                  R%                  U R.                  SS9R1                  UR2                  R5                  S5      R7                  S5      5      n[        R(                  " UU5      R9                  U R:                  5      R%                  U R.                  5      nO[=        S	5      eXS US 4$ )
Nr   )r   rB   r   T)copyr   g@ ?z7State must contain either CBt or CB matrix for backward)r   rn   r"   rC   r    r!   r3   r4   r   r:   sumr   rE   r   reshaperG   r$   rz   r'   rv   r+   rD   r   r0   mul_r   	unsqueezer
   ry   r   	Exception)r6   rH   	bias_gradrI   rJ   rK   req_gradBiasr   r   r    r   r}   r:   rL   rM   	grad_biasCgradCgradtSCgradSCgradtr   r   s                         r   rQ   SwitchBackBnb.backward/  s   << # 0e6F6Fsxx6PI##CEE*E,<,<SUU,CT9VZZZ363G3G0	aq{{1%%			&***#@I {  !Q&%--b+2C2CB2GHSSUK787J7J;>>Z_ZgZgKh7i4vvw \\+--/15Fxx#XX[[4[8==eii>Q>QRS>T>X>XYd>efk26;;CNNKNNs{{[ YZZtY44r   rS   ro   )
rW   rX   rY   rZ   r[   r   r   r?   rQ   r\   rS   r   r   rg   rg      s8    r5x7N r5 r5h !5 !5r   rg   c                 (   U R                   S   nUR                   S   U:X  a  UR                   S   OUR                   S   n/ SQnSu  pV[        U5       H  u  pxX$US-      :  d  M  Un  O   [        U5       H  u  pxX4US-      :  d  M  Un  XV4$    XV4$ )Nr   r   r   )i   i   rU   i         @   r   )rU   rU   )r   	enumerate)	input_matrixweight_matrixinput_featuresoutput_featuresarrayr.   r/   iks	            r   get_block_sizesr   T  s    !''+N0=0C0CA0F.0Xm))!,^k^q^qrs^tO4EIC% !a%L(C ! % 1q5\)D9 !
 9r   r    r!   r,   r-   r7   r.   r/   c           	      d    US:X  d  US:X  a  [        X5      u  pV[        R                  XXBX5U5      $ Nr   )r   r_   applyr    r!   r,   r-   r7   r.   r/   s          r   matmul_fp8_globalr   e  s4     byDBJ#A)	  sW4HHr   c           	      d    US:X  d  US:X  a  [        X5      u  pV[        R                  XXBX5U5      $ r   )r   r   r   r   s          r   matmul_fp8_mixedr   s  s4     byDBJ#A)	cG$GGr   r:   c                 n    U=(       d
    [        5       nUS:  a  XCl        [        R                  XX%U5      $ )Nrk   )r   rj   rg   r   )r    r!   r7   r:   rj   rn   s         r   switchback_bnbr     s1     $]_E3#qS66r   )Nr   r   )NNrk   N)	functoolsr   r	   typingr   rw   r"    bitsandbytes.autograd._functionsr   r   bitsandbytes.functional
functionalr$   r   autogradFunctionr   r_   rg   r   Tensorintr   r   r   rS   r   r   <module>r      s        O #-Q<U^^,, Q<hQ<enn-- Q<hX5ENN++ X5v, #'I||I||I \\I \\	I
 
%,,	I 
I I& #'H||H||H \\H \\	H
 
%,,	H 
H H" #'%)	7||7||7 
%,,	7 M"	7r   