
    oiwP                        S SK r SSKJrJrJrJrJrJrJrJ	r	   " S S\ R                  R                  5      rSSKJrJr  SS jrSSKJrJr  SS	 jrSS
KJrJr  S r " S S\ R                  R                  5      rSS jr " S S\ R                  R                  5      rS r\ R8                  R:                  r\ R>                  S\ R@                  S\ R@                  4S j5       r!g)    N   ) _maybe_fake_quantize_activationsfast_dequantizeQUANT_STATEget_lora_parametersget_lora_parameters_biasmatmul_loratorch_amp_custom_fwdtorch_amp_custom_bwdc                       \ rS rSrSr\\ S	S\R                  4S jj5       5       r	\\
S\R                  4S j5       5       rSrg)
LoRA_MLP   a  
### LoRA weights
G = G + Ag @ Bg
U = U + Au @ Bu
W = W + Aw @ Bw

### SwiGLU(X)
e = X @ G
f = e * sigmoid(e)
g = X @ U
h = f * g
i = h @ W

### Backpropagation chain rule
See our blog post for more details

df = sigmoid(e) * (1 - f) + f
dC/dW = h.T @ dY
dC/dU = X.T @ (D @ W.T * f)
dC/dG = X.T @ (D @ W.T * df * g)

### Down projection LoRA weights
dC/dAw = dC/dW @ B.T
dC/dBw = A.T @ dC/dW
dC/dAw =       h.T @ dY @ B.T
dC/dBw = A.T @ h.T @ dY

### Up projection LoRA weights
dC/dAu =       X.T @ (D @ W.T * f) @ B.T
dC/dBu = A.T @ X.T @ (D @ W.T * f)

### Gate projection LoRA weights
dC/dAg =       X.T @ (D @ W.T * df * g) @ B.T
dC/dBg = A.T @ X.T @ (D @ W.T * df * g)

Don't forget to see our blog post for more details!
Xc                     UR                   n[        XX4XV5      n[        XXX5      nU" UU5      n[        UXXU5      nUUUUUUUUUU4
U l        U R                  XEXXUUU5	        UU l        U$ N)dtyper	   custom_saved_tensorssave_for_backwardinplace)ctxr   gateWgateW_quantgateAgateBgateSupW	upW_quantupAupBupSdownWdownW_quantdownAdownBdownS_forward_function_backward_functionr   r   eghis                            S/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/kernels/fast_lora.pyforwardLoRA_MLP.forwardC   s    0 +eC	9a#5uUC $
  	eCeAq!L    dYc                 	   U R                   u
  nnnnnnnn	n
nU R                  u	  ppnnnnnUR                  u  nnnUR                  SUR                  S   5      nUR                  SUR                  S   5      nUR                  SUR                  S   5      nUR                  SUR                  S   5      nUR                  nUR                  U5      UR                  U5      UR                  U5      UR                  U5      UR                  U5      UR                  U5      4u  ppnnUR                  5       UR                  5       UR                  5       UR                  5       UR                  5       UR                  5       4u  ppnn[        XR                  5       U	UUU
5      nU" UUU5      u  nnnUUUnnn[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n [        R                  " U5      n![        R                  " U5      n"UR                  UR                  5       UUR                  5       -  U
SS9  UR                  UR                  5       UR                  5       -  XSS9  U!R                  UR                  5       UUR                  5       -  USS9  U"R                  UR                  5       UR                  5       -  UUSS9  UR                  UR                  5       UUR                  5       -  USS9  U R                  UR                  5       UR                  5       -  UUSS9  [        UR                  5       U5      n[        R                  " UUR                  5       U R                  (       a  UOS S9n#AU#R                  UUR                  5       -  UR                  5       US9  [        UR                  5       U5      nU#R                  UUR                  5       5        AU#R                  UUR                  5       -  UR                  5       US9  U#R                  UUU5      S S UR                  5       U R                  5       S S S U!R                  5       U"R                  5       S S S UR                  5       UR                  5       S S S S 4$ Nr   alphabeta)outr5   )r   saved_tensorsshapeviewr   totr	   torch
empty_likeaddmm_r   matmulr   )$r   r0   r   r   r   r   r   r    r!   r"   r%   r'   r   r   r   r   r#   r$   r   r(   r)   batchseq_lenhdr   DWr*   dfded_downAd_downBd_gateAd_gateBd_upAd_upBdXs$                                       r,   backwardLoRA_MLP.backwardr   s    $$	
8;8I8I5cuaAWWwWWR"&FF2qwwr{#FF2qwwr{#FF2qwwr{# HHUOHHUOFF5MFF5MHHUOHHUO0
,cu GGIGGIEEGEEGGGIGGI0
,cu WWYUE5I%b!Q/Aq1r2""5)""5)""5)""5)  %  % 	qssub5779neAFuwwy1335("AF 	QSSUBL#a@SUUWqssu_b#a@ 	qssub5779neAFuwwy1335("eAF ceegy1\\"ceeg#++Q4H
		"suuw,	5	;7
		"eggi 
		"uwwy.%'')U	; GGE7B'IIKIIKGGIGGIIIKIIK'
 	
r/    NT__name__
__module____qualname____firstlineno____doc__staticmethodr
   r>   Tensorr-   r   rO   __static_attributes__rQ   r/   r,   r   r      s`    $L * )+<<+  +Z q
%,, q
  q
r/   r   )swiglu_fg_kernelswiglu_DWf_DW_dfg_kernelc                 &   [        XR                  5      n[        U R                  5      u  p4pVn[        U R                  5      u  ppn[        U R                  5      u  pnnn[
        R                  UUUUUUUU	U
UUUUUUU[        [        U5      nU$ r   )	r   	gate_projr   up_proj	down_projr   applyr\   r]   selfr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r7   s                      r,   apply_lora_mlp_swiglure      s    (NN;A.A$...Q+Ee$7$E!CCc.A$...Q+Eue
..	 'C* Jr/   )geglu_exact_forward_kernelgeglu_exact_backward_kernelc                 &   [        XR                  5      n[        U R                  5      u  p4pVn[        U R                  5      u  ppn[        U R                  5      u  pnnn[
        R                  UUUUUUUU	U
UUUUUUU[        [        U5      nU$ r   )	r   r_   r   r`   ra   r   rb   rf   rg   rc   s                      r,   apply_lora_mlp_geglu_exactri     s    (NN;A.A$...Q+Ee$7$E!CCc.A$...Q+Eue
..	"#'C* Jr/   )geglu_approx_forward_kernelgeglu_approx_backward_kernelc                 "   [        XR                  5      n[        U R                  5      u  p#pEn[        U R                  5      u  pxpn[        U R                  5      u  ppn[
        R                  UUUUUUUUU	U
UUUUUU[        [        5      nU$ r   )	r   r_   r   r`   ra   r   rb   rj   rk   )rd   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r7   s                     r,   apply_lora_mlp_geglu_approxrm   +  s    (NN;A.A$...Q+Ee$7$E!CCc.A$...Q+Ee
..	#$%C( Jr/   c                   n    \ rS rSrSr\\ SS\R                  4S jj5       5       r	\\
S 5       5       rSrg)	LoRA_QKViG  ab  
### LoRA weights
Wq = Wq + Aq @ Bq
Wk = Wk + Ak @ Bk
Wv = Wv + Av @ Bv
Q = X @ Wq = X @ Wq + X @ Aq @ Bq
K = X @ Wk = X @ Wk + X @ Ak @ Bk
V = X @ Wv = X @ Wv + X @ Av @ Bv

### Backpropagation chain rule
See our blogpost for more details.

dC/dWq = X.T @ D(Wq)
dC/dWk = X.T @ D(Wk)
dC/dWv = X.T @ D(Wv)
We then sum them all find dC/dX

### Q projection LoRA weights
dC/dAq =       X.T @ D(Wq) @ B.T
dC/dBq = A.T @ X.T @ D(Wq)

### K projection LoRA weights
dC/dAk =       X.T @ D(Wk) @ B.T
dC/dBk = A.T @ X.T @ D(Wk)

### V projection LoRA weights
dC/dAv =       X.T @ D(Wv) @ B.T
dC/dBv = A.T @ X.T @ D(Wv)
r   c           	      
   UR                   nUR                  nUnUR                  5       S:X  a  UR                  SUR                  S   5      n[	        UX#XEU5      n[	        UXxXU5      n[	        UXXU5      n[        U5      S:X  aK  UR                  US   US   S5      nUR                  US   US   S5      nUR                  US   US   S5      nUUUUUUUUU4	U l        U R                  UUUU	U
UU5        UU l        UUU4$ )N   r3   r   r   )	r   r:   dimr;   r	   lenr   r   r   )r   r   QWQW_quantQAQBQSKWKW_quantKAKBKSVWVW_quantVAVBVSr   r   
orig_shapeX_for_matmulQKVs                           r,   r-   LoRA_QKV.forwardf  s.   , 
 WW
557a<66"aggbk2LbBB?bBB?bBB? z?az!}jmR8Az!}jmR8Az!}jmR8A 
$
  		
 !Qwr/   c                 	   U R                   u	  pEpgppnU R                  u  nnnnnnnUR                  u  nnnUR                  SUR                  S   5      nUR	                  SUR                  S   5      nUR                  SUR                  S   5      nUR                  SUR                  S   5      nUR
                  nUR                  U5      UR                  U5      UR                  U5      UR                  U5      UR                  U5      UR                  U5      4u  pnnnnUR                  5       UR                  5       UR                  5       UR                  5       UR                  5       UR                  5       4u  pnnnn[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[        R                  " U5      nUR                  UR                  5       XR                  5       -  USS9  UR                  UR                  5       UR                  5       -  XSS9  UR                  UR                  5       UUR                  5       -  U	SS9  UR                  UR                  5       UR                  5       -  X)SS9  UR                  UR                  5       UUR                  5       -  USS9  UR                  UR                  5       UR                  5       -  X<SS9  [        UR                  5       U5      n[        R                  " XR                  5       U R                  (       a  UOS S9nAUR                  XR                  5       -  UR                  5       US9  [        UR                  5       U5      nUR                  X'R                  5       5        AUR                  UUR                  5       -  UR                  5       U	S9  [        U
R                  5       U5      n
UR                  X:R                  5       5        A
UR                  UUR                  5       -  UR                  5       US9  UR                  UUU5      S S UR                  5       UR                  5       S S S UR                  5       UR                  5       S S S UR                  5       UR                  5       S S 4$ r2   )r   r9   r:   r;   reshaper   r<   r=   r>   r?   r@   r   rA   r   )r   dQdKdVrt   ru   rx   ry   rz   r}   r~   r   r   r   rv   rw   r{   r|   r   r   rB   rC   rD   r   d_QAd_QBd_KAd_KBd_VAd_VBrN   s                                  r,   rO   LoRA_QKV.backward  s    @C?W?W<bhB" 	
 WWwWWR"&ZZBHHRL)WWR"&FF2qwwr{# EE%LEE%LEE%LEE%LEE%LEE%L"
BB "$!OBB ###### 	ACCE2;1=BDDFQSSUNB1= 	ACCE2;1=BDDFQSSUNB1= 	ACCE2;1=BDDFQSSUNB1= RTTVX.\\"ddfA$G
		"ttv+rttvr	2 RTTVX.
		"ddf
		"rttv+rttvr	2 RTTVX.
		"ddf
		"rttv+rttvr	2 GGE7B'FFHFFHFFHFFHFFHFFH#
 	
r/   rQ   NrR   rS   rQ   r/   r,   ro   ro   G  sS    < & %<<<<  <| l
  l
r/   ro   c                     [        XR                  5      n[        U R                  5      u  p4pVn[        U R                  5      u  ppn[        U R                  5      u  pnnn[
        R                  UUUUUUUU	U
UUUUUUUU5      u  nnnUUU4$ r   )r   q_projr   k_projv_projro   rb   )rd   r   r   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   s                        r,   apply_lora_qkvr     s    (KK8A24;;?B""24;;?B""24;;?B"b"nn	











#GAq!& a7Nr/   c                       \ rS rSrSr\\S\R                  4S j5       5       r	\\
S\R                  4S j5       5       rSrg)	LoRA_Wi2  a  
### LoRA weights
Wq = Wq + Aq @ Bq
Wk = Wk + Ak @ Bk
Wv = Wv + Av @ Bv
Q = X @ Wq = X @ Wq + X @ Aq @ Bq
K = X @ Wk = X @ Wk + X @ Ak @ Bk
V = X @ Wv = X @ Wv + X @ Av @ Bv

### Backpropagation chain rule
dC/dWq = X.T @ D(Wq)
dC/dWk = X.T @ D(Wk)
dC/dWv = X.T @ D(Wv)

### Q projection LoRA weights
dC/dAq =       X.T @ D(Wq) @ B.T
dC/dBq = A.T @ X.T @ D(Wq)

### K projection LoRA weights
dC/dAk =       X.T @ D(Wk) @ B.T
dC/dBk = A.T @ X.T @ D(Wk)

### V projection LoRA weights
dC/dAv =       X.T @ D(Wv) @ B.T
dC/dBv = A.T @ X.T @ D(Wv)
r   c                 p    UR                   n[        XX4XV5      nUUU4U l        U R                  XEU5        U$ r   )r   r	   r   r   )	r   r   WW_quantABSr   XWs	            r,   r-   LoRA_W.forwardN  sE     w10$
 
 	aA&	r/   r0   c                    U R                   u  p#nU R                  u  pVnUR                  u  pn
UR                  SUR                  S   5      nUR                  SUR                  S   5      nUR                  nUR                  U5      UR                  U5      peUR                  5       UR                  5       pe[        R                  " U5      n[        R                  " U5      nUR                  UR                  5       XR                  5       -  USS9  UR                  UR                  5       UR                  5       -  XSS9  [        UR                  5       U5      nXR                  5       -  nAUR                  XR                  5       -  UR                  5       US9  UR                  XU
5      S S UR                  5       UR                  5       S 4$ )Nr3   r   r4   r8   )r   r9   r:   r   r   r<   r=   r>   r?   r@   r   r;   )r   r0   r   r   r   r   r   r   rB   rC   rD   r   d_Ad_BrN   s                  r,   rO   LoRA_W.backward[  si    00A##aWWZZBHHRL)IIb!''"+&ttE{ADDK1ssuacce1q!q! 	

1335"ssu*a
:

13351335="
: ACCE7+##%Z
		"ssu*acceQ	/ wwur*D$$NNr/   rQ   NrS   rQ   r/   r,   r   r   2  sX    6 	 	  	 !O%,, !O  !Or/   r   c                     [        XR                  5      n[        U R                  5      u  p#pEn[        R	                  XX4XV5      nU$ r   )r   o_projr   r   rb   )rd   r   OWOW_quantOAOBOSOs           r,   apply_lora_or     s<    (KK8A24;;?B""QH"1AHr/   xreturnc                     [        S5      e)NzAUnsloth: Currently not supported yet - reshaping done incorrectly) NotImplementedError_check_forward_argspopdisable_adaptersmergedunmerge
base_layer_mixed_batch_forwardrs   active_adapterslora_Akeyslora_dropout
isinstanceIDENTITY_DROPOUTuse_doraweightlora_Bscalingr   rb   r   cloner>   is_autocast_enabledr   r<   nnIdentitytraininglora_magnitude_vectorget_base_layer)rd   r   argskwargsadapter_namesresultactive_adapterdropoutr   r   r   r   requires_conversionexpected_dtypebase_results                  r,   fast_lora_forwardr     s    
K r/   rR   )"r>   utilsr   r   r   r   r   r	   r
   r   autogradFunctionr   swiglur\   r]   re   geglurf   rg   ri   rj   rk   rm   ro   r   r   r   r   r   r   _disable_dynamorZ   r   rQ   r/   r,   <module>r      s    	 	 	I
u~~&& I
X ?: K: M8M
u~~&& M
`6LOU^^$$ LO^ 88$$  Nu|| N N Nr/   