
    oi                     ,   S SK r S SKJr  S SKJr  S SKrSSS\R                  R                  SS5      S:H  SS	.r S S
K	J
rJr  \ R                  " \S\S9rSr\(       d  \ R                  " SS\S9S 5       rSrSrOVS SKrS SKrS rS r\R                  SS j5       r\R                  SS j5       rSS jr SS jr\R                  S 5       r
S r\ R.                  r\ R2                  r\ R6                  R8                  R:                  rS rg!   Sr N= f)    N)	lru_cache)loggerTUNSLOTH_COMPILE_DEBUG01F)epilogue_fusionmax_autotuneshape_paddingztrace.enabledztriton.cudagraphs)flex_attentioncreate_block_mask)dynamicoptions)	fullgraphr   r   c                    UR                   R                  nUR                  nUR                   R                  n	UR                  n
US S 2S S 2S S S 2S S 24   R                  XYXU5      nUS S 2S S 2S S S 2S S 24   R                  XYXU5      nUR                  XWXh5      nUR                  XWXh5      nUR                   R                  nUR                   R                  nU [        R                  " US-  U R                  S9-  n [        R                  " XR                  SS5      5      nU[        R                  " X-  5      -  nXS U2S U24   -  n[        R                  R                   R#                  US[        R$                  S9R'                  U R                  5      n[        R                  " X5      nUR                  SS5      R)                  5       nUR                  XVXx-  5      nU$ )N      ࿩dtype      dimr      )confignum_attention_headshead_dimnum_key_value_headsnum_key_value_groupsexpandreshapequery_pre_attn_scalarattn_logit_softcappingtorchtensorr   matmul	transposetanhnn
functionalsoftmaxfloat32to
contiguousQKVcausal_maskselfbszq_lenn_headsr   
n_kv_headsn_groupsstAs                 X/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/kernels/flex_attention.pyslow_attention_softcappingr=   -   s   ++11==[[44
,, aD!Q&&sRaD!Q&&sRIIcE4IIcE4 KK--KK..QWagg66LLKK1-.

15!!	%%(( HH''U]]'KNNqwwWLLKK1((*IIc'"45    c                    ^  U 4S jnU$ )Nc                 <   > T[         R                  " U T-  5      -  $ N)r#   r'   )xbhq_idxkv_idxr:   s        r<   tanh_softcap+generate_tanh_softcap.<locals>.tanh_softcapV   s    uzz!a%(((r>    )r:   rG   s   ` r<   generate_tanh_softcaprJ   U   s    	) r>   c                 
    X#:  $ rA   rI   )rC   rD   rE   rF   s       r<   causal_maskerrL   [   s
    r>   c                    ^  U 4S jnU$ )Nc                 "   > X#:  nX#-
  T:*  nXE-  $ rA   rI   )rC   rD   rE   rF   r2   window_masksizes         r<   sliding_window-sliding_window_masker.<locals>.sliding_window`   s    /K.D0K,,r>   rI   )rP   rQ   s   ` r<   sliding_window_maskerrS   ^   s    	-
 r>   c           
           [        U SSUUSSS9$ )Nr      T)
BLOCK_SIZE_compile)_create_block_mask)maskns     r<   r   r   g   s$    !
 	
r>   c                 &    [        [        U 5      nU$ rA   )r   rL   )max_seq_lengthr2   s     r<   !create_flex_attention_causal_maskr]   s   s    '~Fr>   c                 2    [        U5      n[        X 5      nU$ rA   )rS   r   )r\   rQ   sliding_maskerr2   s       r<   )create_flex_attention_sliding_window_maskr`   w   s     /~>'Gr>   c                     S[         R                  " U 5      -  n[        U5      n[        R                  " [
        UUSS9$ )Ng      ?T)	score_modscale
enable_gqa)mathsqrtrJ   	functoolspartial_flex_attention)r9   r:   rc   rb   s       r<   r   r   ~   s=    diil")!,	  !	
 	
r>   c                 .   UR                   R                  nUR                  nUR                   R                  n	UR                   R                  n
[        X5      nU" XX#S9nUR                  SS5      R                  5       nUR                  XVXx-  5      nU$ )N)querykeyvalue
block_maskr   r   )	r   r   r   r!   r"   r   r&   r-   r    )r/   r0   r1   r2   r3   r4   r5   r6   r   r9   r:   fxr;   s                r<   r=   r=      s    ++11==KK--KK..A!q1GKK1((*IIc'"45r>   c                 X   UR                   R                  nUR                  nUR                   R                  n	UR                  n
US S 2S S 2S S S 2S S 24   R                  XYXU5      nUS S 2S S 2S S S 2S S 24   R                  XYXU5      nUR                  XWXh5      nUR                  XWXh5      nUR                   R                  nUR                   R                  nU [        R                  " US-  U R                  S9-  n [        XR                  SS5      5      nX-  n[        XS9  X-  nXS U2S U24   -  n[        US[        R                   S9R#                  U R                  5      n[        X5      nUR                  SS5      R%                  5       nUR                  XVXx-  5      nU$ )	Nr   r   r   r   )outr   r   r   )r   r   r   r   r   r   r    r!   r"   r#   r$   r   torch_matmulr&   
torch_tanhtorch_nn_functional_softmaxr+   r,   r-   r.   s                 r<   $slow_inference_attention_softcappingru      s   kk--G}}H00J((H 	
!Qa
""3HXNA	!Qa
""3HXNA			#0A			#0A 	))A**A	ELLD!''22AQAq)*A FAqFAVeVVeV^	$$A 	$ARGJJ177SAQA	Aq$$&A			#g01AHr>   )   )rU   )    )rw   rv   ) r#   rg   r   (transformers.models.llama.modeling_llamar   osenvirongettorch_compile_options!torch.nn.attention.flex_attentionr   ri   r   rX   compileHAS_FLEX_ATTENTIONr=   r]   r`   re   rJ   rL   rS   r%   rr   r'   rs   r(   r)   r*   rt   ru   rI   r>   r<   <module>r      sM     ; 	 ZZ^^$;SASH 
 mm43HO 
 
]]tt?TU V> )-%04-
    	
 	

 15 
 
	 ||ZZ
#hh1199 !gs   D D