
    oi9                     @   / S Qr SSKrSSKrSSKrSSKJs  Jr  SSKJ	r	J
r
JrJrJrJrJrJrJrJrJrJr  S r \R.                  S\4S j5       r \R.                  S\R4                  4S	 j5       r     SS
 jr S r      SS jr  SS jr S r      SS jr!g))flex_attention_with_sinkold_flex_attention_with_sinkis_flex_attention_decoding!flex_attention_with_sink_decodingflex_attention_add_sinksflash_attention_left_padded    N   )create_block_mask_cachedcreate_block_maskcompiled_create_block_mask_flex_attentionflex_attentionFlexAttentionCachecausal_mask!generate_causal_mask_with_padding*generate_decoding_causal_mask_with_paddinggenerate_sliding_window_mask)generate_sliding_window_mask_with_padding2generate_decoding_sliding_window_mask_with_paddingc                 $    US-   U:  nUS:H  nXE-  $ )zS
  0 1 2 3     0 1 2 3
0 X X       1   X
1 X X X     2   X X
2 X X X X   3   X X X
r	   r    )batchheadq_idxkv_idxr   sink_first_columns         c/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/flex_attention/attention_sink.pycausal_mask_with_sinkr   /   s%     19'K!**    window_sizec                 8   ^  U 4S jnST  S3=Ul         Ul        U$ )Nc                 B   > US-   U:  nUS-   U-
  T:  nUS:H  nXE-  U-  $ )Nr	   r   r   )r   r   r   r   r   windowed_maskr   r    s          r   sliding_window9generate_sliding_window_with_sink.<locals>.sliding_window>   s?    qyV+ f,{:"aK+/@@@r   sliding_window__sink)__name____doc__)r    r$   s   ` r   !generate_sliding_window_with_sinkr*   <   s,    A :IUZ7[[Nn4r   sink_weightsc                    ^  U 4S jnU$ )Nc                 x   > [         R                  " US:H  TU   R                  U R                  5      S-   U 5      $ )Nr           )torchwheretodtype)scorer   r   r   r   r+   s        r   sink_score_mod/generate_sink_score_mod.<locals>.sink_score_modL   s:    {{aK!!%++.4
 	
r   r   )r+   r4   s   ` r   generate_sink_score_modr6   J   s    
 r   c           
         U R                   (       d  [        S5      e[        U SS5      c   S5       eU R                  n[        U SS5      S:g  n	[        U SS5      =(       d    [        U SS5      =(       d    UnUR                  u  ppUR                  u  pnn[
        R                  " UR                  XSU5      U/S	S
9n[
        R                  " UR                  XSU5      U/S	S
9nU=(       d    [        U SS5      n[        U5      [        L a  US:w  a  [        U5      O[        n[        U5      n[        UUUS-   UR                  S9nU(       a  [        O[         " UUUUUU	US9nUR#                  SS	5      R%                  5       nU$ )a  
Allows one sink token to be attended to for full/sliding window attention
Similar to Efficient Streaming Language Models with Attention Sinks
Primarily for GPT-OSS 2025

[WARNING] This only works for training. Inference fails since KV cache's
absolute positioning will fail.
z?Unsloth: This version of flex attention only works for trainingsinksN"Unsloth: self_attn must have sinksnum_key_value_groupsr	   scalingscale   dimr$   r   device)
block_mask	score_mod
enable_gqar<   )trainingNotImplementedErrorgetattrr8   shaper/   cat	new_zerostypeintr*   r   r6   r   rA   r   uncompiled_flex_attention	transpose
contiguous)	self_attnquerykeyvalueattention_maskr<   r$   compiler+   rD   bszheads_Qqlen_Qr?   _heads_KVqlen_KV
key_paddedvalue_paddedmask_modrC   rB   attn_outputs                          r   r   r   W   s   $ !"cdd9gt,8^:^^8??L$:A>!CJIy$/]79gt3T]X]E %C&!iiA! 99coocQDcJTUVJ99eoocQDeLTUVL $Qwy:JD'QN 3&>Q+> 	*.9  (5I+HfgaiRUR\R\]J%,>2KKK ''1-88:Kr   c                     UR                  5       S:X  a  UR                  u  p#pEOUR                  u  p$nU R                  n[        U S5      nU(       d  U(       d  U(       a  US:w  a  gg)N   _flex_attention_cacher	   FT)r?   rH   rE   hasattr)rP   rQ   rV   rW   rX   r?   is_traininghas_flex_caches           r   r   r      sZ    yy{a$)KK!fc ;;S$$KY(?@N6Q;r   c	                    [        U SS5      c   S5       eU R                  n	[        U SS5      S:g  n
[        U SS5      =(       d    [        U SS5      =(       d    UnUR                  u  ppUR                  u  nnnnU=(       d    [        U SS5      nU R                  nSnSn[	        U S	5      nUGb  U(       Ga  U(       d  U(       Gd  U(       a  US:w  Ga  U(       a  U(       a  U ?GOUc   eUR                  5       S
:X  d   SUR                  5        35       eUR                  S5      R                  UR                  5      n[        R                  " [        UU5      UR                  S9R                  US45      UR                  S5      R                  :  nSUR!                  S
S5      USS2SU24   '   [        R"                  * UR!                  S
S5      USS2SU24   '   SUR!                  S
S5      USS2SU24   '   [%        U5      [&        L a  US:w  a  [)        UU5      O
[+        U5      =nn[%        U5      [&        L a  US:w  a  [-        UU5      O
[/        U5      n[1        UUU5      U l        OU R                  U5      n  Uc)  [%        U5      [&        L a  US:w  a  [3        U5      O[4        nUc  [7        UXUUUR                  S9nU(       a  [8        O[:        " UUUUSU
USS9u  nn[        R<                  " UU R                  R                  S5      -
  5      nUUR                  S5      R                  UR>                  5      -  nUR!                  SS
5      RA                  5       nU$ )a  
Allows one sink token to be attended to for full/sliding window attention
Similar to Efficient Streaming Language Models with Attention Sinks
Primarily for GPT-OSS 2025

[WARNING] has higher error than old_flex_attention_with_sink, but works for inference
r8   Nr9   r:   r	   r;   r<   r$   rb   r=   z"Unsloth: Attention_mask has dim = r@   r   TrB   rC   rD   r<   
return_lse)!rG   r8   rH   rE   rc   rb   r?   argmaxr1   rA   r/   arangemaxrepeat	unsqueezeTrN   infrK   rL   r   r   r   r   r   r   r   r   r   rM   sigmoidr2   rO   )rP   rQ   rR   rS   rT   r<   r$   rU   has_static_cacher+   rD   rV   rW   rX   r?   rY   rZ   r[   rd   r^   rB   re   padding_start_idx
do_paddingprefill_mask_moddecoding_mask_modr_   	logsumexp
sink_scales                                r   r   r      s   $ 9gt,8^:^^8??L$:A>!CJIy$/]79gt3T]X]E %C&!iiAx! $Qwy:JD'QN$$KHJY(?@N!&6^v{!!7 &111%))+q0m4VWeWiWiWkVl2mm0$2$9$9!$<$?$?$M!"\\#fg*>V]]_bde^fgj{  kF  kF  GH  kI  kK  kK  K
 BC1%jGVG&=>BG))1%jHWH&=>AB1%jHWH&=> N+s2~7J >nN_`56GHI+ N+s2~7J G~Whi>?PQ " 3ESJ[]k2l	/"88=J N#s*~/B )8 	 /#PWbeblblm
07n=VV	K, y9??+D+DQ+GGHJ 
 4 4R 8 ; ;K<M<M NNK ''1-88:Kr   c                     [        U SS 5      c   S5       e[        U SS5      S:g  n[        U SS 5      =(       d    [        U SS 5      =(       d    UnU R                  U5      n[        UUUUS UUSS9u  pxXx4$ )	Nr8   r9   r:   r	   r;   r<   Trg   )rG   rb   r   )	rP   rQ   rR   rS   r<   rD   rB   r_   rw   s	            r   r   r   
  s     9gt,8^:^^8$:A>!CJIy$/]79gt3T]X]E005J+	K !!r   c                     X R                   R                  S5      -  n[        R                  " X"S9nXR                  S5      R	                  UR
                  5      -  nUR                  SS5      R                  5       nU$ )Nr	   )outri   r=   )r8   rn   r/   rq   r1   r2   rN   rO   )rP   r_   rw   rx   s       r   r   r   "  sn    
 **1--Iy:J''+..{/@/@AAK''1-88:Kr   c	                    UR                   [        R                  [        R                  [        R                  4;   d   eUR
                  n	UR                  u  pU R                  R                  n[        U R                  SU5      nU R                  nUR                  u  pnnUR                  u  nnnnUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  [        R                  U	S9R                  SS9n[        R                  " UR!                  S[        R                  S9S5      n[#        UR%                  5       R'                  5       5      nUR)                  S	5      R                  U	S
9nUR+                  SS9R-                  S	5      nUR)                  U
U-  X5      nUR)                  U
U-  X5      nUR)                  U
U-  X5      nUR/                  SU5      R1                  5       n UR/                  SU5      R1                  5       n!UR/                  SU5      R1                  5       n"Uc  S[2        R4                  " U5      -  n[7        US9n#Ub  [#        U5      U#S'   SU#S'    [        R8                  R:                  R<                  " SU U!U"UUUU[?        U5      [	        U5      SS.
U#D6u  n$n%n&  n[        R@                  " U%U RB                  RE                  S5      -
  5      n'U$U'RE                  S	5      R                  SS5      R                  U$R                   5      -  n$URG                  U
U-  X45      n(U$U(U'   U(RI                  U
UX5      n$U$R1                  5       n$U$$ )Nnum_key_value_headsr	   r=   )r2   rA   r>   r   )r2   )r	   r   ri   r@   F)as_tupleg      ?)r<   window_size_leftwindow_size_right)
rQ   rR   rS   	cum_seq_q	cum_seq_kmax_qmax_k	dropout_p	is_causalreturn_debug_maskr   )%r2   r/   int32int64boolrA   rH   confignum_attention_headsrG   head_dimrN   r1   sumFpadcumsumrL   rl   itemreshapenonzerosqueezeindex_selectrO   mathsqrtdictopsaten_flash_attention_forwardfloatrq   r8   rn   rJ   view))rP   query_states
key_statesvalue_statesrT   r   r   r   r<   rA   rV   qlenn_heads
n_kv_headsr   rW   rX   r?   rY   rZ   r[   QKVseqlens
cu_seqlens
max_seqlen	flat_maskkeepQ_flatK_flatV_flatQ_unpadK_unpadV_unpadkwargsr_   rw   	rng_staterx   out_flats)                                            r   r   r   .  sC    EKKejj#IIII  F$$IC22G))+@'JJ!!H , 2 2C&#(..Ax!q!$AQ"Aq!$A ekk&AEE!ELGw~~au{{~;VDJW[[]'')*J &&r*--V-<Ie,44R8DYYsV|g;FYYsW}j;FYYsW}j;F!!!T*557G!!!T*557G!!!T*557G }dii))5"F#%()9%:!"&'"#
 /4iinn.U.U /)$O!/ /+KIq! y9??+D+DQ+GGHJ
 4 4R 8 B B1a H K KKL]L] ^^Kvw ABH HTN--VW?K((*Kr   )NNNT)NNNTT)N)TNr.   N)"__all__r/   	functoolsr   torch.nn.functionalnn
functionalr   utilsr
   r   r   r   rM   r   r   r   r   r   r   r   r   r   	lru_cacherL   r*   Tensorr6   r   r   r   r   r   r   r   r   r   <module>r      s   "        "
+ 

3 
 
 
5<<    2f   iT  ", 	  Vn r   