
    oih$                    ~   S r SSKJr  SSKJr  SSKJrJrJr  SSK	J
r
  SSKJr  SSK7  SS	KJrJr  \(       a  SS
KJrJr  \SLrS\R                   =(       d    S;   rSrSrSrSr\(       a  \R8                  R:                  OSr\ " S S5      5       r\ " S S5      5       r SSS jjr!            SS jr"/ SQr#g)z=Shared helpers for attention backend selection and execution.    )annotations)	dataclass)AnyOptionalTuple)Tensor)scaled_dot_product_attention   )*) build_sdpa_packed_attention_mask build_xformers_block_causal_mask)flash_attn_funcflash_attn_varlen_funcN
enable_gqa flash_varlenflash_densexformerssdpac                  p    \ rS rSr% SrS\S'   S\S'   S\S'   SrS	\S
'   SrS	\S'   SrS	\S'   Sr	S	\S'   Sr
g)AttentionConfig0   a  
Per-layer attention metadata.

NOTE(djsaunde): I had originally intended this to be populated once per layer, but
    we're currently constructing it on every forward pass since it can possibly be
    invalid from one forward pass to the next (e.g., switching from training to
    inference). For now, I'm keeping separate from AttentionContext for the sake of
    better grouping of params.
strbackendint
n_kv_headsn_groupsNzOptional[dict[str, Any]]flash_dense_kwargsflash_varlen_kwargssdpa_kwargsxformers_kwargs )__name__
__module____qualname____firstlineno____doc____annotations__r   r   r    r!   __static_attributes__r"       Z/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/utils/attention_dispatch.pyr   r   0   sF     LOM37074818,0K)004O-4r*   r   c                      \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   SrS\S'   Srg)AttentionContextE   z(Per-call info required to run attention.r   bszq_len
kv_seq_lenn_headshead_dimboolrequires_gradz$Optional[Tuple[Tensor, Tensor, int]]seq_infozOptional[Tensor]attention_maskzOptional[Any]causal_maskNzOptional[int]sliding_windowr"   )r#   r$   r%   r&   r'   r(   r9   r)   r"   r*   r+   r-   r-   E   s@    2	HJOLM22$$$(NM(r*   r-   c                l    [         (       a  U (       a  [        $ [        $ [        (       a  [        $ [
        $ )z@Return attention backend based on availability / priority order.)HAS_FLASH_ATTENTIONFLASH_VARLENFLASH_DENSEHAS_XFORMERSXFORMERSSDPA)
use_varlens    r+   select_attention_backendrB   U   s(     |Kr*   c           	     n   U R                   nU[        :X  a$  UR                  c  [        (       a  [        O[
        nU R                  =(       d    0 nU R                  =(       d    0 nU R                  =(       d    0 nU R                  =(       d    0 n	UR                  n
UR                  nUR                  nUR                  nUR                  nUR                  nUR                   nU[        :X  a  UR#                  SS5      R%                  X-  X5      nUR#                  SS5      R%                  X-  U R&                  U5      nUR#                  SS5      R%                  X-  U R&                  U5      nUR                  u  nnn[)        UUUUUUU40 UD6R+                  XX5      $ U[        :X  aT  UR#                  SS5      nUR#                  SS5      nUR#                  SS5      n[-        UUU40 UD6R%                  XX5      $ U[.        :X  Ga  [1        UR                  UUR2                  S9nUR#                  SS5      nUR#                  SS5      nUR#                  SS5      nUnUnUnU R4                  S:w  a  UR+                  XU R&                  SU5      nUR+                  XU R&                  SU5      nUR7                  XU R&                  U R4                  U5      nUR7                  XU R&                  U R4                  U5      nU(       a%  UR%                  XX5      nUR%                  XX5      nO(UR+                  XU R&                  U R4                  U5      n[8        SL=(       a    [;        U[8        5      nU R4                  S:w  a  U(       a  U(       d  UR+                  SX-  U R&                  U R4                  U5      nUR+                  SX-  U R&                  U R4                  U5      nUR+                  SX-  U R&                  U R4                  U5      nO?UR+                  SX-  X5      nUR+                  SX-  X5      nUR+                  SX-  X5      n[=        UUU4SU0U	D6nU R4                  S:w  aC  U(       d<  UR+                  XU R&                  U R4                  U5      nUR%                  XX5      nU$ UR+                  XX5      nU$ UR>                  n Sn!UR                  b.  U c+  [A        UR                  URB                  URD                  US9n O.URF                  S   n"URF                  S   n#U SL =(       a    U"U#:H  n![I        U5      n$U$RK                  S	U 5        U$RK                  S
U!5        [L        (       a>  U$RK                  SU R4                  S:g  5        [O        X#U40 U$D6nUR#                  SS5      $ UnUnU R4                  S:w  a  USS2SS2SSS2SS24   R7                  XR&                  U R4                  X5      nUSS2SS2SSS2SS24   R7                  XR&                  U R4                  X5      nUR%                  XX5      nUR%                  XX5      n[O        URQ                  5       URQ                  5       URQ                  5       40 U$D6nUR#                  SS5      RQ                  5       $ )a  
Run attention using config / context info.

Backend choice is prioritized for speed: FlashAttention when installed
(`flash_varlen` for packed/variable-length inputs with `seq_info`, otherwise dense
flash), then xFormers if flash is unavailable, with PyTorch SDPA as the final
fallback (e.g., CPU or no fused kernels).

Varlen flash is preferred when packing metadata is present because it avoids padding
and keeps peak memory low. xFormers and SDPA can also handle packed batches (we
pass a block-diagonal mask into each).
N   r
   )r9   	base_mask	attn_biasF)dtypedevicer9   	attn_mask	is_causalr   ))r   r<   r6   r;   r=   r@   r   r   r    r!   r/   r2   r0   r3   r1   r5   r9   	transposereshaper   r   viewr   r?   r   r8   r   expandXFORMERS_BLOCK_DIAG_CLS
isinstancexformers_attentionr7   r   rG   rH   shapedict
setdefaultSDPA_HAS_GQAr	   
contiguous)%configcontextQKVr   r   r   r    r!   r/   r2   r0   r3   r1   r5   r9   Q_fK_fV_f_
cu_seqlens
max_seqlenQ_tK_tV_trF   K_modV_modQ_mod	has_blockout
local_maskis_causal_localq_len_localk_len_localkwargss%                                        r+   run_attentionrp   b   s@   * nnG,7#3#3#;!4!4+$228b 44:$$*K,,2O
++CooGMMEH##J))M++N,kk!Q''WGkk!Q''V5F5FQkk!Q''V5F5FQ$+$4$4!:z%	
 "	
 $s7
-		. 
K	kk!Qkk!Qkk!QsCC0BCKK
 	
 
H	4+++
	 kk!Qkk!Qkk!Q??aHHSf.?.?HMEHHSf.?.?HMELL!2!2FOOXE LL!2!2FOOXE cwIcwI 1 16??H ,47 
J.=
	 ??aI 

s{F$5$5v 

s'):):FOOX 

s'):):FOOX 

1ck7E

1c&6J

1c&6J 
 "	

 
 ??a((3v'8'8&//8TC++c'<C 
 ((3w9C
++
'J,>9  !/	J ''"+K''"+K(D0O[K5OOk"+z2+7<lFOOq,@A.qQA&AC==A&&??aaD!Q&'..&&E aD!Q&'..&&E MM#
EEMM#
EE*LLN
 	
 }}Q"--//r*   )r   r-   rB   rp   )F)rA   r4   returnr   )rX   r   rY   r-   rZ   r   r[   r   r\   r   rq   r   )$r'   
__future__r   dataclassesr   typingr   r   r   torchr   torch.nn.functionalr	   models._utilsutils.packingr   r   r;   
flash_attnr   r   r   r>   rV   r<   r=   r?   r@   rF   BlockDiagonalCausalMaskrP   r   r-   rB   rp   __all__r"   r*   r+   <module>r|      s     D " ! ' '  < 
 Bt# < D D JK 3?H..D 
 5 5 5( ) ) )
h0h0 h0 	h0
 h0 h0 h0Vr*   