
    bCi]                         S SK r S SKrS SKrSSKJr  \" 5       (       a  S SKJr  SrSr\	" \R                  " S\S95      r\\\4;  a  \" S5      e0 rS	 rS
 r   SS jr     SS jrg)    N   )is_torch_npu_available)npu_fusion_attention   NPU_FA2_SPARSE_MODE)defaultzEnvironment variable `NPU_FA2_SPARSE_MODE` can only be set as 2 (top-left aligned causal mask) or 3 (down-right aligned causal mask).c                     U [         ;  a?  [        R                  " [        R                  " SS/U S9SS9R	                  5       [         U '   [         U    $ )z6Get or create attention mask for the specified device.i   device   )diagonal)ATTN_MASK_NPU_CACHEtorchtriuonesboolr
   s    g/home/james-whalen/.local/lib/python3.13/site-packages/transformers/integrations/npu_flash_attention.pyget_attn_mask_npur   (   sF    ((&+jjT4LQW1Xcd&e&j&j&lF#v&&    c                  >    [        5       (       a  [        [        :H  $ S$ )NF)r   SPARSE_MODE!TOP_LEFT_ALIGNED_CAUSAL_MASK_MODE r   r   'is_npu_fa2_top_left_aligned_causal_maskr   /   s    ?U?W?W;;;b]bbr   c                 (   SU-
  nUc&  S[         R                  " U R                  S   5      -  nU(       d   U R                  S   n[        XX(SXtS9S   n	U	$ [	        U R
                  5      n
U R                  S   n[        U UUUSUUU
[        S9	S   n	U	$ )N      ?r   BSND)	keep_probscaler   )r   r    
atten_masksparse_mode)mathsqrtshaper   r   r   r   )qkv	dropout_psoftmax_scalecausalkwargsr   head_numoutputattn_mask_npus              r   npu_flash_attn_funcr0   3   s     iIdii44771:%aA9jklm  M *!((3771:%$#

 
 Mr   c
                    SU-
  nUc&  S[         R                  " U R                  S   5      -  nU	(       d  U R                  S   n[        U UUUS S UUS[	        USS  R                  5       R                  5       R                  5       5      [	        USS  R                  5       R                  5       R                  5       5      S9S   nU$ [        U R                  5      nU R                  S   n[        U UUUS S UUUS[	        USS  R                  5       R                  5       R                  5       5      [	        USS  R                  5       R                  5       R                  5       5      [        S9S   nU$ )Nr   r   r   TND)pser!   r    r   input_layoutactual_seq_qlenactual_seq_kvlenr   )	r3   padding_maskr!   r    r   r4   r5   r6   r"   )r#   r$   r%   r   tuplecpunumpytolistr   r   r   )r&   r'   r(   cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_kr)   r*   r+   r,   r   r-   r.   r/   s                  r   npu_flash_attn_varlen_funcr@   V   sv    iIdii44771:%!,qr"2"6"6"8">">"@"G"G"IJ"<#3#7#7#9#?#?#A#H#H#JK
 @ M% *!((3771:%$!,qr"2"6"6"8">">"@"G"G"IJ"<#3#7#7#9#?#?#A#H#H#JK#
   Mr   )        NF)NNrA   NF)r#   osr   utils.import_utilsr   	torch_npur   r   #DOWN_RIGHT_ALIGNED_CAUSAL_MASK_MODEintgetenvr   
ValueErrorr   r   r   r0   r@   r   r   r   <module>rI      s     	  7 .
 %& !&' #"))1;^_`8:]^^
	1 
  'c  R 4r   