
    oiA                        S SK 7  SSKrS SKJr  SSKJrJr  SSKJr  SSK	J
r
JrJrJr  S S	K JrJrJr   SS
KJrJrJrJr  SSKJr   SSKJrJr          SS\R:                  S\\   S\\R:                     S\\R@                     S\\!\R:                        S\"S\"S\\R@                     S\\!\R:                  \R:                  4      S\!\R:                  \\R:                     \\!\R:                        4   4S jjr#\RH                  r%  SS\R:                  S\\!\R:                        4S jjr& " S S\'5      r(g!   \" \5      r\\" S5      :  d  \" S\ S35      e GN#= f!   \r\r GN = f)    )*    N)__version__)Version
_get_dtype   )get_packed_info_from_kwargs)AttentionConfigAttentionContextrun_attentionselect_attention_backend)LlamaRotaryEmbedding!LlamaLinearScalingRotaryEmbedding"_LlamaModel_fast_forward_inference)Qwen3AttentionQwen3DecoderLayer
Qwen3ModelQwen3ForCausalLMz4.50.3z&Unsloth: Your transformers version of z does not support Qwen3 and Qwen3Moe.
The minimum required version is 4.50.3.
Try `pip install --upgrade "transformers>=4.50.3"`
to obtain the latest transformers build, then restart this session.)*_prepare_4d_causal_attention_mask_for_sdpa)Qwen3SdpaAttentionQwen3FlashAttention2hidden_statescausal_maskattention_maskposition_idspast_key_valueoutput_attentions	use_cachepadding_maskposition_embeddingsreturnc
                    [        U S5      (       a  U ?U ?U ?U ?U ?U ?U ?UR                  5       u  pnU R                  R                  nU R                  nU R                  R                  nU R                  nUU-  U:X  d   eU R                  X5      u  nnnUR                  XUU5      nUR                  XUU5      nUR                  XUU5      R!                  SS5      n[#        XR$                  5      n['        U R(                  U5      n['        U R*                  U5      nUR!                  SS5      nUR!                  SS5      nUR,                  S   nUb  UUS   R,                  S   -  nU	(       a  UU	S   R,                  S   ::  a  U	u  nnOEU R.                  nUR1                  UUS9  UR3                  UUR$                  R4                  5      u  nnUb  UOUR7                  S5      n[9        UUUUU5      u  nnUb2  [:        R=                  US   U/SS9n[:        R=                  US   U/SS9nU(       a  UU4OS nUS L=(       a    US L n[?        U5      n[A        UUUS	S
0SS
[C        U SS 5      S.S9n[E        UUUUUURF                  UUUS9	n[I        UUUUUS9n U RK                  XUU-  5      n!U RM                  U U!5      n!S n"U!U"U4$ )Npaged_attentionr   r   r   )seq_lenr   )dimcausalTg        softmax_scale)	dropout_pr'   r(   )backend
n_kv_headsn_groupsflash_dense_kwargsflash_varlen_kwargs)	bszq_len
kv_seq_lenn_headshead_dimrequires_gradseq_infor   r   )configcontextQKV)'hasattrpaged_attention_Kpaged_attention_Vr#   temp_QAtemp_KVRH_Q	attentionsizer6   num_attention_headsnum_key_value_groupsnum_key_value_headsr3   	apply_qkvview	transposer	   devicefast_rms_layernormq_normk_normshape
rotary_embextend_rope_embedding
get_cachedindexgetfast_rope_embeddingtorchcatr   r
   getattrr   r4   r   reshapeapply_o)#selfr   r   r   r   r   r   r   r   r    argskwargsr/   r0   _r2   r,   r+   r3   r8   r9   r:   r5   r1   cossinrN   rope_position_ids
use_varlenr*   attention_configr7   Aattn_outputattn_weightss#                                      N/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/models/qwen3.pyQwen3Attention_fast_forwardrf   A   s    t&''"" LLIN!&&(MCkk--G((H00J}}H G+++nnT1GAq!	GX	A 	
J	A 	
s:x0::1a@A*63G3GHH 	4;;*A4;;*A	AqA	AqAJ!nQ'--b11
 z-@-C-I-I!-LL&S__
((j(A((QXX^^DS %0fjj6P  q!S#/@ADAq!II~a(!,AI6II~a(!,AI6(aVdN %@.D*@J&z2G&&-$T?DA

 %33'!
G 	/7qVWXA))C((:;K,,t[1KLn44    c           
         UnUR                  5       u  pxn	Uu  pUR                  nU R                  R                  nU R                  nU R                  R
                  nU R                  nU R                  R                  nUU-  nU
R                  S   nUS-   nUR                  nU(       Ga  [        R                  [        U-   S-   SXU4UUS9U l        U R                  SS2S4   U l        U R                  SS2S4   U l        U
R!                  SSSS5      U R                  SU& UR!                  SSSS5      U R                  SU& [        R                  SUSU4UUS9U l        [        R                  SUSUU-  4UUS9U l        [        R                  X}SU4UUS9U l        UU:w  a  [        R                  SUU4UUS9U l        O U R"                  S   SS2SS2SU24   U l        [        R                  X}S[        U-   4UUS9U l        S[-        U R                  5      -  U l        US-  U l        OUU R                  R                  S   :  a  U R                  R3                  U R                  R                  S   [        -   SUUU45        U R                  SS2S4   U l        U R                  SS2S4   U l        U R*                  R3                  X}SU R*                  R                  S	   [        -   45        [5        U R6                  X`R"                  S   S
9n[5        U R8                  X`R$                  S   S
9n[5        U R:                  X`R$                  S   S
9nUR=                  USUU5      nUR=                  USUU5      nUR=                  USUU5      R?                  SS5      n[A        U RB                  U5      n[A        U RD                  U5      nUR?                  SS5      nUR?                  SS5      nU RF                  RI                  UUS-   5        U RF                  RK                  UUR                  RL                  5      u  nnUU   RO                  S5      nUU   RO                  S5      nU R0                  nU R&                  nUSS2SS2SS2US24   USS2SS2SS2SU24'   USS2SS2SS2SU24   USS2SS2SS2US24'   USS2SS2SS2SU24   RQ                  5         UU-  nURS                  UU5        USS2SU2SS2SS24   nUSS2SS2SS2US24   USS2SS2SS2SU24'   USS2SS2SS2SU24   USS2SS2SS2US24'   USS2SS2SS2SU24   RQ                  5         UU-  nURS                  UU5        UR!                  SSSS5      U R                  U'   UR!                  SSSS5      U R                  U'   U R                  SU R!                  SSSS5      nU R                  SU R!                  SSSS5      n[U        U R                  SS5      nUb.  UU:  a(  SU-
  nUSS2SS2US2SS24   n USS2SS2US2SS24   n!OUUn!n UR                  S   n"U R                  S   n#Uc	  U"U#:X  a  Sn$OSn$U R                  u    nn%nUS:X  d  [V        (       dt  US:w  an  U SS2SS2SSS2SS24   RY                  XUU%U5      n U!SS2SS2SSS2SS24   RY                  XUU%U5      n!U R[                  X}U%U5      n U!R[                  X}U%U5      n!US:X  ak  UU R.                  -  n[]        UU R?                  SS5      U R*                  SS2SS2SS2SU%24   S
9n&[_        U&S	[        R`                  S9U&SS& []        U&U!US
9n&O'[V        (       a  [c        UU U!UU$SS9n&O[c        UU U!UU$S9n&U&R?                  SS5      n&U&R[                  USU5      n&[5        U Rd                  U&U R(                  S
9n&U&UU44$ )a  
https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L406
Fast inference using KV cache.
QK^T can be computed in 4 chunks

[Q, q] @ [K, k].T where q, k are the new tokens.
[QK^T, Qk^T]
[qK^T, qk^T]

Since the attention mask wipes Qk^T, we just get
[QK^T,    0]
[qK^T, qk^T]

Since softmax is row-wise, we get
softmax([QK^T,    0])
softmax([qK^T, qk^T])

We then multiply by   [V]
                      [v]
softmax([QK^T,    0]) [softmax(QK^T)V] *
softmax([qK^T, qk^T]) [softmax([qK^T, qk^T]) @ [V, v]]

But notice * [softmax(QK^T)V] is just the last attention.
We just need to compute the last final row.

This means we can pass in a row of Q, but we need to
remember K and V, which are called the KV cache.
r$   r   r   )dtyperI   Nr      g      ?)outsliding_windowTF)r&   ri   )	attn_mask	is_causal
enable_gqa)rn   ro   )3rB   ri   r6   rC   rD   rE   r3   hidden_sizerM   rI   rT   emptyKV_CACHE_INCREMENTr#   r<   r=   permuter>   r?   r@   temp_OrA   	math_sqrtscalarhalf_head_dimresize_fast_linear_forwardq_projk_projv_projrG   rH   fast_rms_layernorm_inferencerK   rL   rN   rO   rP   rQ   	unsqueezeneg_addcmul_rV   SDPA_HAS_GQAexpandrW   torch_matmultorch_nn_functional_softmaxfloat32scaled_dot_product_attentiono_proj)'rY   r   r   r   
do_prefillr   Xnr/   r\   hdK1V1ri   r2   r,   r+   r3   rq   attention_sizer%   r1   rI   QnKnVnr]   r^   hr@   RH_Krm   slicing_tokensKnnVnnQ_lenK_lenro   
cached_lenrb   s'                                          re   %Qwen3Attention_fast_forward_inferencer      s   H 
B##%JCBFBHHEkk--G((H00J}}H ++))Kx'NhhrlG1J !!F${{')A-q#8L  +  

 "&!5!5ad!;!%!5!5ad!;+-::aAq+Ax(+-::aAq+Ax({{Q' # 
 {{Q
X-. # 
 KKq( ;UU[K\	 [(++q#{&;UU[+\DK,,q/!Q*<=DK107:;UU[ % 
 Idmm44%]	t++11!4	4$$$$**1-0BB	
 "&!5!5ad!;!%!5!5ad!;1dnn22269KKL	
 
T[["LLO	DB	T[["LLO	DB	T[["LLO	DB	Q
B 
Q
H
B 
aX	.	8	8A	>B	%dkk2	6B	%dkk2	6B	a	B	a	B 	OO))"gk:))*biiooFHC
l

%
%a
(C
l

%
%a
(CA99D1aAB;DAq"1"1aBQB;DAq!"Aq"1"#IBKKc	;J;1D 1aAB;DAq"1"1aBQB;DAq!"Aq"1"#IBKKc
 ')jjAq!&<D7#&(jjAq!&<D7#				,	4	4Q1a	@B				,	4	4Q1a	@B T[[*:DAN!j>&A^+A~)*A~)*rS HHRLEIIbME%5.		 ))Aq*a
ax||A!Qa"#**Xz8
 !Qa"#**Xz8
 kk#
H=kk#
H= ax
dkka#4>>!Q;J;:N+O
 +R
! Cr*<,*%!A -C.iA 	
AqA			#q.)ADKK$++>Ar2h;rg   c                   N    \ rS rSr\S 5       r\           SS j5       rSrg)FastQwen3Modeli  c                     [        S[        [        [        S9u  pU b(  [	        U[        5       5        [        U 5      [        l        [        [        l	        [        [        l	        [        [        l	        [        [        l	        [        [        l	        [!        [#        [$        5      5      [&        l	        [(        [*        l	        [-        [&        5        SS Kn[        UR0                  R2                  R4                  l        g )NQwen3)
model_namerope_modulescaled_rope_moduleattention_moduler   )patch_linear_scalingr   r   r   execglobalseval__init__rf   forwardr   r   LlamaDecoderLayer_fast_forwardr   LlamaModel_fast_forwardr   CausalLM_fast_forwardr   r   r   PeftModel_fast_forwardPeftModelForCausalLM!fix_prepare_inputs_for_generation(transformers.models.qwen3.modeling_qwen3modelsqwen3modeling_qwen3Qwen3RotaryEmbedding)	init_namefunctiontransformerss      re   	pre_patchFastQwen3Model.pre_patch  s    2 .!B-	
	  79%&*9oN#!<%@"'B$$B!4
#8./TU$
  (>$)*:; 	8 ! 	!!00E 	rg   Nc                 N    [         R                  " SU UUUUUUU[        U	U
S.UD6$ )N)r   max_seq_lengthri   load_in_4bittoken
device_maprope_scalingfix_tokenizermodel_patchertokenizer_nametrust_remote_code )FastLlamaModelfrom_pretrainedr   )r   r   ri   r   r   r   r   r   r   r   r   r[   s               re   r   FastQwen3Model.from_pretrained  sG     -- 
#+'#')*+ 1
 
 	
rg   r   )zQwen/Qwen3-7Bi   NTN
sequentialNTNNF)__name__
__module____qualname____firstlineno__staticmethodr   r   __static_attributes__r   rg   re   r   r     sJ     B $!!
 
rg   r   )NNNNFFNN)FN))llamaos_utilsr   unsloth_zoo.utilsr   r   utils.packingr	   utils.attention_dispatchr
   r   r   r   r   r   r   r   r   r   r   r   transformers_versionImportError%transformers.modeling_attn_mask_utilsr   r   r   rT   TensorOptionalBlockDiagonalCausalMask
LongTensorTupleboolrf   matmulr   r   r   r   r   rg   re   <module>r      s    	  1 7  
 "
* 6:-1/348#/3GKj5<<j5 12j5 U\\*	j5
 5++,j5 U5<<01j5 j5 j5 5++,j5 "%ell(B"CDj5 5<<%,,/%:M1NNOj5Z || V<<V U5<<01Vr?
^ ?
G

"#787$  45I4J KR S
 	
&*')s   D? E' ?"E$'E0