
    oi(                         S SK JrJrJrJrJrJrJrJr  S SK	r	S SK
r
S SKJr  S SKJs  Jr  S SKrSSKJrJrJrJrJr  SSKJrJrJrJrJrJrJrJr  S r  \RB                  " \ 5        g)    )AnyListOptionalTupleUnionDictSetCallableN   )TEMPORARY_PATCHEStorch_compile_torch_compileget_torch_compile_optionsUNSLOTH_ENABLE_LOGGING)patch_functionpatch_function_past_key_valuesdedentKWARGS_TYPEraise_errorloggerCacheprocess_returnc                    ^  SS K n U R                  R                  R                  R                    Sn SS K n U R                  R                  R                  R                    SnU(       a^  S[        R                  S[        R                  4S jn[        SSS9S	 5       mS[        R                  S[        R                  4U4S
 jjnOES[        R                  S[        R                  S[        R                  S[        R                  4S jn[        U R                  R                  R                  R                  SU5        g ! [
         a  n[        SU5      s S nA$ S nAff = f! [
         a  nSn S nAGNS nAff = f)Nr   zGtransformers.models.qwen3_moe.modeling_qwen3_moe.Qwen3MoeSparseMoeBlockTFhidden_statesreturnc                    UR                   u  p#nUR                  SU5      nU R                  U5      n[        R                  " US[
        R                  S9n[
        R                  " X`R                  SS9u  pgU R                  (       a  XfR                  SSS9-  nUR                  UR                  5      n[
        R                  " X#-  U4UR                  UR                  S9n[
        R                  R                   R#                  XpR$                  S9R'                  S	SS
5      n	[
        R(                  " U	R                  SS9S
5      R+                  5       n
U
 H  nU R,                  U   n[
        R.                  " X   R1                  S
5      5      u  pUSU4   R3                  SU5      nU" U5      XnUS4   -  nUR5                  S
UUR                  UR                  5      5        M     UR3                  X#U5      nX4$ ) r   dimdtyper    Tr    keepdimr!   devicenum_classes   r   r   N)shapeviewgateFsoftmaxtorchfloattopktop_knorm_topk_probsumtor!   zerosr&   nn
functionalone_hotnum_expertspermutegreaternonzeroexpertswheresqueezereshape
index_add_)selfr   
batch_sizesequence_length
hidden_dimrouter_logitsrouting_weightsselected_expertsfinal_hidden_statesexpert_mask
expert_hit
expert_idxexpert_layeridxtop_xcurrent_statecurrent_hidden_statess                    a/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/temporary_patches/qwen3_moe.pyold_forward$patch_qwen3_moe.<locals>.old_forward<   s   6C6I6I3J)..r:>M IIm4Mii1EKKPO05

?JJ\^0_-O""#6#62t#6#LL-001D1DEO"'++-z:-BUBU^k^r^r#  ((--556FTdTd5emmnoqrtuvK {8'DaHPPRJ(
#||J7"[[)@)H)H)KL

 !.dEk : B B2z R(4](Co]`bfVfFg(g% $..q%9N9Q9QR_ReRe9fg ) #6"="=j[e"f&55    )dynamic	fullgraphc                    U R                  U5      n[        R                  " US[        R                  S9n[        R
                  " X0R                  SS9u  p4U R                  (       a  X3R                  SSS9-  nUR                  UR                  5      n[        R                  " X!R                  S9R                  SXC5      nXTU4$ )Nr   r   r   r"   Tr#   )r!   )r.   r/   r0   r1   float32r3   r4   r5   r6   r7   r!   
zeros_likescatter_)rE   r   rI   rJ   rK   router_scoress         rU   router_forward'patch_qwen3_moe.<locals>.router_forwardd   s     IIm4Mii1EMMRO05

?JJ\^0_-O"""14G4GBX\4G4]"]-001D1DEO!,,]DWDWXaabceu  HM MAArX   c                 N  > UR                   u  p#nUR                  SU5      nT" X5      u  pVn[        R                  " X#-  U4[        R                  UR
                  S9n[        U R                  5       H  n	U R                  U	   n
[        R                  " Xi:H  5      u  pX   R                  SU5      nU
" U5      X[U	S4   -  nUR                  SXR                  [        R                  5      5        M     UR                  X#U5      nUR                  UR                  5      U4$ )r   r   r%   Nr   )r,   r-   r1   r8   r\   r&   ranger<   r@   rA   rC   rD   r7   r!   )rE   r   rF   rG   rH   r_   rK   rI   rL   rO   rP   	token_idx_rS   rT   r`   s                  rU   forward patch_qwen3_moe.<locals>.forwardq   s"   6C6I6I3J)..r:>M >LD=`:M]"'++-z:%--XeXlXl# $D$4$45
#||J7${{+;+IJ	 !. 8 @ @Z P(4](Cm_ikoToFp(p%
 $..q)=U=UV[VcVc=de! 6" #6"="=j[e"f&))-*=*=>MMrX   top_k_indextop_k_weightsc                 T   [         R                  " U5      nUR                  S   n[         R                  " 5          [         R                  R
                  R                  X%S-   S9nUR                  SSS5      n[         R                  " UR                  SS9S5      R                  5       nS S S 5        W H  nUS   nX:X  a  M  [         R                  " WU   5      u  pX   n[        R
                  R                  XR                  U   5      R                  SSS9u  pU R                  U5      U-  n[        R
                  R                  XR                   U   5      nXXS 4   -  nUR#                  SXR%                  UR&                  5      5        M     U$ ! , (       d  f       N= f)Nr   r'   r)   r   r*   r"   r   )r1   r]   r,   no_gradr9   r:   r;   r=   r>   r6   r?   rA   lineargate_up_projchunkact_fn	down_projrD   r7   r!   )rE   r   rh   ri   rL   r<   rM   rN   rO   re   rd   rS   r.   uprT   s                  rU   rf   rg      sy    #("2"2="A'--a0K#hh1199+abSb9c)11!Q:"]];??x?+H!LTTV
 !
 )
']
,${{;z+BC - 8==//?P?PQ[?\]ccdekmcn(,D(9B(>%(*(<(<=RTbTbcmTn(o%(=ieiNi@j(j%#..q)=U=UViVoVo=pq ) '&# !s   A0F
F'rf   )0transformers.models.qwen3_moe.modeling_qwen3_moemodels	qwen3_moemodeling_qwen3_moeQwen3MoeSparseMoeBlock	Exceptionr   Qwen3MoeExpertsr1   Tensorr   r   )transformerseold_transformersrV   rf   r`   s        @rU   patch_qwen3_moer}   *   sY   i?%%88OO  ?%%88HH  &	6U\\ &	6ell &	6P 
4	8
	B 
9
	B,	N ,	N%,, ,	N ,	N\	' <<	' 	' !<<		'
 \\	'4 <&&00CCZZ\egnoQ  idfghhi    s4   .E 0E' 
E$EE$E$'
E>1E99E>)"typingr   r   r   r   r   r   r	   r
   osr1   torch.nnr9   torch.nn.functionalr:   r/   inspectcommonr   r   r   r   r   utilsr   r   r   r   r   r   r   r   r}   append rX   rU   <module>r      sW   " J I I 	      	 	 	Op`     )rX   