
    oi'                     B   S SK r S SKJr  S SKJrJr  S SKrS SKJrJ	r	J
r
JrJrJrJr  S.S jr\S 5       rS/S jr\" S	S
9 " S S5      5       r\" S	S
9 " S S5      5       r\" S	S
9 " S S5      5       r\R*                  S\R,                  S\R.                  S0rS rS0S jrS r     S1S\\   4S jjr    S2S\\   S\S\S\S\4
S jjrS S/r SS!/r!/ S"Qr"\ RF                  " \ \!\"5       V VVs/ s H  u  pn\" U UUS    US    SSS#9PM     snnn r$\" S S!S$S%S	SS#9r%\" S&S'S(S)SSS#9r&S'S*/r'\R*                  /r(\ RF                  " \'\(5       VVs/ s H  u  p4\" X4S+9PM     snnr)\" 5       u  r*r+r,\-S,:X  a  \." \+S    R_                  SSS-95        ggs  snnn f s  snnf )3    N)contextmanager)	dataclassfield)KernelConfigKernelConfigBackward_dWKernelConfigBackward_dXKernelConfigForward prune_kernel_configs_backward_dW prune_kernel_configs_backward_dXprune_kernel_configs_fwdc                     [        X-  5        g N)print)charlengths     Z/home/james-whalen/.local/lib/python3.13/site-packages/unsloth/kernels/moe/tests/common.pyprint_delimiterr      s    	$-    c               #   <   #    [        5         S v   [        5         g 7fr   )r    r   r   delimiter_contextr      s     	s   Fc                    [         R                  " X4SXVS9S-  n[         R                  " X-  U4SXVS9S-  n[         R                  " USU-  U4SXVS9S-  n	[         R                  " X2U4SXVS9S-  n
[         R                  " X4SXVS9nU(       aP  UR                  5         UR                  5         U	R                  5         U
R                  5         UR                  5         XxXU4$ )Ncuda)devicedtyperequires_grad
      )torchrandnretain_grad)MNKEtopkr   r   X1X2W1W2scores               r   make_inputsr,       s    QFVUZ
	 
 	XqMFE	
 	  	AqMFE	
 	  	Q1I]
	  KK^E



25  r   T)kw_onlyc                   Z    \ rS rSr% \\S'   \R                  \S'   Sr\	\S'   Sr
\\S'   Srg	)

DataConfig?   seq_lenr   r   r      bsr   N)__name__
__module____qualname____firstlineno__int__annotations__r   r   r   strr3   __static_attributes__r   r   r   r/   r/   ?   s$    L;;FCBKr   r/   c                   |    \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   \\S'   Sr\\S	'   \" SS
9r	\\S'   S r
Srg)ModelConfigG   hidden_sizeintermediate_sizenum_expertsr&   use_sigmoidrenormalizeFpre_mul)initpost_mulc                 0    U R                   (       + U l        g r   )rD   rF   )selfs    r   __post_init__ModelConfig.__post_init__R   s     LL(r   )rF   N)r4   r5   r6   r7   r8   r9   boolrD   r   rF   rI   r;   r   r   r   r=   r=   G   sC    
IGT%(Hd()r   r=   c                   8    \ rS rSr% Sr\\S'   \\S'   \\S'   Sr	g)GroupedGEMMTestConfigV   testnamedata_configmodel_configr   N)
r4   r5   r6   r7   rP   r:   r9   r/   r=   r;   r   r   r   rM   rM   V   s    D#r   rM   )MbP?rS   )-C6?rT   )h㈵>rU   c                     [        U [        R                  5      (       a+  [        R                  " X:H  5      (       d   SU  SU 35       eg X:X  d   SU  SU 35       eg )Nztensors not equal z != zref not equal to tri )
isinstancer   Tensorallreftris     r   assert_equalr]   e   sZ    #u||$$yy$$I(:3%tC5&II$zA23%tC5AAzr   c                 ~   UR                   R                  S:X  aW  U R                  UR                   5      nU R                   UR                   :X  a   [        R                  " Xa:H  5      (       d   eg Un Uc  SnUc  Sn U R                  [        R
                  5      R                  5       n UR                  [        R
                  5      R                  5       nU R                  UR                  :X  d#   SU R                  < SUR                  < 35       e[        R                  " U 5      n[        R                  " U5      n[        R                  " Xx5      (       d   S5       e[        R                  " USU 5      n	[        R                  " USU5      n
SnS	[        R                  " [        R                  " U	5      5      U-   -  nX-  n	X-  n
[        R                  " [        R                  " U	5      R                  5       5      U-   n[        R                  " X-
  5      [        R                   " U[        R                  " U	5      5      -  n[        R                  " U5      R#                  5       n[        R                  " [        R                  " U5      R                  5       5      R#                  5       nU(       a.  [%        U< S
U< SU< S35        [%        U< SU< SU< S35        X:  a  [        R&                  " X:  5      nUR)                  S5      nUS S n[%        SUUR+                  5       [-        UR                  5      UR/                  5       4-  5        UR1                  S5      n[%        SU / UQ7   R3                  5       5        [%        SU/ UQ7   R3                  5       5        X::  d   eUU::  d   eg )Nr2   g{Gz?gMbp?z(Tensors must have same size ref.shape = z tri.shape = z'Tensor must have same infinite elementsr   gKH9g      ?z maximum relative error = z (threshold = )z RMS relative error = i  z5%d / %d mismatched elements (shape = %s) at coords %szref values: ztri values: )r   itemsizetor   rY   float32detachshapeisinfequalwheremaxabssqrtsquaremeanmaximumitemr   nonzerosizenumeltupletolistunbindcpu)r[   r\   maxtolrmstoldescriptionverboseref_as_typeinf_mask_refinf_mask_trirefntrineps
multiplierref_rmsrel_errmax_errrms_errbad_idxsnum_nonzeros                      r   assert_closer   l   s   
yyQffSYY'99		!99[/0000~~
 &&

&
&
(C
&&

&
&
(C		SYYD	2cii^>SYYNCD ;;s#L;;s#L;;  101  ;;|Q,D;;|Q,D C		%))D/2S89JDDjjd+0023c9Gii$u}}Weiio'NNGii %%'Gjjg.3356;;=GGV-	
 	GV-	

 ==!12mmA&ET?CGMMOU7==-A8??CTUV	

 ??2&nc)8)n0023nc)8)n0023fr   c                     [        XS [        U 5       5        [        R                  " U[        U 5      S  S:H  5      (       d   eg )Nr`   )r]   lenr   rY   rZ   s     r   assert_indx_equalr      s8    *CHo&99SS_*++++r      returnc                 x   / n/ n/ nS H  nS H  n	S Hx  n
S Ho  nS Hf  nUR                  [        U UUUUU
UUUU	S9
5        UR                  [        U UUUUUU
UU	US9
5        UR                  [        U UUUUU
UUU	US9
5        Mh     Mq     Mz     M     M     [	        U5      n[        U5      n[        U5      nXVU4$ )N)FT)TF)
BLOCK_SIZE_MBLOCK_SIZE_NBLOCK_SIZE_K	num_warps
num_stagesuse_tma_load_wuse_tma_load_xuse_tma_store	permute_x	permute_y)
r   r   r   r   r   use_tma_load_dyr   r   r   r   )
r   r   r   r   r   r   r   r   r   r   )appendr	   r   r   r   r   r
   )r   r   r   r   r   configs_fwdconfigs_bwd_dXconfigs_bwd_dWr   r   r   r   r   s                r   get_kernel_test_configsr      s    KNN"	&I"/&3N)6#**//;/;/;,5-71?1?0=,5,5 '--3/;/;/;,5-72@1?,5,50= '--3/;/;/;,5-72@1?,5,50=; *7 '4 #0 ' #^ +;7K5nEN5nEN66r   kernel_configsr   r   	tma_loads	tma_storec                 D   / nU  GH  nU(       a  UR                   (       a  M  U(       a  UR                  (       a  M8  U(       a  [        U[        5      (       a$  UR                  (       d  UR
                  (       a  Mx  [        U[        5      (       a$  UR                  (       d  UR                  (       a  M  [        U[        5      (       a$  UR                  (       d  UR
                  (       a  M  U(       a  UR                  (       a  GM  UR                  U5        GM     U$ r   )r   r   rW   r	   r   r   r   r   r   r   r   )r   r   r   r   r   pruned_configsconfigs          r   remove_feature_flagsr      s     N ))))&"566((F,A,A&"9::))V-B-B&"9::))V-B-B##f%' !( r   r2      ))    r   )   r   )   r   )r&   rA   r?   r@   rB   rC   i   i       r   i   i   i   )r1   r   __main__)include_tuning_paramsinclude_tma)-P   )F)NNz--T)r   r   r   r   r   )TTTT)0	itertools
contextlibr   dataclassesr   r   r   grouped_gemm.kernels.tuningr   r   r   r	   r
   r   r   r   r   r,   r/   r=   rM   bfloat16float16rc   	TOLERANCEr]   r   r   listr   rK   r   TOPKNUM_EXPERTSTEST_MODEL_SIZESproductSMALL_MODEL_CONFIGSLLAMA_MODEL_CONFIGQWEN_MODEL_CONFIGSEQLENSDTYPEDATA_CONFIGSKERNEL_CONFIGS_FWDKERNEL_CONFIGS_BWD_dXKERNEL_CONFIGS_BWD_dWr4   r   	to_string)r&   rA   
model_sizer1   r   s   00000r   <module>r      s    % (     !> T   T) ) ) T   
NNL	MM<	MM<	BDN, =7 
,=7D &  	
 B 	
1v"g  *3):):k+**%: ! m&qM*  !     +	 $++GU;; 0;
  A )+@ z	a **$) 	+ 	
 SBs   2FF