
    ȅi                     "   S SK r S SKJrJr  S SKrS SKJs  Jr  S SK	J
r
  SSKJrJr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJrJrJrJrJrJrJr  SSKJrJrJ r   SSKJ!r!J"r"  SSK#J$r$J%r%J&r&  S\RN                  S\R(                  S\R(                  S\R(                  S\R(                  S\(\)\R(                  \\R(                     4   4S jr*S\)S\%S\%S\\%   S\\%   S\\%   S\\%   S\%4S jr+    S#S\S\,\   S\,\   4S  jjr-S!\-l.        S" r/g)$    N)OptionalUnion)mm_args   )configir)CppGemmTemplate)CppGroupedGemmTemplatecreate_epilogue_with_attr)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtypeview)autotune_select_algorithmChoiceCallerExternKernelChoice)use_aten_gemm_kernelsuse_cpp_gemm_template)opsOpsValueVW_tensorpacked_weightx_scalex_zpw_scalereturnc                 >   S n[        S X#U4 5       5      nU(       Ga  [        R                  R                  UR	                  5          [        R                  R                  UR	                  5          -  n[        R                  R                  UUR	                  5       S-   S9n[        R                  " U R                  [        R                  5      SS9n[        R                  R                  UR	                  5          n	X-  U	-  n[        R                  R                  UUR	                  5       S-   S9n
Oa[        R                  " U R                  [        R                  5      SS9n[        R                  R                  UUR	                  5       S-   S9n
UU
U4$ )Nc              3   ^  #    U  H  n[        U[        R                  5      =(       a}    UR                  5       [        R
                  R                  ;   =(       aK    [        UR                  S 5      =(       a.    [        UR                  R                  [        R                  5      v   M     g7f)dataN)

isinstancer   r   get_namer   graph	constantshasattrr%   ConstantBuffer).0items     Z/home/james-whalen/.local/lib/python3.13/site-packages/torch/_inductor/mkldnn_lowerings.py	<genexpr>+create_int8_compensation.<locals>.<genexpr>,   sz      *
 -D	 	4& 	:MMOqww000	:DIIv&	: tyy~~r'8'89	: -s   B+B-_x_w_compensnamer   )dim_BMatrixCompens)
allr   r(   r)   r'   add_tensor_constanttorchsumtofloat)r   r   r   r    r!   	x_w_scaleuse_int8_fast_compensation_pathx_w_scale_tensorweight_compens_tensorx_zp_tensorweight_compenss              r.   create_int8_compensationrB       su    )-I&) *
 G,* '# 'GGg..01gg 0 0 234 	 GG//'')N: 0 
	 !&		(++ekk*B Jgg''8 5 H; V44!''),== 5 

 !&		(++ekk*B J44!''),== 5 

 	(     r=   input_weight_compo_x_scale_x_zp_w_scale
_x_w_scalec                 t   U (       a.  [         R                  " [         R                  " UU5      U5      nU$ [         R                  " [         R                  " UU5      U5      n[         R                  " U[         R                  " [         R                  " [         R                  " UU5      U5      U5      5      nU$ N)r   submul)r=   rD   rE   rF   rG   rH   rI   temps           r.   'codegen_int8_gemm_template_compensationrO   P   s     'wwGG 
H K9 wwGG 
 wwGGGG    	
 KrC   xwbc           	         U R                  5       n[        U5      S:  a  [        U SUS   /5      n [        U5      n[        R                  (       d  [        R
                  (       d   eU V	s/ s H&  oc  U	O[        R                  R                  U	5      PM(     nn	/ n
[        U [        US   SS/5      US9Gt ppU V	s/ s H  oS LPM     sn	SS [        R                  [        U5      U 5      S.nU /UQnUR                  U V	s/ s H	  oc  M  U	PM     sn	5        [        R                   " U
UU40 UD6  [        U
5      S:w  d   e[#        SU
UU5      nUR$                  R$                  n[        U5       Vs/ s H!  n[        R&                  " Xo[(        U4/5      PM#     nn[        R*                  " US   R-                  5       S	9Ul        UUl        [        U5       Vs/ s H%  n[        R2                  R5                  UU   5      PM'     nn[        U5      S:  a@  [        U5       H1  n[        UU   / US S QUU   R                  5       S   P75      UU'   M3     U$ s  sn	f s  sn	f s  sn	f s  snf s  snf )
N   r   r   layoutT)has_biastrans_wepilogue_creatoract_mappinggrouped_gemm)device)get_sizelenr   r   max_autotunemax_autotune_gemmr   ExternKernelrealize_inputr   r   dictfromkeysrangeextendr
   add_choicesr   r%   MultiOutputlistMultiOutputLayout
get_devicerW   outputsr   create)rP   rQ   rR   attrscalars	algorithmrW   x_sizenum_gemmbiaschoices_kwargsinput_nodesresulttemplate_bufgemm_idxreturn_bufsreturn_tensorss                      r.   grouped_gemm_loweringr~      sq    ZZ\F
6{QR$%1vH&":":::STUST42??#@#@#F	FSTAU"$Gq'!A$A"7GQ 344!$%!4 }}U8_a8	F 'q'K??@&& 	 w<1&	F ;;##L h'H 	vtX.>-?@'  
 ..k!n6O6O6QRL&LCH?CRxK12?   6{QhH'+x(G&"+G~h7@@B2FG(N8$ (
 ] 	V 5 @"s$   ,-I>I"I'I'6(I,,I1Tc            !        ^^^^^^ [         R                  R                  (       GaN  SSKJm  [        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                   R                  S9m[         R                  R                  R"                  [         R                  R                  R$                  [         R                  R                  R&                  [         R                  R                  R                  [(        R*                  R,                  [         R                  R                  R.                  /n [1        [         R                  R                  R"                  5      S[2        S[2        S	[2        4U4S
 jj5       n[1        [         R                  R                  R"                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R$                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R                  5       S3S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R                  R                  5       S3S[2        S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R&                  5      S[2        S[2        S	[2        4U4S jj5       n[1        [(        R*                  R,                  5      S[2        S[2        S[2        S[2        S[2        S[2        S[2        S[4        S[6        [8           S[8        S[8        S[8        S[4        S [4        S![4        S"[4        4 U4S# jj5       n[1        [         R                  R                  R.                  S S$9S[2        S%[2        S&[2        S	[2        4U4S' jj5       n[1        [         R                  R                  R:                  R                  S S$9[1        [         R                  R                  R:                  R<                  S S$9S[2        S%[2        S&[2        S([2        S	[2        4
U4S) jj5       5       n	[1        [         R                  R                  R                  S S$9 S3S[2        S%[2        S&[2        S*[2        S	[2        4
U4S+ jjj5       n
[1        [         R                  R                  R                  R                  S S$9[1        [         R                  R                  R                  R<                  S S$9 S3S[2        S%[2        S&[2        S*[2        S,[2        S	[2        4U4S- jjj5       5       n[         R                  R>                  (       a  [        [         R                  R@                  RB                  S.STRD                  R                  S9mU RG                  [         R                  R@                  RB                  5        [1        [         R                  R@                  RB                  5      S S/.S[2        S0[2        S1[2        S[H        [2           4U4S2 jjj5       n[K        U 5        g g )4Nr   )	mkldnn_irzmkldnn::_linear_pointwiseF)has_out_variantkernel_creatorzonednn::qlinear_pointwiserP   weightrt   c
                 t   > [         R                  " T
R                  R                  U UUUUUUUUU	5
      5      $ rK   )r   rn   ConvolutionUnary)rP   r   rt   paddingstridedilationgroupsro   rp   rq   r   s             r.   convolution_unary5register_onednn_fusion_ops.<locals>.convolution_unary   sJ     ##**11 rC   otherc                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rK   )r   rn   ConvolutionBinaryrP   r   r   rt   r   r   r   r   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmr   s                r.   convolution_binary6register_onednn_fusion_ops.<locals>.convolution_binary	  sS      ##++22 !# rC   c                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rK   )r   rn   ConvolutionBinaryInplacer   s                r.   convolution_binary_inplace>register_onednn_fusion_ops.<locals>.convolution_binary_inplace+  sS      ##2299 !# rC   rQ   rR   c                   >^^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n Ub  [        R                  R                  U5      n/ n[        R                  (       d  [        R                  (       ao  [        USS/5      n	[        X	US9Gt pp	[        X`U	5      (       aC  UUU4S jnUS LSTS:X  a  S OUS	.nUb  / S
QUS'   [        R                  " UUUc  X/OXU/40 UD6  [        U5      S:X  d  [        5       (       a>  [        TTTS9nUc  S US'   UR!                  TR"                  " Uc  X/OXU/U40 UD65        UR%                  5       [&        R(                  R*                  ;   d   eSS 0n[-        SUUc  X/OXU/UUS9n[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )NrT   rU   r   r   rV   c                    > [        U TTTS9$ )Nrp   rq   r   )bufrq   ro   rp   s    r.   rZ   Jregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.epilogue_creatorc  s    8w)  rC   TnonerX   rY   rZ   )rT   r   r   input_indices)ro   rp   rq   Bc                 X    [         R                  R                  U R                  5          $ rK   r   r(   r)   r'   rP   s    r.   <lambda>Bregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.<lambda>      QWW..qzz|<rC   linear_unaryinput_gen_fnsr^   r_   r   r   rb   rc   r   r`   ra   r   r   r   r	   rh   r   rd   appendbindr'   r   r(   r)   r   )rP   rQ   rR   ro   rp   rq   rW   rr   ru   transposed_wrv   rZ   rw   r   ry   aten_mkldnn_linear_unarys      ```         r.   r   0register_onednn_fusion_ops.<locals>.linear_unaryM  s    ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&1.5af.U+A(LAA %&TM#'$(FND8H	F }2;/#//"#)! !	 7|q $9$;$;4IN9"&F3K,11"#)! ! ::<177#4#4444<M /)!+F 6{Qf&Ks&KV__5Fr5J&KLMrC   yc                   >^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n TR                  5       n[        U5      S:  a  [        TSUS   /5      mUb  [        R                  R                  U5      n/ n[        R                  (       d  [        R                  (       ao  [        USS/5      n	[        X	TUS9Gt pp	m[        XPU	5      (       aA  UU4S jnUS LSUS.nUc  / S	QO/ S
QUS'   [        R                  " UUUc  U TU/OU TX#/40 UD6  [        U5      S:X  d  [        5       (       a?  [        TS9nUc  S US'   UR!                  TR"                  " Uc  U TU/OU TX#/U40 UD65        UR%                  5       [&        R(                  R*                  ;   d   eSS 0n[-        SUUc  U TU/OU TX#/UUS9n[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )NrT   rU   r   r   rV   c                    > [        U TTS9$ )N)r   r   )r   ro   r   s    r.   rZ   Kregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.epilogue_creator  s    8d!LLrC   Tr   )r   rT   r   )   r   rT   r   r   )ro   r   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   Cregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.<lambda>  r   rC   linear_binaryr   r   )rP   r   rQ   rR   ro   rW   rr   y_sizeru   r   rv   rZ   rw   r   ry   aten_mkldnn_linear_binarys    `  `          r.   r   1register_onednn_fusion_ops.<locals>.linear_binary  s!    ZZ\F6{QR,-ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&118Qv2.AQ )LAAM %&TM#',<F <=9i,F?+#//%&YAq	Q1L !	 7|q $9$;$;49"&F3K-22%&YAq	Q1L ! ::<177#4#4444<M /YAq	Q1L+F 6{Qf&Ks&KV__5Fr5J&KLMrC   c                 v   > [         R                  " TR                  R                  U UUUUUUUUU	U
5      5      $ rK   )r   rn   ConvolutionTransposeUnary)rP   r   rt   r   output_paddingr   r   r   ro   rp   rq   r   s              r.   convolution_transpose_unary?register_onednn_fusion_ops.<locals>.convolution_transpose_unary  sM     ##33::" rC   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    > [         R                  " [        R                  TR                  R                  U UUUUUUUUU	U
UUUUU5      5      $ rK   )pytreetree_mapr   rn   MkldnnRnnLayer)rP   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                   r.   mkldnn_rnn_layer4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layer  sc    & ??  ((//!! rC   )type_promotion_kindr   r!   c                 R  > [        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9n[        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9n[        R                  " TR                  R                  U UUUUUUUUU	U
UUUUUU5      5      $ )Ndtyper   r2   r   r    w_zp)r&   r   r   typer;   r   r(   r7   r8   tensorfloat32int32intrn   QConvPointWisePT2E)rP   r   r    r   r!   r   rt   r   r   r   r   o_inv_scaleo_zero_pointoutput_dtypero   rp   rq   r   s                    r.   qconvolution_unary6register_onednn_fusion_ops.<locals>.qconvolution_unary  sk   ( gr||44G}---''55LL>Y 6  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  |ww22LL%++6V 3  ##,,33!  # rC   accumc                 R  > [        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9n[        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9nUS:X  au  U[        R                  [        R                  4;   aQ  UR                  5       [        R                  [        R                  4;   a  UR                  5       U:w  a  [        Xn5      n[        R                   " TR"                  R!                  U UUUUUUUUU	U
UUUUUUUUUUU5      5      $ )Nr   r   r2   r   r    r   r9   )r&   r   r   r   r;   r   r(   r7   r8   r   r   r   r   bfloat16	get_dtyper   rn   QConvPointWiseBinaryPT2E)rP   r   r    r   r!   r   r   rt   r   r   r   r   r   r   r   accum_scaleaccum_zpr   alphar   r   r   r   s                         r.   qconvolution_binary7register_onednn_fusion_ops.<locals>.qconvolution_binaryW  s   < gr||44G}---''55LL>Y 6  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  |ww22LL%++6V 3 
 u$ U]]ENN$CCOO%%--)HHOO%5 !5##2299!  !#- rC   r   c                   >^^^^^^^	^
^^^^^^ UR                  5       [        R                  [        R                  4;   d   S5       eU R	                  5       n[        U5      S:  a  [        U SUS   /5      n [        T[        R                  5      (       dT  [        T5      [        L d   e[        R                  R                  [        R                  " T[        R                   S9SS9mOeTR#                  5         [%        S TR	                  5        5       5      (       a  [        T/ 5      m[        TR	                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R&                  S9SS9m[        T[        R                  5      (       dT  [        T5      [(        L d   e[        R                  R                  [        R                  " T[        R&                  S9SS9mOTR#                  5         TR+                  5       S:X  d   S5       eUc?  [        R                  R                  [        R                  " S
[        R&                  S9SS9nTR#                  5         UR#                  5         UR                  5       [        R&                  :w  a  [        [        R,                  R/                  U5      [        R0                  5      (       a  [        R                  R2                  UR5                  5          R7                  [        R&                  5      n[        R                  R                  [        R                  " U[        R&                  S9UR5                  5       S9nTc  S OTR                  5       m/ n[8        R:                  (       d  [8        R<                  (       Ga  [?        XUT	S9Gt npn[        [        R,                  R/                  U5      [        R0                  5      (       Ga\  [        R@                  " [        RB                  " [        R                  R2                  UR5                  5          5      [        R                  R2                  UR5                  5          5      (       a  [E        XU5      (       a  [        R                  R2                  UR5                  5          RG                  5       n[I        UUTTT5      u  mmmUU
UUUUU	UUUUUUU4S jnU R                  5       [        RJ                  [        R                  4;   d   e[L        RN                  " UUTc  U TTUTU/OU TTUTUT/TS LUTc  / SQO/ SQS9  [        U5      S
:X  d  [Q        5       (       aK  [S        TTT	T
TTS9nTc  S US'   URU                  TRV                  " Tc  U TTUTU4OU TTUTUT4U40 UD65        UR5                  5       [        R                  R2                  ;   d   eS S S S S.n[        [        R,                  R/                  T5      [        R0                  5      (       a  S US'   [        [        R,                  R/                  T5      [        R0                  5      (       a  S US'   [Y        SUTc  U TTUTU/OU TTUTUT/UUS9n[        U5      S:  a%  [        U/ US S QUR	                  5       S   P75      nU$ )Nz=Only int8 and e4m3fn weights are supported by oneDNN qlinear.rT   rU   r   r   r2   c              3   *   #    U  H	  oS :H  v   M     g7fr   N r,   r4   s     r.   r/   Dregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<genexpr>       >+=Cax+=   r   r   x_scale must be 0D or 1Dr   r    r   z(x_zp is incompatible with oneDNN qlinearr   rW   	out_dtypec                 d  >^^^^^	^
^^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mS mT(       a  Tc   eTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUUU
UUUU4
S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   aw  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       T[        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc           	        >
 T" U 5      n[         R                  " U[        R                  5      nU S   4nS nS nS nT(       d  T" S5      nT" S5      nT" U5      nT" U5      nS nT(       a  Tc   eT" U5      n[	        TUUUUUU5      nT
b}  T" U5      n	T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U	[        R                  5      n	[         R                  " X5      nU$ NrU   r   r   r   r8   r   rO   r   r   )indexrD   weight_compens_indexrF   rG   rH   rE   rI   rN   _biasrt   
bias_dtypebias_loaderinput_loaderr=   w_scale_loaderweight_compens_loaderx_scale_loaderx_w_scale_loaderx_zp_loaders             r.   inner_fn]register_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn6  s	   $0$7E %(LL$FE49"I<0'+H$(E'+H#B+9"+=(3B+9:N+O,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt';#'KrC   r]   r   r  rangesr   r   c                 B   > T" U 5      n[         R                  " UT5      $ rK   r   r   r  rD   output_cast_loaderr   s     r.   inner_fn_cast_output_to_bf16qregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16q      (:5(A'*||E<'H HrC   r   _create_constantsc                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " UT
5      $ Ng      ?r   r      i   r8   r   r   rounduint8minimummaximumr   r  scale
zero_pointrD   	inv_scalevalqminqmaxclampedr  r   requant_input_loaders            r.   inner_fn_requanteregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_requant  s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||G\'J JrC   r$  r%  r8   r   r   r   int8make_loaderr   	Pointwiserl   r^   r   get_device_or_errorloweringr  	functoolspartialr;   r   )input_bufferr  
output_bufr  r,  r  r  r  r  r+  r  r	  r
  r  r  rq   ro   rt   r  o_scaler   r   rp   r=   r!   rA   r   r<   r    s        @@@@@@@@@@r.   rZ   Kregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator   s   +!MM!NN!KK!JJ	0      (4'?'?'A0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K(( ((T &(\\#/#:#:#<"'--%-#/#8#8#:	&
  6>)B *D'Y*J
 (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0K" *,'1'E'E'G&2)2):):$4*/./2</@*"
 (2':':'<	*J  *)rC   )r   r   r   rT         )   r   r   r   rT   r;  r<  rX   rZ   r   )output_scaleoutput_zero_pointr   post_op_namepost_op_argspost_op_algorithmrt   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   Cregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   rC   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   rE    r   rC   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   rE    r   rC   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   rE    r   rC   )r   r;  r<  r=  c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   rE        QWW->->qzz|-LrC   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   rE    rJ  rC   qlinear_unaryr   )-r   r8   r0  float8_e4m3fnr^   r_   r   r&   r   r   r   r;   r   r(   r7   r   r   realizer6   r   r   	get_numelInputsKernelunwrap_storage_for_inputr+   r)   r'   r:   r   r`   ra   r   equal
zeros_liker   to_denserB   r   r	   rh   r   rd   r   r   r   )rP   r   r    r   r!   r   rt   r9  r   r   ro   rp   rq   rW   rr   w_zp_tensorru   rv   r   rZ   rw   r   ry   r  r=   rA   r<   aten_mkldnn_qlinear_unarys    `` ` ```````          @@@@r.   rL  1register_onednn_fusion_ops.<locals>.qlinear_unary  s   " !**,U=P=P0QQ OQ ZZ\F6{QR,-gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|
 ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  >>#q(T*TT(
 |
 ww22LL%++6V 3  OOLLN~~5;;.:88>!!4 4
  gg//@CCEKKPww22LLEKK@t}} 3  "&4>>3CJ*,G""f&>&>&>/6V|0,F} @@F))  (():):4==?)KL))$--/:  ,F}EE ww001G1G1IJSSUH 1 %	7&!|* |* |*| ;;=U[[%**,EEEE#//< GT='4H$wdS!%T!1)9< '92 7|q $9$;$;!(&2!-!%!(&/ <%)F6N-22< GT='4H$wdS	
 ! !))+qww/@/@@@@<<<<	M 88A!! 
 $Ma 88>!!  $Ma .< GT='4@$wdK+F 6{Qf&Ks&KV__5Fr5J&KLMrC   x2c                 D  >^^^^^^^	^
^^^^^^^ ^! U R                  5       nTR                  5       n[        U5      [        U5      :X  d   e[        U5      S:  a(  US;   a"  [        U SUS   /5      n [        TSUS   /5      m[        T[        R
                  5      (       dT  [        T5      [        L d   e[        R                  R                  [        R                  " T[        R                  S9SS9mOeTR                  5         [        S TR                  5        5       5      (       a  [        T/ 5      m[        TR                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R                   S9SS9mUc?  [        R                  R                  [        R                  " S
[        R                   S9SS9n[        T[        R
                  5      (       dT  [        T5      ["        L d   e[        R                  R                  [        R                  " T[        R                   S9SS9mOTR                  5         TR                  5         UR                  5         UR%                  5       [        R                   :w  a  [        [        R&                  R)                  U5      [        R*                  5      (       a  [        R                  R,                  UR/                  5          R1                  [        R                   5      n[        R                  R                  [        R                  " U[        R                   S9UR/                  5       S9nUS:X  a  T
[        R                  [        R2                  4;   aS  TR%                  5       [        R                  [        R2                  4;   a!  TR%                  5       T
:w  a  [5        TT
5      mOTR%                  5       T
:X  d   S5       eTR%                  5       m Tb  TR%                  5       OS m/ n[6        R8                  (       d  [6        R:                  (       Ga  US;   Ga  [=        XTUT
S9Gt nnpm[        [        R&                  R)                  T5      [        R*                  5      (       Ga  [        TR?                  5       R@                  5      S
:X  Gak  [        [        R&                  R)                  U5      [        R*                  5      (       Ga.  [        RB                  " [        RD                  " [        R                  R,                  UR/                  5          5      [        R                  R,                  UR/                  5          5      (       a  [G        UX5      (       a  [        R                  R,                  UR/                  5          nURI                  5       n[K        UUTTT5      u  mmm!UUUU	U
UUUUUUUU UU!U4S jn[L        RN                  " UUTc	  U TTUTUT/O	U TTUTUTT/TS LUTc  / SQO/ SQS9  [        U5      S
:X  d  [Q        5       (       aQ  [S        TT	T
UUUUTTTS9
nTc  S US'   URU                  T"RV                  " Tc	  U TTUTUT4O	U TTUTUTT4U40 UD65        UR/                  5       [        R                  R,                  ;   d   eS S S S.nTb  S US'   [Y        SUTc	  U TTUTUT/O	U TTUTUTT/UUS9n[        URZ                  RZ                  [        R\                  5      (       Ga  US:X  Ga  URZ                  RZ                  R^                  TR?                  5       :X  a  [        R
                  Ra                  [        R\                  " [        Rb                  " [        Rd                  " TTR?                  5       S95      URZ                  RZ                  Rf                  URZ                  RZ                  Rh                  URZ                  RZ                  Rj                  URZ                  RZ                  Rl                  S95      n[        U5      S:  a+  US;   a%  [        U/ US S QUR                  5       S   P75      nU$ ) NrT   )r   r9   rU   r   r   r2   c              3   *   #    U  H	  oS :H  v   M     g7fr   r   r   s     r.   r/   Eregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<genexpr>  r   r   r   r   r   r    r   r9   zCdtype of accum for qlinear post op sum should be the same as outputr   c                   >^^^^^	^
^^^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mTR                  5       mS mT(       a  Tc   eTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUUU
UUUUUU4S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   a  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       [         R                  [        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc           	        > T" U 5      nT" U 5      nS nS nS nU S   4nT(       d  T" S5      nT" S5      nT" U5      n[         R                  " U[        R                  5      nT" U5      nS nT(       a  Tc   eT" U5      n[	        TUUUUUU5      n	Tb}  T" U5      n
T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U
[        R                  5      n
[         R                  " X5      n	T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U[        R                  5      n[         R                  " X5      n	U	$ r   r  )r  rD   _x2rF   rG   rH   r  rE   rI   rN   r  rt   r  r  r  r=   r  r	  x2_dtype	x2_loaderr
  r  r  s              r.   r  ^register_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn}  s\   $0$7E"+E"2C'+H$(E'+H49"I<0#B+9"+=(3B+9:N+O$'LL$FE,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt'; $,u~~/N#NN#N'5>>9&)ll3&F#&774#5D#'KrC   r  r   r   c                 B   > T" U 5      n[         R                  " UT5      $ rK   r  r  s     r.   r  rregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16  r  rC   r   r  c                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " U[         R                  5      $ r  r  r#  s            r.   r,  fregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_requant  s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||GU[['I IrC   r.  r/  ) r7  r  r8  r  r,  r  r  r  r  r+  r  r	  r`  r
  r  r  rt   r  r9  r   r   r   r   r   r=   r!   rA   rX  r_  r   r<   r    s         @@@@@@@@@@@r.   rZ   Lregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creatore  s   +!MM!NN!KK!JJ	0      (4'?'?'A$&NN$4	0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K.( .( .(` &(\\#/#:#:#<"'--%-#/#8#8#:	&
 &/)B * *(5*9	*J (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0J" *,'1'E'E'G&+kk)2):):$4*/./2</@*"
 (2':':'<	*J  *)rC   )r   r   r   rT   r;  r<  r=  )   r   r   r   rT   r;  r<  r=  r>  )
r?  r@  r   other_scaleother_zpbinary_post_opr   unary_post_opunary_post_op_argsunary_post_op_algorithmrt   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   Dregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   rC   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   ro    r   rC   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   ro    r   rC   )r   r;  r<  c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   ro    rJ  rC   rg  qlinear_binaryr   )r%   rW   )rW   inputsmake_kernel_rendertemplatechoice)7r^   r_   r   r&   r   r   r   r;   r   r(   r7   r8   r   r   rN  r6   r   r   r   rP  rQ  r+   r)   r'   r:   r   r   r   r`   ra   r   
get_layoutsizerR  rS  r   rT  rB   r	   rh   r   rd   r   r   r   r%   CppTemplateBufferrW   rn   NonOwningLayoutReinterpretViewrt  ru  rv  rw  )#rP   r   r    r   r!   r   rX  rt   r9  r   r   x2_scalex2_zpr   r   r   r   r   rW   rr   x2_sizerU  ru   rv   r   rZ   rw   r   ry   r  r=   rA   r_  r<   aten_mkldnn_qlinear_binarys#    `` ` `````    ```           @@@@@r.   rs  2register_onednn_fusion_ops.<locals>.qlinear_binary  s>   6 ZZ\FkkmGv;#g,...6{Q;.#@R,-"r72;/0gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|ww22LL%++6V 3  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  
 OOLLN~~5;;.:88>!!4 4  gg//@CCEKKPww22LLEKK@t}} 3  e#MMNN$  lln(GG||~5
 &b,7<<>\9 ]9 ||~H-1-=)4J*,G##v'?'?'?[ U F 4;b<40FAb @@F))  DOO-223q8"@@F))  (():):4==?)KL))$--/:  .faGG ww001G1G1IJH'002H
 1 %	7&!G* G* G*R $//< GT='4L$wbRVW!%T!1)9  < '<5 7|q $9$;$;!(&2!- ("#.!&",'4,; <%)F6N.33< GT='4L$wbRVW	
 ! !))+qww/@/@@@@<<<M
 #La . < GT='4D$wb$O+F 6;;++R-A-ABB5(KK$$++r}}> ,,((!11..Br}}O   &{{//66+1;;+;+;+N+N!'!1!1!:!:%{{//66
 6{Q;.#@f&Ks&KV__5Fr5J&KLMrC   zmkl::_mkl_linearrV   packed_worig_wc          	      r  > / n[         R                  (       d  [         R                  (       aH  [        USS/5      n[	        XUS9Gt pp[        XPU5      (       a  [        R                  " UUXU/SSS/S9  [        U5      S:X  d  [        5       (       a#  UR                  TR                  XU4US US95        UR                  5       [        R                  R                  ;   d   eUR                  5       [        R                  R                  ;   d   eS S	 S
.n	[!        SUXU/UU	S9n
Ub  [#        X5      n
U
$ )Nr   r   rV   TrT   )rY   r   )r   
batch_sizec                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   Gregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>i      !2!21::<!@rC   c                 X    [         R                  R                  U R                  5          $ rK   r   r   s    r.   r   r  j  r  rC   )r   rT   packed_linearr   )r   r`   ra   r   r   r   r	   rh   r_   r   r   r   r'   r   r(   r)   r   r   )rP   r  r  rR   r  rW   ru   r   rv   r   ry   aten_mkl_linears              r.   mkl_packed_linear5register_onednn_fusion_ops.<locals>.mkl_packed_linearD  s?    /1&&&*B*B#*6Aq6#:L293/Q -VEE'33#"&1$(+,a& w<1$(=(?(?NN',,&16Tj -   ((*agg.?.????(AGG,=,==== A@! %>#&)"/% = ^FrC   rK   )&r8   _C_has_mkldnn r   r   r   mkldnn_linear_pointwiseLinearUnaryrn   binaryLinearBinaryonednnqlinear_pointwiseQLinearPointwisePT2EQLinearPointwiseBinaryPT2E_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiser   r   defaultqconv_pointwiser   r   boolrj   r   qconv2d_pointwisebinary_tensorhas_mklmkl_mkl_linearMKLPackedLinearr   r   r   )cpu_needs_realized_inputsr   r   r   r   r   r   r   r   r   rL  rs  r  r  r   r   r  rV  r   s                @@@@@@r.   register_onednn_fusion_opsr     s   xx#5II..'!$0077	$
  %7II..55'!$1188	%
! %7II..'!$99@@	%
! &8II..55'!$??FF	&
" II33II44II==II..!!))II,,
 	" 
599++BB	C			 	 
D	6 
599++BBII	J			 	 		 
K	B 
599++CCJJ	K			 	 		 
L	B 
599++==	> A	A	A	 A	 
?A	F 
599++==DD	EQU=	=	&=	+4=	9B=	 
F=	~ 
599++LL	M			 	 
N	: 
40088	9&	&	&	 &	 	&	
 &	 &	 &	 &	 c&	 &	 &	 &	 &	  &	 &	  !&	 
:&	P 
599++;;QU	V<	<	 %	<	
 <	 <	 
W<	| 
II..554

 
II..<<RV

Q	Q	 %	Q	
 Q	 Q	 Q	



Q	f 
599++==SW	X l	l	 %	l	
 l	 l	 l	 
Yl	\	 
II..554

 
II..<<RV

, 'T	T	 %	T	
 T	 T	 T	 T	



T	l
 880		))" %(88??	O &,,UYY]]-F-FGuyy}}889 00#0 "0 I&	0 :0d 	"";<a% rC   )NNNN)0r5  typingr   r   r8   torch.utils._pytreeutils_pytreer    torch._inductor.kernel.mm_commonr   r  r   r   codegen.cpp_gemm_templater	   !codegen.cpp_grouped_gemm_templater
   codegen.cpp_utilsr   r   r4  r   r   r   r   r   r   r   select_algorithmr   r   r   r   r   virtualizedr   r   r   Tensortupler  rB   rO   rj   r~   _inductor_lowering_functionr  r   rC   r.   <module>r     sj    "  $ $ 4  6 E 8    
 @ ) )-ll-<<- \\- ,,	-
 \\- LLR\\-`.%).. . x 	.
 H. x . ". .j 
??I? I?D 59  1q=rC   