
    ȅi                        S SK Jr  S SKJrJr  S SKrS SKrS SKJrJ	r	  S SK
Jr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJrJr  SS	KJ r       S4S
SSSSSS\\!   S\\!   S\\!   S\!S\"S\\\!      S\\#S      S\S   4S jjr$   S5S
SSSSSS\\#S      S\S   S\"4S jjr%S r& " S S\5      r' " S S\5      r( " S S\5      r) " S  S!\5      r* " S" S#\5      r+ " S$ S%\5      r, " S& S'\5      r- " S( S)\5      r. " S* S+\5      r/ " S, S-\5      r0 " S. S/\5      r1 " S0 S1\5      r2 " S2 S3\5      r3g)6    )Sequence)AnyOptionalN)make_channels_last_strides_for
StrideType
OrderedSet   )ExternKernelAllocFixedLayoutFlexibleLayoutget_device_typeir_node_to_tensorIRNode is_contiguous_storage_and_layoutLayoutmay_convert_to_optionalMultiOutputMultiOutputLayoutMutationOutput
NoneLayout	TensorBox)convert_shape_to_inductorpad_listlikeSUPPORTED_MKLDNN_DEVICES)Vxr   weightbiaspaddingstridedilationgroups
transposedoutput_paddingquantize_argsotherc                    S nSS jnS nUR                  5         UR                  5         Ub  UR                  5         [        R                  R                     [	        USS9n[	        USS9n[        UR                  5       5      S-
  nS[        U5      s=:  a  U::  d   e   eS[        U5      s=:  a  U::  d   e   eS[        U5      s=:  a  U::  d   e   e[        UU5      n[        UU5      n[        UU5      nU	c  [        S/U5      n	O'S[        U	5      s=:  a  U::  d   e   e[        U	U5      n	[        U[        [        R                  R                  R                  45      (       d   eU(       a(  U" UU5      nUR                  5       nU" UUUU	UUU5      nO[        UR                  5      n[        UR                  5      n[        U5      [        U5      :w  a1  [        U5      S	:X  a  [        U5      S
:X  d   eUR!                  S5        U" UUUUU5      nS/[        [#        [%        S[        U5      S-   5      5      5      -   n[        U5      /U-   nSSS5        U R'                  UW5      n[)        S W 5       5      (       + nU(       d  [+        U5      S:X  a'  [-        U5      (       a  [.        R0                  " U5      nOH[+        U5      S:X  a.  UR3                  5       S   S:X  a  [.        R0                  " U5      nO[5        U5      n[+        U5      [+        U5      :X  d   e[+        U5      [6        ;   d   eU/nU
bX  U
u  nnnnUR                  5         UR                  5         UR                  5         UR                  5         UUU/-   U/-   UU/-   nOUU/-  nUb/  U R'                  UU5      n[        U[8        5      (       d   eUU/-  n[;        UR=                  5       UR?                  5       [A        U5      [A        U5      5      nXEXg/n U(       a  U RC                  SU	5        Ub  URE                  U5        OU RC                  SU5        UU UUU4$ ! , (       d  f       GN	= f)ae  
This function is a helper function to prepare inputs, layout and constant args
for convolution post-op fusion's create function, including deciding the output
layout (channels first or channels last), realizing inputs and make them etc. The
function only supports the CPU/XPU device since conv post-op fusion kernel is only
supported on CPU/XPU right now.
c                    [        U 5      [        U5      :X  d   S5       e[        U 5      nUS:  d   S5       eSnSn	/ n
U
R                  X   5        U
R                  X   U-  5        [        SU5       HK  nX   S-
  X[S-
     -  S-   nX   S-
  XKS-
     -  X+S-
     S-  -
  U-   X;S-
     -   nU
R                  U5        MM     [        [	        [
        U
5      5      $ )NzExpect input dim == weight dim   zExpect input dim > 2r   r
   )lenappendrangelistmapint)output_sizeweight_sizer    r%   r!   r"   r#   dim	BATCH_DIMWEIGHT_INPUT_CHANNELS_DIM
input_sizedkernelinput_size_ds                 S/home/james-whalen/.local/lib/python3.13/site-packages/torch/_inductor/mkldnn_ir.py_conv_input_size<_prepare_convolution_fusion_create.<locals>._conv_input_size6   s    ;3{#33U5UU3+Qw...w	$%!
+01+@6IJq#A!nq(HUO;a?F!#v!e}4q5>A%' !Q'(  l+  CZ())    Nc                 6   US Ln[        U 5      n/ nUR                  U S   5        UR                  US   5        [        SU5       HO  nU(       a  XHS-
     OSn	XU   S-
  -  S-   n
X   SX(S-
     -  -   U
-
  UUS-
     -  S-   nUR                  U5        MQ     U$ )Nr   r*   r
   )r+   r,   r-   )r6   r2   r    r!   r"   has_dilationr3   r1   r7   	dilation_r8   output_size_ds               r:   _conv_output_size=_prepare_convolution_fusion_create.<locals>._conv_output_sizeN   s    t+*o:a=);q>*q#A+7QQIa.1"459F']a'a%..@AFJvAP M }-  r=   c                 R  ^ U R                  5       m[        T5      nUS:  d   S5       eUS:  aW  / nUR                  TS   U-  5        UR                  TS   U-  5        UR                  U4S j[	        SU5       5       5        U$ U R                  SS5      R                  5       nU$ )Nr*   zExpect weight dim > 2r
   r   c              3   .   >#    U  H
  nTU   v   M     g 7fN ).0r7   prepacked_weight_sizes     r:   	<genexpr>[_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size.<locals>.<genexpr>m   s     OA4Q7s   )sizer+   r,   extendr-   	transpose)prepacked_weightr#   r3   r2   rI   s       @r:   _original_deconv_weight_sizeH_prepare_convolution_fusion_create.<locals>._original_deconv_weight_sizeb   s     !1 5 5 7'(Qw///wA:K4Q7&@A4Q7&@AOq#OO  +44Q:??AKr=   T)guard_shaper*   r         r
   c              3   B   #    U  H  n[        U[        5      v   M     g 7frF   )
isinstancer0   )rH   is     r:   rJ   5_prepare_convolution_fusion_create.<locals>.<genexpr>   s     G+AZ3//+s   xpurF   )#realizer   graph	fake_moder   r+   rL   r   rV   r0   sympycorenumbersIntegerr.   shapepopreversedr-   require_stride_orderallr   r   r   contiguous_strides
get_strider   r   r   r   get_device_or_error	get_dtyper   insertr,   )!clsr   r   r   r    r!   r"   r#   r$   r%   r&   r'   r;   rB   rP   x_fakeweight_fakedimsr2   r6   r1   x_shapeweight_shapereq_stride_orderdynamic_shapesoutput_strideinputsx_scalex_zero_pointw_scalew_zero_pointkernel_layoutconstant_argss!                                    r:   "_prepare_convolution_fusion_creater{      s7   .*0(  IIK
NN	
		"1$7'DA6;;=!A%3w<'4'''''3x=(D(((((3v;&$&&&&&w-$/fd+!)1#t4Ns>*2d22222).$?N&3

(:(:(B(B"CDDDD 7{FKKJ*K 6<<(G 1 12L7|s<007|q(S->!-CCC  #+K 3huQFa/H&I!JJ 0125EE_ 
b 	  $45A G+GGGN/!,5
*1
-
-$2$E$Ek$R 
	u	$):a)?&99+F6{C1!88881!9999SF 7D4w7L11VH<?VV6(((0@A%++++5'		!+.!-0	M h7MQ/dQ%=-1A5HHW 
	s   HQ
Q.
binary_sumc           
         UR                  5         UR                  5         Ub  UR                  5         UR                  5       Gt pxUR                  5       u  p[        U5      U	/-   n
[        [        [	        [        UR                  5       5      5      5      5      nU R                  X5      n[        U5      [        U5      :X  d   e[        U5      [        ;   d   eU/nUbV  Uu  pnnUR                  5         UR                  5         UR                  5         UR                  5         XU/-   U/-   UU/-   nOX/-  nUb  U(       a  U R                  X[5      nX/-   n[        R                  " U
5      n[        UR                  5       UR                  5       U
U5      n/ nUb  UR                  U5        OUR                  SU5        UUUX4$ )z
This function is a helper function to prepare inputs, layout and constant args
for linear post-op fusion's create function. The function only supports the CPU device
since linear post-op fusion kernel is only supported on CPU right now.
r   )rZ   get_sizer.   rc   r-   r+   rd   r   r   r   rf   r   
get_deviceri   r,   rj   )rk   r   r   r   r&   r'   r|   m_ocr1   rq   rt   ru   rv   rw   rx   rs   ry   rz   s                       r:   _prepare_linear_fusion_creater      s    IIK
NNJJLEQ OOEAq'RD.KHU3qzz|+<%=>?  5A1!88881!9999SF 7D4wL11VH<?VV(,,UEE'!"55kBM			M  "MdQ%=-1AHHr=   c                     [        U R                  5       U / 5      n[        U R                  5       S9U l        U/U l        U$ )Ndevice)r   
get_layoutr   r   layoutoutputs)packed	output_irs     r:   _create_output_noder   #  sD    
I
 &V->->-@AFM[FNr=   c                      ^  \ rS rSr S SU 4S jjjrU 4S jr\SSSSSSS\\   S	\\   S
\\   S\S\	\\
      4S j5       rSrU =r$ )ConvolutionUnaryi.  c           
         > [        US   5      U l        [        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  g )Nr   aoti_torch__mkldnn__convolution_pointwiseop_overloadcpp_kernel_name)	r   device_typesuper__init__torchopsmkldnn_convolution_pointwisedefaultselfr   rt   rz   	__class__s       r:   r   ConvolutionUnary.__init__/  sa     +6!95		((??GG)$*:*:);;YZ 	 	
r=   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g Nz&torch/csrc/inductor/aoti_torch/c/shim_z.hinclude_extra_headerr   r   codegenr   wrapperr   s     r:   r   ConvolutionUnary.codegen?  2    $$4T5E5E4FbI	
 	 r=   r   r   r   r   padding_stride_r@   r#   scalarsc           
      x    [        XX#XEXg5      u  nnnnnUU[        U	5      U
/-   n[        UUUS9n[        U5      $ )Nr   rt   rz   )r{   r   r   r   )rk   r   r   r   r   r   r@   r#   attrr   	algorithmrt   rz   ry   r   r   s                   r:   createConvolutionUnary.createE  sj    ( /F(Y
	
 &#G,)
 

 " '

 #6**r=   r   rG   returnN__name__
__module____qualname____firstlineno__r   r   classmethodr.   r0   r   r   r   __static_attributes____classcell__r   s   @r:   r   r   .  s    
 	

 

 
 !  + +  + 	 +
 s) + c + 9 +  + $s)$ +  +r=   r   c                      ^  \ rS rSr  S SU 4S jjjrU 4S jr\SSSSSSSSS	\\   S
\\   S\\   S\S\	S\
\   S\
\	   S\
\\      S\
\	   4S j5       rSrU =r$ )ConvolutionBinaryii  c           
         > [        US   5      U l        [        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  X@l	        g )Nr   r   %_mkldnn__convolution_pointwise_binaryr   )
r   r   r   r   r   r   r   r   binarycpp_constant_args)r   r   rt   rz   r   r   s        r:   r   ConvolutionBinary.__init__j  si     +6!95		((??FF)$*:*:);;`a 	 	
 "3r=   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g r   r   r   s     r:   r   ConvolutionBinary.codegen|  r   r=   r   r   r'   r   r   r   r   r@   r#   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmc           
          [        XX4XVXx5      u  nnnnnU R                  UU5      nUR                  SU5        UU	U
U[        U5      U/-   n[	        UUUS9n[        U5      $ )Nr
   r   )r{   rd   rj   r   r   r   )rk   r   r'   r   r   r   r   r@   r#   r   r   r   r   r   rt   rz   ry   rq   r   r   s                       r:   r   ConvolutionBinary.create  s    . /F(Y
	

 ((0@Aa%#M2)
 
 # '

 #6**r=   )r   r   )rG   rG   r   )r   r   r   r   r   r   r   r.   r0   strr   floatr   r   r   r   r   s   @r:   r   r   i  s    
 3 
3 3$! (+(+ (+ 	(+
 (+ s)(+ c(+ 9(+ (+ (+ uo(+ SM(+  S	*(+ "#(+ (+r=   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jrS\\R                     4S jr	\
SSSSS	SS
SS\\   S\\   S\\   S\S\S\\   S\\   S\\\      S\\   4S j5       rSrU =r$ )ConvolutionBinaryInplacei  r   c           
        > [        US   5      U l        US   US   /USS  -   n[        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  [        [        US   R                  5       S9US   U 5      [        [        US   R                  5       S9US   U 5      /U l        g )Nr   r
   r*   r   &_mkldnn__convolution_pointwise_binary_r   r   )r   r   r   r   r   r   r   _convolution_pointwise_r   r   r   r   mutation_outputs)r   ry   rt   rz   reordered_inputsr   s        r:   r   !ConvolutionBinaryInplace.__init__  s     +6!95"1Ivay1F12J>		((@@GG)$*:*:);;ab 	 	
 :VAY-A-A-CDfQiQUV:VAY-A-A-CDfQiQUV!
r=   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g r   r   r   s     r:   r    ConvolutionBinaryInplace.codegen  r   r=   c                     [        5       $ rF   r   r   s    r:   get_unbacked_symbol_defs1ConvolutionBinaryInplace.get_unbacked_symbol_defs  
    |r=   r   r   r'   r   r   r   r   r@   r#   r   r   r   r   r   c           
          [        XX4XVXx5      u  nnnnnU R                  UU5      nUR                  SU5        UU	U
U[        U5      U/-   n[	        [        US   R                  5       S9UUS9nUR                  S   $ )Nr
   r   )ry   rt   rz   r   )r{   rd   rj   r   r   r   r   rt   )rk   r   r'   r   r   r   r   r@   r#   r   r   r   r   r   rt   rz   r   rq   r   s                      r:   r   ConvolutionBinaryInplace.create  s    . /F(Y
	

 ((0@Aa%#M2)
 
 *$F1I,@,@,BC'
 }}Qr=   )r   r   r   r   )r   r   r   r   r   r   r	   r]   Symbolr   r   r.   r0   r   r   r   r   r   r   r   r   s   @r:   r   r     s    
 	

 

 
0!*U\\*B  + +  +  	+ 
 +  s)+  c+  9+  +  +  uo+  SM+   S	*+  "#+  + r=   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jr\SSSSSSS\\   S	\\   S
\\   S\\   S\S\	\\
      4S j5       rSrU =r$ )ConvolutionTransposeUnaryi  c           
         > [        US   5      U l        [        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  g )Nr   r   (_mkldnn__convolution_transpose_pointwiser   )	r   r   r   r   r   r   r    _convolution_transpose_pointwiser   r   s       r:   r   "ConvolutionTransposeUnary.__init__   sa     +6!95		((IIQQ)$*:*:);;cd 	 	
r=   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g r   r   r   s     r:   r   !ConvolutionTransposeUnary.codegen  r   r=   r   r   r   r   r   output_padding_r   r@   groups_r   c                     Sn[        U UUUUUUUUU5
      u  nnnnnUU	[        U
5      U/-   n[        UUUS9n[        U5      $ )NTr   )r{   r   r   r   )rk   r   r   r   r   r   r   r@   r   r   r   r   r$   rt   rz   ry   r   r   s                     r:   r    ConvolutionTransposeUnary.create  s     
 /
	
 &#G,)
 

 + '

 #6**r=   r   r   r   r   r   s   @r:   r   r     s    
 	

 

 
 ! ++++ ++ 	++
 s)++ c++ c++ 9++ ++ $s)$++ ++r=   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jr\SSSSSSSSS	SS
SS\\   S\\   S\\   S\S\	S\4S j5       r
SrU =r$ )QConvPointWisePT2EiE  c           
         > [        US   5      U l        [        U5      S:H  U l        [        TU ]  UUUS[        R                  R                  R                  R                  SU R                   S3S9  g)a  
if bias is not None
    - inputs = [x, w, b, weight_scale, weight_zp]
    - const_args is: [stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, w, weight_scale, weight_zp]
    - const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
r      Nr   __qconv_pointwise_tensorr   )r   r   r+   has_biasr   r   r   r   onednnqconv_pointwisetensorr   s       r:   r   QConvPointWisePT2E.__init__F  sq      +6!95Fq(		((88??)$*:*:);;ST 	 	
r=   c                    > UR                  SU R                   S35        [        TU ]  U5        [	        U R
                  [        5      (       a  U R                  U5        g g r   r   r   r   r   rV   r   r   codegen_size_assertsr   s     r:   r   QConvPointWisePT2E.codegena  V    $$4T5E5E4FbI	
 	 dkk6**%%g. +r=   qxr   ru   rv   qwrw   r   r!   r    r"   r#   output_scaleoutput_zero_pointc                 .   SnS n[        U UUUU	UU
UUUX#XV/5      u  nnnnnUc  US   US   sUS'   US'   OUS   US   sUS'   US'   UUUUU[        U5      U/-   nUc   eU[        R                  [        R                  4;   a  UUl        [        UUUS9$ )NFr*   r
   r   r   )r{   r   r   float32bfloat16dtyper   )rk   r   ru   rv   r   rw   rx   r   r!   r    r"   r#   r  r  output_dtyper   r   r   r$   r%   rt   rz   ry   r   s                           r:   r   QConvPointWisePT2E.createi  s   * 
 /G:
	
 <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#G,)
 
 '''EMM5>>:: #/M! '
 	
r=   r   r   r   r   )r   r   r   r   r   r   r   r.   r0   r   r   r   r   r   s   @r:   r   r   E  s    
 	

 

 
6/ B
B
 B
 "	B

 B
 B
 B
 S	B
 cB
 s)B
 B
 B
 B
 B
r=   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jrS\\   4S jrS\	\
R                     4S jr\SSS	SS
SSSSSSSS\\   S\\   S\\   S\SSSS4S j5       rSrU =r$ )QConvPointWiseBinaryPT2Ei  r   c           
         > [        US   5      U l        [        U5      S:H  U l        SU l        [
        TU ]  UUUS[        R                  R                  R                  R                  SU R                   S3S9  g)a  
Needs input/weight/output qparams
if bias is not None
    - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum, b]
    - const_args = [stride, padding, dilation, groups, o_scale, o_zp,
    output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum]
    - const_args [b, stride, padding, dilation, groups, o_scale, o_zp,
     output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
r         Nr   !__qconv2d_pointwise_binary_tensorr   )r   r   r+   r   idx_for_inplace_sumr   r   r   r   r   qconv2d_pointwisebinary_tensorr   s       r:   r   !QConvPointWiseBinaryPT2E.__init__  sy    " +6!95Fq(#$ 		((::HHd..//PQ 	 		
r=   c                    > UR                  SU R                   S35        [        TU ]  U5        [	        U R
                  [        5      (       a  U R                  U5        g g r   r   r   s     r:   r    QConvPointWiseBinaryPT2E.codegen  r   r=   c                 :    U R                  U R                  5      /$ rF   )
input_namer  r   s    r:   get_mutation_names+QConvPointWiseBinaryPT2E.get_mutation_names  s     8 89::r=   c                     [        5       $ rF   r   r   s    r:   r   1QConvPointWiseBinaryPT2E.get_unbacked_symbol_defs  r   r=   r   r   ru   rv   r   qaccumr   r!   r    r"   r#   r  r  c                    SnS n[        U UUUU
U	UUUUX#XV/U5      u  nnnnnUc  US   US   sUS'   US'   OUS   US   sUS'   US'   UUUUUUUUU[        U5      U/
-   nUS:X  d   S5       e[        R                  R	                  UR                  5       5        [        [        UR                  5       S9UUS9nUR                  UR                     $ )	NFr*   r
   r   sumzCFor now, only post op sum is supported in QConvPointWiseBinaryPT2E.r   r   )r{   r   r   r[   mark_buffer_mutatedget_namer  r   r   rt   r  )rk   r   ru   rv   r   rw   rx   r  r   r!   r    r"   r#   r  r  r  accum_scaleaccum_zero_pointr   alphar   r   r   r$   r%   rt   rz   _kernel_layoutrq   r   s                                 r:   r   QConvPointWiseBinaryPT2E.create  s=   4 
 /G:
	
" <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#M2)
 
 e# 	
Q	
# 	
##FOO$56)V%6%6%89'
 }}V7788r=   r   r   r  r   r   )r   r   r   r   r   r   r   r   r  r	   r]   r   r   r   r.   r0   r   r   r   r   s   @r:   r  r    s    
 	

 

 
>/;HSM ;*U\\*B  O9O9 O9 "	O9
 O9 O9 O9 S	O9 cO9 s)O9 O9 "O9 'O9 O9r=   r  c                   P   ^  \ rS rSr S SU 4S jjjrU 4S jr\S 5       rSrU =r	$ )MKLPackedLineari0  rG   c                    > [         TU ]  UUUS [        R                  R                  R
                  R                  S9  g N)r   )r   r   r   r   mkl_mkl_linearr   r   s       r:   r   MKLPackedLinear.__init__1  s:     			1199 	 	
r=   c                 F   > UR                  S5        [        TU ]	  U5        g Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hr   r   r   r   s     r:   r   MKLPackedLinear.codegen?  s    $$%RS r=   c                    U R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5       Gt pgUR                  5       u  p[        U5      U/-   n	[        R
                  " U	5      n
XU/nU/nUb  X/-  nOUR                  SS 5        UR                  5       nUc   e[        [        XR                  5       X5      UUS9$ )Nr   r   )require_stride1realize_inputr~   r.   r   rf   rj   r   r(  r   ri   )rk   r   packed_worig_wB
batch_sizer   r   r   r1   rs   rt   rz   r   s                 r:   r   MKLPackedLinear.createC  s     1 1! 45$$S%6%6v%>?

!1gn&99+Fv&#=cMF  D)!!!v{{}kQ'
 	
r=   r   r   
r   r   r   r   r   r   r   r   r   r   r   s   @r:   r(  r(  0  s5    
 	

 

 
! 
 
r=   r(  c                   V   ^  \ rS rSr S SU 4S jjjrU 4S jr\S 5       rS rSr	U =r
$ )	LinearUnaryi[  c           
         > [        US   5      U l        [        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  g )Nr   r   __linear_pointwiser   )	r   r   r   r   r   r   r   _linear_pointwiser   r   s       r:   r   LinearUnary.__init__\  sa     +6!95		((::BB)$*:*:);;MN 	 	
r=   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g r   r   r   s     r:   r   LinearUnary.codegenl  r   r=   c                    U R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5       Gt pxUR                  5       u  p[        U5      U	/-   n
X/nXE(       a  UOS/U/nUb2  U R                  U R                  U5      5      nUR	                  U5        OUR                  SS 5        UR                  5       nUc   e[        [        UUR                  5       U
S9UUS9n[        U5      $ )Nr   r   r  rL   r   )require_contiguousr4  r~   r.   r,   rj   r   r<  r   ri   r   )rk   r   wr7  r   r   r   r   _icr   r1   rt   rz   r   r   s                  r:   r   LinearUnary.creater  s   ""3#4#4Q#78""3#4#4Q#78**,**,1gn'wtYG=&&s'8'8';<AMM!  D)!!!kkm 
 '
 #6**r=   c                     g rF   rG   r   s    r:   apply_constraintLinearUnary.apply_constraint      r=   r   r   r   )r   r   r   r   r   r   r   r   rK  r   r   r   s   @r:   r<  r<  [  s?    
 	

 

 
 ! + +: r=   r<  c                   Z   ^  \ rS rSrSr S S	U 4S jjjrU 4S jr\S 5       rS r	Sr
U =r$ )
LinearBinaryi  z)torch.ops.mkldnn._linear_pointwise.binaryc           
         > [        US   5      U l        [        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  g )Nr   r   __linear_pointwise_binaryr   )	r   r   r   r   r   r   r   r?  r   r   s       r:   r   LinearBinary.__init__  sa     +6!95		((::AA)$*:*:);;TU 	 	
r=   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g r   r   r   s     r:   r   LinearBinary.codegen  r   r=   c                 H   U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5       Gt pgUR                  5       u  p[        U5      U/-   n	XU/n
U/nUb2  U R                  U R                  U5      5      nU
R	                  U5        OUR                  SU5        UR                  5       nUc   e[        [        UUR                  5       U	S9U
US9n[        U5      $ )Nr   rE  r   )rF  r4  r~   r.   r,   rj   r   rO  r   ri   r   )rk   r   yrG  r7  r   r   rH  r   r1   rt   rz   r   r   s                 r:   r   LinearBinary.create  s   ""3#4#4Q#78""3#4#4Q#78""3#4#4Q#78**,**,1gn=&&s'8'8';<AMM!  A&!!!kkm 
 '
 #6**r=   c                     g rF   rG   r   s    r:   rK  LinearBinary.apply_constraint  rM  r=   r   r   r   )r   r   r   r   r8   r   r   r   r   rK  r   r   r   s   @r:   rO  rO    sD    8F 	

 

 
 ! + +: r=   rO  c                   z   ^  \ rS rSr  S SU 4S jjjrU 4S jr\SSSSSSSSS	SS
SSSS\S\4S j5       r	Sr
U =r$ )QLinearPointwisePT2Ei  c           
         > [        US   5      U l        X@l        [        TU ]  UUUS[
        R                  R                  R                  R                  SU R                   S3S9  g)ag  
if bias is not None
    - inputs = [x, w, b, weight_scale, weight_zp]
    - const_args is: [x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, w, weight_scale, weight_zp]
    - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
r   Nr   __qlinear_pointwise_tensorr   )
r   r   r   r   r   r   r   r   qlinear_pointwiser   r   r   rt   rz   r   r   s        r:   r   QLinearPointwisePT2E.__init__  sf    " +6!95 ));;BBd..//IJ 	 		
r=   c                    > UR                  SU R                   S35        [        TU ]  U5        [	        U R
                  [        5      (       a  U R                  U5        g g r   r   r   s     r:   r   QLinearPointwisePT2E.codegen  sV    $$4T5E5E4FbI	
 	 dkk6**%%g. +r=   r   r   ru   rv   r   rw   rx   r   r  r  c           
          [        U UUUX#XV/5      u  pn  nUUU	U
U[        U5      U/-   nU
c   eU
[        R                  [        R                  4;   a  U
Ul        [        UUUUS LS9$ )Nr   rt   rz   r   )r   r   r   r  r  r  r[  )rk   r   ru   rv   r   rw   rx   r   r  r  r  post_op_namepost_op_argspost_op_algorithmrt   rz   ry   r   s                     r:   r   QLinearPointwisePT2E.create  s    " 8UG:8
4q! &#L1)
 
 '''EMM5>>:: #/M# '$&	
 	
r=   r	  rG   Tr   )r   r   r   r   r   r   r   r   r0   r   r   r   r   s   @r:   r[  r[    s    
 
 

 
</ ,
,
 ,
 "	,

 ,
 ,
 ",
 ,
 ,
 ,
 ,
r=   r[  c                      ^  \ rS rSr  S SU 4S jjjrU 4S jrS\\   4S jr\	SSSSS	SS
SSSSSSSSSS\
S\4S j5       rSrU =r$ )QLinearPointwiseBinaryPT2Ei'  r   c           
         > [        US   5      U l        X@l        SU l        [        TU ]  UUUS[        R                  R                  R                  R                  SU R                   S3S9  g)a  
if bias is not None
    - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2, bias]
    - const_args is: [o_scale, o_zp,
      fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2]
    - const_args is: [bias, o_scale, o_zp,
      fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
r   r  Nr   !__qlinear_pointwise_binary_tensorr   )r   r   r   r  r   r   r   r   r   r^  r  r_  s        r:   r   #QLinearPointwiseBinaryPT2E.__init__(  sn    " +6!95 #$ ));;II)$*:*:);;\] 	 	
r=   c                    > UR                  SU R                   S35        [        TU ]  U5        [	        U R
                  [        5      (       a  U R                  U5        g g r   r   r   s     r:   r   "QLinearPointwiseBinaryPT2E.codegenE  r   r=   c                     U R                   S   nUS:X  aA  U R                  U R                     n[        U[        5      (       d   eUR                  5       /$ / $ )Nr  )rz   rt   r  rV   r   r   )r   binary_post_opinputs      r:   r  -QLinearPointwiseBinaryPT2E.get_mutation_namesM  sV    ++B/U"KK 8 89EeV,,,,NN$%%Ir=   r   r   ru   rv   r   rw   rx   r'   r   r  r  c                    [        U UUUX#XV/UUS:H  5      u  nnnnnUU	U
UUUUUU[        U5      U/
-   nUS:X  ai  [        R                  R	                  UR                  5       5        [        [        UR                  5       S9UUUS LS9nUR                  UR                     $ Uc   eU[        R                  [        R                  4;   a  UUl        [        UUUUS LS9$ )Nr  r   rd  )r   r   r   r[   r  r   rk  r   r   rt   r  r   r  r  r  )rk   r   ru   rv   r   rw   rx   r'   r   r  r  r  other_scaleother_zprs  r   unary_post_opunary_post_op_argsunary_post_op_algorithmrt   rz   ry   rq   r   s                           r:   r   !QLinearPointwiseBinaryPT2E.createV  s    8 *G:e#
	
 &#$67#)
 
 U"GG''(89/!)9)9);<+d*	F ==!;!;<<'''EMM5>>:: #/M) '$&	
 	
r=   r&  ri  r   )r   r   r   r   r   r   r   r   r  r   r   r0   r   r   r   r   s   @r:   rk  rk  '  s    
 
 

 
:/HSM  H
H
 H
 "	H

 H
 H
 "H
 H
 H
 H
 H
 H
r=   rk  c            !          ^  \ rS rSr S SU 4S jjjr\SSSSSSSSS	SS
SSSS\S\\   S\S\S\S\S\S\S\4 S j5       r	U 4S jr
SrU =r$ )MkldnnRnnLayeri  rG   c                    > [         TU ]  UUUS [        R                  R                  R
                  R                  S9  g r*  )r   r   r   r   atenmkldnn_rnn_layerr   r   s       r:   r   MkldnnRnnLayer.__init__  s:     			77?? 	 	
r=   r   r   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    U R                  U R                  U5      5      nUR                  5         U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5         U R                  U R                  U5      5      nUR                  5         UR                  5       n[	        U5      S:X  d   S5       eUu  nnnUUU/nUR                  5       nUR                  5       nXX4XVU/nUU	U
UUUUUU/	nUR                  5       nUc   e[        [        US9UUS9nS nUUUS//nU" UU5      [        R                  " U5      [        R                  " U5      S//n[        [        UU5      5       VVV s/ s HE  u  nu  nn [        [        UR                  5       UR                  5       UU 5      U[        U4/5      PMG     n!nnn U!Ul        U!$ s  sn nnf )NrS   zExpect lstm input to be 3Dr   )rt   rz   c                 Z    [        U 5      S:X  d   S5       e[        R                  " U 5      $ )NrS   zExpect output_shape to be 3D)r+   r   rf   )output_shaper  s     r:   get_strides_of_lstm_output9MkldnnRnnLayer.create.<locals>.get_strides_of_lstm_output  s,    |$)I+II)!44\BBr=   r
   )r3  r4  freeze_layoutr~   r+   r   r~  r   r   rf   	enumeratezipr   r   ri   tupler   )"rk   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r6   
seq_length
mini_batchr  hy_shapecy_shapert   rz   r   r   r  output_sizesoutput_stridesrW   r1   rs   r   s"                                     r:   r   MkldnnRnnLayer.create  si   *  1 1! 45 	
  !2!22!67  !2!22!67  !2!22!67  !2!22!67  !2!22!67
  !2!22!67
ZZ\
:!#A%AA# .8*
J
"J<;;=;;=,

 !!!V,'
	C %h1#>&|[A--h7--h7C	
" 4=L.14
4//K LLNKKM!	 	4 	 
 ##
s   3AIc                 D   > UR                  S5        [        TU ]	  U5      $ r/  r0  r   s     r:   r   MkldnnRnnLayer.codegen  s!    $$%RSww''r=   r   r   )r   r   r   r   r   r   boolr.   r0   r   r   r   r   r   s   @r:   r~  r~    s    
 	

 

 
 dd d 	d
 d d d d d #Yd d d d d d  !d" #d dL( (r=   r~  c                   d   ^  \ rS rSr S SU 4S jjjrU 4S jr\        SS j5       rSrU =r	$ )	WeightInt4PackMatmuli  rG   c           	         > [        U5      S:X  d   e[        U5      S:X  d   e[        TU ]	  UUUS[        R                  R
                  R                  R                  SS9  g)zA
inputs = [x, w, qGroupSize, qScalesAndZeros]
constant_args = ()
rT   r   N-aoti_torch_cpu__weight_int4pack_mm_cpu_tensorr   )r+   r   r   r   r   	quantizedint4mm_packed_weight_cpur   r   s       r:   r   WeightInt4PackMatmul.__init__  sd     6{a=!Q&&&,,EEMML 	 	
r=   c                    > UR                  S5        [        TU ]	  U5        [        U R                  [
        5      (       a  U R                  U5        g g r/  )r   r   r   rV   r   r   r   r   s     r:   r   WeightInt4PackMatmul.codegen4  sB    $$%RS dkk6**%%g. +r=   c                    XX4/nUR                  5       Gt pgUR                  5       u  p[        U5      U/-   n	[        R                  " U	5      n
[	        UR                  5       UR                  5       U	U
5      n[        UUS9$ )N)r   rt   )r~   r.   r   rf   r   r   ri   r  )rk   r   rG  
qGroupSizeqScalesAndZerosrt   r   r   nr1   rs   ry   s               r:   r   WeightInt4PackMatmul.create;  s~     
4

zz|1gm&99+F#LLNKKM	
 $ 
 	
r=   r   r   )r   r   rG  r   r  r   r  r   r:  r   s   @r:   r  r    s[    
 	

 

 
*/ 

 
  	

 %
 
r=   r  )FNNN)NNF)4collections.abcr   typingr   r   r]   r   torch._prims_commonr   r   torch.utils._ordered_setr	   irr   r   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   virtualizedr   r0   r  r.   r{   r   r   r   r   r   r   r   r  r(  r<  rO  r[  rk  r~  r  rG   r=   r:   <module>r     s   $     J /     U T  .215#'BIBI BI 	BI
 c]BI SMBI smBI BI BI Xc]+BI D-.BI K BIT 26#'<I<I <I 	<I
 D-.<I K <I <I~8+( 8+vB+) B+JN 0 N bC+ 1 C+Lg
* g
T~90 ~9B(
' (
V6# 6r8$ 8vU
, U
px
!2 x
vx(& x(x3
, 3
r=   