
    ȅi\9                       % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
JrJrJr  S SKJr  S SKJr  S SKJr  \(       a  S S	KJr  S S
KJr  S SKJr  S qS qS q/ qS\S'   / rS\S'   S q S q!\R                   " S S5      5       r"/ q#S\S'   S q$S q%S q&S q'S\S'   S q(S q)S<S jr*\ " S S5      5       r+S=S jr, " S S5      r-0 r.S\S'   \ " S S5      5       r/\/Ra                  S / S!Q5        \/Ra                  S"/ S#Q5        \/Ra                  S$/ S%Q5        \/Ra                  S&/ S'Q5        \/Ra                  S(/ S)Q5        S>S* jr1S?S+ jr2S@S, jr3      SAS- jr4SBS. jr5S?S/ jr6SCS0 jr7SDS1 jr8      SES2 jr9        SFS3 jr:S<S4 jr;SGS5 jr<\	SHS6 j5       r=SIS7 jr>SJS8 jr?\/Ra                  S9/ S:Q5                  SKS; jr@g)L    )annotationsN)	dataclass)	lru_cache)OptionalTYPE_CHECKINGUnion)config)get_benchmark_name)
OrderedSet)Callable)Config)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                  .    \ rS rSr% S\S'   SrS\S'   Srg)CppOuterLoopFusedCount+   intinner_kernel_numberr   local_buffer_number N)__name__
__module____qualname____firstlineno____annotations__r   __static_attributes__r       Q/home/james-whalen/.local/lib/python3.13/site-packages/torch/_inductor/metrics.pyr   r   +   s      r   r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsr   num_auto_chunkingc                     Sq SqSq[        R	                  5         [
        R	                  5         SqSq[        R	                  5         Sq	Sq
SqSqSqSqg )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr    num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countcodegen_mix_order_reductionr!   r   r   r   resetr.   A   sj     %&"%++- !01- "#r   c                  V    \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S'   S\S	'   S
rg)CachedMetricsDeltas_   zQ
The subset of metrics we want update across cache hits, e.g., the
FxGraphCache.
r   r#   r$   r'   r(   r%   r*   r   N)r   r   r   r   __doc__r   r   r   r   r   r0   r0   _   s-    
  $''/22r   r0   c                 t    [         R                  " [        5       V s/ s H  o R                  PM     sn $ s  sn f N)dataclassesfieldsr0   name)fields    r   get_metric_fieldsr9   n   s*    $/$6$67J$KL$K5JJ$KLLLs   5c                  @    \ rS rSrSrSS jrS	S jr\S
S j5       rSr	g)CachedMetricsHelperr   z
A helper class to help calculate and apply counter deltas for those
metrics we want to save with cache entries (e.g., FxGraphCache) and
apply on a cache hit.
c                h    0 U l         [        5        H  n[        5       U   U R                   U'   M     g r4   )cached_metricsr9   globals)selfmetrics     r   __init__CachedMetricsHelper.__init__y   s.     ')F*1)F*;D' *r   c                |    0 n[        5        H"  n[        5       U   U R                  U   -
  X'   M$     [        S0 UD6$ )Nr   )r9   r?   r>   r0   )r@   delta_metricsrA   s      r   
get_deltasCachedMetricsHelper.get_deltas~   sC    ')F$+If$58K8KF8S$SM! * #3]33r   c                b    [        5        H!  n[        5       U==   [        X5      -  ss'   M#     g r4   )r9   r?   getattr)deltarA   s     r   apply_deltas CachedMetricsHelper.apply_deltas   s&    ')FIf!77 *r   )r>   NreturnNone)rN   r0   )rJ   r0   rN   rO   )
r   r   r   r   r2   rB   rF   staticmethodrK   r   r   r   r   r;   r;   r   s%    <
4 8 8r   r;   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                  |    \ rS rSr% S\S'   S\S'   SrS\S'       SS	 jrSS
 jrSS jrSS jr	\
SS j5       rSrg)MetricTable   str
table_name	list[str]column_namesr   r   num_rows_addedc                \   U R                   [        5       ;  a  g U" 5       n[        U R                  5      [        U5      :X  d(   [        U R                  5       S[        U5       35       e[	        U R                  5      [	        UR                  5       5      :X  d6   [	        U R                  5       S[	        UR                  5       5       35       e[        5       nU/U R                   Vs/ s H  oBU   PM	     sn-   n[        S U 5       5      (       d   eU R                  U5        g s  snf )Nz v.s. c           	   3  b   #    U  H%  n[        U[        [        [        S 5      45      v   M'     g 7fr4   )
isinstancerU   floattype).0is     r   	<genexpr>&MetricTable.add_row.<locals>.<genexpr>   s%     HCq:a#ud4j!9::Cs   -/)	rV   enabled_metric_tableslenrX   r   keysr
   all
_write_row)r@   row_fnrow_dictbncolumn_namerows         r   add_rowMetricTable.add_row   s    ??"7"9984$$%X6 	
4$$%&fS]O<	
6 $++,
8==?0KK 	
$++,-VJx}}4O3PQ	
K  !dt?P?PQ?P{+?PQQHCHHHHH Rs   +D)c                "    SU R                    S3$ )Nmetric_table_z.csv)rV   )r@   s    r   output_filenameMetricTable.output_filename   s    t/t44r   c                    U R                  5       n[        US5       n[        R                  " USS9nUR	                  S/U R
                  -   5        S S S 5        g ! , (       d  f       g = f)Nw
lineterminator
model_name)rq   opencsvwriterwriterowrX   )r@   filenamefdr{   s       r   write_headerMetricTable.write_header   sQ    '')(C BZZ48FOO\NT->->>? !  s   5A
A)c                   U R                  5       nU R                  S:X  a4  [        R                  R	                  U5      (       d  U R                  5         U =R                  S-  sl        [        U5       H+  u  p4[        U[        5      (       a  US nOUc  SnOUnXQU'   M-     [        US5       n[        R                  " USS9nUR                  U5        S S S 5        g ! , (       d  f       g = f)Nr      z.6f aru   rv   )rq   rY   ospathexistsr   	enumerater\   r]   ry   rz   r{   r|   )r@   rl   r}   idxorig_valnew_valr~   r{   s           r   rg   MetricTable._write_row   s    '')!#BGGNN8,D,Dq &s^MC(E**%cN!"H , (C BZZ48FOOC  !  s   0'C  
C.c                ,    [        X5      nU[        U '   g r4   )rS   rQ   )r7   rX   tables      r   register_tableMetricTable.register_table   s    D/). &r   r   N)rh   z4Callable[[], dict[str, Optional[Union[str, float]]]]rN   rO   )rN   rU   rM   )rl   zlist[str | float | None]rN   rO   )r7   rU   rX   rW   rN   rO   )r   r   r   r   r   rY   rm   rq   r   rg   rP   r   r   r   r   r   rS   rS      sP    ONCJ	(5@!( / /r   rS   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)
kernel0_pathr   r   kernel3_pathkernel0_latencyr   r   kernel3_latency
size_hintsreduction_hint'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    SSK Jn  SSKJn  UR	                  U 5      nU" U5      n[
        R                  " UR                  R                  5      $ )z
The kernel_module_code is the python module that contains kernel function code.
kernel function is the proper triton kernel function annotated with
@triton.jit
r   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder      s@     '4


-
.Cs#F VYY\\**r   c                4    [        U R                  5       5      $ )zB
Return the line of code for the kernel excluding the decorators.
)rd   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder   0  s     $//122r   c                |    US:X  a  g [         R                  " SU 5      nU(       d   S5       eUR                  S5      $ )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!r   researchgroup)r   r   ms      r   _parse_size_hintsr   7  s;    )#
		/1CDA###1771:r   c                |    U S;  a  g [         R                  " SU5      nU(       d   S5       eUR                  S5      $ )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!r   r   )r   r   r   s      r   _parse_reduction_hintr   @  s>     CC
		9;MNA???1771:r   c                $    U R                  U5      $ r4   )count)r   patterns     r   _count_patternr   J  s     &&w//r   c                    U R                  5       S   nUR                  S5      (       d   eUR                  S5      nUR                  S5      nXS-   U nUR                  S5      n[	        U5      $ )Nr   def (z):r   ,)r   
startswithindexsplitrd   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   N  sn    $//1!4Hv&&&&s#InnT"GA0HNN3Eu:r   c                ,    U R                  S5      nXS $ )z
Skip decorators.
r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   X  s     $$V,I*%%r   c                z    [         R                  " U S3U 5      nU(       a  [        UR                  S5      5      $ g )Nz
 = ([\d]+)r   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   `  s3    
		^$K02GHA1771:r   c                v    [         R                  " SU 5      nU(       a  [        UR                  S5      5      $  g)zu
inductor meta looks like:
    inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
z.kernel_num_gb.:\s*([0-9.]+)r   N)r   r   r]   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   h  s5     			1>BAQWWQZ  	 r   c           
        ^ ^^^^^^^	 SSK Jn  U" U5      m[        TU5      m[        UT5      m	[	        U5      m[        T5      m[        T5      m[        S5      R                  UUUU UUUU	4S j5        g)z
An utility to log kernel metadata. We may parse metadata from kernel source code here.

It's fine to parse the generated kernel code here since the logging is
disabled by default. It would hurt compilation time.
r   )"get_kernel_category_by_source_coder   c                    > TTT TTT[        TS5      [        TS5      [        TS5      [        TS5      [        T5      [        TS5      [        TS5      [        TS5      [        TT 5      S.$ )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>%log_kernel_metadata.<locals>.<lambda>  s}    &&.$,/&'<iH'(=zJ*+@&I,-BOT#$9:"#8(C"#8(C"#8(C";#
r   N)	r   r   r   r   r   r   r   get_metric_tablerm   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   }  sp     F89KLO*?<NON"#5GJ*+=>N8H 55JK&'//	
 	
r   c                    [         R                  5        Ho  u  pU [        5       ;   d  M  UR                  5       n[        R
                  R                  U5      (       a  [        R                  " U5        UR                  5         Mq     g)z
Purge the old log file at the beginning when the benchmark script runs.
Should do it in the parent process rather than the child processes running
each individual model.
N)	rQ   itemsrc   rq   r   r   r   unlinkr   )r7   r   r}   s      r   purge_old_log_filesr     s^     0557(**,,.Hww~~h''		(#  8r   c                 4    [        [        R                  5      $ r4   )enabled_metric_tables_implr	   rc   r   r   r   rc   rc     s    %f&B&BCCr   c                    [        5       nU R                  S5       HB  nUR                  5       nU(       d  M  U[        ;   d   SU S35       eUR	                  U5        MD     U$ )Nr   zMetric table name z is not registered)r   r   striprQ   add)
config_strenabledr7   s      r   r   r     sg    )|G  %zz|// 	
 &89	
/ 	D & Nr   c                    U [        5       ;   $ r4   )rc   r7   s    r   is_metric_table_enabledr    s    (***r   c                >    U [         ;   d   SU  S35       e[         U    $ )NzMetric table z is not defined)rQ   r  s    r   r   r     s*    ++R}TF/-RR+#D))r   kernel_autotuner   r   triton_config
latency_msc                N   ^ ^^^ [        S5      R                  UUU U4S j5        g )Nr
  c                 $   > TT[        T 5      TS.$ )Nr  )rU   )r	   r   r   latencys   r   r   ,log_kernel_autotune_result.<locals>.<lambda>  s    && [!	
r   )r   rm   )r   r   r	   r  s   ````r   log_kernel_autotune_resultr    s     &'//	
r   rM   )rN   rW   )r   rU   rN   rU   )r   rU   rN   r   )r   rU   r   rU   rN   Optional[str])r   rU   r   rU   rN   r  )r   rU   r   rU   rN   r   )r   rU   rN   rU   )r   rU   r   rU   rN   zOptional[int])r   rU   r   rU   rN   zOptional[float])r   rU   r   rU   r   rU   rN   rO   )rN   OrderedSet[str])r  rU   rN   r  )r7   rU   rN   bool)r7   rU   rN   rS   )
r   rU   r   rU   r	   r   r  r]   rN   rO   )A
__future__r   rz   r5   r   r   r   r   	functoolsr   typingr   r   r   torch._inductorr	   torch._inductor.utilsr
   torch.utils._ordered_setr   collections.abcr   %torch._inductor.runtime.triton_compatr   torch._inductor.schedulerr   r#   r$   r%   r   r   r   r'   r(   r   r    r)   r*   r+   r!   r,   r-   r.   r0   r9   r;   rQ   rS   r   r   r   r   r   r   r   r   r   r   r   r   rc   r   r  r   r  r   r   r   <module>r     s   " 
   	 	 !  1 1 " 4 / (<;  !"       8:4 9     ! ! ! CE !#? D ,- )  3    < 3 3 3M8 82 46 0 5 :/ :/ :/z         "   -	   6+ 3.10&*-*))#&)<?)	)X!D 
 
+*
   

#&
06
AF
	
r   