
    ΅iM                        S SK r S SKJrJr  S SKJrJrJr  S SKJ	r	  SSK
Jr  \ R                  R                  r\	" \ R                  \ R                  \ R                   \ R"                  /5      r\	" / \R&                  P\R(                  P\R*                  P\R,                  P\R.                  P\R0                  P\R2                  P\R4                  P\R6                  P\R8                  P\R:                  P\R<                  P\R>                  P\R@                  P\RB                  P\RD                  P\RF                  P\RH                  P\RJ                  P\RL                  P\RN                  P\RP                  P\RR                  P\RT                  P\RV                  P\RX                  P\RZ                  P\R\                  P\R^                  P\R`                  P\Rb                  P5      r2\	" \Rf                  \Rh                  \Rj                  \Rl                  \Rn                  \Rp                  \Rr                  \Rt                  \Rv                  /	5      r<\2\<-  r=S\>4S jr?S	\ R                  S\A4S
 jrBS\>4S jrCg)    N)get_device_tflopsget_gpu_dram_gbps)has_hint	size_hintstatically_known_true)
OrderedSet   )flop_registryreturnc                     U [         ;   ae  [        U5      S:w  a  [        SU SU  35      eUR                  5       n[	        U5      S-  nSnXv-  n[         U    n	U	" U0 UDSU0D6S-  n
X-  S-  nU$ g	)
aO  
Estimates the compute time of an aten operator.

Args:
    func_packet: The operator overload packet.
    args: The arguments to the operator.
    kwargs: The keyword arguments to the operator.
    out: The output of the operator.
    out_dtypes: The output data types.

Returns:
    float: The estimated compute time in nanoseconds.
r	   z"Only support single out dtype got z for g  4&kCg      ?out_val   g    eAg        )r
   lenAssertionErrorpopr   )func_packetargskwargsout
out_dtypesdtypepeak_gpu_flopsfactorpeak_empirical_flopsflop_count_func
flop_countcompute_times               Y/home/james-whalen/.local/lib/python3.13/site-packages/torch/utils/_runtime_estimation.pyget_compute_timer   N   s     m#z?a 4ZLk]S   *51D8%6'4$dBfBcBQF
"9S@    tc                    Sn[        U R                  U R                  5       5       HJ  u  p#[        U5      (       a  [        U5      (       d    g[	        US:H  5      (       a  M<  U[        U5      -  nML     XR                  5       -  $ )z
Calculates the memory consumption of a tensor.

Args:
    t (torch.Tensor): The input tensor.

Returns:
    int: The memory consumption of the tensor in bytes.
r	   r   )zipshapestrider   r   r   element_size)r!   
real_numelsizer%   s       r   get_num_bytesr)   p   sm     JAGGQXXZ0~~Xf%5%5 %Vq[11)D/)J 1 (((r    c                 r    [        5       n[        S U  5       5      n[        S U 5       5      nX4-   nXR-  nU$ )a+  
Estimates the memory transfer time of input and output tensors.

Args:
    flat_args_kwargs (List[torch.Tensor]): The flat list of arguments and keyword arguments.
    flat_outs (List[torch.Tensor]): The flat list of outputs.

Returns:
    float: The estimated memory transfer time in nanoseconds.
c              3   z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7fN
isinstancetorchTensorr)   .0r!   s     r   	<genexpr>$get_transfer_time.<locals>.<genexpr>   s,      "2QjELL6Qa"2   #;;c              3   z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7fr,   r-   r1   s     r   r3   r4      s+      "+Qz!U\\/Ja)r5   )r   sum)flat_args_kwargs	flat_outsgpu_memory_bandwidth
read_byteswrite_bytescounted_bytestransfer_times          r   get_transfer_timer?      sU     -. "2 J  "+ K ,M!8Mr    )Dr/   torch._inductor.utilsr   r   %torch.fx.experimental.symbolic_shapesr   r   r   torch.utils._ordered_setr   flop_counterr
   opsatenfloat16bfloat16float32float64_FLOAT_TYPES
lift_freshr!   	transposeviewdetach_unsafe_viewsplitadjoint
as_strideddiagonalexpand	expand_asmovedimpermuteselectsqueezemTmHrealimagview_as	unflattenunfoldunbind	unsqueezevsplithsplitsplit_with_sizesswapaxesswapdimschunk	_VIEW_OPSrandintrandnrand
randn_like	rand_likerandint_likearange	ones_like
zeros_like_CREATE_OPS_IGNORE_OPSfloatr   r0   intr)   r?    r    r   <module>rx      s    F 
 0 ' yy~~	     	  				 
 	  	  	

  	  	  	  	  	  	  	  	   	! " 	# $ 	% & 			' ( 			) * 	+ , 	- . 	/ 0 	1 2 	3 4 	5 6 	7 8 	9 : 	; < 	= > 	

? "	H 

		
 +%E D)U\\ )c ),e r    