
    hʧ                     N   S SK r S SKrS SKrS SKrS SKrS SKrS SK Jr  S SKJr  S SK	J
r
  S SKJrJrJrJr  S SKrS SKJs  Js  Jr  S SKJr  / SQr\ R0                  S\R$                  R2                  S	\4S
 j5       rSSS jrS rS rS rS\R>                  S\R>                  S	\R>                  4S jr S r!S\"S\"S	\"4S jr#STS jr$S r%SUS jr& " S S\R$                  R2                  5      r'SVS jr(S\RR                  S	\*4S jr+S r,S r-S  r.S!\/S	\/4S" jr0S!\/S	\/4S# jr1 " S$ S%5      r2\0" S&5      r3\0" S'5      r4\0" S(5      r5\0" S)5      r6\0" S*5      r7\0" S+5      r8\0" S,5      r9\1" S-5      r:\1" S.5      r;\1" S/5      r<\1" S05      r= " S1 S25      r> S3 r?S4 r@S5 rASWS6 jrBS7 rCS8\S9\4S: jrDS8\S9\S	\4S; jrES< rF " S= S>\R>                  5      rGS? rHS@ rISA rJSB rKSC rLSD\"SE\"S	\*4SF jrMSG rNSH rOSI rPSXSJ jrQSYSK jrRSL rSSM\/SN\/4SO jrTSP rU " SQ SR\R$                  R2                  5      rVg)Z    Nreduce)versiongcd)AnyCallableOptionalType)return_and_correct_aliasing)benchmark_modelprofiler_runnerget_available_devicesget_compute_capability(benchmark_torch_function_in_microsecondsfind_multiple_register_custom_opget_model_size_in_bytesunwrap_tensor_subclassTorchAOBaseTensoris_MI300is_sm_at_least_89is_sm_at_least_90is_package_at_leastDummyModuleTORCH_VERSION_AT_LEAST_2_2TORCH_VERSION_AT_LEAST_2_3TORCH_VERSION_AT_LEAST_2_4TORCH_VERSION_AT_LEAST_2_5TORCH_VERSION_AT_LEAST_2_6TORCH_VERSION_AT_LEAST_2_7TORCH_VERSION_AFTER_2_2TORCH_VERSION_AFTER_2_3TORCH_VERSION_AFTER_2_4TORCH_VERSION_AFTER_2_5modulereturnc                 D   U R                  5        Vs1 s H  oR                  iM     snU R                  5        Vs1 s H  oR                  iM     sn-  n[        U5      S::  d
   SU 35       e[        U5      S:  a  [	        [        U5      5      nU$ SnU$ s  snf s  snf )zz
Returns the unique device for a module, or None if no device is found.
Throws an error if multiple devices are detected.
   zKprepare only works with cpu or single-device CUDA modules, but got devices r   N)
parametersdevicebufferslennextiter)r&   pdevicesr+   s       G/home/james-whalen/.local/lib/python3.13/site-packages/torchao/utils.py_assert_and_get_unique_devicer3   4   s     "(!2!2!45!4Axx!45 ..*9*Q*9 G w<1 	")	% %(L1$4T$w- FM ;?FM 6 9s
   BBc                    Uc  0 nUcE  [        U [        R                  R                  5      (       d   S5       e[	        U 5      R
                  nUS:X  a  [        R                  R                  5         [        R                  R                  SS9n[        R                  R                  SS9nUR                  5         [        U5       H=  n[        R                  R                  R                  S5         U " U0 UD6  SSS5        M?     UR                  5         [        R                  R                  5         UR                  U5      U-  $ US:X  Ga
  [        R                  R                  5         [        R                  R                   R                  SS9n[        R                  R                   R                  SS9nUR                  5         [        U5       H=  n[        R                  R                  R                  S5         U " U0 UD6  SSS5        M?     UR                  5         [        R                  R                  5         UR                  U5      U-  $ US:X  a  [        R"                  R                  5         [$        R$                  " 5       n[        U5       H=  n[        R                  R                  R                  S5         U " U0 UD6  SSS5        M?     [$        R$                  " 5       n	[        R"                  R                  5         X-
  U-  n
U
$ g! , (       d  f       GMf  = f! , (       d  f       GMh  = f! , (       d  f       M  = f)	z?Benchmark model runs with `args` and `kwargs` both are optionalNzFExpecting `model` to be torch.nn.Module if device_type is not providedcudaT)enable_timingztimed regionmpscpu)
isinstancetorchnnModuler3   typer5   synchronizeEventrecordrangeautogradprofilerrecord_functionelapsed_timer7   eventr8   time)modelnum_runsargskwargsdevice_typestart_event	end_event_
start_timeend_timeaverage_time_per_runs              r2   r   r   F   s   ~%11 	
T	
1 4E:??f

 jj&&T&:JJ$$4$8	 xA((88Ht&v& IH ! 	

 ''	2X==				iioo++$+?IIOO)))=	 xA((88Ht&v& IH ! 			''	2X==				YY[
 xA((88Ht&v& IH ! 99;		 ( 5A## 
- IH IH IHs$   3	L	L0	M
L-	0
M 	
M	c                 4   [         R                  R                  [         R                  R                  R                  [         R                  R                  R
                  /SS9 nU" U0 UD6nS S S 5        WR                  U 5        W$ ! , (       d  f       N!= f)NT)
activitiesrecord_shapes)r:   rC   profileProfilerActivityCPUCUDAexport_chrome_trace)pathfnrJ   rK   profresults         r2   r   r   ~   s    			NN++//NN++00
  
  
 
T$V$
 	T"M
 
s   %	B		
Bc                  F   S/n [         R                  R                  5       (       a  U R                  S5        O4[         R                  R                  5       (       a  U R                  S5        [         R
                  R                  5       (       a  U R                  S5        U $ )Nr8   r5   xpur7   )r:   r5   is_availableappendr`   r7   )r1   s    r2   r   r      sk    gGzz  v				!	!uyyuN    c                      [         R                  R                  5       (       a4  [         R                  R                  5       n [	        U S    SU S    35      $ g)Nr   .r)   g        )r:   r5   ra   get_device_capabilityfloat)
capabilitys    r2   r   r      sH    zz  ZZ557

1a
1788rc   output
output_refc                     [         R                  " [         R                  " X-
  5      5      [         R                  " [         R                  " U5      5      -  $ N)r:   meanabs)ri   rj   s     r2   compute_max_diffro      s;    ::eii 345

		*9  rc   c                     SS K Js  Jn  U " U0 UD6  U " U0 UD6  UR                  SXU S.S9nUR	                  5       nUR
                  S-  $ )Nr   zf(*args, **kwargs))rJ   rK   f)stmtglobalsg    .A)torch.utils.benchmarkutils	benchmarkTimerblocked_autorangerm   )rq   rJ   rK   rv   t0measurements         r2   r   r      sc    -- tvtv	!a8 
 
B &&(Kc!!rc   nrJ   c                 H    [        S US-   5      nX-  S:X  a  U $ X-   X-  -
  $ )Nc                 "    X-  [        X5      -  $ rl   r   )xys     r2   <lambda>find_multiple.<locals>.<lambda>   s    #a)!3rc   )r)   r   r   )r{   rJ   ks      r2   r   r      s/    3TD[AAuz5AE?rc   c                 B   ^ ^^^ SSK Jm  T(       a  SOSmUUU U4S jnU$ )a  This decorator is used to preserve some high level operators for torch.export.export
while still allow them to be decomposed for inductor path

requirement: make sure `fn.__name__[1:]` is the operator name you want to register

NOTE: This should be applied at the top, after all other decorators have been applied
NOTE: We haven't tested the case when `fn` accepts tensor subclass instance as input,
e.g. uint4 tensor subclass instance, and we'll probably need to figure out what would make
sense for downstream system (like executorch) to accept as well

Example:
    lib = torch.library.Library("my_namespace', "FRAGMENT")

    register_custom_op = _register_custom_op(lib)

    @register_custom_op
    def _the_op_that_needs_to_be_preserved(...)
        ...

    # after this, `_the_op_that_needs_to_be_preserved` will be preserved as
    # torch.ops.my_namespace.the_op_that_needs_to_be_preserved operator after
    # torch.export.export

r   )register_decompositionCompositeImplicitAutogradCompositeExplicitAutogradc                   >^  SSK Jn  [        U 4S jS 5       5      (       a   ST R                   35       eT R                  nUS   S:X  a  USS  nX!" T 0 S9-   nTR	                  U5        TR                  UT T5        TR                  n[        [        [        R                  U5      U5      nT(       a  T	" U/5      " T 5        U$ )	Nr   )infer_schemac              3   @   >#    U  H  oTR                   ;   v   M     g 7frl   )__name__).0cr\   s     r2   	<genexpr>9_register_custom_op.<locals>.decorator.<locals>.<genexpr>   s     7ABKK's   z.<>zEExpecting op to be defined in normal functions, not lambda or local: rO   r)   )mutates_args)
torch._library.infer_schemar   anyr   defineimplnsgetattrr:   ops)
r\   r   op_nameschemalib_namespaceopdispatch_keyinductor_decomposedlibr   s
   `     r2   	decorator&_register_custom_op.<locals>.decorator   s    <7777 	
STVT_T_S`a	
7 ++1:abkG<<<

6"l+WUYY6@"B4(,	rc   )torch._inductor.decompositionr   )r   r   r   r   r   s   `` @@r2   r   r      s-    2 E  	$(  & rc   c                    ^ ^ U U4S jnU$ )Nc                 .   > TR                  TU S5      nU$ )NMeta)r   )r\   r   r   r   s     r2   r   $_register_meta_op.<locals>.decorator   s    XXgr6*	rc    )r   r   r   s   `` r2   _register_meta_opr      s     rc   c                 P  ^ U4S jmSnU R                  5        H  u  p4[        U[        R                  R                  5      (       a	  U(       a  M7  [
        R                  " UR                  SS9UR                  SS95       H  nUT" U5      -  nM     U[        XA5      -  nM     U$ )z
Returns the model size in bytes. The option to ignore embeddings
is useful for models with disproportionately large embeddings compared
to other model parameters that get quantized/sparsified.
c                    > [        U S5      (       a4  SnU R                  5       S    H  n[        X5      nUT" U5      -  nM     U$ U R                  5       U R	                  5       -  $ )N__tensor_flatten__r   )hasattrr   r   numelelement_size)tensorsize	attr_name
sub_tensor	flat_sizes       r2   r   *get_model_size_in_bytes.<locals>.flat_size   sj    6/00D $668;	$V7
	*-- < K<<>F$7$7$999rc   r   F)recurse)
named_childrenr9   r:   r;   	Embedding	itertoolschainr*   r,   r   )rH   ignore_embeddings
model_sizenamechildr0   r   s         @r2   r   r      s    
: J++-5%(("4"455:K:K__   /u1M il*
 1%KKJ . rc   c                        \ rS rSrS rS rSrg)UnwrapTensorSubclassi  c                 :   [        U5      n[        U R                  5       H\  u  p4n[        U5      n[	        XRU* S  5       VVs0 s H  u  pxXx_M	     nnnX&S  nUR                  XTS S 5      n	UR                  U	5        M^     [        U5      S:X  d   eUS   $ s  snnf )Nr)   r   )listreversedrebuild_stackr-   zip__tensor_unflatten__rb   )
selftensorstodotpmetainner_tensors	nb_tensorabrebuilts
             r2   forwardUnwrapTensorSubclass.forward  s    G}'/0B0B'C#BmM*I.1-yjkAR.ST.SdaQT.SMT
#D--m4NGKK  (D 4yA~~Aw Us   Bc                    [        U5      [        R                  Ld   e/ n/ nU/nU(       a  UR                  5       nUR	                  5       u  pgUR                  [        U5      Xv45        U Ho  n[        XX5      n	[        U	5      [        R                  L a  UR                  U	5        M=  [        U	[        R                  5      (       d   eUR                  U	5        Mq     U(       a  M  X l        U$ rl   )	r=   r:   Tensorpopr   rb   r   r9   r   )
r   r   r   plain_tensorsr   objr   metadatar   vals
             r2   right_inverse"UnwrapTensorSubclass.right_inverse#  s    F|5<<///x((*C&)&<&<&>#M  $s)X!EF*	c-9,!((-%c5<<8888KK$ +	 d +rc   )r   N)r   
__module____qualname____firstlineno__r   r   __static_attributes__r   rc   r2   r   r     s    
rc   r   c                 2   U R                  5        GH  u  p#[        U[        R                  R                  5      (       d*  [        U[        R                  R
                  5      (       Ga  [        US5      (       Ga  [        UR                  5      [        R                  La  [        UR                  5      [        R                  R                  La  [        UR                  [        R                  5      (       a  [        [        UR                  5      [        R                  5      (       aZ  [        UR                  [        5      (       a;  [        R                  " U5      (       d   [        R                  " US[!        5       5        [#        U5        GM     U $ )a%  Unwraps (nested) tensor subclass in the model to plain tensors
This is a workaround to make a model with tensor subclass to work with `torch.export.export`
and `torch.aot_compile`, we hope this can be integrated into compile stack soon
tracking issue: https://github.com/pytorch/ao/issues/345
weight)r   r9   r:   r;   Linearr   r   r=   r   r   	Parameter
issubclassr   parametrizeis_parametrizedregister_parametrizationr   r   )rH   	filter_fnr   r   s       r2   r   r   9  s     ++- 5%((//22eUXX%7%788x((U\\"%,,6U\\"%((*<*<<5<<664-u||<<5<<):;;//6600x!5!7 	u%% .& Lrc   dtypec                     [         R                  [         R                  [         R                  [         R                  1nX;   $ rl   )r:   float8_e4m3fnfloat8_e4m3fnuzfloat8_e5m2float8_e5m2fnuz)r   	fp8_typess     r2   _is_float8_typer   U  s8    	I rc   c                     [        [        R                  " SU 5      5      n[        R                  " SU 5      nU(       a.  [	        [
        UR                  5       5      u  p4nU(       a  SnX4U/$ [        SU  35      e)z
Parse version string representing pre-release with -1

Examples: "2.5.0.dev20240708+cu121" -> [2, 5, -1], "2.5.0" -> [2, 5, 0]
z	(git|dev)z(\d+)\.(\d+)\.(\d+)zInvalid version string format: )boolresearchmatchmapintgroups
ValueError)version_stringis_prereleaser   majorminorpatchs         r2   parse_versionr   _  sk     <@AMHH+^<E!#u||~6eEe$$:>:JKLLrc   c                  @    [        [        R                  S5      (       + $ )Ngit_version)r   r:   r   r   rc   r2   	is_fbcoder   q  s    u}}m444rc   c                 l    [        5       (       a  g[        [        R                  5      [        U 5      :  $ )NT)r   r   r:   __version__)min_versions    r2   torch_version_at_leastr  u  s)    {{ **+}[/IIIrc   version_strc                 ~    SR                  U R                  S5      SS 5      nSU S3n[        [        U 5      U5      $ )zu
Wrapper for existing TORCH_VERSION_AT_LEAST* variables that will log
a deprecation warning if the variable is used.
rO   re   N   TORCH_VERSION_AT_LEAST_4 is deprecated and will be removed in torchao 0.14.0)joinsplit_BoolDeprecationWrapperr  )r  version_str_var_namedeprecation_msgs      r2   "_deprecated_torch_version_at_leastr  }  sN    
 88K$5$5c$:2A$>?/0D/EEyzO"{+ rc   c                     [        5       =(       d    [        S5      U :  nSR                  U R                  S5      SS 5      nSU S3n[	        X5      $ )zr
Wrapper for existing TORCH_VERSION_AFTER* variables that will log
a deprecation warning if the variable is used.
r:   rO   re   Nr  TORCH_VERSION_AFTER_r	  )r   r   r
  r  r  )r  
bool_valuer  r  s       r2   _deprecated_torch_version_afterr    sW    
 ? 0K ?J88K$5$5c$:2A$>?,-A,BBvwO":??rc   c                   6    \ rS rSrSrS\S\4S jrS rS r	Sr
g	)
r  i  zR
A deprecation wrapper that logs a warning when the given bool value is accessed.
r  msgc                     Xl         X l        g rl   r  r  )r   r  r  s      r2   __init__ _BoolDeprecationWrapper.__init__  s    $rc   c                 Z    [         R                  " U R                  5        U R                  $ rl   )warningswarnr  r  r   s    r2   __bool__ _BoolDeprecationWrapper.__bool__  s    dhhrc   c                 0    [        U 5      [        U5      :H  $ rl   )r   )r   others     r2   __eq___BoolDeprecationWrapper.__eq__  s    DzT%[((rc   r  N)r   r   r   r   __doc__r   strr  r  r"  r   r   rc   r2   r  r    s$    4 c )rc   r  2.8.0z2.7.02.6.0z2.5.0z2.4.0z2.3.0z2.2.0z	2.5.0.devz	2.4.0.devz	2.3.0.devz	2.2.0.devc                   0    \ rS rSrSrS\S\4S jrS rSr	g)	_ConfigDeprecationWrapperi  z
A deprecation wrapper that directs users from a deprecated "config function"
(e.g. `int4_weight_only`) to the replacement config class.
deprecated_name
config_clsc                     Xl         X l        g rl   )r*  r+  )r   r*  r+  s      r2   r  "_ConfigDeprecationWrapper.__init__  s    .$rc   c           	          [         R                  " SU R                   SU R                  R                   SU R                  R                   S35        U R                  " U0 UD6$ )N`zE` is deprecated and will be removed in a future release. Please use `z/` instead. Example usage:
    quantize_(model, z(...)))r  r  r*  r+  r   r   rJ   rK   s      r2   __call__"_ConfigDeprecationWrapper.__call__  sd    $$% &??334 5$$(OO$<$<#=VE	

 ///rc   )r+  r*  N)
r   r   r   r   r$  r%  r   r  r1  r   r   rc   r2   r)  r)    s    
% % %0rc   r)  c                    ^ ^ [        T S5      (       d  0 T l        T T R                  ;  a  0 T R                  T '   [        T[        [        45      (       d  T/mUU 4S jnU$ )a  Use this decorator to implement a function for an aten ops in __torch_dispatch__
(if user passed in a list of ops)
or torch function in __torch_function__ (if user passed in a single object)

class MyTensor(torch.Tensor):
    ...
    implements = classmethod(_implements)

implements = MyTensor.implements

@implements(torch.nn.functional.linear):
def _(func, types, args, kwargs):
    ...

_ATEN_OP_OR_TORCH_FN_TABLEc                 ~   >^  T H4  n[         R                  " U5      U 4S j5       nUTR                  T   U'   M6     T $ )Nc                    > T" XX#5      $ rl   r   )rq   typesrJ   rK   funcs       r2   wrapper/_implements.<locals>.decorator.<locals>.wrapper  s    Ad33rc   )	functoolswrapsr4  )r8  r   r9  aten_ops_or_torch_fnsclss   `  r2   r   _implements.<locals>.decorator  sF    'B__R 4 !4 7>C**3/3 ( rc   )r   r4  r9   r   tuple)r>  r=  r   s   `` r2   _implementsrA    sa      3455)+&
#000.0&&s++dE];;!6 7 rc   c                    ^ U R                   n[        R                  R                  nU" [        R                  R
                  /5      S 5       nU" UR                  R                  UR                  R                  UR                  R                  UR
                  R                  /5      S 5       nS[        S[        S[        4S jmU" UR                  R                  5      U4S j5       nU" UR                  R                  5      S 5       ng )	Nc                 <   ^ ^^ TS   R                  UU U4S j5      $ )Nr   c                 "   > T" U /TSS  Q70 TD6$ Nr)   r   r~   rJ   r8  rK   s    r2   r   :_implements_common_tensor_ops.<locals>._.<locals>.<lambda>  s    43ODH3O3Orc   )_apply_fn_to_datar8  r7  rJ   rK   s   ` ``r2   rO   (_implements_common_tensor_ops.<locals>._  s     Aw(()OPPrc   c           
      T   ^ ^^ [        T TTTS   R                  UU U4S j5      5      $ )Nr   c                 "   > T" U /TSS  Q70 TD6$ rE  r   rF  s    r2   r   rG    s    Q0Lab0LV0Lrc   )r   rH  rI  s   ` ``r2   rO   rJ    s.     +G%%&LM	
 	
rc   r   srcr'   c                   ^ ^ [        U U4S jT R                   5       5      nSn[        T S5      (       a   [        U U4S jT R                   5       5      n[        U U4S jT R                   5       5      nSn[        T S5      (       a   [        U U4S jT R
                   5       5      n[        T 5      [        T5      :H  =(       a=    T R                  TR                  :H  =(       a    U=(       a    U=(       a    U=(       a    U$ )Nc              3   ~   >#    U  H2  n[        TU5      R                  [        TU5      R                  :H  v   M4     g 7frl   r   shaper   t_namer   rM  s     r2   r   H_implements_common_tensor_ops.<locals>._same_metadata.<locals>.<genexpr>  s7      "
0 D&!''73+?+E+EE0s   :=Toptional_tensor_data_namesc              3      >#    U  HM  n[        TU5      b-  [        TU5      R                  [        TU5      R                  :H  O[        TU5      S L v   MO     g 7frl   rP  rR  s     r2   r   rT    s^      / >F 4(4 f%++wsF/C/I/IIS&)T12 >s   AAc              3   V   >#    U  H  n[        TU5      [        TU5      :H  v   M      g 7frl   r   r   a_namer   rM  s     r2   r   rT    s+      
5 D&!WS&%995   &)optional_tensor_attribute_namesc              3   V   >#    U  H  n[        TU5      [        TU5      :H  v   M      g 7frl   rX  rY  s     r2   r   rT  %  s+      'BF f%f)==Br[  )alltensor_data_namesr   rU  tensor_attribute_namesr\  r=   rQ  )r   rM  _tensor_shape_match_optional_tensor_shape_match_attr_match_optional_attr_matchs   ``    r2   _same_metadata5_implements_common_tensor_ops.<locals>._same_metadata  s    ! "
00"
 
 (,$4566+. / #==	/ ,(  
55
 

  $4:;;#& '"BB' $  J$s)# %

cii'%#% -% 	%
 %	
rc   c                    > US   nUS   nT" XE5      (       a@  UR                  5       S   nU H&  n[        XG5      R                  [        XW5      5        M(     g [        SUS   US   4 35      e)Nr   r)   z7Not supported args for copy_ due to metadata mismatch: )r   r   copy_r   )	r8  r7  rJ   rK   r   rM  self_tensorstensor_namere  s	           r2   rO   rJ  3  s~    Aw1g$$$224Q7L+*001JK  ,Ed1gtTUwFVEWX
 	
rc   c                 ,   US   n[        US5      (       Gad  [        US5      (       GaR  UR                  " USS  0 UD6nUR                  S5      nUR                   Vs/ s H  n[	        XF5      R                  U5      PM     nn/ n[        US5      (       aT  UR                   HD  n	[	        XI5      n
U
b"  UR                  U
R                  U5      5        M3  UR                  S 5        MF     UR                   Vs/ s H  nUS:w  a  [	        XK5      OUPM     nn/ n[        US5      (       a-  UR                   Vs/ s H  nUS:w  a  [	        XK5      OUPM     nnUR                  " / UQUQUQUQ76 n[        XX>5      $ [        S5      es  snf s  snf s  snf )	Nr   r_  r`  r)   r+   rU  r\  zSubclasses must implement `aten._to_copy.default` or specify `tensor_data_names` and `tensor_attribute_names` for tensor class or tensor instance before using it)r   _get_to_kwargsr   r_  r   torU  rb   r`  r\  	__class__r   NotImplementedError)r8  r7  rJ   rK   r   r+   r   r   optional_tensorstensor_data_namemaybe_tensorr   tensor_attributesoptional_tensor_attributests                  r2   rO   rJ  @  s   Aw4,--'*3
 3
 (($qr(=f=FZZ)F;?;Q;Q;Q4#&&v.;Q    "t9::(,(G(G$#*4#BL#/(//0GH(//5 )H "&!<!<!!<I -6,A(vM!<  ! *,&t>?? &*%I%I.%I	 1:X0EGD,6Q%I + .
  " " ,	A /t6EE! p
 	
A!.s   $F9F9F)
implementsr:   r   atenr   
contiguousdetachdefaultclonealiasr   r   rh  _to_copy)r>  rv  rw  rO   re  s       @r2   _implements_common_tensor_opsr~    s    J99>>DLL##	

Q
Q KKJJJJOO##		


"
. "
5F "
4 "
H 

""#

 $

 %%&)
 ')
rc   c                    [        U S5      (       a  [        U S5      (       d   e[        R                  R                  X5        [	        U S/ 5       H2  nX R
                  ;  d  M  [        X5      (       a  M&  [        XS 5        M4     [	        U S/ 5       H2  nX0R
                  ;  d  M  [        X5      (       a  M&  [        XS 5        M4     g )Nr_  r`  rU  r\  )r   r:   _utils_set_obj_stater   __dict__setattr)r   stateoptional_tensor_data_nameoptional_tensor_attribute_names       r2    _torchao_base_tensor__setstate__r  m  s    4,--'&3 3   
LL,%,T3OQS%T!$MM9'C
 C
 DT:	 &U +2/+& *>wH
 H
 D$?+rc   c                 (   Uc  0 OUn[        U S5      (       a:  X R                  ;   a+  XR                  U    ;   a  U R                  U    U   " XX45      $ [        R                  R	                  5          U" U0 UD6sSSS5        $ ! , (       d  f       g= f)zUse this util function for a common `__torch_function__` implementation
that dispatches to ops/functions registered with `_implements`

class MyTensor(torch.Tensor):
    ...
    __torch_function__ = classmethod(_dispatch__torch_function__)
Nr4  )r   r4  r:   _CDisableTorchFunctionSubclass)r>  r8  r7  rJ   rK   s        r2   _dispatch__torch_function__r    s     >RvF12211122377--c248dSS		.	.	0T$V$ 
1	0	0s   1B
Bc                 r   [        U S5      (       a:  X R                  ;   a+  XR                  U    ;   a  U R                  U    U   " XX45      $ [        S U 5       5      nUR                  5        VVs0 s H  u  pgU[	        U5      _M     nnn[        U R                   SU< SU< SU< SU< 3	5      es  snnf )zUse this util function for a common `__torch_dispatch__` implementation
that dispatches to ops/functions registered with `_implements`

class MyTensor(torch.Tensor):
    ...
    __torch_dispatch__ = classmethod(_dispatch__torch_dispatch__)
r4  c              3   8   #    U  H  n[        U5      v   M     g 7frl   )r=   r   args     r2   r   ._dispatch__torch_dispatch__.<locals>.<genexpr>  s     04Cd3ii4s   zC dispatch: attempting to run unimplemented operator/function: func=z, types=z, arg_types=z, kwarg_types=)r   r4  r@  itemsr=   ro  r   )	r>  r8  r7  rJ   rK   	arg_typesr   r  kwarg_typess	            r2   _dispatch__torch_dispatch__r    s     	12211122377--c248dSS0400I.4lln=nFA1d3i<nK=
<<.\W[V]]f`e_ggtjsiu  vE  yD  xF  	G  >s   1B3tensor_classlayout_classc                 H   ^ ^ [        T S5      (       d  0 T l        UU 4S jnU$ )ab  Helper function for layout registrations, this is used to implement
register_layout decorator for each tensor subclass, see aqt.py for example usage

Args:
    tensor_class: Tensor subclass type
    layout_class: the class type of subclass of `Layout`, e.g. `PlainLayout`

Returns:
    a decorator that registers the tensor impl constructor in the table
_LAYOUT_CONSTRUCTOR_TABLEc                 |   > U R                   TR                  T'   [        R                  R	                  TU /5        U $ rl   )
from_plainr  r:   serializationadd_safe_globals)tensor_impl_classr  r  s    r2   r   #_register_layout.<locals>.decorator  s>    (( 	..|< 	,,l<M-NO  rc   )r   r  )r  r  r   s   `` r2   _register_layoutr    s(     <!<==13.! rc   c                     [        U S5      (       d  [        SU  35      eXR                  ;  a  [        SU SU  35      eU R                  U   $ )ax  Get TensorImpl class constructor (TensorImplClass.from_plain) for `tensor_class` based on `layout_class`
`layout_class` means the class type of subclass of `Layout`, e.g. `PlainLayout`

Args:
    tensor_class: Tensor subclass type
    layout_class: the class type of subclass of `Layout`, e.g. `PlainLayout`

Returns:
    tensor impl subclass constructor for the layout_class
r  z1no registered tensor_impl class constructor for: zlayout_name: z is not supported yet for )r   r   r  )r  r  s     r2   _get_tensor_impl_constructorr    sh     <!<==?~N
 	
 AAAL>)CL>R
 	
 11,??rc   c                    [        S U 5       5      nSU;   a  UR                  S5        [        R                  R                  R
                  " U0 UD6u  p4  nUc  U R                  OUnUc  U R                  OUnUUS.nU$ )Nc              3   h   #    U  H(  n[        U[        R                  5      (       a  M$  Uv   M*     g 7frl   )r9   r:   layoutr  s     r2   r   !_get_to_kwargs.<locals>.<genexpr>  s     JJsELL,Is   #2	2r  )r+   r   )r@  r   r:   r  _nn	_parse_tor+   r   )r   rJ   rK   r+   r   rO   s         r2   rl  rl    s~    JJJD6

8
  ((,,00$A&AF1a"NT[[F-DJJUEF Mrc   c                       \ rS rSrSr\S 5       r\" \5      r\" \	5      r	\" \
5      r\" \5      r\" \5      r\" \5      r\rS rS r\S 5       rS rS rS	 rS
rg)r   i  a  A util tensor subclass that provides commonly used functions
   new tensor subclass can inherit it to get all the utility functions

   class MyTensor(TorchAOBaseTensor):
       pass

This includes:
   `_get_to_kwargs` that can get the kwargs for `to`
        class MyTensor(TorchAOBaseTensor):
            def to(self, *args, **kwargs):
                kwargs = _get_to_kwargs(*args, **kwargs)
                ...
    `implements`:
        implements = MyTensor.implements

        @implements(torch.nn.functional.linear):
        def _(func, types, args, kwargs):
            ...

    `register_layout`:
        register_layout = MyTensor.register_layout

        @register_layout(PlainLayout)
        class PlainAQTTensorImpl(...):
            ...

     `get_tensor_impl_constructor`:
        get_tensor_impl_constructor = MyTensor.get_tensor_impl_constructor
        # in constructor of MyTensor:
        tensor_impl_ctr = get_tensor_impl_constructor(type(_layout))
        tensor_impl = tensor_impl_ctr(data, scale, zero_point, _layout)

class variables to define to simplify implmentation of tensor subclasses:
   `tensor_data_names` (List[str]): list of names of all requires tensor_data, order should match
      the `__init__` list of tensor subclass
   `tensor_attribute_names` (List[str]): list of names of non-Tensor attributes,
        order should match the `__init__` list of tensor subclass, following all the `tensor_data_names` arguments
   `optional_tensor_data_names` (List[str]): it's optional to define this field to have the additional boilerplate functions been implemented for you, but this will be need if there are some optional Tensor data attributes, when defined, this will be a list of names of Tensors that can be optional
   `optional_tensor_attribute_names` (List[str]): it's optional to define this field to have the additional boilerplate functions been implemented for you, but this will be need if there are some optional non-Tensor attributes, when defined, this will be a list of names of attributes that can be optional
   Note: Argument order in __init__ and __new__ should match exaclty with tensor_data_names + tensor_attribute_names + optional_tensor_data_names (if present) + optional_tensor_attribute_names (if present)


If `tensor_data_names` (torch.Tensor data attribute names) and `tensor_attribute_names` (non-torch.Tensor attribute names) are defined, there are some additional
functions that will be added, this includes:
`__tensor_flatten__`: flattens a subclassed tensor instance, returns a tuple, first element is tensor data names for valid tensor data,
    second element is a dict from attribute_name to non-Tensor attributes
`__tensor_unflatten__`: takes a tensor_data_dict (a map from tensor name to Tensor), and list of non-tensor attributes, returns a new instance of the subclassed tensor
`_apply_fn_to_data`: takes a function (Tensor -> Tensor),  applies function to all tensor data and
    recreate a new subclassed Tensor with the transformed tensor data
`__repr__`: the string representation of the subclassed tensor instance
`_same_metadata`: returns whether the metadata is the same between two instances of cls
`__setstate__`: when loading a serialized tensor subclass checkpoints, it sets the new
optional tensor and tensor attribute that is saved in the old checkpoint to None,
to maintain BC of old checkpoints when we add new optional tensor data or attributes to
the tensor subclass
torch ops: torch.Tensor.contiguous
aten ops: aten.detach.default, aten.clone.default, aten.alias,default, aten.contiguous.default, aten.copy_.default, aten._to_copy.default (enables t.to)

Example:
    class MyTensor(torch.Tensor):
        tensor_data_names = ["a", "b"]
        tensor_attribute_names = ["c", "d"]
        optional_tensor_data_names = ["e", "f"]
        optional_tensor_attribute_names = ["g", "h"]


        def __new__(
            cls,
            a: Tensor,
            b: Tensor,
            c: int,
            d: str,
            e: Optional[Tensor] = None,
            f: Optional[Tensor] = None,
            g: Optional[int] = None,
            h: Optional[int] = None,
        ):
            pass

        def __init__(
            self,
            a: Tensor,
            b: Tensor,
            c: int,
            d: str
            e: Optional[Tensor] = None,
            f: Optional[Tensor] = None,
            g: Optional[int] = None,
            h: Optional[int] = None,
        ):
            pass

c                    [        U S5      (       d  0 U l        X R                  ;  a  0 U R                  U '   [        U S5      (       a,  [        U S5      (       a  U R                  5         [        U l        U R
                   H?  nX R                  ;   d  M  U R                  U    R                  U R                  U   5        MA     g )Nr4  r_  r`  )r   r4  r~  r  __setstate__	__bases__update)r>  rK   parents      r2   __init_subclass__#TorchAOBaseTensor.__init_subclass__S  s    s899-/C*44424C**3/ 3+,,>V1W1W--/?C mmF777..s3::226: $rc   c                 f    [         R                  R                  [        [	        U 5      5      5        g rl   )r:   r  _log_api_usage_oncer%  r=   r0  s      r2   r  TorchAOBaseTensor.__init__q  s    $$Sd_5rc   c           	         [        U S5      (       a  [        U S5      (       a  U R                  R                  5       n[        U S5      (       a4  U R                   H$  n[	        X5      nUc  M  UR                  U5        M&     U R                   Vs0 s H  oD[	        X5      _M     nn[        U S5      (       a*  UU R                   Vs0 s H  nU[	        X5      _M     sn-  nX4$ [        S5      es  snf s  snf )Nr_  r`  rU  r\  zSubclasses should implement __tensor_flatten__ or specify `tensor_data_names` and `tensor_attribute_names` for tensor class before using it)	r   r_  copyrU  r   rb   r`  r\  ro  )r   r_  rq  rr  attr	attr_dicts         r2   r   $TorchAOBaseTensor.__tensor_flatten__t  s   4,--'*3
 3
 !% 6 6 ; ; =t9::(,(G(G$#*4#BL#/)001AB )H 7;6Q6Q6Qdgd))6Q   t>??% $ D D) D '$-- D) 	
 %//! Z
 	
)s   C0C5c                    [        U S5      (       a  [        U S5      (       a  U R                   Vs/ s H  oQU   PM	     nn0 n[        U S5      (       a,  U R                   Vs0 s H  nXR                  US 5      _M     nnU R                   Vs/ s H  oRU   PM	     n	n0 n
[        U S5      (       a  U R
                   Vs0 s H  nXRU   _M
     n
nU " / UQU	Q70 UDU
D6$ [        S5      es  snf s  snf s  snf s  snf )Nr_  r`  rU  r\  zSubclasses should implement __tensor_unflatten__ or specify `tensor_data_names` and `tensor_attribute_names` for tensor class before using it)r   r_  rU  getr`  r\  ro  )r>  tensor_data_dictrs  
outer_sizeouter_strider   required_tensorsoptional_tensor_dictrq  required_attributesoptional_attribute_dicts              r2   r   &TorchAOBaseTensor.__tensor_unflatten__  s_    3+,,>V1W1W363H3H 3H4&3H    $& s899 -0,J,J(,J( %&:&:;KT&RR,J % ( 584N4N#4ND$'4N   # ')#s=>> !$ C C+ C D11 C ( +
  !$ ' *	  " \
 	
7 
(
#
+s   C*"C/C4?C9c           	      D   [        U S5      (       a  [        U S5      (       a  U R                   Vs/ s H  o!" [        X5      5      PM     nn0 n[        U S5      (       a1  U R                   H!  n[        X5      nUb  U" U5      XE'   M  S XE'   M#     U R                   Vs/ s H  n[        X5      PM     nn0 n[        U S5      (       a&  U R
                   V	s0 s H  n	U	[        X	5      _M     nn	U R                  " / UQUQ70 UDUD6$ [        S5      es  snf s  snf s  sn	f )Nr_  r`  rU  r\  zSubclasses should implement _apply_fn_to_data or specify `tensor_data_names` and `tensor_attribute_names` for tensor class or tensor instance before using it)r   r_  r   rU  r`  r\  rn  ro  )
r   r\   r  r  r  rq  rr  r  r  r   s
             r2   rH  #TorchAOBaseTensor._apply_fn_to_data  sm   4,--'*3
 3
 594J4J 4JD74&'4J    $& t9::(,(G(G$#*4#BL#/ACLAQ,>AE,> )H 150K0K#0K#0K   # ')#t>?? &*%I%I+%I	 wt77%I ( +
 >> !$ ' *	  " l
 	
; #
+s   DDDc           	      v   [        U S5      (       Ga  [        U S5      (       Ga  SnXR                  S    S[        X R                  S   5       3-  nU R                  SS   H  nUSU S[        X5       3-  nM     U R                   H  nUSU S[        X5       3-  nM     [        U S5      (       a'  U R                   H  nUSU S[        X5       3-  nM     [        U S	5      (       a'  U R
                   H  nUSU S[        X5       3-  nM     U R                  R                   S
U S3$ [        S5      e)Nr_  r`   r   =r)   z, rU  r\  ()zSubclasses must implement __repr__ or specify `tensor_data_names` and `tensor_attribute_names` for tensor class or tensor instance before using it)	r   r_  r   r`  rU  r\  rn  r   ro  )r   repr_strrq  tensor_attribute_names       r2   __repr__TorchAOBaseTensor.__repr__  s   4,--'*3
 3
 H11!45QwtE[E[\]E^7_6`aaH$($:$:12$> b!1 2!GD4S3TUU %? *.)D)D%./q1U0VW *E t9::(,(G(G$-.a0O/PQH )H t>??-1-Q-Q)"%:$;1WT=a<b ccH .R nn--.az;;! a
 	
rc   c                 >    [        U S5      (       d  g U R                  $ )N_layout)r   r  r  s    r2   
get_layoutTorchAOBaseTensor.get_layout  s    tY''||rc   r   N)r   r   r   r   r$  classmethodr  rA  rv  r~  r  __torch_dispatch__r  __torch_function__r  register_layoutr  get_tensor_impl_constructorrl  r  r   r   rH  r  r  r   r   rc   r2   r   r     s    \|  * [)J$/0M$N!$%@A$%@A!"23O"-.J"K#N6
4 !
 !
F#
J 
Drc   r   c                     U[        U5      -
  [        U 5      :  a  [        S5      e[        U 5      n[        [        U 5      U5       H%  nUR	                  X$U-
  [        U5      -      5        M'     U$ )ai  
__torch_dispatch__ doesn't guarantee the number of arguments you are
passed (e.g., defaulted arguments are not passed); but usually it is
convenient to pad out the arguments list with defaults.  This function
helps you do that.
Args:
    args: the list of positional arguments passed to __torch_dispatch__
    n: the number of arguments you are expecting to get
    defaults_tail: default values for the arguments, starting from the
        end of the list
Example:
    >>> fill_defaults([1, 2, 3], 5, [3, 4, 5])
    [1, 2, 3, 4, 5]
    >>> fill_defaults([1, 2, 3], 5, [None, None, None])
    [1, 2, 3, None, None]]
z%not enough defaults to fill arguments)r-   RuntimeErrorr   rA   rb   )rJ   r{   defaults_tailris        r2   fill_defaultsr    sf    " 	3}D	)BCCT
A3t9a 	1us='99:; !Hrc   c                      [         R                  R                  5       =(       a    [         R                  R                  $ rl   )r:   r5   ra   r   hipr   rc   r2   is_ROCMr  !  s$    ::""$:):)::rc   c                      [        5       (       a=  / SQn [        R                  R                  S5      R                  nU  H
  nX!;   d  M
    g   g)N)gfx940gfx941gfx942r   TFr  r:   r5   get_device_propertiesgcnArchName)
mxArchNamearchNamearchs      r2   r   r   %  s@    yy3
::33A6BBD  rc   c                      [        5       (       a0  [        R                  R                  S5      R                  n SU ;   a  gg)Nr   gfx950TFr  r  s    r2   is_MI350r  /  s1    yy::33A6BBxrc   c                  x    [        5       (       a+  [        R                  R                  S5      R                  n  gg)Nr   TFr  r  s    r2   is_Navi4r  7  s,    yy::33A6BBrc   r   r   c                     [         R                  R                  5       =(       a    [         R                  R                  nU(       a"  [         R                  R	                  5       X4:H  $ S$ )z0Check if the CUDA version is exactly major.minorFr:   r5   ra   r   rf   )r   r   is_cudas      r2   is_sm_versionr  ?  sE    jj%%'>EMM,>,>GCJ5::++-%?UPUUrc   c                      [         R                  R                  5       =(       aB    [         R                  R                  =(       a!    [         R                  R	                  5       S:  $ )N)   	   r  r   rc   r2   r   r   E  C    

! 	9MM	9JJ,,.&8rc   c                      [         R                  R                  5       =(       aB    [         R                  R                  =(       a!    [         R                  R	                  5       S:  $ )N)r  r   r  r   rc   r2   r   r   M  r  rc   c                      [         R                  R                  5       =(       aB    [         R                  R                  =(       a!    [         R                  R	                  5       S:  $ )N)
   r   r  r   rc   r2   is_sm_at_least_100r  V  sC    

! 	:MM	:JJ,,.'9rc   c                     [        U [        R                  5      (       a  U R                  n U S:H  =(       a    [	        U5      $ )Nr8   r9   r:   r+   r=   r  r+   r   s     r2   check_cpu_versionr  ^  1    &%,,''U?>5g>>rc   c                     [        U [        R                  5      (       a  U R                  n U S:H  =(       a    [	        U5      $ )Nr`   r  r  s     r2   check_xpu_versionr  d  r  rc   c                     X-   S-
  U-  $ rE  r   )r   r   s     r2   ceil_divr  j  s    EAI!rc   package_namer  c                 p    [         R                  R                  U 5      S LnU(       d  g[        U 5      U:  $ )NF)	importlibutil	find_specr   )r  r  package_existss      r2   r   r   n  s1    ^^--l;4GN< K//rc   c                      [         R                  R                  S5      c  gSS Kn [	        5       (       d  U R
                  S:  a  gg)N
fbgemm_gpuFr   z1.2.0T)r  r  r  fbgemm_gpu.experimental.gen_air   r  )r  s    r2   _is_fbgemm_gpu_genai_availabler  v  s9     ~~-5);;:11G;rc   c                   j   ^  \ rS rSrSrSS\R                  S\\R                     4U 4S jjjrSr	U =r
$ )r   i  a  This is used because the TorchAO quantization functions tend to operate on modules so to apply the transform to a tensor, we can load a
DummyModule with the target tensor and then apply the transformation to the module and then extract the transformed tensor.
r   biasc                 :   > [         TU ]  5         Xl        X l        g rl   )superr  r   r  )r   r   r  rn  s      r2   r  DummyModule.__init__  s    	rc   )r  r   rl   )r   r   r   r   r$  r:   r   r
   r  r   __classcell__)rn  s   @r2   r   r     s-    u|| 8ELL3I  rc   r   )r   NN)T)Frl   )r   N)r'  )r&  )Wr;  r  r   r   rG   r  r   importlib.metadatar   mathr   typingr   r	   r
   r   r:   torch.nn.utils.parametrizer;   ru   r   torch.utils._python_dispatchr   __all__cacher<   r3   r   r   r   r   r   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r%  r  r  r  TORCH_VERSION_AT_LEAST_2_8r!   r    r   r   r   r   r%   r$   r#   r"   r)  rA  r~  r  r  r  r  r  rl  r   r  r  r   r  r  r  r   r   r  r  r  r  r   r  r   r   rc   r2   <module>r     s      	    &  0 0  0 0 D> %((// c  "5$p
U\\ u||  "S   4n< 588??  F85;; 4 M$5J
C 
C 
@ @ @) )$ @H ?H ?H ?H ?H ?H ?H 9+F 9+F 9+F 9+F 0 0&#Lw
t@(%(,8 8 :@@*2@@2&H HVD;V VS VT V??0c 0 0%((// rc   