
    ȅivs                        S SK r S SKrS SKrS SKJr  S SKJr  S SKrS SKJ	r
  S SKJ	s  Js  Jr  S SKJ	s  Js  Jr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJrJr  S S	KJrJr  S S
K J!r!J"r"J#r#J$r$  S SKJ%r%  S SK&J'r'  S SK(J)r)  S SK*J+r+J,r,  S SK-J.r.J/r/J0r0  S SK1J2r2J3r3J4r4  S SK5J6r6  S SK7J8r8   S SK9J:r;  \R|                  R~                  r?\R                  " \A5      rBS\R                  R                  S\E\FS4   S\G\H\F4   4S jrIS\R                  R                  S\E\FS4   S\G\H\F4   S\J4S jrKS\R                  R                  S\E\FS4   S\G\H\F4   SS4S jrL " S S5      rMg! \< a	    S SK9J=r;   Nf = f)    N)Sequence)cast)fill_defaults)
LazyString)_are_we_tracing)
DeviceMesh)DTensorSpec
TensorMeta)argminmax_handlerminmax_dim_handler)OpInfoOpSchemaOutputShardingOutputSpecType)is_rng_supported_mesh)redistribute_local_tensor)ShardingPropagator)convolution_backward_handlerconvolution_handler)#_format_implicit_redistribution_msgExplicitRedistributionContexttry_find_mesh_from_args)Partial	Placement	Replicate)get_active_debug_mode)return_and_correct_aliasing)_cxx_pytree)_pytreeop_callargs.kwargsc                 l   [        U R                  X5      u  pU(       a   eUu  p4pVUR                  5       [        U5      :X  ag  UR	                  5       [        U5      :X  aJ  Ub  UR                  5       U:X  a3  [        R                  R                  R                  R                  U5      $ [        S5      e)Nz%as_strided not supported with DTensor)r   _schemasizetuplestridestorage_offsettorchopsatenaliasdefaultRuntimeError)r    r!   r"   tensorr%   r'   r(   s          \/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/tensor/_dispatch.pyas_strided_handlerr1   5   s    
 !$?LD:+/(F&t$MMOuV},#v'<'<'>.'Pyy~~##++F33
>
??    returnc                     [        [        R                  US   5      n[        [        R                  US   5      nUR                  UR                  :H  $ )Nr      )r   r)   Tensorshape)r    r!   r"   lhsrhss        r0   is_same_size_handlerr:   F   s?    
 u||T!W
%C
u||T!W
%C99		!!r2   c           
      t   [         R                  R                  R                  XU5      n[        R
                  " [        [        [           UR                  5      UR                  5      n[        [        [        S4   U5      nU " U0 UR                  D6  [        [        [         R                     US   5      S   nUR                  nUR                  n/ nU HE  n	[        U	[         5      (       a  UR#                  U	5        M+  UR#                  [%        S5      5        MG     [        [&        R(                  US   5      n
[+        U[        U5      [-        U
R/                  5       U
R1                  5       U
R2                  S9S9n[         R                  " U
USS9nUR5                  5       nU
R7                  U5        g )	N.r   maxr5   r7   r'   dtype)mesh
placementstensor_metaF)local_tensorspecrequires_grad)dtensorDTensor_op_dispatcherunwrap_to_op_infopytreetree_unflattenr   listobject
local_argsargs_tree_specr&   local_kwargsr@   device_mesh
isinstancer   appendr   r)   r6   r	   r
   r%   r'   r>   full_tensorcopy_)r    r!   r"   op_infolocal_tensor_argsgrad_dtensorgrad_placementsr?   found_inf_placements	placementtarget_tensorrC   found_inf_dtensor	found_infs                 r0   found_inf_reduce_handlerr^   P   sy   
 oo,,>>wfUG--T&\7--. U63;/1BC7'"6"67W__-tAw7:L"--O##D,.$	i++ ''	2 ''7	 % tAw/M-.$$& '')%%
D  "
 "--/I	"r2   c                      \ rS rSrSrS$S jr\S\4S j5       r\R                  S\SS4S j5       rS	\
R                  R                  S
\\S4   S\\\4   S\S\S\4S jrS	\
R                  R                  S
\\S4   S\S\4S jrS	\
R                  R                  S
\\S4   S\\\4   S\S\S\S\S\S\S\4S jr\S\S\S\SS4S j5       rS	\
R                  R                  S
\\S4   S\\\4   S\4S jrS	\
R                  R                  S
\\S4   S\\\4   S\S\4
S jr\S\S\S\4S  j5       rS	\
R                  R                  S!\
R<                  S\S\4S" jr S#r!g)%OpDispatcher|   a  
Op dispatching class instance to handle args/kwargs pre-processing (un-wrapping), sharding
propagation, redistribute local args, local compute, and post-processing (re-wrapping). It
also handles any op specific logic if necessary.

NOTE: Given the runtime overhead of Tensor subclass (__torch_dispatch__), the OpDispatcher
is designed to minimize the CPU overhead by using the tricks of proper unflattening, faster
pytree if needed, and leveraging various caching mechanisms implemented in the sharding
propagation and redistribute modules. The CPU overhead is critical to eager mode performance,
one need to carefully measure the CPU overhead when making significant changes to the
OpDispatcher and ShardingPropagator.
r3   Nc                    [        5       U l        [        R                  R                  [        R
                  R                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                   1U l        [        R$                  R                  [&        [        R(                  R                  [*        [        R,                  R                  [.        [        R0                  R                  [2        [        R4                  R                  [6        [        R8                  R                  [:        [        R<                  R                  [:        [        R>                  R@                  [B        [        RD                  R@                  [B        0	U l#        g N)$r   sharding_propagatorr+   native_dropoutr-   normal_rand	rand_likerandn
randn_likerandint_like	low_dtypelow_dtype_outuniform_	bernoulli
bernoulli_float_random_opsis_same_sizer:   convolutionr   convolution_backwardr   *_amp_foreach_non_finite_check_and_unscale_r^   
as_stridedr1   argminr   argmaxr<   dimr   min_custom_op_handlersselfs    r0   __init__OpDispatcher.__init__   s]   #5#7  ''LL  IINN""JJOO##%%''++MM!!NN""OO!!
 %%';$$&9%%--/K;;CCE]OO##%7KK!2KK!2HHLL,HHLL,
$
 r2   c                 >    [         R                  R                  5       $ rc   )r)   _C'_get_dtensor_allow_implicit_replicationr}   s    r0   _allow_implicit_replication(OpDispatcher._allow_implicit_replication   s    xx??AAr2   valuec                 @    [         R                  R                  U5      $ rc   )r)   r   '_set_dtensor_allow_implicit_replication)r~   r   s     r0   r   r      s    xx??FFr2   r    r!   .r"   rU   	try_cachec                 X   UR                   c   S5       e U(       a4  [        5       (       d%  U R                  R                  UR                   5      $ U R                  R	                  UR                   5      $ ! [
         ax    [        R                  R                  UR                  5       [        R                  R                  R                  5      (       a!  UR                  " U0 UD6nU[        Ld   eUs $ e [         a)  n[        U SUR                   =(       d    U 35      UeS nAff = f)Nzwop_info.schema should not be None in sharding propagation. This function should only be called after unwrap_to_op_info.z"

Sharding propagation failed for )schemar   rd   propagate_op_sharding propagate_op_sharding_non_cachedNotImplementedErrorr)   r   %_dispatch_has_kernel_for_dispatch_keynameDispatchKeyCompositeImplicitAutograd	decomposeNotImplemented	Exceptionr.   )r~   r    r!   r"   rU   r   outes           r0   )_propagate_op_sharding_dispatch_slow_path6OpDispatcher._propagate_op_sharding_dispatch_slow_path   s   6 ~~) 	
K	
)	 !2!2//EEgnnUU//PPNN  # 
	xx== 4 4 N N 
 ''88.000
 	#9'..:SG9TU	s$   :A6 $A6 6A>D)6
D) $D$$D)c                    UR                   nUc   S5       eUc   S5       eUR                  nUR                  5       nS nU(       Ga!  UR                  (       a6  UR                  c   eU R                  UUR                  UR                  5        UR                  (       a@  [        R                  " [        [        [           UR                  5      UR                  5      OUR                  n[        [        [        S4   U5      nXR                  ;   Ga?  [         R"                  (       d/  [%        U5      (       a  [         R&                  " U5      [         l        [        [(        R*                  US   5      [        [,        R.                  US   5      pUR0                  R3                  SS 5      nUb!  [5        U[,        R6                  5      (       d   e[         R"                  (       a9  U
R8                  (       d(  [         R"                  R;                  U	R<                  US9O[>        R@                  " 5       nU   U" U0 UR0                  D6nS S S 5        U$ U" U0 UR0                  D6n U$ URB                  nURD                  RF                  nUc  S nU$ S[H        S[,        R.                  4S	 jn[5        U[H        5      (       a
  U" U5      nU$ [5        U[J        5      (       aa  U Vs/ s H  nUb  U" U5      OS PM     nn[5        U[        5      (       d   eS U;   a'  [M        US   RN                  5      n[Q        S
U S35      eU$ ! , (       d  f       U$ = fs  snf )Nz"output sharding should not be Nonezop_info should never be None.r   	generator)r   rC   r3   c                    U R                   be  U R                   R                  nU R                   R                  n[        U5      S:X  a  [        R
                  " SUS9$ [        R                  " / US9$ [        U  S35      e)Nr    )r>   z has no tensor metadata.)rA   r7   r>   lenr)   zerosr/   r.   )rC   r7   r>   s      r0   default_tensorJOpDispatcher._dispatch_get_local_results_slow_path.<locals>.default_tensorM  sr    ''3 $ 0 0 6 6 $ 0 0 6 6u:?#(;;r#?? $)<<%#@@*dV3K+LMMr2   zreturn type z in DTensor op is not supported))output_shardingcompute_mesh_is_current_rank_part_of_meshneeds_redistributeredistribute_schemaredistribute_local_args use_val_from_redistribute_schemarN   rI   rJ   r   rK   rL   rM   r&   rr   random_rng_trackerr   OffsetBasedRNGTrackerrE   rF   r)   r6   rO   poprQ   	Generatoris_meta_distribute_region_spec
contextlibnullcontextoutput_specr$   returnsr	   r   strtyper   )r~   r    r!   rU   r   r?   participatinglocal_resultsrV   	first_argfirst_local_argmaybe_user_generatorrng_contextrC   ret_listr   sret_types                     r0   %_dispatch_get_local_results_slow_path2OpDispatcher._dispatch_get_local_results_slow_path   s?    "11*P,PP*"B$BB"##::<11 '::FFF,,#77#DD )) %%fw'9'9:** ''  !%U63;%79J K*****/DT/J/J +1*F*Ft*LF' $q'2'8';< + (/';';'?'?T'R$+3z(%//8 8   **?3J3J ''::!3G ;  $//1  !$+->$W'BVBV$WM !` Y !(): Sg>R>R SX I #..D..H| !%< 7N N N dK00$24$8M   h// OS%NRQ]q)Dd " % &mT::::},#&x{'7'7#81*8*4ST  a ![` %s   6M$M
Mr   r   r   r   is_inplace_opis_out_variant_opc
                 8   UR                   c  U[        R                  R                  :X  a  Ub  [	        U[
        5      (       d   e[        R                  " Ub  [        U5      OSUR                  S9n
[        R                  " U
[        R                  R                  S9  [        U
R                  5       5      nU(       a  UR                   b  UR                   n[	        U[        5      (       d   e[	        US   [         R"                  5      (       d   eU[        R$                  R&                  :X  a  XS   l        [+        XX2S   5      $ US   R(                  R,                  UR,                  :w  a5  [/        U SUS   R(                  R,                   SUR,                   S35      eUS   $ gU	(       a  [	        UR                   [0        5      (       d  UR                   4OUR                   n/ nSnUR2                  R4                   Hi  nUR6                  (       d  M  [9        [         R"                  X?R:                     5      n[9        [        X   5      Ul        UR=                  U5        US-  nMk     [?        U5      S:  d   S	5       e[?        U5      S:  a  [1        U5      $ US   $ U[        R                  R                  :X  d   U5       eU RA                  XeR                   5      nU(       a,  UR2                  RC                  5       (       a  [+        XUU5      $ U$ )
z<
Tail of main dispatching logic, called from C++ fast path.
Nr5   )device)opr   zr: in-place operations that require placement changes are not supported. The operation would change placement from z to z}, which requires redistribution and breaks aliasing semantics. Please use the out-of-place version of this operation instead.z,out variant should have at least one out arg)"r   r+   equalr-   rQ   boolr)   r/   intdevice_typedist
all_reduceReduceOpMINitemr	   rE   rF   squeeze_rz   r   r   r@   r.   r&   r$   	argumentsis_outr   r   rR   r   wrap_is_view_op)r~   r    r!   r"   r   r   r   r   r   r   rr   output_specsout_dtsspec_idxargumentout_dtrets                     r0   _dispatch_fast_path_python_tail,OpDispatcher._dispatch_fast_path_python_tailj  s     &&.$**,,, %,
=$0O0OOOLL*7*CC&'33 dmm&7&78 $QVVX**6-99!+{;;;;!$q'7??;;;; dmm///$/GM 7wfSTgVV Aw}}//;3I3II*&i (\#Aw}}778[=S=S<T U]^   7N "/"="=uEE !,,.$00 
 GH#OO55???!'//6--3HIF#'\5K#LFLNN6*MH 6 w<1$T&TT$%(\A%55>E71:Edjj0009'90))M+F+FGC!<!<!>!>27&#NN
r2   suggested_input_schemar   c           
      \   [        5       nU R                  b*  [        [        R                  " UR
                  5      5      nOUR
                  n/ n[        U R                  5       GH"  u  pgXF   n[        U[        5      (       a  [        [        R                  U R                  U   5      n	Xx:w  a  Ub  UR                  XgU5      O[        R                   " 5       n
["        R$                  " UU['        [(        U R*                  =(       d    UR,                  5      5        U
   [/        U	UU5      nS S S 5        UR1                  W5        M  UR1                  U	5        M  U(       a  UR1                  U5        GM  UR1                  U5        GM%     [        U5      U l        g ! , (       d  f       Nv= frc   )r   rN   r&   rI   tree_leavesargs_schema	enumerateflat_args_schemarQ   r	   r   r)   r6   rM   record_redistribute_callsr   r   r   observe_redistributionr   r   r   r   r   rR   )rU   r   r   
debug_modeflatten_args_schema_to_reshardnew_local_argsiarg_specreshard_arg_specrB   redistribute_contextresharded_local_tensors               r0   r   $OpDispatcher.redistribute_local_args  sz    +,
 !!--2""#9#E#EF.* .D-O-O*')$W%=%=>KA=@(K00#ELL'2D2DQ2GH/
 &1 #<<)9 (335 ) 2HH ("?#NNG.D.G.G	 .1J($,	2. . #))*@A")),73 #))*:;"))(3M ?P #>2% .-s   F
F+	c                 (    U R                  XUS5      $ )NT)_unwrap_to_op_info_impl)r~   r    r!   r"   s       r0   rH   OpDispatcher.unwrap_to_op_info  s     ++G64HHr2   create_schemac           
         U R                   R                  R                  US 5      nUb,  UR                  (       a  [        R
                  " U5      u  pgUnOUS px/ n	0 n
/ n0 nS nU H  n[        U[        R                  5      (       aI  UR                  UR                  5        U	R                  UR                  5        Uc  UR                  nMi  Mk  [        U[        R                  5      (       aH  U=(       d    [        X5      nU	R                  U R!                  XU5      5        UR                  U5        M  U	R                  U5        UR                  U5        M     UR#                  5        H  u  nn[        U[        R                  5      (       a  UR                  X'   UR                  X'   MC  [        U[        R                  5      (       a/  U=(       d    [        X5      nU R!                  UUU5      X'   UX'   M  UX'   UX'   M     Uc   SU S35       e[%        UU(       a2  ['        UU(       a  [        R(                  " X5      O
[+        U	5      U
US9OS U	[+        U5      UU5      nU$ )Nz*found no DeviceMesh from dtensor args for !)schema_info)rd   op_to_schema_infogetneeds_pytreerI   tree_flattenrQ   rE   rF   rR   _local_tensorr   rP   r)   r6   r   %_try_replicate_spec_for_scalar_tensoritemsr   r   rJ   r&   )r~   r    r!   r"   r   runtime_schema_info	tree_args	args_spec	args_listr   kwargs_schemarM   rO   r   argkvrU   s                     r0   r   $OpDispatcher._unwrap_to_op_info_impl  s[    #66HHLLT
 */B/O/O#)#6#6t#< I*3I#'y$&+-#%
*,*.C#w//!!#"3"34""399-'#&??L ( C..+  /F0 "">>l
 !!#& ""3'!!#&) , LLNDAq!W__--"#//#$77 Au||,,+  /F0 $(#M#M $ 
 #$ $% "## #& ' 	
8	C	
'    ! ))+A{+/
 *%
( r2   resrC   c                 :   [        U [        R                  5      (       a[  Ub?  [        U[        5      (       d   SU S35       e[        R
                  " XU R                  S9$ U R                  S:X  d   S5       eU $ [        U [        [        45      (       a  Ub  [        U[        [        45      (       d   SU S35       e/ n[        X5       H)  u  p4UR                  [        R                  X45      5        M+     [        U [        5      (       a  [        U5      $ U$ U $ )NzBoutput spec does not match with output! Expected DTensorSpec, got .)rD   r   zoutput tensor should be scalar!zAoutput spec does not match with output! Expected list/tuple, got )rQ   r)   r6   r	   rE   rF   rD   ndimrK   r&   ziprR   r`   r   )r  rC   res_listr   r   s        r0   r   OpDispatcher.wrapa  s   c5<<((!$44 XY]X^^_`4 s@Q@QRR xx1}G&GG}
dE]++#
4$(G(G STXSYYZ[G HC 1 1! 78 ' '1e&<&<5?J(J Jr2   
tensor_argc           
      v   UR                  5       S:X  a%  UR                  S:X  a  [        R                  " SSS9  UR                  5       S:X  d  U R                  (       aN  [        U[        5       4UR                  -  [        UR                  UR                  5       UR                  S9S9nU$ [        U S35      e)Nr5   zFound a non-scalar tensor with numel=1 and ndim!=0, we are implicitly creating a replicated DTensor for it. However, please consider changing it to a scalar tensor or explicitly create a DTensor under distributed environment.   )
stacklevelr=   )rA   z: got mixed torch.Tensor and DTensor, need to convert all torch.Tensor to DTensor before calling distributed operators! Please see https://docs.pytorch.org/docs/main/distributed.tensor.html#mixed-tensor-and-dtensor-operations for more details.)numelr	  warningswarnr   r	   r   r
   r7   r'   r>   r.   )r~   r    r  r   replication_specs        r0   r   2OpDispatcher._try_replicate_spec_for_scalar_tensor}  s     "z!';MMP  "d&F&F*!2!22&$**%,,.$**     ) % % r2   )r|   rr   rd   )r3   N)"__name__
__module____qualname____firstlineno____doc__r   propertyr   r   setterr)   _ops
OpOverloadr&   rL   dictr   r   r   r   r   r   r   staticmethodr   r   rH   r   r   r   r6   r	   r   __static_attributes__r   r2   r0   r`   r`   |   s   
Z BT B B !''G G$ G (G6&&6 FCK 6 S&[!	6
 6, -6. 
/6pq&&q FCK q 	q
 
qfW&&W FCK W S&[!	W
 !W (W W W W  W 
Wr 8383 (83 +/83 
	83 83tI&&I FCK I S&[!	I
 
IY&&Y FCK Y S&[!	Y
 Y 
Yv &  6  6" &&"  LL"  !	" 
 
" r2   r`   )Nr   loggingr  collections.abcr   typingr   r)   torch.distributeddistributedr   torch.distributed.tensor._apir/   _apirE    torch.distributed.tensor._random_randomr   torch._library.utilsr   torch._loggingr   )torch.distributed._functional_collectivesr   torch.distributed.device_meshr   &torch.distributed.tensor._dtensor_specr	   r
   )torch.distributed.tensor._nonlinear_reduxr   r   #torch.distributed.tensor._op_schemar   r   r   r   r   &torch.distributed.tensor._redistributer   'torch.distributed.tensor._sharding_propr   !torch.distributed.tensor._tp_convr   r   torch.distributed.tensor._utilsr   r   r   (torch.distributed.tensor.placement_typesr   r   r   torch.utils._debug_moder   torch.utils._python_dispatchr   torch.utilsr   rI   ImportErrorr   r*   r+   	getLoggerr  loggerr  r  r&   rL   r  r   r1   r   r:   r^   r`   r   r2   r0   <module>r=     s      $     / / 1 1 . % E 4 J  C L F 
 S R 9 D.1 yy~~			8	$@ZZ""@

@ f@""ZZ"""

" f" 
	")#ZZ"")#

)# f)# 
	)#Xc  c ]  .-.s   *E" "E10E1