
    ȅiL                         S SK r S SKJr  S SKrS SKJr  S SKJrJr  S SKJ	r	  S/r
  SS S.S\R                  S\S-  S	\	\\\	4   -  S-  S
\S-  S\R                  4
S jjjrg)    Nfnmatch)_mesh_resources
DeviceMesh)ParallelStyleparallelize_modulesrc_data_rankmoduledevice_meshparallelize_planr
   returnc          
      *  ^ [         R                  R                  S5        U=(       d    [        R                  " 5       nUc  [
        R                  " SSS9  U $ [        U[        5      (       a  X2l	        UR                  X5      $ [        U[        5      (       a  UR                  5        H  u  pEUS:X  a  [        XU5        M  UR                  S5      nUS   m[        [!        U4S jU R#                  5       5      5      nU(       d!  [
        R                  " S	U S
T SU  S3SS9  M~  UR%                  S5        U H8  u  pU(       a   SR'                  U5      n
[        U	UX0US9  M,  [        U	UUUS9  M:     M     U $ [)        S[+        U5       S35      e)a,
  
Apply Tensor Parallelism in PyTorch by parallelizing modules or sub-modules based on a user-specified plan.

We parallelize module or sub_modules based on a parallelize_plan. The parallelize_plan contains
:class:`ParallelStyle`, which indicates how user wants the module or sub_module
to be parallelized.

User can also specify different parallel style per module fully qualified name (FQN).

Note that ``parallelize_module`` only accepts a 1-D :class:`DeviceMesh`, if you have a 2-D or N-D :class:`DeviceMesh`,
slice the DeviceMesh to a 1-D sub DeviceMesh first then pass to this API(i.e. ``device_mesh["tp"]``)

Args:
    module (:class:`nn.Module`):
        Module to be parallelized.
    device_mesh (:class:`DeviceMesh`, optional):
        Object which describes the mesh topology of devices for the DTensor.
        If not specified, the call must be under a DeviceMesh context.
    parallelize_plan (Union[:class:`ParallelStyle`, Dict[str, :class:`ParallelStyle`]], optional):
        The plan used to parallelize the module. It can be either a
        :class:`ParallelStyle` object which contains how we prepare
        input/output for Tensor Parallelism or it can be a dict of module
        FQN and its corresponding :class:`ParallelStyle` object. If not
        specified, the call will do nothing at the moment.
Keyword args:
    src_data_rank (int, optional): the rank of the source data for the logical/global tensor, it is used by
        :meth:`distribute_tensor` to scatter/broadcast the shards/replicas to other ranks. By default,
        we use ``group_rank=0`` on each DeviceMesh dimension as the source data to preserve the single-device
        semantic. If passing ``None`` explicitly, :meth:`parallelize_module` simply uses its local data instead
        of trying to preserve the single-device semantic via scatter/broadcast. Default: 0
Return:
    A :class:`nn.Module` object parallelized.

Example::
    >>> # xdoctest: +SKIP("distributed")
    >>> from torch.distributed.tensor.parallel import parallelize_module, ColwiseParallel
    >>> from torch.distributed.device_mesh import init_device_mesh
    >>>
    >>> # Define the module.
    >>> m = Model(...)
    >>> tp_mesh = init_device_mesh("cuda", (8,))
    >>> m = parallelize_module(m, tp_mesh, {"w1": ColwiseParallel(), "w2": RowwiseParallel()})
    >>>

.. note:: For complex module architecture like Attention, MLP layers, we recommend composing
    different ParallelStyles together (i.e. ``ColwiseParallel`` and ``RowwiseParallel``) and pass
    as a parallelize_plan, to achieves the desired sharding computation.
z4torch.distributed.tensor.parallel.parallelize_modulezNo parallelize_plan is provided and auto-parallel is not supported at the moment, so this parallelize_module call will do nothing.   )
stacklevel .r   c                 "   > [        U S   T5      $ )Nr   r   )ttokens    _/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/tensor/parallel/api.py<lambda>$parallelize_module.<locals>.<lambda>f   s    gadE2    zParallelize plan key 'z6' could not be resolved: no submodule matching token 'z' in module z, skipping this plan entry.r	   zLExpect Union[ParallelStyle, Dict[str, ParallelStyle]] for parallelize_plan, z found!)torch_C_log_api_usage_oncer   get_current_meshwarningswarn
isinstancer   r
   _applydictitemsr   splitlistfilternamed_childrenpopjoin	TypeErrortype)r   r   r   r
   module_pathparallelize_stylepath_splitsmatched_children_	submodule	leaf_pathr   s              @r   r   r      s   n 
HH  !WXC!A!A!CKN	

 
 "M22)6&&&v;;	$d	+	+.>.D.D.F*Kb "68IJ%++C0K  NE#2))+  $,[M :4497,vh O01  !	  OOA 0 # 5I&!#"6&3	 '!#)&3	 !1? /Gb ""&'7"8!9B
 	
r   )NN)r   r   r   torch.nnnntorch.distributed.device_meshr   r   'torch.distributed.tensor.parallel.styler   __all__Moduler#   strintr    r   r   <module>r=      s        E A  
 
 &*HL@

 !"@
II@
d"@
 $d3+=&>>E@

 :@
 YY@
r   