
    ȅi#                     X   S SK r S SKJr  S SKJrJr  S SKrS SKJr  S SK	J
r
  S SKJr  SSKJrJr  Sr " S	 S
\
5      rS\S\S\4S jr " S S5      r\" \S9 SS\R&                  S\\R                  R&                     S-  S\R&                  4S jj5       rS\R&                  S\4S jrg)    N)Iterable)AnyNoReturn)_State)DistributedDataParallel   )_get_registrycontract c                     ^  \ rS rSr% \R
                  \S'   SU 4S jjr\4S\	R                  S\\	R                     S\\	R                     S	\SS4
S
 jjrSS jrS\	R                  S\\	R                     SS4S jrSS jrSS jrS\	R                  S\\S4   S\\\4   S\4S jrS\	R                  S\\R0                     S\R0                  S\R0                  4S jrSrU =r$ )_ReplicateState   _ddp_weakrefreturnNc                    > [         TU ]  5         [        R                  " 5       U l        SU l        [        R                  " 5       U l        U R                  U l        / U l        SU l	        S U l
        0 U l        / U l        g )NF)super__init__nnParameterListmodulehas_initialized_param_list_orig_module_param_names_no_sync
_init_args_init_kwargs_comm_hook_args)self	__class__s    a/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/_composable/replicate.pyr   _ReplicateState.__init__   sk    !#!1!1!3%*-/-=-=-? !KK')#26,.*,    r   ignored_modulesignored_paramsprefixc                 l   [        U5      (       a  g X;   a  g U[        :w  a  U S3O[        nUR                  SS9 HF  u  pgXs;  d  M  U R                  R	                  U5        U R
                  R	                  U U 35        MH     UR                  5        H  u  pU R                  U	UUU U 3S9  M     g )N.F)recurse)r&   )_is_fully_sharded_ROOT_MODULE_PREFIXnamed_parametersr   appendr   named_children_collect_params)
r   r   r$   r%   r&   recurse_prefixnpnamechild_modules
             r!   r/   _ReplicateState._collect_params"   s     V$$ $ #&99vhaL?R 	 ++E+:DA&  ''*!!((N+;A3)?@ ;
 #)"7"7"9D  ()$0	 !  #:r#   c                 N   ^  [         R                  " SS9U 4S j5       nU" 5         g )NT)	recursivec                     > T R                   c   eT R                  " T R                   0 T R                  D6  T R                  5         ST l         0 T l        g )N )r   initr   register_comm_hook)r   s   r!   
_lazy_init-_ReplicateState.lazy_init.<locals>._lazy_initC   sI    ??...IIt<$*;*;<##% DO "Dr#   )torch_disable_dynamo)r   r<   s   ` r!   	lazy_init_ReplicateState.lazy_initB   s'    				.	# 
/	# 	r#   c                    U R                   (       a  g SU l         Xl        U VVs1 s H  oDR                  5         H  oUiM     M     nnnUR                  5        H4  n[	        U5      (       d  M  UR                  UR                  5       5        M6     SSKJn  U" XS9  U R                  XU5        SU;   a]  US   bA  US   n	[        U	[        R                  5      (       a  U	R                  S:X  a  S US'   OU	/US'   OS US'   UR                  S5        [        U R                  40 UD6U l        ["        R$                  " U R                   5      [&        R)                  U R                  5      l        g s  snnf )NTr   )_localize_dtensor)r%   	device_idcpu
device_ids)r   r   
parametersmodulesr*   update%torch.distributed.tensor.parallel.ddprC   r/   
isinstancer>   devicetypepopr   r   _ddpweakrefref	replicatestater   )
r   r   r$   kwargsmr2   r%   	submodulerC   rD   s
             r!   r:   _ReplicateState.initM   s6    #%4M_lln!n!_M)I ++%%i&:&:&<= * 	L&@VnE& 
 k".";/	i669>>U;R+/F<( -6;F<('+|$JJ{#+D,<,<GG	4;KK		4J	$1; Ns   "E1c                     U R                    H!  u  pU R                  R                  " U0 UD6  M#     U R                   R                  5         g N)r   rO   r;   clear)r   	comm_argscomm_kwargss      r!   r;   "_ReplicateState.register_comm_hookw   s>    &*&:&:"III(()C{C ';""$r#   c                     Xl         X l        g rY   )r   r   r   argsrT   s      r!   record_init_args _ReplicateState.record_init_args|   s    "r#   r`   .rT   c                     U R                   (       d  U R                  (       a  U R                  5         U R                  (       + U R                  l        U R                  R                  " U0 UD6$ rY   )r   r   r@   r   rO   require_backward_grad_sync_pre_forward)r   r   r`   rT   s       r!   forward_pre_hook _ReplicateState.forward_pre_hook   sJ     ??d//NN37==/@		,yy%%t6v66r#   inputoutputc                 8    U R                   R                  U5      $ rY   )rO   _post_forward)r   r   rh   ri   s       r!   forward_post_hook!_ReplicateState.forward_post_hook   s     yy&&v..r#   )
r   rO   r   r   r   r   r   r   r   r   r   N)__name__
__module____qualname____firstlineno__rP   rQ   __annotations__r   r+   r   Moduleset	Parameterstrr/   r@   r:   r;   ra   tupler   dictrf   r>   Tensorrl   __static_attributes____classcell__)r    s   @r!   r   r      s!   ++-& *		 RYY BLL)	
  
@	(K		(K RYY(K
 
(KT%
#7ii7',S#X7@DS#X7	7/		/ U\\"/ 	/
 
/ /r#   r   r`   rT   r   c                      [        S5      e)NzGDDP does not support deepcopy. Please use state dict for serialization.)AssertionError)r`   rT   s     r!   unimplemented_deepcopyr      s    
Q r#   c                   6    \ rS rSrS rS\SS4S jrS	S jrSrg)
DDP   c                 L    U R                   S   nUR                  " U/UQ70 UD6$ )z
Override ``__new__`` to remove the DDP class and directly construct
the original class for cases like indexing into a container module.
   )__mro____new__)clsr`   rT   orig_clss       r!   r   DDP.__new__   s,     ;;q>:4:6::r#   requires_gradient_syncr   Nc                 B    U(       + [         R                  U 5      l        g)z
Sets if the module should sync gradients. This can be used to implement
gradient accumulation without communication.

Args:
    requires_gradient_sync (bool): Whether to reduce gradients for the
        module's parameters.
N)rR   rS   r   )r   r   s     r!   set_requires_gradient_syncDDP.set_requires_gradient_sync   s     .D)C	&r#   c                 b    [         R                  U 5      R                  R                  X45        g rY   )rR   rS   r   r-   r_   s      r!   r;   DDP.register_comm_hook   s!    --44d^Dr#   r9   rn   )	ro   rp   rq   rr   r   boolr   r;   r{   r9   r#   r!   r   r      s#    ;	D 	D$ 	DEr#   r   )	state_clsr   r$   c                     [         R                  R                  S5        SU;   aB  [        US   [        [         R
                  45      (       d  [        S[        US   5       35      e[        U 5      (       a  [        S5      eUc  0 nO[        U5      n[        R                  U 5      nU R                  UR                  SS9  UR                  S5      nUb?  UR                  5       nXT:w  a*  SS	KJnJn  U R                  U5        U R'                  U5        U R'                  UR(                  5        UR*                  " X40 UD6  U R,                  nS
[.        0n	[        SUR0                   3[2        U4U	5      n
Xl        U $ )zReplicates a module

Args:
    module (torch.nn.Module): module to replicate

Example::
    >>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
    >>> module = nn.Linear(3, 3)
    >>> replicate(module)
ztorch.distributed.replicaterD   z6Expected device_id to be int or torch.device, but got zGCannot apply `replicate()` on a Module already managed by `fully_shard`T)with_kwargsdevice_meshr   )rC   _reconstruct_dtensor__deepcopy__r   )r>   _C_log_api_usage_oncerK   intrL   RuntimeErrorrM   r*   ru   rR   rS   register_forward_pre_hookrf   get_get_root_meshrJ   rC   r   register_forward_hookrl   ra   r    r   ro   r   )r   r$   rT   rS   r   	root_meshrC   r   r   dctnew_clss              r!   rR   rR      st     
HH  !>? f&-U\\/BCC{ 3457 
   U
 	
 o.OOF#E
$$U%;%;$N**]+K..0	 #
 ,,-AB(():;
  !8!89	6=f= 

C1
2CS'#sS9GMr#   c                 *    [        U 5      nUc  gSU;   $ )z+Check if module is marked with fully_shard.Ffully_shard)r	   )r   registrys     r!   r*   r*      s     V$HH$$r#   rY   )rP   collections.abcr   typingr   r   r>   torch.nnr   #torch.distributed._composable_stater   torch.nn.parallelr   r
   r	   r+   r   r   r   rt   rR   r   r*   r9   r#   r!   <module>r      s     $     6 5 -  }/f }/@#   E E4 
O$ 9=CIICehhoo.5C YY	C %CL%bii %D %r#   