
    ȅiFZ                        S SK r S SKrS SKJrJr  S SKJrJrJrJ	r	  S SK
JrJr  S SKJr  / SQrS\R                   S\\R                   /\R                   S-  4   4S jrS\R                   S	\\R                   \\\4   4   S
\S\\R                   /\R                   S-  4   4S jrS\R                   S\\\R                         S\\R                      S\\\4   S	\\R                   \\\4   4   4
S jrS\4S jr " S S\5      rS\R                   S\S\S\\\R                         S\4
S jr " S S\5      r " S S\5      rS\R                   S\S\S\S\4
S jrS\R                   S\S\S\\\R                         S\4
S jr S\R                   S\	\   S\4S  jr!S\R                   S\S\S\4S! jr"\" S"5      SS4S\R                   S\S\S#\S$\\\R                         S-  S%\\\R                         S-  S\4S& jjr#\RH                  \RJ                  1\#l&        \RN                  1\#l(        \ RR                  S'\S(\S\S)   4S* j5       r*S\R                   S+\S\R                   4S, jr+S\R                   S'\S\R                   4S- jr, S5S\R                   S.\S'\S\\R                      S/\\RZ                     S0\S1\S\.\R                   \4   4S2 jjr/ " S3 S45      r0g)6    N)ABCabstractmethod)Callable	GeneratorIterableSequence)Anycast)always_wrap_policylambda_auto_wrap_policytransformer_auto_wrap_policysize_based_auto_wrap_policyenable_wrapwrapCustomPolicyModuleWrapPolicyroot_modulefnc                    ^^^ U 1mS[         R                  S[        S[         R                  S-  4UUU4S jjmT" U SS5        g)a9  
This applies ``fn`` to every module in the module tree of ``root_module``
following a post-order traversal. If ``fn`` returns an :class:`nn.Module`,
then this replaces the original module with the newly returned one in the
tree. Otherwise, ``fn`` should return ``None``, in which case the module is
not changed.
modulemodule_nameparent_moduleNc                   > U R                  5        H'  u  p4UT;  d  M  TR                  U5        T" XCU 5        M)     T" U 5      nUb  [        U[        R                  5      (       d  [        SU SU  35      eU(       d  [        SU  35      e[        U[        R                  5      (       d  [        SU 35      e[        X!U5        g g )Nz=Non-root modules should have their parent module set but got z for zTNon-root modules should have their module name set but got an empty module name for z.fn should return None or an nn.Module but got )named_childrenadd
isinstancennModuleAssertionErrorsetattr)	r   r   r   child_module_namechild_moduleoptional_module_post_order_apply_innerr   visited_moduless	         U/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/fsdp/wrap.pyr$   2_post_order_apply.<locals>._post_order_apply_inner-   s    
 06/D/D/F+?2##L1'P 0G V*&mRYY77$S$oU6(4  $006x9  oryy99$D_DUV  M@ '     )r   r   str)r   r   r$   r%   s    `@@r&   _post_order_applyr+      sP     (3mOA		AA yy4'A A6 KT2r(   target_module_to_kwargsfsdp_fnreturnc                 f   ^ ^^ S[         R                  S[         R                  S-  4UU U4S jjnU$ )z
This constructs the "wrap" function to pass to :func:`_post_order_apply`
based on ``target_module_to_kwargs``, which should be constructed from the
wrapping policy.
r   r.   Nc                 8   > U T;   a  U TLa  TU    nT" U 40 UD6$ g N )r   kwargsr-   r   r,   s     r&   r   _construct_wrap_fn.<locals>.fnV   s2     ,,{1J,V4F6,V,,r(   )r   r   )r   r,   r-   r   s   ``` r&   _construct_wrap_fnr5   K   s/    299 T!1   Ir(   module_classesignored_modulesroot_kwargsc                     [        [        U5      5      nU R                  5        H,  nXb;   a  M
  [        Xe5      (       d  M  Xd;  a  X4U'   S XF   S'   M.     U$ )Nmixed_precision)tuplesetmodulesr   )r   r6   r7   r8   r,   module_classes_tupler   s          r&   $_run_mixed_precision_override_policyr?   a   s`     !^!45%%'$5542=/AE#+,=> ( #"r(   c                      g)z
A simple recursive wrap policy that always returns ``True``. This means
that every submodule is wrapped by the wrapper class in
:func:`_recursive_wrap`.
Tr2   )argsr3   s     r&   r   r   u   s     r(   c                       \ rS rSrSr\S\R                  S\\R                     S\	\
\4   S\	\R                  \	\
\4   4   4S j5       rSrg	)
_Policy~   z_
This defines an abstract base class that represents a policy for applying
a module-level API.
r   r7   r8   r.   c                     g)zp
This should return a dict ``target_module_to_kwargs`` that maps from
each target module to wrap to its kwargs.
Nr2   )selfr   r7   r8   s       r&   _run_policy_Policy._run_policy   s     	r(   r2   N)__name__
__module____qualname____firstlineno____doc__r   r   r   r<   dictr*   r	   rG   __static_attributes__r2   r(   r&   rC   rC   ~   sk    
 
YY
 RYY
 #s(^	

 
biic3h'	(
 
r(   rC   r   recursenonwrapped_numelc                 <    U(       a  g[        U [        U5      5      $ )a  
This auto wrap policy wraps every module that is an instance of any type in
``module_classes`` as its own FSDP instance. The root module given by
``module`` is always wrapped as an FSDP instance regardless. Since the
wrapping proceeds bottom up, each FSDP instance manages the parameters in
its subtree excluding any already managed by a child FSDP instance.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.
    module_classes (Set[Type[nn.Module]]): Set of module classes that are
        wrapped as FSDP instances.

Returns:
    ``True`` if ``recurse=True``, and whether ``module`` should be wrapped
    if ``recurse=False``.
Tr   r;   )r   rP   rQ   r6   s       r&   _module_wrap_policyrT      s    6 feN344r(   c                      ^  \ rS rSrSrS\\\R                        4S jr	S\R                  S\
\R                     S\\\4   S\\R                  \\\4   4   4S	 jrS
 rS\4U 4S jjrSrU =r$ )r      zo
This policy applies to every module of the specified module classes,
passing in the kwargs given to the root.
r6   c                 F    [        U5      nX l        [        U5      U l        g r1   )r<   _module_classesr*   _module_classes_str)rF   r6   module_classes_sets      r&   __init__ModuleWrapPolicy.__init__   s!     01#&'9#: r(   r   r7   r8   r.   c                     [        U R                  5      n0 nUR                  5        H4  nXb;   a  M
  [        Xd5      (       d  M  [        R                  " U5      XV'   M6     U$ r1   )r;   rX   r=   r   copy)rF   r   r7   r8   r6   r,   r   s          r&   rG   ModuleWrapPolicy._run_policy   s[     t334CE!))+F(F3326))K2H'/ , '&r(   c                 ,    [        XSU R                  S9$ )N)rQ   r6   )rT   rX   )rF   r   rP   rA   r3   s        r&   __call__ModuleWrapPolicy.__call__   s    "bAUAU
 	
r(   c                 B   > [         TU ]  5       SU R                   S3-   $ )N())super__repr__rY   )rF   	__class__s    r&   rh   ModuleWrapPolicy.__repr__   s&    w!a(@(@'A$CCCr(   )rX   rY   )rI   rJ   rK   rL   rM   r   typer   r   r[   r<   rN   r*   r	   rG   rb   rh   rO   __classcell__)ri   s   @r&   r   r      s    
;xRYY'@ ;
'YY' RYY' #s(^	'
 
biic3h'	(' 
D# D Dr(   r   c                       \ rS rSrSrS\\R                  /\\	\
\4   -  4   4S jrS\R                  S\\R                     S\	\
\4   S\	\R                  \	\
\4   4   4S	 jrS
rg)r      a  
This policy takes in a lambda function that maps a given ``nn.Module`` to
either ``False``, ``True``, or a kwarg dictionary.
- If the function returns ``False`` or an empty dictionary, then the module
  does not have the API applied.
- If the function returns ``True``, then the module has the API applied
  with the root's kwargs.
- If the function returns a non-empty dictionary, then the module has the
  API applied, and the dictionary overrides the root's kwargs.

Example::

    >>> # xdoctest: +SKIP("undefined variables")
    >>> model = init_transformer_model(...)
    >>> def lambda_fn(module: nn.Module):
    >>>     if module is model.lm_head:
    >>>         return {"sharding_strategy": ShardingStrategy.SHARD_GRAD_OP}
    >>>     elif isinstance(module, TransformerBlock):
    >>>         return True
    >>>     return False
    >>> policy = CustomPolicy(lambda_fn)
    >>> fsdp_model = FSDP(model, auto_wrap_policy=policy)
	lambda_fnc                     Xl         g r1   
_lambda_fn)rF   ro   s     r&   r[   CustomPolicy.__init__   s    #r(   r   r7   r8   r.   c                 L   0 nUR                  5        H  nXR;   a  M
  U R                  U5      n[        U[        [        45      (       d  [        SU 35      eU(       d  MM  [        R                  " U5      n[        U[        5      (       a  UR                  U5        XtU'   M     U$ )Nz_The lambda_fn passed to CustomPolicy should return False/True or a kwarg dict, but it returned )r=   rr   r   rN   bool
ValueErrorr^   update)rF   r   r7   r8   r,   r   resr3   s           r&   rG   CustomPolicy._run_policy   s     DF!))+F(//&)CcD$<00 CCF%I  YY{+F#t$$ c".4F+! ," '&r(   rq   N)rI   rJ   rK   rL   rM   r   r   r   ru   rN   r*   r	   r[   r<   rG   rO   r2   r(   r&   r   r      s    0$(BII;tCH~8M+M"N $'YY' RYY' #s(^	'
 
biic3h'	('r(   r   ro   c                 "    U(       a  gU" U 5      $ )a  
A convenient auto wrap policy to wrap submodules based on an arbitrary user
function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as
a `wrapper_cls` unit.

Return if a module should be wrapped during auto wrapping.

The first three parameters are required by :func:`_recursive_wrap`.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.

    lambda_fn (Callable[[nn.Module], bool]): If this returns ``True``, then
        this module will be wrapped.
Tr2   )r   rP   rQ   ro   s       r&   r   r     s    . Vr(   transformer_layer_clsc                     [        XX#5      $ )a  
See :func:`_module_wrap_policy`, where ``transformer_layer_cls`` is the
same as ``module_classes``. Note that shared parameters must be wrapped in
the same FSDP instance, so this auto wrap policy can help wrap shared
embeddings into the same FSDP instance for transformer models.
)rT   )r   rP   rQ   r{   s       r&   r   r   *  s     v0@XXr(   c                 <    U(       a  g[        U [        U5      5      $ )NTrS   )r   r6   rP   rA   r3   s        r&   _wrap_module_cls_individuallyr~   9  s      &%"788r(   c                 6   ^ ^^ [        U UU4S jU 5       5      $ )zj
A policy that wraps ``module`` if any policy in the passed in iterable of
``policies`` returns ``True``.
c              3   4   >#    U  H  nU" TTTS 9v   M     g7f)r   rP   rQ   Nr2   ).0policyr   rQ   rP   s     r&   	<genexpr>_or_policy.<locals>.<genexpr>O  s"      F 	fg@PQs   )any)r   rP   rQ   policiess   ``` r&   
_or_policyr   E  s        r(   g    חAmin_num_paramsforce_leaf_modulesexclude_wrap_modulesc                     Uc  [         R                  OUnUc  [         R                  OUnUnX&:  nU(       a#  U=(       a    [        U [	        U5      5      (       + $ U=(       a    [        U [	        U5      5      (       + $ )a{  
A size-based auto wrap policy.

Args:
    module (nn.Module): Current module being considered.
    recurse (bool): If ``False``, then this function must decide whether
        ``module`` should be wrapped as an FSDP instance or not. If
        ``True``, then the function is still recursing down the module
        tree as a part of the DFS.
    nonwrapped_numel (int): Parameter numel not yet wrapped.

    min_num_params (int): Customizable policy input that controls the size
        threshold over which a module is ready to be wrapped. This is in
        units of numel.
    force_leaf_modules (Optional[set[type[nn.Module]]]): Set of module types to keep
        as leaves, i.e. their children will never be wrapped.
    exclude_wrap_modules (Optional[set[type[nn.Module]]]): Set of module types to be
        excluded in wrapping.

Returns:
    Whether ``module`` should be wrapped.
)r   FORCE_LEAF_MODULESEXCLUDE_WRAP_MODULESr   r;   )r   rP   rQ   r   r   r   min_nonwrapped_numelis_larges           r&   r   r   U  s    B % 	$66   ' 	$88!  *7HM
659K3L MMM O
659M3N OOOr(   wrapper_clswrapper_kwargs)NNNc              +   j   #    SU 0UEn[        S0 UD6   Sv   SSS5        g! , (       d  f       g= f7f)a  
Context manager to wrap modules using a wrapper.

Useful for when you'd like to apply the same configuration arguments to all
child modules that you wrap. A particularly important use case is wrapping
large layers so that they get sharded (in-place) during initialization, to
avoid running out of system memory. Large layers can indicate that they
should be sharded via the ``wrap`` annotation and this context manager can
provide the exact configuration for these nested instances.

Usage::

    with enable_wrap(wrapper_cls, **params):
        # Wraps layer in FSDP by default if within context
        self.l1 = wrap(torch.nn.Linear(5, 5))

Args:
    wrapper_cls:
        Class that `wrap` annotation will `wrap` modules with, such as
        `FullyShardedDataParallel`.
    **wrapper_kwargs:
        Configuration settings that will be passed to all ``wrap``
        instances inside the context
r   Nr2   )_ConfigAutoWrap)r   r   r3   s      r&   r   r     s6     : 	{
F 
	"6	" 
#	"	"s   3"	3
03wrap_overridesc                     [         R                  (       aK  [         R                  c  [        S5      e0 [         R                  EUEn[        U [         R                  40 UD6$ U $ )a  
Annotate that a module should be wrapped. Annotated modules will only be
wrapped if inside of an :func:`enable_wrap` context manager. This allows
a module to be initialized both with and without a wrapper without code
change.

The class that this function wraps the passed in ``nn.Module`` with is the
passed in ``wrapper_cls`` argument into ``enable_wrap``. Both
``enable_wrap`` and ``wrap`` can take in kwargs specifying how to construct
the ``wrapper_cls`` instance. In the case of duplicate kwargs in
``enable_wrap`` and ``wrap``, the argument passed into ``wrap`` will be
respected.

Usage::

    with enable_wrap(wrapper_cls=FSDP, **fsdp_config):
        # Wraps layer in FSDP by default if within context
        self.l1 = wrap(torch.nn.Linear(5, 5))

Args:
    module (nn.Module): module to wrap (if in :func:`enable_wrap` context)
    **wrap_overrides: configuration overrides that will take priority over
        the values provided by the :func:`enable_wrap` context
z.Expected _ConfigAutoWrap.wrapper_cls to be set)r   in_autowrap_contextr   r   r3   _wrap)r   r   s     r&   r   r     sd    2 **&&. !QRREO22EnE''
 
 	

 Mr(   c                     Uc  [        S5      e[        U S5      (       a  0 UEU R                  EnU" U 40 UD6$ U" U 40 UD6$ )NzExpected wrapper_cls to be set_wrap_overrides)r   hasattrr   )r   r   r3   	overridess       r&   r   r     sY    =>>v())
 9v8!7!78	6/Y//v(((r(   auto_wrap_policyignored_paramsonly_wrap_childrenr3   c           
      `  ^ Uc  [        S5      eUc  [        S5      eU R                  5        H=  u  pxX;   a  M   [        U[        [        U5      5      (       a  [        SU SU 35      eM?     [        U4S jU R                  5        5       5      n	Uc  [        S5      eU" U SU	S9(       an  S	n
U R                  5        H/  u  pX;   a  M  [        SUUUUTS
.UD6u  p[        XU5        X-  n
M1     X-
  nU(       d  U" U SUS9(       a  [        X40 UD6U	4$ X
4$ U S	4$ ! [
         a     M  f = f)a  
Wraps submodules of ``module`` for which ``auto_wrap_policy`` returns
``True`` with ``wrapper_cls``.

Args:
    module (nn.Module): Module to recursively wrap.
    auto_wrap_policy (Callable): A callable representing a policy that
        determines which modules to recursively wrap with ``wrapper_cls``.
    ignored_modules (set[torch.nn.Module]): Modules to ignore when
        wrapping.
    ignored_params (set[torch.nn.Parameter]): Parameters to ignore when
        wrapping; these should be the parameters contained in the modules
        in ``ignored_modules``.
Returns:
    (nn.Module, int):
        ``module`` after wrapping and the numel recursively wrapped.
zMust specify auto_wrap_policy.zMust specify wrapper_clszChild module z is already wrapped by c              3   R   >#    U  H  oT;  d  M
  UR                  5       v   M     g 7fr1   )numel)r   pr   s     r&   r   "_recursive_wrap.<locals>.<genexpr>  s$      .a>2I			.s   	''z#Expected auto_wrap_policy to be setTr   r   )r   r   r   r7   r   Fr2   )r   named_modulesr   r
   rk   	TypeErrorsum
parametersr   _recursive_wrapr    r   )r   r   r   r7   r   r   r3   _childrQ   total_wrapped_numelnamewrapped_childnum_wrapped_params	remainders       `          r&   r   r     s   4 =>>788((*#	%dK!899$#E7*A+O  :	 +  !,,.  BCCvtFVW!002KD'0? 1!1' /-1 1-M F-05 3  %:	!&659'
 779III..19M  		s   0D
D-,D-c                       \ rS rSr% SrSr\\S'   Sr\	S-  \S'   0 r
\\\4   \S'   S\\\4   4S jr\S\S	S4S
 j5       r\SS j5       rSS jrS\S\S\S	S4S jrSrg)r   i8  z
Helper class to wrap modules based on default config args via a context manager.
See :func:`enable_wrap` for more information.
Fr   Nr   r3   c                     Xl         g r1   r3   )rF   r3   s     r&   r[   _ConfigAutoWrap.__init__B  s    r(   r.   c                     [         R                  (       a  [        S5      eS[         l        SU ;  a  [        S5      e[	        [
        U S   5      [         l        U S	 U [         l        g )Nz]You are already within an autowrap context and we currently do not supported nested autowrap.Tr   z9Expected to pass in wrapper_cls arg into _ConfigAutoWrap.)r   r   NotImplementedErrorr   r
   r   r   r3   r   s    r&   enable_autowrap_context'_ConfigAutoWrap.enable_autowrap_contextE  sc    ..%o  /3+& K  '+8VM5J&K#=!!'r(   c                  F    S[         l        S [         l        0 [         l        g )NF)r   r   r   r3   r2   r(   r&   disable_autowrap_context(_ConfigAutoWrap.disable_autowrap_contextV  s    .3+&*#!#r(   c                 :    U R                  U R                  5        g r1   )r   r3   )rF   s    r&   	__enter___ConfigAutoWrap.__enter__\  s    $$T[[1r(   exc_typeexc_valexc_tbc                 $    U R                  5         g r1   )r   )rF   r   r   r   s       r&   __exit___ConfigAutoWrap.__exit___  s    %%'r(   r   )r.   N)rI   rJ   rK   rL   rM   r   ru   __annotations__r   r   r3   rN   r*   r	   r[   staticmethodr   r   r   r   rO   r2   r(   r&   r   r   8  s    
 !&%#'KD'FDcNc3h  ( ( ( (  $ $
2( (s (C (D (r(   r   )F)1
contextlibr^   abcr   r   collections.abcr   r   r   r   typingr	   r
   torch.nnr   __all__r   r+   rN   r*   r5   rk   r<   r?   ru   r   rC   intrT   r   r   r   r   r~   r   r   
ModuleList
ModuleDictr   MultiheadAttentionr   contextmanagerr   r   r   	Parameterr;   r   r   r2   r(   r&   <module>r      s     # C C  	)3)3"))bii$../)3X!"))T#s(^";<  ryyk299t++,	,##T"))_-# ^# c3h	#
 ""))T#s(^";<#(4 c (5II55 5 RYY(	5
 
5@"Dw "DJ4'7 4'nII $8;HP	8YIIYY Y tBII/	Y
 
Y	9II	9'/~	9@D	9II 
 
* c(6:8<3PII3P3P 3P
 3P DO,t33P d299o.53P 
3Pn 57MM2==3Q  0242G2G1H  .   +.     F# #c #bii #L)")) )( ) )(  %MIIMM M ^	M
 %M M M 299c>M`(( ((r(   