
    ȅia                     d   % S SK r S SKrS SKrS SKrS SKJrJrJr  S SKJ	r	J
r
Jr  S SKJr  S SKJrJrJrJr  S SKrS SKJr  S SKJr  S SKJr  S SKJrJrJrJrJ r J!r!  S SK"J#r#  S S	K$J%r%J&r&J'r(J)r)J*r*J+r+J,r,J-r-  S S
K.J/r/J0r0  S SK1J2r2  S SK3J4r4  S SK5J6r7  S SK8J9r9  / SQr:Sr;Sr<Sr=Sr>\?\@   rA\\2\\R                  \C\D\@4   rE\\E\F\E   \G\E   \H\@S4   4   rI\H\@\I4   rJ\F\J   rK\H\@\J\K-  4   rL\?" 5       rM\?\   \NS'   \ R                  S 5       rP\
 " S S5      5       rQ\
 " S S\Q5      5       rR   SIS\R                  S\@S\@S\TS\TS \A4S! jjrU " S" S#5      rVSJS$ jrWSSS%.S\R                  S&\G\R                  R                  S'4   S(\TS)\?\R                     S-  S*\QS-  S \R4S+ jjrZS,\H\@\I4   S-\LS.\RS S4S/ jr[S0\R                  \R                  R                  -  S1\@S \4S2 jr\S3\H\@\4   S.\RS \H\@\4   4S4 jr]\R                  " 5       S\R                  S.\RS \H\@\I4   4S5 j5       r_\R                  " 5       S\R                  S3\H\@\I4   S.\RS \44S6 j5       r`S7\R                  R                  S S4S8 jraS3\LS \H\@\I4   4S9 jrbS7\R                  R                  S3\H\@\I4   S.\RS \L4S: jrc\R                  " 5       S\R                  S;\G\R                  R                  S'4   S.\RS \L4S< j5       rdS\R                  S7\R                  R                  S-\LS.\RS \L4
S= jre\R                  " 5       S\R                  S;\G\R                  R                  S'4   S3\LS.\RS S4
S> j5       rfSSS%.S\R                  S)\?\R                     S-  S*\QS-  S \H\@\I4   4S? jjrgSSS%.S\R                  S;\R                  R                  \\R                  R                     -  S)\?\R                     S-  S*\QS-  S \L4
S@ jjrhSSS%.S\R                  S;\R                  R                  \\R                  R                     -  S)\?\R                     S-  S*\QS-  S \G\H\@\I4   \L4   4
SA jjriS\R                  S3\H\R                  \H\@\I4   4   \H\@\I4   -  S \H\@\I4   4SB jrjSSC.S\R                  S,\H\@\I4   S*\QS-  S \44SD jjrkSSC.S\R                  S;\R                  R                  \\R                  R                     -  S-\LS*\QS-  S S4
SE jjrlSSC.S\R                  S;\R                  R                  \\R                  R                     -  S,\H\@\I4   S-\LS*\QS-  S \44SF jjrm\SSC.S\R                  S*\QS-  S S4SG jj5       rn\SSC.S\R                  S;\G\R                  R                  S'4   S*\QS-  S S4SH jj5       rog)K    N)Callable	GeneratorIterable)asdict	dataclassfield)chain)Anycastno_type_checkUnion)ShardedTensor)_broadcast_state_dict_distribute_state_dict_flatten_state_dict_gather_state_dict_offload_state_dict_to_cpu_unflatten_state_dict)_CHECKPOINT_PREFIX)FullOptimStateDictConfigFullStateDictConfigFullyShardedDataParallelOptimStateDictConfigShardedOptimStateDictConfigShardedStateDictConfigStateDictConfigStateDictType)._get_module_fsdp_state_if_fully_sharded_moduleFSDP_WRAPPED_MODULE)DTensor)_IncompatibleKeys)DistributedDataParallel)tree_map_only)FQNS_TPrimitiveType	ValueTypeDictValueTypeListDictValueTypeOptimizerStateTypeStateDictOptionsget_model_state_dictget_optimizer_state_dictget_state_dictset_model_state_dictset_optimizer_state_dictset_state_dict_flat_paramparam_groupsparamsstater&   _patched_state_dictc               #      #    [         R                  " 5       n [         R                  " 5          S v   U (       a  [         R                  " 5         g g ! U (       a  [         R                  " 5         f f = f7fN)gc	isenableddisableenable)
is_enableds    a/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/checkpoint/state_dict.py_gc_contextr>   Q   sC     JJJLIIK :IIK s   +A2A A2A//A2c                       \ rS rSr% SrSr\\S'   Sr\\S'   Sr	\\S'   Sr
\\S'   Sr\\S	'   Sr\\S
'   Sr\\S'   Sr\\S'   Srg)r*   \   a   
This dataclass specifies how get_state_dict/set_state_dict will work.

- ``full_state_dict``: if this is set to True, all the tensors in the
  returned state_dict will be gathered. No ShardedTensor and DTensor
  will be in the returned state_dict.

- ``cpu_offload``: offload all the tensors to cpu. To prevent CPU OOM, if
  ``full_state_dict`` is also true, then only the rank0 will get the
  state_dict and all other ranks will get empty state_dict.

- ``ignore_frozen_params``: if the value is True, the returned state_dict
  won't contain any frozen parameters -- the ``requires_grad`` is False.
  The default value is False.

- ``keep_submodule_prefixes`` (deprecated): when ``submodules`` is not None, this option
  indicates whether to keep the submodule prefixes from the state_dict keys.
  or example, if the submodule is ``module.pretrain`` and the full FQN of
  the parameter is ``pretrain.layer1.weight`` of the param. When this option
  is True, the parameter's key in the returned state_dict will be
  ``pretrain.layer1.weight``. If the options is False, the key will be
  ``layer1.weight``.
  Note that if ``keep_submodule_prefixes`` is False, there may be conflicted
  FQNs, hence there should be only one submodule in ``submodules``.

- ``strict``: the ``strict`` option when ``set_state_dict`` calls
  model.load_state_dict().

- ``broadcast_from_rank0``: when the option is True, rank0 should receive a
   full state_dict and will broadcast the tensors in the state_dict/
   optim_state_dict one by one to other ranks. Other ranks will receive
   the tensors and shard according to the local shards in the model and
   optimizer. ``full_state_dict`` must be set to True when using this option.
   This option currently only supports DTensor, not the legacy ShardedTensor.
Ffull_state_dictcpu_offloadignore_frozen_paramsTkeep_submodule_prefixesstrictbroadcast_from_rank0flatten_optimizer_state_dict_fqn_modifiersdsd_fqn_modifiers N)__name__
__module____qualname____firstlineno____doc__rA   bool__annotations__rB   rC   rD   rE   rF   rG   rI   str__static_attributes__rJ       r=   r*   r*   \   s_    "H "OT!K!&$&$(T(FD!&$&). $.-s-rT   r*   c                   H   \ rS rSr% \" \S9r\\\R                  -  \
\R                  -  4   \S'   \" \S9r\\\R                  -  \
\R                  -  4   \S'   \" \S9r\\   \S'   Sr\\S'   Sr\\S'   \R&                  r\\S	'   \" \S9r\\R2                     \S
'   Srg)_StateDictInfo   )default_factoryfqn_param_mappingshared_params_mappingsubmodule_prefixesThandle_modelhandle_optimfsdp_contextfsdp_modulesrJ   N)rK   rL   rM   rN   r   dictrY   rR   torchTensorr$   rQ   rZ   setr[   r\   rP   r]   
contextlibnullcontextr^   r   listr_   nnModulerS   rJ   rT   r=   rV   rV      s    
 	d# tell	 $ 	d# 4ell	 $ $)#=C=L$L$'33L(3$)$$?L$ryy/?rT   rV   modelnamerI   skip_ddp_prefixskip_compiler_prefixreturnc                 `   UR                  [        S5      nSU;  a  U1$ UR                  S5      n/ nU n[        U5       GH@  u  p[	        U[
        5      (       a=  U	S:w  a  [        SU	 S35      eUR                  nU(       d  UR                  U	5        MV  MX  [	        U[        5      (       a  U[        U5      S-
  :  a^  XXS-      [        :X  aO  SR                  U5      n
[        U[        5      nU
(       a  U
 S3n
UR                   Vs1 s H  o U 3iM
     sns  $ [        U[        5      nU	[        :w  a  UR                  U	5        [        Xy5      nGM  GM  [	        U[         R"                  R$                  R&                  5      (       a?  U	S:w  a  [        SU	 S35      eUR(                  nU(       d  UR                  U	5        GM  GM  [+        Xr5      (       aA  [        Xr5      " 5       R-                  U	5      =n(       a  [+        X}5      (       a  [        X}5      nUR                  U	5        U	[.        R0                  R                  R2                  :X  a   U[        U5      S-
  :w  a  [5        S	5      eGM5  [        Xy5      nGMC     SR                  U5      R                  [        S5      1$ s  snf )
a  
This API is used to convert the name of a parameter to the FQNs. For FSDP
without `use_orig_params`, the name of FlatParameter can be mapped to
multiple original parameters. As a result, the return type of this function
is `set[str]`.

Args:
    module (nn.Module): the root model.
    name (str): the name
    skip_ddp_prefix (bool): whether to skip DDP's `module` prefix

Returns:
    The canonical FQNs based on the model traversal.
 .modulezExpected 'module', got ''   	_orig_modzExpected '_orig_mod', got 'z-Expect `_extra_state` to be the last obj name)replacer   split	enumerate
isinstanceDDPAssertionErrorrq   appendFSDPlen_FLAT_PARAMjoingetattr_fqnsr   ra   _dynamo
eval_frameOptimizedModulert   hasattrgetrg   modules_EXTRA_STATE_KEY_SUFFIXRuntimeError)ri   rj   rI   rk   rl   	obj_namesfqn_obj_namescurr_objicurr_obj_nameprefix
flat_paramfqnremoved_fqns                 r=   	_get_fqnsr      se   . <<*B/D
$v

3IMH%i0h$$($'?a%PQQH"$$]3 #$''3y>A%%)E*:k*I-0$X{;
 &xq\F4>4D4DE4DS(3%(4DEEx)<=H 33$$]3"8; 4 %--":":"J"JKK+$'B=/QR%STT))H'$$]3 (
 x33")("F"H"L"L!# ;  x55#*8#A  /

 1 1 I III**&'VWW + #8;Q 1T HH]#++,>CDD; Fs   	J+c                       \ rS rSrSrg)_EXTRA_STATE   rJ   N)rK   rL   rM   rN   rS   rJ   rT   r=   r   r      s    rT   r   c              #      ^^^#    [        5       mS[        R                  S[        S[        4UUU4S jjmT" U S5       S h  vN   g  N7f)Nrq   curr_fqnrm   c              3     >#    TR                  U 5        U(       a  U S3OSnU R                  5        H]  u  p#UT;   a  M  [        U T5      (       a)  U[        U T5      " 5       R	                  5       ;   a  US S nOU U 3nT" X45       S h  vN   M_     [        U R                  SS9U R                  SS95       H!  u  p%X R                  ;   a  M  U U 3nXE4v   M#     [        U R                  S[        R                  R                  5      [        R                  R                  :w  a7  U [        R                  R                  R                   3nU[!        5       4v   g g  N7f)Nrp   ro   F)recurseget_extra_state)addnamed_childrenr   r   valuesr	   named_buffersnamed_parameters_non_persistent_buffers_set	__class__rg   rh   r   r   rq   r   r   )	rq   r   rj   	submodulenew_fqnobjrI   r   visited_moduless	         r=   r   +_iterate_valid_model_state.<locals>.recurse   s]    F#%-hZq>2%446ODO+  122GF,=>@GGII #3B-%Jtf-y222  7    /1H1HQV1H1W
ID 999!
4&)G,
 F$$&79R9RSyy(() "
2::#4#4#L#L"MNG<>))	) 3s   BE$E"CE$ro   )rc   rg   rh   rR   r   )ri   rI   r   r   s    `@@r=   _iterate_valid_model_stater      sC     &)eO *		  *S  *Y  *  *D ub!!!s   =A
AA
)
submodulesoptionsoptims.
optim_onlyr   r   c                ~   U(       a  [         R                  " S[        SS9  U(       a  U(       d  [        S5      eU=(       d
    [	        5       n0 n0 n[        U 5       H  u  px[        U[        5      (       a  M  [        X5      n	UR                  U5      n
U
b/  [        [        [           XX   5      R                  U	5        XX   Xh'   OU	R                  5       XX'   U	 H  n
[        U[        5      (       a  M  XU
'   M      M     [        UR!                  5       5       H*  u  pU H  n
[        ["        R$                  U5      Xj'   M!     M,     [        5       nU(       ah  [        U5      nU R'                  5        HI  u  p~X;  a  M  [        X5      n	[)        U	5      S:w  a  [+        S5      eUR                  S U	 5       5        MK     UR,                  (       a  UR.                  (       d  [1        S5      e[2        R4                  " U 5      nU(       a  UR.                  (       a`  [7        UR8                  UR8                  S	9n[;        UR8                  UR8                  =(       d    UR,                  S	9n[<        R>                  nO6[A        UR8                  S
9n[C        UR8                  S
9n[<        RD                  n[F        RH                  S 5       n[J        RL                  " UU UUUS9nO[F        RN                  n[Q        S0 [S        U5      DUUUU[        [        [T        RV                     U5      U(       + [)        U5      S:  S.D6$ )zO
Verify the model and options passed by the user and generates _StateDictInfo.
zGetting submodules only model/optim state_dict is deprecated and will be removed in 2.5. This feature can be achieved by manually filtering out the state_dict returned from get_state_dict.   
stacklevelz;Optimizers are not passed in but optim_only is set to True.rs   z)Submodule FQN should only have 1 instancec              3   *   #    U  H	  o S 3v   M     g7f)rp   NrJ   ).0r   s     r=   	<genexpr>"_verify_options.<locals>.<genexpr>M  s     %@4CQi4s   z?full_state_dict must be True when broadcast_from_rank0 is True.)offload_to_cpu
rank0_only)r   c              3     #    [         R                  " 5          [         R                  " SS[        S9  [        R
                  " U UUUS9   S v   S S S 5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f7f)NignorezFSDP.state_dict_type)messagecategoryrq   state_dict_typestate_dict_configoptim_state_dict_config)warningscatch_warningsfilterwarningsFutureWarningr|   r   r   s       r=   $fsdp_state_dict_type_without_warning=_verify_options.<locals>.fsdp_state_dict_type_without_warningk  sj      ((*''&<} ))!$3&7,C	 	 +* 	 +*s4   B2A1
A A1	B 
A.	*A11
A?;Br   r   )rY   rZ   r[   r^   r_   r\   r]   rJ   ),r   warnr   r   r*   r   rx   r   r   r   r   rc   rR   updatecopyrf   itemsra   rb   named_modulesr}   rz   rF   rA   
ValueErrorr|   r_   r   rB   r   r   FULL_STATE_DICTr   r   SHARDED_STATE_DICTrd   contextmanager	functoolspartialre   rV   r   rg   rh   )ri   r   r   r   r   rY   rZ   rj   paramfqnsr   param_fqns_r[   rq   r_   r   r   r   r   r^   s                        r=   _verify_optionsr     s    I 	
 &I
 	
 +)+GKMOQ1%8e\**%##E*?S,34;;DA+<+C!( (,yy{$Ce\22).#&  9  399;<C)-ellF)C!&  = $'5_
!//1LD'U)D4yA~$%PQQ%%%@4%@@ 2 ##G,C,CM
 	
 $$U+L "" 3&22w?R?R! '?&22#//O73O3O'# ,;;O 6&22! 'B&22'# ,>>O		"	"	 
#	$ !((0+/$;
 "-- 	
/	+3-!$ryy/<8#^&kAo	 	rT   model_state_dictoptim_state_dictinfoc                    UR                    H  n[        U5      nUb  M  [        S5      e   UR                  (       a  U (       d  UR                  (       dx  UR
                  (       dg  UR                  (       a  UR                  (       dE  UR                  (       a4  UR                  (       d#  [        S[        R                  " 5       < S35      eUR                  (       aH  U(       dA  UR                  (       a  UR                  (       d  UR                  (       d  [        SU 35      eU  H"  n[        U;   d  M  [        U S[         S35      e   g )Nz)Expected a fsdp_state with a fsdp module.z}The option indicates that model state_dict is required to save or load, but model state_dict is empty.rank = dist.get_rank()=rp   zgThe option indicates that model state_dict is required to save, or load but optim state_dict is empty. z
 contains z6. This can happen if the model is not the root module.)r_   r   rz   r\   r[   rC   rB   rA   rE   rF   r   distget_rankr]   r~   )r   r   r   rq   
fsdp_statekeys         r=   _verify_state_dictr     s   
 ##CFK
 !LMM $ 	 ''))!!d&:&:KK))'mmo'q*
 	
  %%$*>*>..::J9KM 
  #%z+ /* *   rT   r   apic                     [        X5      nU[        ;   a)  [        R                  " [        U R                  U5      U S9nU$ )N)self)r   r5   r   r   r   )r   r   calls      r=   _state_dict_fnr     s7    3D""  !<3GKrT   
state_dictc                     UR                   (       aL  UR                  (       a#  [        R                  R	                  5       (       d  SOSn[        XR                  US9$ UR                  (       a  [        U 5      $ U $ )NrJ   )r   )rB   
ranks_only)rA   rB   ra   distributedis_initializedr   r   )r   r   r   s      r=   _maybe_full_or_cpu_state_dictr     so      $$E,=,=,L,L,N,N  	
 "$4$4
 	
 
		)*55rT   c           	         UR                   (       d  0 $ UR                  5          [        U S5      " 5       nS S S 5        [        WR	                  5       5       H  n[        X5      n[        U5      S:w  a  [        SU S[        U5       SU 35      e[        [        U5      5      nXS:w  d  MU  S[        4S jnU" X55      (       d  [        SU S	U 35      eUR                  U5      X%'   M     UR                  (       ae  0 nU H[  nUR                   HH  nUR                  U5      (       d  M  UR                  (       a  X%   Xu'   M4  U[        U5      S  n	X%   Xy'   MJ     M]     UnUR                   (       aQ  U R#                  5        H=  u  p:U
R$                  (       a  M  [        X5      nU H  nUR                  U5        M     M?     ['        X!5      $ ! , (       d  f       GN= f)
Nr   rs   Expected 1 FQN for key '', got z: rm   c                    [        U5      [        U 5      :  a  gUR                  S5      nU R                  S5      nSn[        U5       H>  u  pVXbU   :X  a)  US-  nU[        U5      :X  a  U[        U5      S-
  :H  s  $ M6  US;   a  M>    g   g)NFrp   r   rs   )rq   rt   T)r}   rv   rw   )r   r   	fqn_split	key_splitfqn_idxkey_idxkey_names          r=   verify%_get_model_state_dict.<locals>.verify  s    s8s3x' IIcN	IIcN	)29)=%GW#551"c)n4#*c)nq.@#@@ 5!%<< $ *> rT   zAn unexpected key, z, exists. FQN is )r\   r^   r   rf   keysr   r}   rz   nextiterrP   r   popr[   
startswithrD   rC   r   requires_gradr   )ri   r   r   r   r   r   r   new_state_dictr   r   r   s              r=   _get_model_state_dictr     s    					#E<8:
 
 JOO%&$t9> *3%ws4ykD6J  4::D " ###"%8=Nse#TUU(nnS1JO= '@ /1C11~~f--//*4/N'!#f+-0G.8oN+ 2  $
  002JC""U(Ds# 	 3 )::s 
	s   G
Gc           
         UR                   (       a  U(       d  UR                  (       d  [        0 0 5      $ 0 n[        XR                  5       H  u  pE[        XUR                  5      n[        U UUR                  SSS9n[        Xg5       Hr  u  pUR                  (       a  [        R                  " 5       S:X  a?  X:w  a:  UR                  US 5      n
U
c!  UR                  (       a  [        SU S35      eOXU	'   XSU	'   Mt     M     SnUR                  (       d  UR                  (       Ga  [        5       nUR                  5        HQ  n[        R                   " U5      (       d  M   UR#                  5       S:  d  M6  UR%                  UR&                  5        MS     [        R&                  " S5      U;   a'  UR)                  [        R&                  " S5      5        Sn[+        U5      S:X  a.  UR%                  [        R,                  R/                  5       5        O[+        U5      S:  a  [1        S	5      eUR                  (       a0  [3        UUUR                  5       UR                  UR4                  S
9  O)UR                  (       a  [7        XUR                  5       S9  UR9                  U5        UR;                  5          [=        [        [?        U S5      " XR                  US95      sS S S 5        $ ! , (       d  f       g = f)NF)rk   rl   r   zMissing key: rp   metaTrs   zMultiple devices found)devicerE   rB   r   load_state_dict)r   rE   assign) r\   rF   r!   r   rI   r   zipr   r   r   rE   r   rA   rc   r   ra   	is_tensordimr   r   remover}   distributed_c10d_get_pg_default_devicer   r   rB   r   r   r^   r   r   )ri   r   r   local_state_dictr   valuer   fqns_with_prefixr   fqn_with_prefix
load_valuer  devicess                r=   _load_model_state_dictr    sO    Z8Q8Q R((08N8NO
T%;%;<$""!!&
 %($? C--A1E('^^C6
%{{*]3%q+ABB # 3=/05_- %@ P, F  D$8$8$8%%,,.Eu%%%))+/ELL) /
 <<7*NN5<</0Fw<1KK--DDFG\A566$$! {{}{{ ,, !!":V*+				5"34%kk&
 
		s   ;)K..
K<optimc                    U R                   (       a  gU R                   H#  nU[            H  nUR                  c  M      g   M%     U R                   HA  nU[            H1  nUR                  (       d  M  [
        R                  " U5      Ul        M3     MC     / nU R                   H\  nSU;   d  M  UR                  US   5        [        US   [
        R                  5      (       a  [
        R                  " S5      OSUS'   M^     U R                  SS9  U R                   H  nSU;   d  M  UR                  S5      US'   M!     U R                  SS9  g)z@
Initialize optim states by calling the step() with zero grads.
Nlrg        )closurer   T)set_to_none)r4   r2   _PARAMSgradr   ra   
zeros_liker{   rx   rb   tensorstepr   	zero_grad)r  param_groupr   lrss       r=   _init_optim_stater  c  s*    {{ )) )Ezz% * *
 )) )E""""--e4
 * * C));JJ{4() k$/>> S!  * 
JJtJ )); #
K * 
OOO%rT   c           
      J  ^	^
 S[         [        [        4   S[        S[         [        [        4   4U	U
4S jjm	S m
0 n[	        [
        U [           5      R                  5        HC  u  p#[         SU 3nUR                  T	" [	        [         [        [        4   U5      U5      5        ME     [	        [        U [           5       H_  nUR                  [        5      n[	        [        [           U5       H,  nUR                  5        H  u  pxX[         SU SU 3'   M     M.     Ma     U$ )a  
This API flattens the optimizer state_dict to support optimizer resharding for
MPMD, e.g., pipeline parallelism.

Without the API, the original optimizer state_dict looks like:
{
    "state": {
        "layer1.weight": {
            "step": 10, "exp_avg": SomeTensor, "exp_avg_sq": SomeTensor
        },
        "layer2.weight": {
            "step": 10, "exp_avg": SomeTensor, "exp_avg_sq": SomeTensor
        },
    },
    "param_groups": [
        {
            "lr": 0.0,
            "betas": (0.9, 0.95), ...,
            "params": ["layer1.weight", "layer2.weight"]
        }
    ]
}

With this API, the optimizer state_dict looks like:
{
    "state.layer1.weight.step": 10,
    "state.layer2.weight.step": 10,
    "state.layer1.weight.exp_avg": SomeTensor,
    "state.layer2.weight.exp_avg": SomeTensor,
    "state.layer1.weight.exp_avg_sq": SomeTensor,
    "state.layer2.weight.exp_avg_sq": SomeTensor,
    "param_groups.layer1.weight.lr": 0.1,
    "param_groups.layer2.weight.lr": 0.1,
    "param_groups.layer1.weight.betas": (0.9, 0.95),
    "param_groups.layer2.weight.betas": (0.9, 0.95),
}

The "state" section supports arbitrary levels of nesting for optimizers like Shampoo.
nested_dictr   rm   c                    > 0 nU R                  5        HZ  u  p4[        U5      nU(       a  U SU 3OUn[        U[        5      (       a  UR	                  T" XF5      5        MN  T" U5        XBU'   M\     U$ )z
Recursively flatten a nested dictionary with dot-separated keys.

Args:
    nested_dict: The dictionary to flatten
    prefix: The prefix to prepend to all keys

Returns:
    Flattened dictionary with dot-separated keys
rp   )r   rR   rx   r`   r   )	r  r   	flattenedr   r
  str_keyfull_key_flatten_state_nested_dict_raise_if_type_not_supporteds	          r=   r$  =_flatten_optim_state_dict.<locals>._flatten_state_nested_dict  sw     +-	%++-JC#hG06&7),GH%&&  !;E!LM -U3&+(# . rT   c                     [        U [        R                  [        [        [
        45      (       d  [        S[        U 5       S35      eg )Nz[Flattening optimizer state_dict only supports tensor, int, float, dict states now. Type is rp   )rx   ra   rb   intfloatr`   NotImplementedErrortype)vs    r=   r%  ?_flatten_optim_state_dict.<locals>._raise_if_type_not_supported  sC    !ellC=>>%7)1&  ?rT   rp   )r`   rR   r
   r&   r   r'   _STATEr   r   r(   _PGr   r  rf   )r   retr   r4   state_prefixr  r   kr,  r$  r%  s            @@r=   _flatten_optim_state_dictr3    s   R#s(^-0	c9n	 < !#C =*V*<=CCE
 3%(

&tDcNE'BLQ	
 F -z#?w'S	4(C#))+*+se1SE1#&' , ) @ JrT   c                 R   S[         S[        [         [        4   S[        [         [        4   4S jn0 n/ n[        U[        U0nU R
                   GH  nUR                  [        / 05        U[            GH#  nUR                  U    GH  n	XR                  ;   a-  Sn
U H$  nU[        :X  a  M  [         SU	 SU 3nX;   a  Sn
  O   OSn
U
(       d  MK  US   [           n[        U[        5      (       d  [        S	[        U5       35      eUR                  U	5        UR                  (       d  M  0 XI'   U R                  U    HM  n[         SU	 SU 3nX;  a  U" X5      nU[!        ["        XI   5      U'   M5  UU   [!        ["        XI   5      U'   MO     GM     GM&     [!        [        [            US   [           5      S
   nU H\  nU[        :X  a  M  U[         SU SU 3   nXS   ;  a
  UUS   U'   M2  US   U   U:w  d  M@  [%        SU SU SU SUS   U    S3	5      e   GM     U$ )z
This API unflattens the state_dict generated by _flatten_optim_state_dict().
Supports arbitrary levels of nesting in the state section through recursive reconstruction.

See the docstring of _flatten_optim_state_dict() for more detail.
flattened_keyflattened_dictrm   c                 *   U  S3n0 nUR                  5        Hw  u  pEUR                  U5      (       d  M  U[        U5      S nUR                  S5      nUnUSS  H)  n	X;  a  0 X'   [	        X   [
        5      (       d   eX   nM+     XXUS   '   My     U$ )z
Reconstructs a potentially nested value from flattened keys.
For non-nested values, returns the value directly.
For nested values, reconstructs the nested structure with string keys.
rp   Nr   )r   r   r}   rv   rx   r`   )
r5  r6  r   r  r   r
  remaining_keypartscurrentparts
             r=   _reconstruct_nested_dict=_unflatten_optim_state_dict.<locals>._reconstruct_nested_dict  s     "?!$&( )..0JC >>&))  F.M "'',E!G cr
&$&GM!'-6666!- # "'E"I9 1> rT   Frp   Tr   Expected list, got r   zaAll the parameters in the same parameter group should have the same saved param_group value. But z is z while other(s) is )rR   r`   r&   r.  r/  r2   r{   r  rY   rZ   rx   rf   rz   r+  r   r4   r   r'   r   )r  r   r   r<  r4   pg_state
return_osdr  r   r   	in_paramsr2  flatten_keyr3   
state_nameflattened_state_keyreconstructed_valuefirst_param_fqnr
  s                      r=   _unflatten_optim_state_dictrG    sv   //,0i,@/	c9n	/b E"$H&,eS(%CJ))"& )E--e4 444 %I(<$),Qse1QC&8&4(,I ) !%I !"g.!&$//(+>tF|n)MNNc" **  
"'++e"4J-3HAcU!J<*H'*<.F//+ 0 ]EJ7
C
 GQ/G]EJ7
C #5= 5 *` tCy(2,w*?@CAG|#a'8!=>E$"'Q"aE)"==L<MQqc R 3HRLO3DAG  g *@ rT   
optimizersc                 p   UR                   (       d  0 $ [        0 [        / 0nU GH  n[        U5        [	        US5      " 5       nUR
                  (       a  UR                  5          [        R                  " XU5      nS S S 5        U(       d  Mj  [        U[           R                  5       5       H=  nSU;   d  M  U[           R                  U5      U[           UR                  SS5      '   M?     U[            H3  nU[            Vs/ s H  ofR                  SS5      PM     nnX[        '   M5     GOM[        [        R                  " S UR                    5       5      5      n[#        [%        U['        [)        U5      5      5      5      n	0 n
U R+                  5        H`  u  p[-        X5      n[)        U5      S:w  a  [/        SU S[)        U5       35      e[1        [3        U5      5      nX;  a  MT  X   nXU'   XU'   Mb     [        U[           R                  5       5       H)  nX   nU[           R                  U5      U[           U'   M+     U[            H&  nU[            Vs/ s H  oU   PM	     snU[        '   M(     U(       d  GMd  [5        [6        U[           5      R9                  U[           5        [5        [:        U[           5      R=                  U[           5        GM     UR>                  (       a  [5        [@        [C        U5      5      n[E        X25      $ ! , (       d  f       GN= fs  snf s  snf )	Nr   rt   z
_orig_mod.ro   c              3   2   #    U  H  o[            v   M     g 7fr7   )r  )r   gs     r=   r   (_get_optim_state_dict.<locals>.<genexpr>  s     -UBTQjBTs   rs   r   r   )#r]   r.  r/  r  r   r_   r^   r|   r   rf   r   r   ru   r  r	   from_iterabler2   r`   r  ranger}   r   r   rz   r   r   r   r'   r   r(   extendrG   r)   r3  r   )ri   rH  r   r   r  osdr2  rK  r3   param_pid_mappingfqn_pid_mappingr   r   r   r   pidgroups                    r=   _get_optim_state_dictrU  s  s    	,2BR+@% UL13""$++E#> % #f+**,-!#?B6{q?QCK		, ;< . X?@zJz!))L"5zJ#'
  %---U%BTBT-UUVF $Ss6{1C%D E O#446
 ,t9>(23%ws4ykJ  4:&1'.'*$'*$ 7 CK,,./%* $'v;??3#7FC 	 0 SBG.!Q.3#"6.!Qg " ],V45<<S[I 0 56==c#hGe h (( 9:J K
 ))9@@k %$ K6 "Rs   %LL."L3
L+	c           
         0 n/ n[         U[        U0n0 n[        S [        [        U[            5       5       5      (       a  U$ UR
                   GH  nUR                  [        / 05        U[            GH  n	UR                  U	    GHz  n
XR                  ;   aG  Sn[        [        U[           5       H)  nU
[        [        [           U[           5      ;   d  M'  Sn  O   OSnU(       d  Me  US   [           n[        U[        5      (       d  [        S[        U5       35      eUR                  U
5        U	R                   (       aX  U
[        [        U[            5      ;   a  [        [        U[            5      U
   XJ'   O UR"                  (       a  [%        SU
 S35      e[        [        U[           5       HH  nU
[        [        [           U[           5      ;   d  M'  ['        U[           5      S-
  U[)        U5      '   MJ     GM}     GM     ['        U[           5      S	:X  d  GM  / n[        [        U[           5       HA  n['        [        [        [           U[           5      5      S	:X  d  M0  UR                  U5        MC     ['        U5      S:w  a  [+        S
5      e['        U[           5      ['        UR
                  5      :w  a  [+        S5      e['        U[           5      S-
  U[)        W5      '   GM     [        [        U[           5       HS  nUR-                  [)        U5      S5      nUS:X  a  M&  UR/                  5        H  u  nnU[        :X  a  M  UX_   U'   M     MU     U$ )a  
Extract the corresponding optim state_dict from ``optim_state_dict`` for
``optim`` and return the result optim state_dict.

Args:
    model (nn.Module): the root model.
    optim (torch.optim.Optimizer): the optimizer.
    optim_state_dict (Dict[str, ValueType]): the superset optim state_dict that
        contains the optim state_dict of ``optim``.
    info (_StateDictInfo): state dict information.

Returns:
    The optim state_dict of ``optim``.
c              3   B   #    U  H  n[        U[        5      v   M     g 7fr7   )rx   r(  )r   r2  s     r=   r   *_split_optim_state_dict.<locals>.<genexpr>  s     
U'T!:a'Ts   FTr   r>  z'Missing optimizer state for parameter 'z' in checkpoint. The parameter requires gradients but has no saved optimizer state. To load anyway, use StateDictOptions(strict=False).rs   r   zThere are param groups that have zero parameters. In such a case, DSD only support exactly one param group with zero parameters.But the loaded state_dict has zero or more than one param groups that have zero parameters.z`When there is a parameter group that has zero parameters, multiple optimizers are not supported.)r.  r/  allr   r'   r2   r{   r  rY   rZ   r(   rf   rR   rx   rz   r+  r   rE   r   r}   idr   r   r   )ri   r  r   r   r4   r?  r@  
pg_mappingr  r   r   rA  loaded_param_groupr3   r0  pg_idxr   r
  s                     r=   _split_optim_state_dictr^    s   * E"$H&,eS(%CJ!#J

UtM;KF;S'T
UUU))"& )E--e4444 %I.2)+;C+@/* $tCy2DW2M"NN(,I!/ !%I !"g.!&$//(+>tF|n)MNNc"&&d=2B62JKK%)-9I&9Q%RSV%W
*EcU KR R 
 +/%'7'<+& d49.@.IJJ=@C=QTU=U
2&8#9:	+7 5 *D {7#$)C&*+<>Ns>S&T"tDI'9''BCDIJJ12 'U 3x1} 1  #C()S1C1C-DD =  25Z_1E1IJr,-.o *r -/?/DE;4R<%++-JCg~$)HS!	 . F rT   c           
        ^ UR                   (       d  g U GH  n[        U5        U(       a@  [        U;   a  [        XX#5      nO+[	        U[        [        [        [        4   U5      U5      nO0 nUR                  (       Gap  U R                  5        GH)  u  pg[        X5      n[        XSS9n	X:X  a  M"  [        U5      S:w  a  [        SU S[        U5       35      eUR                  5       n
U	R                  5       nU[            HO  n[        [        [        [         4   U5      nU["            Vs/ s H  oR%                  X5      PM     nnX["        '   MQ     [        [&        U[           5      n[)        UR+                  5       5       H.  nU
U;   d  M  UR                  U5      UUR%                  X5      '   M0     GM,     UR-                  5          [.        R0                  " XU5      nS S S 5        GO$UR2                  (       Ga  SUl        [5        X4U5      nSUl        S mU4S jn[7        [8        R:                  UU5      nTc  [        S5      e[=        U5      u  nn[=        U5      u  nnUR>                  (       a  [A        UUTS	9  O[C        UUTS	9  U H0  nUU;  d  M  UU;  a  [        S
U S35      eUU   UU'   UU   UU'   M2     [E        UU5      nU[            H3  n["        U;  d  M  / [        [        [        [        4   U5      ["        '   M5     [G        US5      " US9  GM     g s  snf ! , (       d  f       N)= f)NF)rl   rs   zExpected 1 FQN for 'r   Tc                    > U R                  5       S:  a,  Tc  U R                  mU $ TU R                  :w  a  [        S5      eU $ )Nr   zDevice mismatch)r  r   r   )tr   s    r=   _device'_load_optim_state_dict.<locals>._deviceZ  sD    557Q;~!"   188+():;;rT   zExpected device to be setr   zExpected key 'z' in osd_mappingr  )r   )$r]   r  r.  r^  rG  r   r`   rR   r&   r_   r   r   r}   rz   r   r/  r
   r  ru   r'   rf   r   r^   r|   optim_state_dict_to_loadrA   rU  r#   ra   rb   r   rF   r   r   r   r   )ri   rH  r   r   r  r   original_fqn_r   fqns_with_compilerr   fqn_with_compilerrK  valr   r3   	osd_stater2  r	  rb  flatten_osdosd_mappingflatten_local_osdlocal_osd_mapping	optim_keypgr   s                             @r=   _load_optim_state_dictrq    s,    % ##:*$  $?4S)^ 4jA4$   " $)#9#9#; 5%.e&" -t9>(.|nGCI;O  hhj$6$:$:$<!)#.AtCH~q1CGJ7|GSC;|   $*L / !0@0HI	inn./AaxGP}}UVGW	!))C"CD 0+ $<2 ""$#'#@#@"2$  %$ !!!#(D 4UHdK#'D F ellG5EFA~$%@AA':;K'L$K3FGW3X00((%k3DVT&{4EfU
 )	$55 3,,YK7GH  4?y3I%i03>y3I%i0 )  5!#4  's+"$>@Dc9n-r27; , 	u/0<LMw @ %$s   /L*
L//
L=	c          	          [        5          [        U SSUUS9n[        X5      n[        U0 U5        UsSSS5        $ ! , (       d  f       g= f)a  
Return the model state_dict of ``model``.

See ``get_state_dict`` for the detail usage.

Args:
    model (nn.Module): the nn.Module to the model.
    submodules (deprecated): Optional[set[nn.Module]]: only return the model parameters
        that belong to the submodules.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be returned. See
        `StateDictOptions` for the details.

Returns:
    The state_dict for ``model``.

:rtype: typing.Dict[str, ValueType]
rJ   Fr   r   r   N)r>   r   r   r   )ri   r   r   r   r   s        r=   r+   r+     sI    0 
!
 1=+R6 
s	   '<
A
c          	         [        5          [        U[        R                  R                  5      (       a  U4O
[        U5      n[        U USUUS9n[        XU5      n[        0 XT5        UsSSS5        $ ! , (       d  f       g= f)a  
Return the combined state_dict for optimizers.

See ``get_state_dict`` for the detail usage.

Args:
    model (nn.Module): the nn.Module to the model.
    optimizers (Union[None, Optimizer, Iterable[Optimizer]]):
        The optimizers that are used to optimize ``model``.
    submodules (deprecated): Optional[set[nn.Module]]: only return the model parameters
        that belong to the submodules.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be returned. See
        `StateDictOptions` for the details.

Returns:
    The state_dict for ``optimizers``.

:rtype: OptimizerStateType
Trs  N)	r>   rx   ra   r  	Optimizertupler   rU  r   )ri   rH  r   r   r   r   s         r=   r,   r,     sx    6 
 *ekk&;&;<< Mz" 	
 !
 1DI2/6 
s   AA33
Bc          	          [        5          [        U[        R                  R                  5      (       a  U4O
[        U5      n[        U USUUS9n[        X5      n[        XU5      n[        XVU5        XV4sSSS5        $ ! , (       d  f       g= f)a(  
Return the model state_dict and optimizers state_dict.

``get_state_dict`` can process any module that is parallelized by PyTorch
FSDP/fully_shard, DDP/replicate, tensor_parallel/parallelize_module, and any
combination of these parallelisms. The main functions of ``get_state_dict``
are: 1.) returning a model and optimizer state_dict that can be resharded
with a different number of trainers and/or different parallelisms.
2.) hiding the parallelism-specific state_dict APIs. Users don't have to call
these APIs.
3.) sanity checking the result state_dict.

The keys of the result state dictionary are the canonical FQNs (Fully
Qualified Names).  A canonical FQN refers to the FQN based on a parameter's
position in an nn.Module hierarchy. More specifically, a canonical FQN to a
parameter is the FQN returned by ``module.named_parameters()`` or
``module.named_buffers()`` when the module is not distributed by any
parallelisms. Since the optimizer internally uses parameter IDs to represent
a parameter, there will be a conversion from the parameter IDs to the
canonical FQNs when calling this API.

``get_state_dict`` can also process a module that is not parallelized. In
such a case, ``get_state_dict`` only performs one function -- converting the
optimizer parameter IDs to the canonical FQNs.

Example:
    >>> # xdoctest: +SKIP
    >>> import torch
    >>> from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
    >>> from torch.nn.parallel import DistributedDataParallel as DDP
    >>> from torch.distributed.checkpoint.state_dict import get_state_dict

    >>> fsdp_model = FSDP(copy.deepcopy(model))
    >>> fsdp_optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    >>> ddp_model = DDP(copy.deepcopy(model))
    >>> ddp_optim = torch.optim.Adam(model.parameters(), lr=1e-3)


    >>> ddp_state_dict, ddp_optim_state_dict = get_state_dict(ddp_model, ddp_optim)
    >>> fsdp_state_dict, fsdp_optim_state_dict = get_state_dict(
    ...     fsdp_model, fsdp_optim
    ... )

    >>> # if we simply call ddp_model.state_dict() and fsdp_model.state_dict(),
    >>> # the asserts will fail.
    >>> assert ddp_state_dict == fsdp_state_dict
    >>> assert ddp_optim_state == fsdp_optim_state_dict


Args:
    model (nn.Module): the nn.Module to the model.
    optimizers (Union[None, Optimizer, Iterable[Optimizer]]):
        The optimizers that are used to optimize ``model``.
    submodules (deprecated): Optional[set[nn.Module]]: only return the model parameters
        that belong to the submodules.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be returned. See
        `StateDictOptions` for the details.

Returns:
    ``Tuple`` that contain model state_dict and optimizer state_dict.

:rtype: typing.Tuple[typing.Dict[str, ValueType], OptimizerStateType]
Frs  N)
r>   rx   ra   r  ru  rv  r   r   rU  r   )ri   rH  r   r   r   r   r   s          r=   r-   r-     s    P 
 *ekk&;&;<< Mz" 	
 !
 1=0DI+tD1! 
s   A*A??
Bc           
         U(       d  0 $ [        [        [        UR                  5       5      5      [        R
                  5      (       a  [        R                  " S[        SS9  [        [        [        R
                  [        [        [        4   4   U5      n0 nUR                  5        H  u  pEU R                  5        H  u  pgXt:w  a  M  [        X5      n[!        U5      S:w  a  [#        S5      e[        [        U5      5       S3n	UR%                  UR                  5        V
Vs0 s H
  u  pX-   U_M     snn
5        M     M     U$ [        [        [        [        4   U5      $ s  snn
f )NzPassing model_state_dict as a ``Dict[nn.Module, Dict[str, Any]]``is deprecated and will be removed in 2.5. If you need this feature, please preprocessing the model_state_dict to achieve the same functionality.r   r   rs   z/FQNs for a submodule should only have 1 elementrp   )rx   r   r   r   rg   rh   r   r   r   r   r`   rR   r&   r   r   r   r}   rz   r   )ri   r   cast_state_dictr   r   sub_state_dictrj   mr   r   subfqnr
  s               r=   _unflatten_model_state_dictr}  2  s:    	$tJOO-./;;" 	
 tBIItCN/C$CDjQ/1)8)>)>)@%I ..0> -t9>(I  !d,-Q/%%AOAUAUAWXAWV_e+AWX 1 *A Di(*55	 Ys   $E$)r   c                    [        X5      n[        5          [        U SSUS9n[        U0 U5        [	        XU5      sSSS5        $ ! , (       d  f       g= f)a  Load the model state_dict.

The counterpart of ``get_model_state_dict`` to set the state_dict to the
model. See ``set_state_dict`` for the detail usage.

Args:
    model (nn.Module): the nn.Module to the model.
    model_state_dict: (Dict[str, ValueType]):
       the model state_dict to load. If the key of the ``model_state_dict``
       is nn.Module, the key is a submodule of ``model`` and the value should
       be the state_dict of the submodule. When loading the state_dict,
       the prefix of the submodule will be append to the state_dict.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be loaded. See
        `StateDictOptions` for the details.

Returns:
    ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields:
        * **missing_keys** is a list of str containing the missing keys
        * **unexpected_keys** is a list of str containing the unexpected keys

:type model_state_dict: typing.Dict[str, ValueType]
rJ   Fr   r   N)r}  r>   r   r   r  )ri   r   r   r   s       r=   r.   r.   W  sK    : .I. 
ubUGL+R6%etD	 
s   %A
Ac                    [        5          [        U[        R                  R                  5      (       a  U4O
[        U5      n[        XSUS9n[        0 X$5        [        XX$5        SSS5        g! , (       d  f       g= f)ad  Load the optimizers state_dict.

The counterpart of ``get_optimizer_state_dict`` to set the state_dict to the
optimizers. See ``set_state_dict`` for the detail usage.

WARN: ``set_optimizer_state_dict`` can only be called before ``backward()`` or after
    ``step()`` is called on the optimizers. Otherwise, the optimizer states won't be
    initialized correctly.

Args:
    model (nn.Module): the nn.Module to the model.
    optimizers (Union[Optimizer, Iterable[Optimizer]]):
        The optimizers that are used to optimize ``model``.
    optim_state_dict: OptimizerStateType:
        the optimizer state_dict to load.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be loaded. See
        `StateDictOptions` for the details.

Returns:
    None

:type optim_state_dict: typing.OptimizerStateType
Tr  N)	r>   rx   ra   r  ru  rv  r   r   rq  )ri   rH  r   r   r   s        r=   r/   r/   ~  sf    > 
 *ekk&;&;<< Mz" 	
 uT7S2/6u2BI 
s   AA//
A=c                8   [        X5      n[        5          [        U[        R                  R
                  5      (       a  U4O
[        U5      n[        XU(       + US9n[        X#U5        [        XX55        [        XU5      sSSS5        $ ! , (       d  f       g= f)a  Load the model state_dict and optimizers state_dict.

The counterpart of ``get_state_dict`` to set the state_dict to the model and
optimizers.  The given ``model_state_dict`` and ``optim_state_dict`` do not
have to be returned by ``get_state_dict`` but must meet the following
requirements: 1) all FQNs are canonical FQNs as defined in ``get_state_dict``,
2) if a tensor is sharded, it must be either a ShardedTensor or DTensor,
3) optimizer state_dict cannot contain the parameter IDs; the keys should be
the canonical FQNs.

WARN: ``set_state_dict`` can only be called before ``backward()`` or after ``step()``
    is called on the optimizers. Otherwise, the optimizer states won't be initialized
    correctly.

Args:
    model (nn.Module): the nn.Module to the model.
    optimizers (Union[Optimizer, Iterable[Optimizer]]):
        The optimizers that are used to optimize ``model``.
    model_state_dict: (Union[Dict[nn.Module, Dict[str, ValueType]], Dict[str, ValueType]]):
       the model state_dict to load. If the key of the ``model_state_dict``
       is nn.Module, the key is a submodule of ``model`` and the value should
       be the state_dict of the submodule. When loading the state_dict,
       the prefix of the submodule will be append to the state_dict.
    optim_state_dict: OptimizerStateType:
        the optimizer state_dict to load.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be loaded. See
        `StateDictOptions` for the details.

Returns:
    ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields:
        * **missing_keys** is a list of str containing the missing keys of the model state_dict.
        * **unexpected_keys** is a list of str containing the unexpected keys of the model state_dict.

:type model_state_dict: typing.Dict[str, ValueType]
:type optim_state_dict: typing.OptimizerStateType
r  N)r}  r>   rx   ra   r  ru  rv  r   r   rq  r  )ri   rH  r   r   r   r   s         r=   r0   r0     s    \ .I. 
 *ekk&;&;<< Mz" 	
 .>*>
 	+tDu2BI%etD 
s   A+B
Bc                  ^^ [         R                  " [        U US9mU4S jnX l        [         R                  " [        U US9mS[
        [        [        4   4U4S jjnX0l        [        R                  U5        [        R                  U5        g)a  Patch the ``state_dict`` and ``load_state_dict`` attributes of ``model``.

Patch the ``state_dict`` and ``load_state_dict`` attributes of ``model`` to
be a partial function to call ``get_state_dict`` and ``set_state_dict``.

Example:
    from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
    from torch.distributed.checkpoint.state_dict import patch_model_state_dict

    model = fsdp(model)
    patch_model_state_dict(model)

Args:
    model (nn.Module): the nn.Module to the model.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be loaded. See
        `StateDictOptions` for the details.
Returns:
    None
)ri   r   c                     > T " 5       $ r7   rJ   _state_dict_calls   r=   state_dict_call0_patch_model_state_dict.<locals>.state_dict_call      !!rT   r   c                    > T" U S9  g )N)r   rJ   r   _load_state_dict_calls    r=   load_state_dict_call5_patch_model_state_dict.<locals>.load_state_dict_call      z:rT   N)r   r   r+   r   r.   r`   rR   r
   r  r5   r   )ri   r   r  r  r  r  s       @@r=   _patch_model_state_dictr    s    6 !((" '%--;c3h ; 1O,01rT   c                  ^^ [         R                  " [        U UUS9mU4S jn[         R                  " [        U UUS9mS[        [
        [        4   4U4S jjn[        R                  U5        [        R                  U5        [        U[        R                  R                  5      (       a  U4O
[        U5      nU H  nX5l        XEl        M     g)a`  Patch the ``state_dict`` and ``load_state_dict`` attributes of ``optimizers``.

Patch the ``state_dict`` and ``load_state_dict`` attributes of ``optimizers`` to
be a partial function to call ``get_state_dict`` and ``set_state_dict``.

Note that if there are multiple optimizers, all of the optimizers will be patched.
So users only need to call one of the state_dict() to get the full result.

Example:
    from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
    from torch.distributed.checkpoint.state_dict import patch_model_state_dict

    model = fsdp(model)
    patch_model_state_dict(model)

Args:
    model (nn.Module): the nn.Module to the model.
    options (StateDictOptions): the options to control how
        model state_dict and optimizer state_dict should be loaded. See
        `StateDictOptions` for the details.
Returns:
    None
)ri   rH  r   c                     > T " 5       $ r7   rJ   r  s   r=   r  4_patch_optimizer_state_dict.<locals>.state_dict_callH  r  rT   r   c                    > T" U S9  g )N)r   rJ   r  s    r=   r  9_patch_optimizer_state_dict.<locals>.load_state_dict_callR  r  rT   N)r   r   r,   r/   r`   rR   r
   r5   r   rx   ra   r  ru  rv  r   r  )ri   rH  r   r  r  r  r  r  s         @@r=   _patch_optimizer_state_dictr  "  s    > !(( 	" &-- 	;c3h ; O,01 j%++"7"788 
: 
 * 4 rT   )rH   TT)rH   )prd   r   r8   r   collections.abcr   r   r   dataclassesr   r   r   	itertoolsr	   typingr
   r   r   r   ra   torch.distributedr   r   torch.nnrg   'torch.distributed._shard.sharded_tensorr   #torch.distributed._state_dict_utilsr   r   r   r   r   r   ;torch.distributed.algorithms._checkpoint.checkpoint_wrapperr   torch.distributed.fsdpr   r   r   r|   r   r   r   r   r   $torch.distributed.fsdp._common_utilsr   r   torch.distributed.tensorr    torch.nn.modules.moduler!   torch.nn.parallelr"   ry   torch.utils._pytreer#   __all__r~   r/  r  r.  rc   rR   r$   rb   r(  r)  r%   rf   rv  r`   r&   r'   r(   r)   r5   rQ   r   r>   r*   rV   rh   rP   r   r   r   r  ru  r   r   r   r   no_gradr   r  r  r3  rG  rU  r^  rq  r+   r,   r-   r}  r.   r/   r0   r  r  rJ   rT   r=   <module>r     sa     	  9 9 0 0  2 2     A 	 	 	 - 5 < -" 
		Sg}ellCKL4&m(<d3CS>TT	 S)^$' #}/@@@A  &)U S] *   ,. ,. ,.^ @% @ @& . !%HE99HE
HE HE 	HE
 HE HEV	 	%"Z )-'+}99}%++'',-} }
 BII%} $} }@+3	>*+(+ + 
	+\		EKK$9$99   S#X&4	#s(^$ ?;99?;*?;	#y.?; ?;D A
99A
S)^$A
 A
 	A
 A
H'&U[[22 '&t '&T_*< _c9nAU _DA;;  AS)^$A A 	AH BA99BAekk++S01BA BA 	BA BAJa99a;;  a )a 	a
 aH dN99dNekk++S01dN #dN 	dN
 
dN dNT )-'+	" 99"  BII%"  $	" 
 
#y." R )-'+* 99* %%1F1F(GG*  BII%	* 
 $*  * b )-'+X299X2%%1F1F(GGX2 BII%	X2
 $X2 4Y!334X2v"699"6RYYS)^ 445S)^8LL"6 
#y."6R (,	$E99$E3	>*$E $	$E
 $EX (,(J99(J%%1F1F(GG(J )(J
 $(J 
(Jb (,=E99=E%%1F1F(GG=E 3	>*	=E
 )=E $=E =ED  (,129912 $12 
	12 12l 
 (,	;599;5 ekk++S01;5 $	;5
 
;5 ;5rT   