
    /h^                        S SK r S SKrS SKrS SKJr  S SKrS SKJr  S SKJ	r	J
r
  S SKJr  S SKJrJr  S SKJrJr  S SKJr  S S	KJr  S S
KJr  S SKJrJrJr  \R:                  " \R<                  S9  \" 5       r " S S\R@                  RB                  5      r"S r#S r$S r%S r&S r'S r(S r)S r*S r+\,S:X  a  \#" 5       u  r-r.r/\/R`                  S:  a   \" S5      r1\1Re                  5       (       d  \1Rg                  SSS9  \$" \/\-5      r-\/Ri                  5         \ Rj                  " 5         \" \.\/\-\15        \/Ri                  5         \(" \-\15        \)" \-\15        \*" \-\15        \+" \-\15         \/Rn                  (       a  \Rp                  " \15        \/Ri                  5         \/Rs                  5         ggg! \6 a    e f = f! \/Rn                  (       a  \Rp                  " \15        \/Ri                  5         \/Rs                  5         f = f)    N)Path)	load_file)ShardingStrategyStateDictType)
DataLoader)AcceleratorFullyShardedDataParallelPlugin)merge_commandmerge_command_parser)AcceleratorState)torch_device)RegressionDataset)merge_fsdp_weightspatch_environmentsave_fsdp_model)levelc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )	TinyModel&   c                 B  > [         TU ]  5         [        R                  R	                  SS5      U l        [        R                  R                  5       U l        [        R                  R	                  SS5      U l        [        R                  R                  5       U l
        g )N   )super__init__torchnnLinearlinear1ReLU
activationlinear2Softmaxsoftmax)self	__class__s    j/home/james-whalen/.local/lib/python3.13/site-packages/accelerate/test_utils/scripts/test_merge_weights.pyr   TinyModel.__init__'   s_    xxr2.((--/xxr2.xx'')    c                 `    U R                  U R                  U R                  U5      5      5      $ )N)r    r   r   )r#   xs     r%   forwardTinyModel.forward.   s"    ||DOODLLO<==r'   )r   r   r    r"   )__name__
__module____qualname____firstlineno__r   r*   __static_attributes____classcell__)r$   s   @r%   r   r   &   s    *> >r'   r   c                  ^   [         R                  0 :w  a  [        5       R                  5         [        [        R
                  [        R                  S9n [        5       n[        SS9   U R                  U5        S S S 5        [        U S9nUR                  U5      nXU4$ ! , (       d  f       N,= f)N)sharding_strategystate_dict_typeSIZE_BASED_WRAP)fsdp_auto_wrap_policy)fsdp_plugin)r   _shared_state_reset_stater	   r   
FULL_SHARDr   SHARDED_STATE_DICTr   r   set_auto_wrap_policyr   prepare)pluginmodelaccelerators      r%   setuprA   2   s    %%+'')+*55}GgGgF KE	1B	C##E* 
D&1K&E+%%	 
D	Cs   &B
B,c                    [        SSS9n[        USSS9n[        R                  R	                  UR                  5       SS9nU R                  X1U5      u  p1n[        S	5       Ht  nU Hk  nUR                  5         U" US
   5      n[        R                  R                  R                  XvS   5      nU R                  U5        UR                  5         Mm     Mv     U$ )N   *   )lengthseedr   F)
batch_sizeshuffleg?)lr   r)   y)r   r   r   optimSGD
parametersr=   range	zero_gradr   
functionalmse_lossbackwardstep)	r@   r?   	train_settrain_dl	optimizer_batchoutputlosss	            r%   mock_trainingr\   @   s    !26I)EBH 0 0 2s;I!,!4!4Xi!PHY1XEOO5:&F88&&//c
CD  &NN   Lr'   c                     [        UR                  5       UR                  5       5       HG  u  p4U S:X  a  [        R                  " X45      (       d   eM*  [        R                  " X45      (       d  MG   e   g )Nsame)zipvaluesr   allclose)	operationstate_1state_2weight_1weight_2s        r%   check_weightsrg   P   sW    !'.."2GNN4DE>>(5555~~h9999	 Fr'   c                     [        U S-  5      n[        5       R                  [        5      n[	        SUR                  5       UR                  5       5        UR                  U5        [	        SUR                  5       UR                  5       5        g )Nzmodel.safetensorsdiffr^   )r   r   tor   rg   
state_dictload_state_dict)pathr?   safe_state_dictsafe_loaded_models       r%   check_safetensors_weightsrp   X   so    ': :;O!|4&%**,.?.J.J.LM%%o6&%**,.?.J.J.LMr'   c                 4   [         R                  " U S-  SS9n[        5       R                  [        5      n[        SUR                  5       UR                  5       5        UR                  U5        [        SUR                  5       UR                  5       5        g )Nzpytorch_model.binT)weights_onlyri   r^   )r   loadr   rj   r   rg   rk   rl   )rm   r?   nonsafe_state_dictnonsafe_loaded_models       r%   check_pytorch_weightsrv   `   sw    D+>$>TR$;>>,7&%**,.B.M.M.OP(();<&%**,.B.M.M.OPr'   c                 6    [        US-  USS9  [        X5        g )Npytorch_model_fsdp_0Tsafe_serialization)r   rp   r?   rm   s     r%   test_merge_weights_safetensorsr|   h   s    t44dtTd*r'   c                     [         R                  [        US-  5      [        U5      /5      n[        U5        [	        X5        g )Nrx   )parser
parse_argsstrr
   rp   r?   rm   argss      r%   &test_merge_weights_command_safetensorsr   n   s6    c$)?"?@#d)LMD$d*r'   c                 6    [        US-  USS9  [        X5        g )Nrx   Fry   )r   rv   r{   s     r%   test_merge_weights_pytorchr   t   s    t44duU$&r'   c                     [         R                  [        US-  5      [        U5      S/5      n[        U5        [	        X5        g )Nrx   z--unsafe_serialization)r~   r   r   r
   rv   r   s      r%   "test_merge_weights_command_pytorchr   z   s9    c$)?"?@#d)MefgD$$&r'   __main__   test_merge_weights_fsdp_weightsT)parentsexist_ok):gcloggingshutilpathlibr   r   safetensors.torchr   2torch.distributed.fsdp.fully_sharded_data_parallelr   r   torch.utils.datar   
accelerater   r	   accelerate.commands.merger
   r   accelerate.stater   accelerate.test_utilsr   accelerate.test_utils.trainingr   accelerate.utilsr   r   r   basicConfigINFOr~   r   Moduler   rA   r\   rg   rp   rv   r|   r   r   r   r,   r?   r>   r@   num_processesout_pathexistsmkdirwait_for_everyonecollect	Exceptionis_main_processrmtreeend_training r'   r%   <module>r      s   
     ' ^ ' B I - . < S S   ',, '			> 	>& :NQ++'' z!&E6;  1$	'=>H??$$td; "+u5E))+JJLFKA))+ +5(;25(C&uh7.uh?
 **h'))+$$&7 % .  		 **h'))+$$&s   BF1 1F99F< <AH