
    /h"z                        S r SSKrSSKrSSKJr  SSKJrJr  SSKJ	r	J
r
  SSKJr  SSKrSSKJrJr  S	S
KJr  S	SKJrJr  S	SKJrJrJr  \" 5       (       a	  SSKJs  Jr  \" 5       (       a  SSKJr  S r S r!S r"S r#S r$\ SS.S jr%SDS jr&S r'S r(S r)S r*S r+S r,S r-S r. " S S \/5      r0S! r1S" r2\1S# 5       r3S$\4S% jr4S$\4S& jr5SES' jr6SFS( jr7\Rp                  S	\Rr                  S\Rt                  S)\Rv                  S*\Rx                  S+\Rz                  S,\R|                  S-\R~                  S.\R                  S/\R                  S00
rB\BR                  5        V Vs0 s H  u  pX_M	     snn rDS1 rESGS2\R                  4S3 jjrG\1SES4\H4S5 jj5       rISES4\H4S6 jjrJSHS7 jrKSES8 jrL " S9 S:\M5      rN\2SIS; j5       rOSES< jrP\1SJS= j5       rQS> rR " S? S@5      rSSA rTSB rU\SKSC j5       rVgs  snn f )LzB
A set of basic tensor ops compatible with tpu, gpu, and multigpu
    N)Mapping)contextmanagernullcontext)update_wrapperwraps)Any   )AcceleratorStatePartialState   )!TORCH_DISTRIBUTED_OPERATION_TYPES)DistributedTypeTensorInformation)is_npu_availableis_torch_distributed_availableis_torch_xla_available)ReduceOpc                 6    [        U [        R                  5      $ N)
isinstancetorchTensortensors    U/home/james-whalen/.local/lib/python3.13/site-packages/accelerate/utils/operations.pyis_torch_tensorr   ,   s    fell++    c           
      v   [        U [        R                  R                  [        R                  R                  [        R                  R
                  [        R                  R                  [        R                  R                  [        R                  R                  [        R                  R                  5      $ r   )
r   r   xpuFloatTensor
ByteTensor	IntTensor
LongTensor
HalfTensorDoubleTensorBFloat16Tensorr   s    r   is_torch_xpu_tensorr'   0   sm    														  	 	r   c                 "    [        U [        5      $ r   )r   r   tensor_infos    r   is_tensor_informationr+   =   s    k#455r   c                 n    [        U [        5      =(       a    [        U S5      =(       a    [        U S5      $ )z
Checks if `data` is a `namedtuple` or not. Can have false positives, but only if a user is trying to mimic a
`namedtuple` perfectly.
_asdict_fields)r   tuplehasattrdatas    r   is_namedtupler3   A   s*    
 dE"\wtY'?\GDR[D\\r   c                 r    [        U 5      (       a  [        U 5      " [        U5      6 $ [        U 5      " U5      $ )zG
Cast a generator to the same type as obj (list, tuple, or namedtuple)
)r3   typelist)obj	generators     r   
honor_typer9   I   s1    
 SCy$y/**Cy##r   F	test_typeerror_on_other_typec                  ^ ^^^^ [        U[        [        45      (       a  [        UUUU UU4S jU 5       5      $ [        U[        5      (       aF  [        U5      " UR                  5        VVs0 s H  u  pgU[        T U/TQ7TTS.TD6_M     snn5      $ T" U5      (       a  T " U/TQ70 TD6$ T(       a2  [        S[        U5       ST R                   STR                   S35      eU$ s  snnf )a  
Recursively apply a function on a data structure that is a nested list/tuple/dictionary of a given base type.

Args:
    func (`callable`):
        The function to recursively apply.
    data (nested list/tuple/dictionary of `main_type`):
        The data on which to apply `func`
    *args:
        Positional arguments that will be passed to `func` when applied on the unpacked data.
    main_type (`type`, *optional*, defaults to `torch.Tensor`):
        The base type of the objects to which apply `func`.
    error_on_other_type (`bool`, *optional*, defaults to `False`):
        Whether to return an error or not if after unpacking `data`, we get on an object that is not of type
        `main_type`. If `False`, the function will leave objects of types different than `main_type` unchanged.
    **kwargs (additional keyword arguments, *optional*):
        Keyword arguments that will be passed to `func` when applied on the unpacked data.

Returns:
    The same data structure as `data` with `func` applied to every object of type `main_type`.
c              3   J   >#    U  H  n[        TU/TQ7TTS .TD6v   M     g7f)r:   Nrecursively_apply).0oargsr<   funckwargsr;   s     r   	<genexpr>$recursively_apply.<locals>.<genexpr>m   sA       A "!".7M`dj s    #r:   zUnsupported types (z) passed to `z?`. Only nested list/tuple/dicts of objects that are valid for `z` should be passed.)
r   r/   r6   r9   r   r5   itemsr@   	TypeError__name__)rD   r2   r;   r<   rC   rE   kvs   ` ````  r   r@   r@   T   s   , $&&  	
 	
 
D'	"	"Dz
 !JJL	 )DA $!".7M`dj  )	
 	
 
4D*4*6**	!$t*]4==/ J++4+=+=*>>QS
 	
 Ks   . C*
c                   ^^^ [        U 5      (       d  [        U S5      (       a  TS:X  a  Sm U R                  TTS9$ [        U [        [        45      (       a  [        U UUU4S jU  5       5      $ [        U [        5      (       ad  [        T[        5      (       a  T/mOTc  / m[        U 5      " U R                  5        VVs0 s H  u  pVXUT;   a  UO[        UTTTS9_M     snn5      $ U $ ! [         a    U R                  T5      s $ [         a5  n[        5       (       a  [        T[        5      (       a  ST 3m SnAO
UeSnAff = f U R                  TTS9$ ! [         a    U R                  T5      s $ f = fs  snnf )	as  
Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device.

Args:
    tensor (nested list/tuple/dictionary of `torch.Tensor`):
        The data to send to a given device.
    device (`torch.device`):
        The device to send the data to.

Returns:
    The same data structure as `tensor` with all tensors sent to the proper device.
tonpuznpu:0)non_blockingznpu:Nc              3   <   >#    U  H  n[        UTTTS 9v   M     g7f)rP   	skip_keysN)send_to_device)rA   tdevicerP   rS   s     r   rF   !send_to_device.<locals>.<genexpr>   s      ohncd^AvLT]^hns   rR   )r   r0   rN   rI   AssertionErrorr   r   intr/   r6   r9   r   strr5   rH   rT   )r   rV   rP   rS   errorrK   rU   s    ```   r   rT   rT      s{    v'&$"7"7U?F	99V,9?? 
FUDM	*	*ohno
 	
 
FG	$	$i%%"IIF| #LLN*DA Y1N1fS_kt,uu*
 	
 =  	%99V$$ 	  !!fc**#F8_F		%99V,9?? 	%99V$$	%s;   C,  E<
,E		E)E EEE E98E9c                     S n[        X5      $ )a/  
Recursively gathers the information needed to rebuild a nested list/tuple/dictionary of tensors.

Args:
    data (nested list/tuple/dictionary of `torch.Tensor`):
        The data to send to analyze.

Returns:
    The same data structure as `data` with [`~utils.TensorInformation`] instead of tensors.
c                 >    [        U R                  U R                  S9$ )N)shapedtype)r   r^   r_   r   s    r   _get_data_structure/get_data_structure.<locals>._get_data_structure   s     v||6<<HHr   r?   )r2   r`   s     r   get_data_structurerb      s    I 077r   c                     S n[        X5      $ )a  
Recursively gathers the shape of a nested list/tuple/dictionary of tensors as a list.

Args:
    data (nested list/tuple/dictionary of `torch.Tensor`):
        The data to send to analyze.

Returns:
    The same data structure as `data` with lists of tensor shapes instead of tensors.
c                 ,    [        U R                  5      $ r   )r6   r^   r   s    r   
_get_shapeget_shape.<locals>._get_shape   s    FLL!!r   r?   )r2   re   s     r   	get_shaperg      s    " Z..r   c                 $    S n[        X[        S9$ )z
Recursively initializes tensors from a nested list/tuple/dictionary of [`~utils.TensorInformation`].

Returns:
    The same data structure as `data` with tensors instead of [`~utils.TensorInformation`].
c                 V    [         R                  " U R                  SU R                  06$ Nr_   )r   emptyr^   r_   r)   s    r   _initialize_tensor.initialize_tensors.<locals>._initialize_tensor   s"    {{K--G[5F5FGGr   r;   )r@   r+   )data_structurerl   s     r   initialize_tensorsrp      s    H /K`aar   c                    [        U [        [        [        45      (       a'  [	        U 5      S:X  a  [        S[        U 5       S35      e[        U [        [        45      (       a  [        U S   5      $ [        U [        5      (       a%  U R                  5        H  n[        X   5      s  $    O7[        U [        R                  5      (       d  [        S[        U 5       S35      eU R                  S   $ )z
Recursively finds the batch size in a nested list/tuple/dictionary of lists of tensors.

Args:
    data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to find the batch size.

Returns:
    `int`: The batch size.
r   z&Cannot find the batch size from empty .z0Can only find the batch size of tensors but got )r   r/   r6   r   len
ValueErrorr5   find_batch_sizekeysr   r   rI   r^   )r2   rK   s     r   ru   ru      s     $g.//SY!^A$t*QOPP$&&tAw''	D'	"	"A"47++ ell++J4PT:,VWXYY::a=r   c                 F     [        U 5      $ ! [        [        4 a     gf = f)a   
Same as [`utils.operations.find_batch_size`] except will ignore if `ValueError` and `TypeErrors` are raised

Args:
    data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to find the batch size.

Returns:
    `int`: The batch size.
N)ru   rt   rI   r1   s    r   ignorant_find_batch_sizerx     s,    t$$	" s   
   c                     S n[        X5      $ )a;  
Recursively finds tensors in a nested list/tuple/dictionary and converts them to a list of numbers.

Args:
    data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to convert to regular numbers.

Returns:
    The same data structure as `data` with lists of numbers instead of `torch.Tensor`.
c                     U R                  5       R                  5       n U R                  [        R                  :X  a  U R                  [        R                  5      n U R                  5       $ r   )detachcpur_   r   bfloat16rN   float32tolistr   s    r   _convert_to_list!listify.<locals>._convert_to_list   sF    $$&<<5>>) YYu}}-F}}r   r?   )r2   r   s     r   listifyr     s     -44r   c                 J    S n[        XSS9n[        R                  " 5         U$ )Nc                     U R                   S:X  a  U R                  5       S    n U R                  5       (       d  U R                  5       n [        R
                  " U 5      $ )Nr   )ndimcloneis_contiguous
contiguousxm
all_gatherr   s    r   _tpu_gather_one$_tpu_gather.<locals>._tpu_gather_one-  sL    ;;!\\^D)F ##%%&&(F}}V$$r   Tr<   )r@   r   	mark_step)r   r   ress      r   _tpu_gatherr   ,  s#    % O
NCLLNJr   c                    ^^ [        5       m[        R                  R                  mTR                  R
                  S:X  a  [        R                  R                  5         UU4S jn[        XSS9$ )Nr   c                   > U R                   S:X  a  U R                  5       S    n U R                  5       (       d  U R                  5       n TR                  b  TR                  S:w  aq  [
        R                  " TR                  U R                  5       -  U R                  TR                  S9nT" X5        UR                  " S/U R                  5       SS  Q76 $ [        TR                  5       Vs/ s H  n[
        R                  " U 5      PM     nn[
        R                  R!                  X5        [
        R"                  " USS9$ s  snf )Nr   gloor_   rV   r   dim)r   r   r   r   backendr   rk   num_processesnumelr_   rV   viewsizerange
empty_likedistributedr   cat)r   output_tensors_	gather_opstates      r   _gpu_gather_one$_gpu_gather.<locals>._gpu_gather_oneC  s   ;;!\\^D)F ##%%&&(F==$&)@
 #[[##flln4ll||N
 n-!&&r>FKKM!",=>>
 AFeFYFY@Z[@Z1e..v6@ZN[((@99^33 \s   / ETr   )	r   r   r   all_gather_into_tensorrV   r5   r   synchronizer@   )r   r   r   r   s     @@r   _gpu_gatherr   ;  sS    NE!!88I ||E!		48 _$OOr   c                       \ rS rSrSrSrg)DistributedOperationExceptionib  z}
An exception class for distributed operations. Raised if the operation cannot be performed due to the shape of the
tensors.
 N)rJ   
__module____qualname____firstlineno____doc____static_attributes__r   r   r   r   r   b  s    
 	r   r   c                 0   ^  [        T 5      U 4S j5       nU$ )zn
Verifies that `tensor` is the same shape across all processes. Only ran if `PartialState().debug` is `True`.
c                  R  > [        5       R                  [        R                  :X  d  [        5       R                  (       d  T
" U 0 UD6$ T
R
                   ST
R                   3nSU;   a  US   nOU S   n[        5       R                  R                  [        U5      R                  :w  ag  [        SU SUR                  R                   S[        5       R                  R                   S[        5       R                  R                   SU S35      e[        U5      n[        U/5      nUS   bm  UR                  US   5      [        U5      :H  nU(       dF  S	R                  [!        U5       VVs/ s H  u  pxS
U SU 3PM     snn5      n	[        SU SU	 35      eT
" U 0 UD6$ s  snnf )Nrr   r   r   z%One or more of the tensors passed to z were not on the z+ while the `Accelerator` is configured for z. Please move it to the z before calling z
  - zProcess z: znCannot apply desired operation due to shape mismatches. All shapes across devices must be valid.

Operation: `z`
Input shapes:
  - )r   distributed_typer   NOdebugr   rJ   rV   r5   find_devicer   rg   gather_objectcountrs   join	enumerate)rC   rE   	operationr   shapesoutputare_sameir^   process_shape_strfunctions             r   wrapper!verify_operation.<locals>.wrapperp  s   >**o.@.@@H\H\T,V,,**+1X->->,?@	vH%F!WF>  %%V)<)A)AA/7	{BSTZTaTaTfTfSg  hS  T`  Tb  Ti  Ti  Tn  Tn  So o))5)>)>)C)C(DDTU^T__`b  6"x(!9 ||F1I.#f+=H$,MM[dek[l2m[lxqXaS5'3J[l2m$n!3''0k1GHYGZ\ 
 ((( 3ns   .F#
r   r   r   s   ` r   verify_operationr   k  s"    
 8_) )4 Nr   c                 0   ^  [        T 5      U 4S j5       nU$ )z
Checks that `verify_operation` failed and if so reports a more helpful error chaining the existing
`DistributedOperationException`.
c                     >  T" U 0 UD6$ ! [          a0  nTR                   STR                   3n[        SU S35      UeS nAff = f)Nrr   zError found while calling `z1`. Please see the earlier error for more details.)r   r   rJ   )rC   rE   er   r   s       r   r   "chained_operation.<locals>.wrapper  sc    	T,V,,, 	#../q1B1B0CDI/-i[8ij	s    
A+A  Ar   r   s   ` r   chained_operationr     s"     8_  Nr   c                     [        5       R                  [        R                  :X  a  [	        U 5      $ [        5       R                  [
        ;   a  [        U 5      $ U $ )a  
Recursively gather tensor in a nested list/tuple/dictionary of tensors from all devices.

Args:
    tensor (nested list/tuple/dictionary of `torch.Tensor`):
        The data to gather.

Returns:
    The same data structure as `tensor` with all tensors sent to the proper device.
)r   r   r   XLAr   r   r   r   s    r   gatherr     sF     ~&&/*=*==6""		(	(,M	M6""r   objectc                     [        [        5       R                  5       Vs/ s H  nS PM     nn[        R                  R                  X 5        U VVs/ s H  o3  H  oDPM     M     snn$ s  snf s  snnf r   )r   r   r   r   r   all_gather_object)r   r   output_objectsyxs        r   _gpu_gather_objectr     sa    $),.*F*F$GH$Gqd$GNH	''?%1~!q!AqA~11 I 2s   A*A/c                     [        5       R                  [        R                  :X  a  [	        S5      e[        5       R                  [
        ;   a  [        U 5      $ U $ )a  
Recursively gather object in a nested list/tuple/dictionary of objects from all devices.

Args:
    object (nested list/tuple/dictionary of picklable object):
        The data to gather.

Returns:
    The same data structure as `object` with all the objects sent to every device.
z&gather objects in TPU is not supported)r   r   r   r   NotImplementedErrorr   r   )r   s    r   r   r     sG     ~&&/*=*==!"JKK		(	(,M	M!&))r   c                 "    SS jn[        X SUS9$ )Nc                 @    [         R                  R                  XS9  U $ )Nsrc)r   r   	broadcast)r   r   s     r   _gpu_broadcast_one*_gpu_broadcast.<locals>._gpu_broadcast_one  s    ##F#4r   T)r<   r   r   r?   )r2   r   r   s      r   _gpu_broadcastr     s     /4UXYYr   c                 l  ^^ [        U [        [        45      (       a  [        U U4S j[	        U 5       5       5      $ [        U [
        5      (       aB  [        U 5      " U R                  5        VVs0 s H  u  p4U[        UT SU 3S9_M     snn5      $ [        R                  " TU U4S j5      $ s  snnf )Nc              3   F   >#    U  H  u  p[        UT S U 3S9v   M     g7f)r   nameN)_tpu_broadcast)rA   r   rU   r   s      r   rF   !_tpu_broadcast.<locals>.<genexpr>  s&     "gUfTQ>!TF!A3-#HUfs   !r   r   c                    > U T   $ r   r   )r   r   s    r   <lambda> _tpu_broadcast.<locals>.<lambda>  s	    !C&r   )r   r6   r/   r9   r   r   r5   rH   r   r   mesh_reduce)r   r   r   rK   rL   s    ``  r   r   r     s    &4-((&"gU^_eUf"ghh	FG	$	$F|RXR^R^R`aR`$!Qq$q} EER`abb>>$(899 bs   0B0
                  	   
   c                    Sn[        5       n[        R                  " U[        R                  UR                  S9nU bT  U R
                  n[        U R                     n[        R                  " [        U5      U/-   [        S9US[        U5      S-   & [        USS9nX3R                  5          n[        USS S	   5      nUSS nX64$ )
z]
Grabs the shape of `tensor` only available on one process and returns a tensor of its shape
i   r   Nr_   r   sum	reductionr   r   )r   r   rk   rY   rV   r^   TENSOR_TYPE_TO_INTr_   r   r6   rs   reducenonzero)r   max_tensor_dimensionr   base_tensorr^   tensor_dtyper_   s          r   gather_tensor_shaper     s    
 !NE++2%))ELLYK
 )&,,7(-T%[L>5QY\(]$c%j1n%6K1134KBC #$Ecr"Kr   returnc                     [        5       n[        U 5      u  p#U c5  [        R                  " U[        U   S9R                  UR                  5      n [        U SS9$ )as  
Copys a tensor that only exists on a single device and broadcasts it to other devices. Differs from `broadcast` as
each worker doesn't need to know its shape when used (and tensor can be `None`)

Args:
    tensor (`torch.tensor`):
        The tensor that should be sent to all devices. Must only have it be defined on a single device, the rest
        should be `None`.
r   r   r   )r   r   r   zerosTENSOR_INT_TO_DTYPErN   rV   r   )r   r   r^   r_   s       r   copy_tensor_to_devicesr    sN     NE&v.LE~U*=e*DEHHV&E**r   from_processc                     [        5       R                  [        R                  :X  a
  [	        XSS9$ [        5       R                  [
        ;   a	  [        XS9$ U $ )a  
Recursively broadcast tensor in a nested list/tuple/dictionary of tensors to all devices.

Args:
    tensor (nested list/tuple/dictionary of `torch.Tensor`):
        The data to gather.
    from_process (`int`, *optional*, defaults to 0):
        The process from which to send the data

Returns:
    The same data structure as `tensor` with all tensors broadcasted to the proper device.
zaccelerate.utils.broadcast)r   r   r   )r   r   r   r   r   r   r   )r   r  s     r   r   r     sI     ~&&/*=*==f=YZZ		(	(,M	Mf77r   c                 0  ^ [        5       R                  [        R                  :X  a4  [	        U 5       H#  u  p#[
        R                  " SUU4S j5      X'   M%     U $ [        5       R                  [        ;   a  [        R                  R                  U TS9  U $ )ar  
Broadcast a list of picklable objects form one process to the others.

Args:
    object_list (list of picklable objects):
        The list of objects to broadcast. This list will be modified inplace.
    from_process (`int`, *optional*, defaults to 0):
        The process from which to send the data.

Returns:
    The same list containing the objects from process 0.
z&accelerate.utils.broadcast_object_listc                    > U T   $ r   r   )r   r  s    r   r   'broadcast_object_list.<locals>.<lambda>>  s    efgsetr   r   )r   r   r   r   r   r   r   r   r   r   broadcast_object_list)object_listr  r   r7   s    `  r   r
  r
  /  s}     ~&&/*=*==,FA^^,TVY[tuKN -  
	(	(,M	M///Nr   c                      S n[        X@U5      $ )a*  
Recursively takes a slice in a nested list/tuple/dictionary of tensors.

Args:
    data (nested list/tuple/dictionary of `torch.Tensor`):
        The data to slice.
    tensor_slice (`slice`):
        The slice to take.

Returns:
    The same data structure as `data` with all the tensors slices.
c                 
    X   $ r   r   )r   tensor_slices     r   _slice_tensor$slice_tensors.<locals>._slice_tensorR  s    ##r   r?   )r2   r  process_indexr   r  s        r   slice_tensorsr  D  s    $ ],??r   c                 .  ^ ^ [        T S   [        [        45      (       a/  [        T S   U U4S j[	        [        T S   5      5       5       5      $ [        T S   [        5      (       aS  [        T S   5      " T S   R                  5        VVs0 s H   o"[        T  Vs/ s H  o3U   PM	     snTS9_M"     snn5      $ [        T S   [        R                  5      (       d  [        S[        T S   5       35      e[        R                  " T TS9$ s  snf s  snnf )a  
Recursively concatenate the tensors in a nested list/tuple/dictionary of lists of tensors with the same shape.

Args:
    data (nested list/tuple/dictionary of lists of tensors `torch.Tensor`):
        The data to concatenate.
    dim (`int`, *optional*, defaults to 0):
        The dimension on which to concatenate.

Returns:
    The same data structure as `data` with all the tensors concatenated.
r   c              3   h   >#    U  H"  n[        T Vs/ s H  o"U   PM	     snTS 9v   M$     gs  snf 7f)r   N)concatenate)rA   r   dr2   r   s      r   rF   concatenate.<locals>.<genexpr>f  s/     #lXkSTKt0Dt!1t0D#$NXk0Ds   2-
2r   z%Can only concatenate tensors but got )r   r/   r6   r9   r   rs   r   r5   rv   r  r   r   rI   r   )r2   r   rK   r  s   ``  r   r  r  X  s     $q'E4=))$q'#lX]^abfghbi^jXk#lmm	DGW	%	%DG}UYZ[U\UaUaUcdUcPQD-ADqdD-As!KKUcdeeQ..?T!WOPP99Ts## .Bds   D
D)
D
D
c                       \ rS rSrSrg)CannotPadNestedTensorWarningin  r   N)rJ   r   r   r   r   r   r   r   r  r  n  s    r   r  c           	      $    SS jn[        X@SXUS9$ )a  
Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so they
can safely be gathered.

Args:
    tensor (nested list/tuple/dictionary of `torch.Tensor`):
        The data to gather.
    dim (`int`, *optional*, defaults to 0):
        The dimension on which to pad.
    pad_index (`int`, *optional*, defaults to 0):
        The value with which to pad.
    pad_first (`bool`, *optional*, defaults to `False`):
        Whether to pad at the beginning or the end.
c                   ^^	^
 [        U SS5      (       a  [        R                  " S[        5        U $ T[	        U R
                  5      :  d  T[	        U R
                  5      * :  a  U $ TS:  a  T[	        U R
                  5      -  m[        R                  " U R
                  U R                  S9S    n[        U5      R                  5       n[        U4S jU 5       5      m	T	U R
                  T   :X  a  U $ U R
                  m
[        T
5      nT	UT'   U R                  [        U5      5      U-   nU(       a*  [        UU	U
4S j[        [	        U5      5       5       5      nO([        UU
4S j[        [	        U5      5       5       5      nXU'   U$ )	N	is_nestedFzHCannot pad nested tensors without more information. Leaving unprocessed.r   )rV   c              3   ,   >#    U  H	  oT   v   M     g 7fr   r   )rA   sr   s     r   rF   Fpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>  s     -u!vus   c              3   h   >#    U  H'  oT:X  a  [        TTT   -
  T5      O
[        S 5      v   M)     g 7fr   slice)rA   r   r   max_sizeold_sizes     r   rF   r    s4      `t[\#Xh#.95QU;V`ts   /2c              3   b   >#    U  H$  oT:X  a  [        S TT   5      O
[        S5      v   M&     g7fr   Nr!  rA   r   r   r$  s     r   rF   r    s,     oZnUVCxE!Xc]3U4[PZn   ,/)getattrwarningswarnr  rs   r^   r   r   rV   r   r|   maxr6   	new_zerosr/   r   )r   r   	pad_index	pad_firstr   sizesnew_size
new_tensorindicesr#  r$  s    `       @@r   _pad_across_processes3pad_across_processes.<locals>._pad_across_processes  sL   6;..MMZ, M#fll##sc&,,.?-?'?M73v||$$C ||FLL?Et  "-u--v||C((M<<> %%eHo6B
 `efijrfs`t G oZ_`cdl`mZnooG$7r   T)r<   r   r.  r/  r   r   Fr?   )r   r   r.  r/  r4  s        r   pad_across_processesr7  r  s!    " D 4Sir r   c           	      (    SS jn[        UU SUUUS9$ )z
Takes a `tensor` of arbitrary size and pads it so that it can work given `num_processes` needed dimensions.

New tensors are just the last input repeated.

E.g.:
  Tensor: ([3,4,4]) Num processes: 4 Expected result shape: ([4,4,4])

c                 <  ^^
 X-  nXU-  -
  nX-  S:X  a  X!-
  nOX!U-  -
  nXVU-  s=:  a  S:  a  O  OXV-
  nU R                   m
[        T
5      nX-   US'   U R                  [        U5      5      n[        UU
4S j[	        [        U5      5       5       5      n	XU	'   U$ )Nr   r   c              3   b   >#    U  H$  oT:X  a  [        S TT   5      O
[        S5      v   M&     g7fr&  r!  r'  s     r   rF   @pad_input_tensors.<locals>._pad_input_tensors.<locals>.<genexpr>  s,     kVjQR8a#/tLVjr(  )r^   r6   r-  r/   r   rs   )r   
batch_sizer   r   	remainderlast_inputsto_padr1  r2  r3  r$  s      `      @r   _pad_input_tensors-pad_input_tensors.<locals>._pad_input_tensors  s    /	 $=>&!+"/F"M&ABF &,1, )F<<> )%%eHo6
kV[\_`h\iVjkk$7r   T)r<   r<  r   r   r   r?   )r   r<  r   r   r@  s        r   pad_input_tensorsrB    s(    &  # r   c                 "    SS jn[        X0SXS9$ )a(  
Recursively reduce the tensors in a nested list/tuple/dictionary of lists of tensors across all processes by the
mean of a given operation.

Args:
    tensor (nested list/tuple/dictionary of `torch.Tensor`):
        The data to reduce.
    reduction (`str`, *optional*, defaults to `"mean"`):
        A reduction method. Can be of "mean", "sum", or "none"
    scale (`float`, *optional*):
        A default scaling value to be applied after the reduce, only valied on XLA.

Returns:
    The same data structure as `data` with all the tensors reduced.
c                    [        5       nU R                  5       nUR                  [        R                  :X  a  U$ UR                  [        R
                  :X  aR  [        R                  " 5         [        R                  " [        R                  U/U5        [        R                  " 5         OLUR                  R                  [        ;   a.  [        R                  R                  U[        R                  5        US:X  a  XCR                   -  nU$ )Nmean)r   r   r   r   r   r   r   r   
all_reduce
REDUCE_SUMvaluer   r   r   r   SUMr   )r   r   scaler   cloned_tensors        r   _reduce_across_processes(reduce.<locals>._reduce_across_processes  s    !!_%7%77  !!_%8%88
 LLNMM"---%@LLN##))-NN((E000Mr   T)r<   r   rJ  rE  g      ?r?   )r   r   rJ  rL  s       r   r   r     s    $&  di r   c                 "    S nS n[        XUS9$ )aZ  
Recursively converts the elements nested list/tuple/dictionary of tensors in FP16/BF16 precision to FP32.

Args:
    tensor (nested list/tuple/dictionary of `torch.Tensor`):
        The data to convert from FP16/BF16 to FP32.

Returns:
    The same data structure as `tensor` with all tensors that were in FP16/BF16 precision converted to FP32.
c                 "    U R                  5       $ r   )floatr   s    r   _convert_to_fp32)convert_to_fp32.<locals>._convert_to_fp32  s    ||~r   c                     [        U 5      =(       d    [        U S5      =(       a-    U R                  [        R                  [        R
                  4;   $ rj   )r   r0   r_   r   float16r}   r   s    r   _is_fp16_bf16_tensor-convert_to_fp32.<locals>._is_fp16_bf16_tensor  s@    'C767+C 
MMNNZ
 J
 	
r   rn   r?   )r   rR  rV  s      r   convert_to_fp32rX    s    
 -AUVVr   c                   *    \ rS rSrSrS rS rS rSrg)ConvertOutputsToFp32i  aD  
Decorator to apply to a function outputing tensors (like a model forward pass) that ensures the outputs in FP16
precision will be convert back to FP32.

Args:
    model_forward (`Callable`):
        The function which outputs we want to treat.

Returns:
    The same function as `model_forward` but with converted outputs.
c                 &    Xl         [        X5        g r   )model_forwardr   )selfr\  s     r   __init__ConvertOutputsToFp32.__init__!  s    *t+r   c                 8    [        U R                  " U0 UD65      $ r   )rX  r\  )r]  rC   rE   s      r   __call__ConvertOutputsToFp32.__call__%  s    t114B6BCCr   c                 .    [         R                  " S5      e)NzCannot pickle a prepared model with automatic mixed precision, please unwrap the model with `Accelerator.unwrap_model(model)` before pickling it.)picklePicklingError)r]  s    r   __getstate__!ConvertOutputsToFp32.__getstate__(  s    "" `
 	
r   )r\  N)	rJ   r   r   r   r   r^  ra  rf  r   r   r   r   rZ  rZ    s    
,D
r   rZ  c                 8   ^  [        T 5      m U 4S jnT Ul        U$ )Nc                     > T" U 0 UD6$ r   r   )rC   rE   r\  s     r   forward(convert_outputs_to_fp32.<locals>.forward1  s    d-f--r   )rZ  __wrapped__)r\  rj  s   ` r   convert_outputs_to_fp32rm  .  s!    (7M. (GNr   c                 F   [        U [        5      (       a*  U R                  5        H  n[        U5      nUc  M  Us  $    g[        U [        [
        45      (       a  U  H  n[        U5      nUc  M  Us  $    g[        U [        R                  5      (       a  U R                  $ g)z
Finds the device on which a nested dict/list/tuple of tensors lies (assuming they are all on the same device).

Args:
    (nested list/tuple/dictionary of `torch.Tensor`): The data we want to know the device of.
N)	r   r   valuesr   r/   r6   r   r   rV   )r2   r7   rV   s      r   r   r   :  s     $  ;;=C %F! ! 
D5$-	(	(C %F!  
D%,,	'	'{{ 
(r   c              #   b  #    [        5       R                  [        R                  :w  d<  [        5       R                  b2  [        5       R                  R                  5       (       d  [        5       nOSSKnUR                  R                  XX#S9nU   Sv   SSS5        g! , (       d  f       g= f7f)z|
Wrapper around `deepspeed.runtime.zero.GatheredParameters`, but if Zero-3 is not enabled, will be a no-op context
manager.
Nr   )modifier_rank
fwd_moduleenabled)
r
   r   r   	DEEPSPEEDdeepspeed_pluginis_zero3_init_enabledr   	deepspeedzeroGatheredParameters)paramsrq  rr  rs  gather_param_contextrw  s         r   ry  ry  O  s      **o.G.GG++7 "33IIKK*}(~~@@J  A  
 
 
		s   BB/B	B/
B,(B/)FNr   )r   zbroadcast tensorr   )NNr6  rN  )NNT)Wr   rd  r*  collections.abcr   
contextlibr   r   	functoolsr   r   typingr   r   r   r
   r   	constantsr   dataclassesr   r   importsr   r   r   torch_xla.core.xla_modelcore	xla_modelr   torch.distributedr   r   r'   r+   r3   r9   r@   rT   rb   rg   rp   ru   rx   r   r   r   	Exceptionr   r   r   r   r   r   r   r   rQ  doublehalfr}   uint8int8int16int32int64boolr   rH   r  r   r   r  rY   r   r
  r  r  UserWarningr  r7  rB  r   rX  rZ  rm  r   ry  )rK   rL   s   00r   <module>r     s;     # 2 +   2 8 ;  ))!##*,
6]$ 4CX] 0f1h8$/$b."5.$PN	I 	 F&  &2s 2# &Z: 
KK	LL!	JJ	NNA	KK	JJ	KK	KK	KK	JJ  );(@(@(BC(Bqt(BC 2+5<< +" C  *S *@($,	; 	 4 4n%P & &RW0
 
4	*  I Ds   $G/