
    ȅi                     V   S SK r S SKrS SKrS SKrS SKJr  S SKJr  S SKJ	r	  S SK
JrJrJrJr  S SKJr  S SKJrJrJrJr  S r SS\R.                  S	\S
\S\S\R2                  S\R4                  S-  S\4S jjrS\R.                  S	\S\S\4S jrS\S\S-  S\R.                  4S jrg)    N)_get_device_module)distributed_c10d)ShardShardedTensorShardedTensorMetadataTensorProperties)ShardMetadata)
DeviceMeshDTensor	Replicater   c                     UR                  5       S:X  a  SU  SU 3$ UR                  5       S:X  a"  SU  SU S[        U5      R                  5        3$ SU  SU SX-   3$ )Ncpuzrank:/hpu:)lowerr   current_device)rankdevice_typenum_devices_per_nodes      ]/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/fsdp/_shard_utils.py_get_remote_device_strr      s}    e#tfAk]++					%tfAk]!,>{,K,Z,Z,\+]^^tfAk]!D,G+HII    tensorr   
world_sizer   pgdevicereturnc                 j   U R                  USS9n[        U5      U:  a{  Xa   R                  5       nU R                  5        Vs/ s H  nSPM     n	n[        R
                  " U R                  5       S   U-  5      U-  U	S'   [        R                  " XyU5      /n
O/ n
U Vs/ s H  n[        UR                  5       5      PM     nnS/[        [        R                  " U Vs/ s H  oS   PM	     sn5      5      SS -   nS/[        US   5      S-
  -  n	U Vs/ s H  o/U	-   PM
     nnUc   [        R                  " U5      R                  OUR                  n[        [        U5      5       Vs/ s H%  n[        [         R"                  " UU5      UU5      PM'     nn[        U5      [        U5      :w  d  [        U5      [        U5      :w  a/  [%        S[        U5       S[        U5       S[        U5       35      e['        UUU5       VVVs/ s H  u  nnn[)        UUU5      PM     nnnn[+        UU R                  5       [-        U R.                  U R0                  S[2        R4                  U R7                  5       S	9S
9n[8        R:                  " U
UUS9$ s  snf s  snf s  snf s  snf s  snf s  snnnf )z
Shard a tensor to chunks along the first dimension. The local rank will gets its
corresponding chunk as the local shard to create a ShardedTensor.
r   )dimN   zQExpected chunk_sizes, chunk_offsets, and placements to have the same length, got z, F)dtypelayoutrequires_gradmemory_format
pin_memory)shards_metadatasizetensor_properties)sharded_tensor_metadataprocess_group)chunklencloner)   mathceilr   from_tensor_and_offsetslist	itertools
accumulater   _get_pg_default_devicetyperanger   distget_global_rankAssertionErrorzipr	   r   r   r#   r$   torchcontiguous_format	is_pinnedr   +_init_from_local_shards_and_global_metadata)r   r   r   r   r   r   chunkslocal_shard_offsetslocal_shardsr-   chunk_sizes
chunk_sizedim0_offsetsd0chunk_offsetsr   r
placementsoffsetr)   	placementshard_metadatar+   s                            r   _create_chunk_sharded_tensorrP      s    \\*!\,F
6{Tl((*$kkm,m1m,YYv{{}Q/*<=D
55kDQR 4::6%4

%6K:3kJk
mkJK	r L cSQ(1,-G.:;lTG^lM; > 	//388[[  s;'( )A 	  Q' 	

 )   ;3}--[1AS_1T{#$Bs='9&:"S_<MO
 	
 (+=+z'R'R#FD) 	fdI.'R   4&[[]*,,==11'')

 DD.EUW ] - ;J <s$   J!#J!JJ$.,J)J.device_meshc                 h   U R                  5       R                  5       n [        UR                  5       Vs/ s H  n[	        5       PM     nn[        UR                  5       Vs/ s H  n[	        5       PM     nn[        S5      US'   [        R                  " XUSS9R                  US9$ s  snf s  snf )z
Shard a tensor to chunks along the first dimension. The local rank will gets its
corresponding chunk as the local tensor to create a DTensor.
r   r!   F)	run_check)rL   )	detachr/   r8   ndimr   DShardr   
from_localredistribute)r   r   rQ   rC   replicate_placementsshard_placementss         r   _create_chunk_dtensorr[   _   s     ]]_""$F 27{7G7G1HI1HAIK1HI-2;3C3C-DE-D	-DE!!9R1Ul#  	 JEs   B*#B/	root_meshc                     XR                   :w  a  [        S5      e[        [        R                  " U R
                  5      5      n[        5       US'   U R                  U R                   US9n U R                  5       $ )zL
All gather a DTensor in its sharded dimension and return the local tensor.
z2The device mesh of a tensor should be a root mesh.r!   )rQ   rL   )	rQ   r;   r3   copydeepcopyrL   r   rX   to_local)r   r\   rL   s      r   _all_gather_dtensorra   x   sr     &&&QRRdmmF$5$567J [JrN  && ! F
 ??r   )N)r^   r4   r0   r=   torch.distributeddistributedr9   torch._utilsr   r   'torch.distributed._shard.sharded_tensorr   r   r   r   &torch.distributed._shard.sharding_specr	   torch.distributed.tensorr
   r   r   rV   r   TensorintProcessGroupr   rP   r[   ra    r   r   <module>rl      s          + .  A T TJ #'?LL?
? ? 	?
 	? LL4? ?DLL
  	2D  \\r   