
    ȅi{                        % S SK JrJr  S SKJr  S SKrS SKJr  S SK	J
r
  S SKJr  S SKJrJrJr  S SKJrJr   " S S	\5      rSq\S-  \S
'   S\SS4S jr SS\R0                  S\S-  S\\R0                  \S-  4   4S jjr SS\R0                  S\S\S-  S\R0                  4S jjr SS\R0                  S\S\S\S\R:                  S\S-  S\R0                  4S jjr SS\R0                  S\S\S\S-  S\R0                  4
S jjr SS\R0                  S\S-  S\\R0                  \ \   4   4S jjr! SS\S\S-  S\S-  S\R0                  4S jjr"g)    )ABCabstractmethod)AnyN)ShardedTensor)Shard)_all_gather_dtensor_create_chunk_dtensor_create_chunk_sharded_tensor)
DeviceMeshDTensorc                      \ rS rSrSr\S\R                  S\\R                  \	S-  4   4S j5       r
\S\R                  S\	S\R                  4S j5       r\ SS\R                  S	\S
\S\S\R                  S\R                  S-  S\R                  4S jj5       r\S\R                  S	\S\S\R                  4S j5       r\S\R                  S\\R                  \\   4   4S j5       r\S\S\S-  S\R                  4S j5       rSrg)FSDPExtensions   z
This enables some customizable hooks to enable composability with tensor
parallelism. To activate these hooks, use :func:`_set_fsdp_extensions` to
set a custom :class:`FSDPExtensions` that implements the hooks.
tensorreturnNc                     g)z6E.g. converting ``DistributedTensor`` to local tensor.N selfr   s     a/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/fsdp/_fsdp_extensions.pypre_flatten_transform$FSDPExtensions.pre_flatten_transform   s     	    param_extensionc                     g)z6E.g. converting local tensor to ``DistributedTensor``.Nr   )r   r   r   s      r   post_unflatten_transform'FSDPExtensions.post_unflatten_transform   s     	r   rank
world_sizenum_devices_per_nodepgdevicec                     g)z6Shards a tensor to chunks and returns the local chunk.Nr   )r   r   r   r   r    r!   r"   s          r   chunk_tensorFSDPExtensions.chunk_tensor(        	r   device_meshc                     g)zAShards a tensor/DTensor to DTensor and returns the local DTensor.Nr   )r   r   r   r'   s       r   chunk_dtensorFSDPExtensions.chunk_dtensor5   s     	r   c                     g)z
This is to be called before loading a *sharded* model state dict and
should return the tensor and list of shards from which to load data.
Nr   r   s     r   pre_load_state_dict_transform,FSDPExtensions.pre_load_state_dict_transform?   s     	r   parent_meshc                     g)z
This is to be called before loading a *sharded* DTensor state dict.
This gathers tensor in FSDP dimension and returns local tensor of
TP DTensor.
Nr   )r   r   r.   s      r   all_gather_dtensor!FSDPExtensions.all_gather_dtensorJ   r&   r   r   N)__name__
__module____qualname____firstlineno____doc__r   torchTensortupler   r   r   intdistProcessGroupr"   r$   r   r)   listr   r,   r   r0   __static_attributes__r   r   r   r   r      s     
u||S4Z'	(    
	   '+

 
 	

 "
 
 t#
 

 
    	
 
   
u||T%[(	)  

  $&
 
	
 
r   r   _extensions	flattenerr   c                     U q g r2   )r@   )rA   s    r   _set_fsdp_extensionsrC   [   s    Kr   r   fsdp_extensionc                 B    Ub  UR                  U 5      u  p#Ub  X#4$ U S 4$ r2   )r   )r   rD   
new_tensorr   s       r   _ext_pre_flatten_transformrG   `   s6     !&4&J&J6&R#
&..4<r   r   c                 4    Ub  Ub  UR                  X5      $ U $ r2   )r   )r   r   rD   s      r   _ext_post_unflatten_transformrI   k   s%    
 !o&A66vOOMr   r   r   r    r!   c                 D    Ub  UR                   O[        nU" U UUUU5      $ r2   )r$   r
   )r   r   r   r    r!   rD   chunk_tensor_fns          r   _ext_chunk_tensorrL   u   s;     % 	##) 
 
 r   r'   c                 @    Ub  UR                   O[        nU" U UU5      $ r2   )r)   r	   )r   r   r'   rD   chunk_dtensor_fns        r   _ext_chunk_dtensorrO      s5     % 	$$" 
  r   c                     Ub  UR                  U 5      $ [        U 5      [        La  [        S[        U 5       35      eU R	                  5       nX4$ )NzExpected ShardedTensor, got )r,   typer   AssertionErrorlocal_shards)r   rD   shardss      r   "_ext_pre_load_state_dict_transformrU      sU     !;;FCCF|=(;DL>JKK  "Fr   r.   c                 <    Ub  UR                   O[        nU" X5      $ r2   )r0   r   )r   r.   rD   all_gather_dtensor_fns       r   _ext_all_gather_dtensorrX      s*     % 	))  
 !55r   r2   )#abcr   r   typingr   r8   torch.distributeddistributedr<   +torch.distributed._shard.sharded_tensor.apir   -torch.distributed._shard.sharded_tensor.shardr   #torch.distributed.fsdp._shard_utilsr   r	   r
   torch.distributed.tensorr   r   r   r@   __annotations__rC   r9   r:   rG   rI   r;   r=   rL   rO   r>   rU   rX   r   r   r   <module>rb      s   # #     E ? 
 9ES EP &*^d" )N t  -1LL"T) 5<<t#$ -1LL #T) \\	  -1LL
  	
 	 #T) \\4 -1	LL
  #T)	
 \\( -1
LL
"T)
 5<<e$%
  -1
6
6d"
6 #T)
6 \\	
6r   