
    6bi                     l    S r SSKrSSKrSSKJs  Jr  S rS r    S
S jr	SS jr
S r " S S	5      rg)zUtils for running models in a distribution setting.

Mostly from
https://github.com/tensorflow/models/blob/master/official/utils/misc/distribution_utils.py.
    Nc                 \   [         R                  R                  R                  R                  [         R                  R                  R                  R
                  [         R                  R                  R                  R                  S.nX;  a  [        SR                  U 5      5      eX   $ )a+  Return a CollectiveCommunication based on all_reduce_alg.

Args:
  all_reduce_alg: a string specifying which collective communication to
    pick, or None.

Returns:
  tf.distribute.experimental.CollectiveCommunication object

Raises:
  ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"]
)NringncclzqWhen used with `multi_worker_mirrored`, valid values for all_reduce_alg are [`ring`, `nccl`].  Supplied value: {})	tf
distributeexperimentalCollectiveCommunicationAUTORINGNCCL
ValueErrorformat)all_reduce_alg collective_communication_optionss     c/home/james-whalen/.local/lib/python3.13/site-packages/tf_keras/src/benchmarks/distribution_util.py_collective_communicationr      s     mm((@@EE**BBGG**BBGG($
 =GGMvH
 	
 ,;;    c                     U c  g[         R                  R                  [         R                  R                  S.nX;  a  [	        SR                  U 5      5      eX    nU" US9$ )ax  Return a CrossDeviceOps based on all_reduce_alg and num_packs.

Args:
  all_reduce_alg: a string specifying which cross device op to pick, or
    None.
  num_packs: an integer specifying number of packs for the cross device op.

Returns:
  tf.distribute.CrossDeviceOps object or None.

Raises:
  ValueError: if `all_reduce_alg` not in [None, "nccl",
    "hierarchical_copy"].
N)r   hierarchical_copyzqWhen used with `mirrored`, valid values for all_reduce_alg are [`nccl`, `hierarchical_copy`].  Supplied value: {})	num_packs)r   r   NcclAllReduceHierarchicalCopyAllReducer   r   )r   r   mirrored_all_reduce_optionscross_device_ops_classs       r   _mirrored_cross_device_opsr   7   sn     ++]]DD# 8AAGB
 	
 9H!I66r   c                    US:  a  [        S5      eU R                  5       n U S:X  a!  US:  a  [        SR                  U5      5      egU S:X  a0  [        R                  R
                  R                  [        U5      S9$ U S	:X  aU  US:X  a  [        R                  R                  S
5      $ US:  a  [        S5      e[        R                  R                  S5      $ U S:X  aO  US:X  a  S
/nO[        U5       Vs/ s H  nSU-  PM
     nn[        R                  R                  U[        X#5      S9$ [        SU  35      es  snf )a  Return a DistributionStrategy for running the model.

Args:
  distribution_strategy: a string specifying which distribution strategy to
    use. Accepted values are "off", "one_device", "mirrored", and
    "multi_worker_mirrored" -- case insensitive. "off" means not to use
    Distribution Strategy.
  num_gpus: Number of GPUs to run this model.

Returns:
  tf.distribute.DistibutionStrategy object.
Raises:
  ValueError: if `distribution_strategy` is "off" or "one_device" and
    `num_gpus` is larger than 1; or `num_gpus` is negative.
r   z`num_gpus` can not be negative.off   zNWhen {} GPUs are specified, distribution_strategy flag cannot be set to `off`.Nmulti_worker_mirrored)communication
one_devicezdevice:CPU:0z=`OneDeviceStrategy` can not be used for more than one device.zdevice:GPU:0mirroredzdevice:GPU:%d)devicescross_device_opsz$Unrecognized Distribution Strategy: )r   lowerr   r   r   r   MultiWorkerMirroredStrategyr   OneDeviceStrategyrangeMirroredStrategyr   )distribution_strategynum_gpusr   r   r#   is         r   get_distribution_strategyr-   W   s]   * !|:;;1779%a<//5vh/?   77}}))EE3NC F 
 	
 ,q===22>BBa<O  }}..~>>
*q=%&G49(ODOq*OGD}}--7 . 
 	
 
./D.EF  Es   7D=c                    [         R                  " [        R                  R	                  SS5      5      nU(       a?  [        US   R	                  S/ 5      5      [        US   R	                  S/ 5      5      -   nU$ U (       an  U R                  S5      n[        U5      nUS:  a  US:  a  [        S	5      eUS:X  a  SOUn[         R                  " SU0SUS
.S.5      [        R                  S'   U$ SnU$ )zSet multi-worker cluster spec in TF_CONFIG environment variable.

Args:
  worker_hosts: comma-separated list of worker ip:port pairs.

Returns:
  Number of workers in the cluster.
	TF_CONFIGz{}clusterchiefworker,r   r   z2Must specify task_index when number of workers > 1)typeindex)r0   task)	jsonloadsosenvirongetlensplitr   dumps)worker_hosts
task_index	tf_confignum_workersworkerss        r   configure_clusterrD      s     

2::>>+t<=I)I.227B?@3i $$Xr2D
 
& ! 
$$S)'l?zA~D  &*Q

"&**$g.!)J?#


;  r   c                 L    U (       a  U R                  5       nU$ [        5       nU$ N)scopeDummyContextManager)strategystrategy_scopes     r   get_strategy_scoperK      s)    !)  -.r   c                        \ rS rSrS rS rSrg)rH      c                     g rF    )selfs    r   	__enter__DummyContextManager.__enter__       r   c                     g rF   rO   )rP   argss     r   __exit__DummyContextManager.__exit__   rS   r   rO   N)__name__
__module____qualname____firstlineno__rQ   rV   __static_attributes__rO   r   r   rH   rH      s    r   rH   )r"   r   Nr   )N)__doc__r7   r9   tensorflow.compat.v2compatv2r   r   r   r-   rD   rK   rH   rO   r   r   <module>rb      sL     	 ! !<87B %	>BB r   