
    /h                        S SK Jr  SSKJrJrJr  SSKJrJrJ	r	J
r
JrJrJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9  SSK:J;r;J<r<J=r=J>r>J?r?J@r@JArAJBrBJCrCJDrDJErEJFrFJGrGJHrH  SSKIJJrJJKrKJLrLJMrMJNrNJOrOJPrPJQrQJRrRJSrSJTrTJUrUJVrVJWrWJXrXJYrYJZrZJ[r[J\r\J]r]J^r^J_r_J`r`JaraJbrbJcrcJdrdJereJfrfJgrgJhrhJiriJjrjJkrkJlrlJmrmJnrnJoroJprpJqrqJrrrJsrsJtrtJuruJvrvJwrwJxrxJyryJzrzJ{r{J|r|J}r}J~r~JrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SS	KJrJrJrJrJrJrJr  SS
KJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJr  \X" 5       (       a  SSKJrJrJrJrJrJrJrJr  SSKJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJrJrJrJrJrJr  SSKJrJrJrJrJrJrJr  \b" 5       (       a*  SSKJrJrJrJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJrJr  SSKJrGJ Gr GJGrGJGrGJGrGJGrGJGrGJGrGJGrGJGrGJ	Gr	GJ
Gr
GJGrGJGrGJGrGJGrGJGrGJGr  SSGKGJGrGJGrGJGr  SSGKGJGr  SSGKGJGr  SSGKGJGrGJGrGJGrGJGr  g)   )ParallelismConfig   )convert_model_to_fp8_ao#filter_first_and_last_linear_layershas_ao_layers)(MITA_PROFILING_AVAILABLE_PYTORCH_VERSION
MODEL_NAMEOPTIMIZER_NAMEPROFILE_PATTERN_NAMERNG_STATE_NAMESAFE_MODEL_NAMESAFE_WEIGHTS_INDEX_NAMESAFE_WEIGHTS_NAMESAFE_WEIGHTS_PATTERN_NAMESAMPLER_NAMESCALER_NAMESCHEDULER_NAME!TORCH_DISTRIBUTED_OPERATION_TYPESTORCH_LAUNCH_PARAMSWEIGHTS_INDEX_NAMEWEIGHTS_NAMEWEIGHTS_PATTERN_NAME'XPU_PROFILING_AVAILABLE_PYTORCH_VERSION) AORecipeKwargsAutocastKwargsBnbQuantizationConfigComputeEnvironmentCustomDtypeDataLoaderConfigurationDDPCommunicationHookTypeDeepSpeedPluginDistributedDataParallelKwargsDistributedTypeDynamoBackendFP8RecipeKwargsFullyShardedDataParallelPluginGradientAccumulationPluginGradScalerKwargsInitProcessGroupKwargsKwargsHandler
LoggerTypeMegatronLMPluginMSAMPRecipeKwargsPrecisionTypeProfileKwargsProjectConfigurationRNGTypeSageMakerDistributedTypeTensorInformationTERecipeKwargsTorchContextParallelConfigTorchDynamoPluginTorchTensorParallelConfigTorchTensorParallelPlugin#add_model_config_to_megatron_parser)are_libraries_initializedcheck_cuda_fp8_capabilitycheck_cuda_p2p_ib_supportclear_environmentconvert_dict_to_env_variablesget_cpu_distributed_informationget_gpu_infoget_int_from_envparse_choice_from_envparse_flag_from_envpatch_environmentpurge_accelerate_environmentset_numa_affinitystr_to_bool)9deepspeed_requiredget_ccl_versionis_4bit_bnb_availableis_8bit_bnb_availableis_aim_availableis_bf16_available'is_bitsandbytes_multi_backend_availableis_bnb_availableis_boto3_availableis_ccl_availableis_clearml_availableis_comet_ml_availableis_cuda_availableis_datasets_availableis_deepspeed_availableis_dvclive_availableis_fp8_availableis_fp16_availableis_habana_gaudi1is_hpu_availableis_import_timer_availableis_ipex_availableis_lomo_availableis_matplotlib_availableis_megatron_lm_availableis_mlflow_availableis_mlu_availableis_mps_availableis_msamp_availableis_musa_availableis_npu_availableis_pandas_availableis_peft_availableis_pippy_availableis_pynvml_availableis_pytest_availableis_rich_availableis_sagemaker_availableis_schedulefree_availableis_sdaa_availableis_swanlab_availableis_tensorboard_availableis_timm_availableis_torch_xla_availableis_torchao_availableis_torchdata_available*is_torchdata_stateful_dataloader_availableis_torchvision_availableis_trackio_availableis_transformer_engine_availableis_transformers_availableis_triton_availableis_wandb_availableis_weights_only_availableis_xccl_availableis_xpu_availabletorchao_required)align_module_devicecalculate_maximum_sizescheck_device_mapcheck_tied_parameters_in_config$check_tied_parameters_on_same_devicecompute_module_sizesconvert_file_size_to_intdtype_byte_sizefind_tied_parametersget_balanced_memoryget_grad_scalerget_max_layer_sizeget_max_memory#get_mixed_precision_context_managerhas_offloaded_paramsid_tensor_storageinfer_auto_device_mapis_peft_modelload_checkpoint_in_modelload_offloaded_weightsload_state_dictnamed_module_tensorsretie_parametersset_module_tensor_to_device)OffloadedWeightsLoaderPrefixedDatasetextract_submodules_state_dictload_offloaded_weightoffload_state_dictoffload_weightsave_offload_index)CannotPadNestedTensorWarningGatheredParameters	broadcastbroadcast_object_listconcatenateconvert_outputs_to_fp32convert_to_fp32copy_tensor_to_devicesfind_batch_sizefind_devicegathergather_objectget_data_structure
honor_typeignorant_find_batch_sizeinitialize_tensorsis_namedtupleis_tensor_informationis_torch_tensorlistifypad_across_processespad_input_tensorsrecursively_applyreducesend_to_deviceslice_tensors)compare_versionsis_torch_version)DeepSpeedEngineWrapperDeepSpeedOptimizerWrapperDeepSpeedSchedulerWrapper
DummyOptimDummySchedulerHfDeepSpeedConfigget_active_deepspeed_pluginmap_pytorch_optim_to_deepspeed)has_4bit_bnb_layersload_and_quantize_model)"disable_fsdp_ram_efficient_loading!enable_fsdp_ram_efficient_loadingensure_weights_retiedfsdp2_apply_acfsdp2_canonicalize_namesfsdp2_load_full_state_dictfsdp2_prepare_model!fsdp2_switch_optimizer_parametersget_fsdp2_grad_scalerload_fsdp_modelload_fsdp_optimizermerge_fsdp_weightssave_fsdp_modelsave_fsdp_optimizer)PrepareForLaunch_filter_argsprepare_deepspeed_cmd_envprepare_multi_gpu_envprepare_sagemager_args_inputsprepare_simple_launcher_cmd_envprepare_tpu)AbstractTrainStepBertTrainStepGPTTrainStepMegatronLMDummyDataLoaderMegatronLMDummySchedulerT5TrainStep%avg_losses_across_data_parallel_group)MegatronEngineMegatronLMOptimizerWrapperMegatronLMSchedulerWrapper"gather_across_data_parallel_groups)
initialize)prepare_data_loader)!prepare_model_optimizer_scheduler)prepare_optimizer)prepare_scheduler)find_executable_batch_sizerelease_memory)check_os_kernel clean_state_dict_for_safetensorscompile_regionscompile_regions_deepspeedconvert_bytesextract_model_from_parallelget_module_children_bottom_upget_pretty_namehas_compiled_regionsis_compiled_moduleis_port_in_useloadmerge_dictsmodel_has_dtensorrecursive_getattrsavewait_for_everyonewrite_basic_config)set_seedsynchronize_rng_statesynchronize_rng_states)install_xla)tqdm)apply_fp8_autowrapcontextual_fp8_autocastconvert_modelhas_transformer_engine_layersN(  parallelism_configr   aor   r   r   	constantsr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   dataclassesr   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   environmentr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   importsrH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   modelingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   offloadr   r   r   r   r   r   r   
operationsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   versionsr   r   	deepspeedr   r   r   r   r   r   r   r   bnbr   r   
fsdp_utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   launchr   r   r   r   r   r   r   megatron_lmr   r   r   r   r   r   r   r   r   r   r   r   megatron_lm_initializer   megatron_lm_prepare_data_loaderr   -megatron_lm_prepare_model_optimizer_schedulerr   megatron_lm_prepare_optimizerr   megatron_lm_prepare_schedulermemoryr   r   otherr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   randomr   r   r  	torch_xlar  r  transformer_enginer  r  r  r       S/home/james-whalen/.local/lib/python3.13/site-packages/accelerate/utils/__init__.py<module>r$     s   3 [ [    (! ! ! ! ! ! ! ! !D    : : : : : : : : : : : : : : :v      4        8 9 	 	 	 >           BSoOO >         ( L K K " "    r"  