
    hA                        S r SSKrSSKrSSKrSSKrSSKrSSKrSSKJrJr  SSK	J	r	J
r
  SSKJr  SSKrSSKrSSKJr  SSKJrJr  SSKJr  SS	KJrJr  SS
KJrJr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)  SSK*J+r+  SSK,J-r-J.r.J/r/J0r0J1r1  SSK2J3r3J4r4  SSK5J6r6  SSK7J8r8  SSK9J:r:J;r;J<r<J=r=J>r>J?r?J@r@JArAJBrBJCrCJDrDJErEJFrF   " S S5      rGg)zz
Train a model on a dataset.

Usage:
    $ yolo mode=train model=yolo11n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16
    N)copydeepcopy)datetime	timedelta)Path)distributed)nnoptim)__version__)get_cfgget_save_dir)check_cls_datasetcheck_det_dataset)load_checkpoint)DEFAULT_CFGGIT
LOCAL_RANKLOGGERRANKTQDMYAML	callbacks	clean_urlcolorstremojis)check_train_batch_size)	check_amp
check_filecheck_imgszcheck_model_file_from_stem
print_args)ddp_cleanupgenerate_ddp_command)get_latest_run)plot_results)	TORCH_2_4EarlyStoppingModelEMAattempt_compileautocast$convert_optimizer_state_dict_to_fp16
init_seeds	one_cycleselect_devicestrip_optimizertorch_distributed_zero_firstunset_deterministicunwrap_modelc                   V   \ rS rSrSr\SS4S jrS\4S jrS\4S jr	S\4S jr
S	 rS
 rS rS rS rS/S jrS0S jrS1S\4S jjrS rS rS rS rS rS rS rS rS2S jrS rS3S jrS4S jrS5S jr S r!S  r"S! r#S" r$S# r%S$ r&S% r'S1S& jr(S' r)S( r*S) r+S* r,S+ r-S, r.S6S- jr/S.r0g)7BaseTrainer?   a+  
A base class for creating trainers.

This class provides the foundation for training YOLO models, handling the training loop, validation, checkpointing,
and various training utilities. It supports both single-GPU and multi-GPU distributed training.

Attributes:
    args (SimpleNamespace): Configuration for the trainer.
    validator (BaseValidator): Validator instance.
    model (nn.Module): Model instance.
    callbacks (defaultdict): Dictionary of callbacks.
    save_dir (Path): Directory to save results.
    wdir (Path): Directory to save weights.
    last (Path): Path to the last checkpoint.
    best (Path): Path to the best checkpoint.
    save_period (int): Save checkpoint every x epochs (disabled if < 1).
    batch_size (int): Batch size for training.
    epochs (int): Number of epochs to train for.
    start_epoch (int): Starting epoch for training.
    device (torch.device): Device to use for training.
    amp (bool): Flag to enable AMP (Automatic Mixed Precision).
    scaler (amp.GradScaler): Gradient scaler for AMP.
    data (str): Path to data.
    ema (nn.Module): EMA (Exponential Moving Average) of the model.
    resume (bool): Resume training from a checkpoint.
    lf (nn.Module): Loss function.
    scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
    best_fitness (float): The best fitness value achieved.
    fitness (float): Current fitness value.
    loss (float): Current loss value.
    tloss (float): Total loss value.
    loss_names (list): List of loss names.
    csv (Path): Path to results CSV file.
    metrics (dict): Dictionary of metrics.
    plots (dict): Dictionary of plots.

Methods:
    train: Execute the training process.
    validate: Run validation on the test set.
    save_model: Save model training checkpoints.
    get_dataset: Get train and validation datasets.
    setup_model: Load, create, or download model.
    build_optimizer: Construct an optimizer for the model.

Examples:
    Initialize a trainer and start training
    >>> trainer = BaseTrainer(cfg="config.yaml")
    >>> trainer.train()
Nc                 
   UR                  SS5      U l        [        X5      U l        U R	                  U5        [        U R                  R                  5      U l        S[        U R                  5      ;   a  [        R                  " S5      O[        U R                  5      U R                  l        SU l
        SU l        0 U l        [        U R                  R                  S-   [        -   U R                  R                   S9  [#        U R                  5      U l        U R$                  R&                  U R                  l        U R$                  S-  U l        [        S;   au  U R(                  R+                  S	S	S
9  [        U R$                  5      U R                  l        [,        R.                  " U R$                  S-  [1        U R                  5      5        U R(                  S-  U R(                  S-  sU l        U l        U R                  R6                  U l        U R                  R8                  U l        U R                  R<                  =(       d    SU l        SU l        [        S:X  a  [A        [1        U R                  5      5        U R                  RB                  S;   a  SU R                  l"        [G        U R                  RH                  5      U l$        [K        [L        5         U RO                  5       U l(        SSS5        SU l)        SU l*        SU l+        SU l,        SU l-        SU l.        SU l/        S/U l0        U R$                  S-  U l1        U Rb                  Re                  5       (       a5  U R                  Rf                  (       d  U Rb                  Ri                  5         / SQU l5        SU l6        U=(       d    [n        Rp                  " 5       U l7        [s        U R                  R                  [        5      (       aS  [u        U R                  R                  5      (       a/  [u        U R                  R                  Rw                  S5      5      nO[s        U R                  R                  [x        [z        45      (       a   [u        U R                  R                  5      nOEU R                  R                  S;   a  SnO([|        R~                  R                  5       (       a  SnOSnUS:  =(       a    S[        R                  ;  U lB        X@lC        [        S;   a:  U R                  (       d(  [n        R                  " U 5        U R                  S5        ggg! , (       d  f       GNb= f)z
Initialize the BaseTrainer class.

Args:
    cfg (str, optional): Path to a configuration file.
    overrides (dict, optional): Configuration overrides.
    _callbacks (list, optional): List of callback functions.
sessionNcudaCUDA_VISIBLE_DEVICES   )deterministicweights   r   Tparentsexist_okz	args.yamlzlast.ptzbest.ptd   r   r>   >   cpumpsLosszresults.csv)r   r:      ,r   on_pretrain_routine_start)Fpophub_sessionr   argscheck_resumer.   devicestrosgetenv	validatormetricsplotsr,   seedr   r;   r   save_dirnamewdirmkdirr   savevarslastbestsave_periodbatch
batch_sizeepochsstart_epochr!   typeworkersr    modelr0   r   get_datasetdataemalf	schedulerbest_fitnessfitnesslosstloss
loss_namescsvexistsresumeunlinkplot_idxnan_recovery_attemptsr   get_default_callbacks
isinstancelensplittuplelisttorchr8   is_availableenvironddp
world_sizeadd_integration_callbacksrun_callbacks)selfcfg	overrides
_callbacksr   s        T/home/james-whalen/.local/lib/python3.13/site-packages/ultralytics/engine/trainer.py__init__BaseTrainer.__init__r   s    %==D9C+	)$#DII$4$45@F#dkkJZ@Z299%;<`cdhdodo`p		
499>>A%,DII<S<ST %TYY/++		MMI-	7?IIOOD4O8!$T]]!3DIIIIdmmk14		?C#yy94dii)6K	499900))//ii&&-#2:tDII' ;;~- !DII 0		@
)*5((*DI 6   !	
!(===088??TYY%5%5HHOO!%&" $Hy'F'F'Hdii&&,,TYY5E5E1F1FTYY--33C89J		((5$-88TYY--.JYY/JZZ$$&&JJ>Dl"**&D$7?488//5:; $,?M 65s   #U
U#eventc                 @    U R                   U   R                  U5        g)z7Append the given callback to the event's callback list.N)r   appendr   r   callbacks      r   add_callbackBaseTrainer.add_callback   s    u$$X.    c                 $    U/U R                   U'   g)zPOverride the existing callbacks with the given callback for the specified event.N)r   r   s      r   set_callbackBaseTrainer.set_callback   s    !)
ur   c                 Z    U R                   R                  U/ 5       H  nU" U 5        M     g)z>Run all existing callbacks associated with a particular event.N)r   getr   s      r   r   BaseTrainer.run_callbacks   s$    **5"5HTN 6r   c                    U R                   (       a  U R                  R                  (       a'  [        R                  " S5        SU R                  l        U R                  R
                  S:  a)  [        SU R                   SU R                  S-   S35      e[        U 5      u  p [        R                  " [        S5       S	S
R                  U5       35        [        R                  " USS9   [        U [!        U5      5        gU R#                  5         g! [         a  nUeSnAff = f! [        U [!        U5      5        f = f)zIAllow device='', device=None on Multi-GPU systems to default to device=0.zI'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'F      ?zuAutoBatch with batch<1 not supported for Multi-GPU training, please specify a valid batch size multiple of GPU count z, i.e. batch=   .zDDP:z debug command  T)checkN)r~   rK   rectr   warningr^   
ValueErrorr   r#   infor   join
subprocessrun	Exceptionr"   rN   	_do_train)r   cmdfilees       r   trainBaseTrainer.train   s    88yy~~jk!&		yy$ OOSN__lmqm|m|  @A  nA  mB  BCD  -T2IC-x/0OPs$/ D#d), NN   D#d),s%   %AD 
D&D!!D&&D) )E c                 "  ^  T R                   R                  (       a1  [        ST R                   R                  T R                  5      T l        OU 4S jT l        [        R                  R                  T R                  T R
                  S9T l
        g)z,Initialize training learning rate scheduler.r:   c                    > [        SU TR                  -  -
  S5      STR                  R                  -
  -  TR                  R                  -   $ )Nr:   r   r   )maxr`   rK   lrf)xr   s    r   <lambda>.BaseTrainer._setup_scheduler.<locals>.<lambda>   s=    ADKK$7 ;sTYY]]?R SVZV_V_VcVc cr   )	lr_lambdaN)rK   cos_lrr-   r   r`   rh   r
   lr_schedulerLambdaLR	optimizerri   r   s   `r   _setup_schedulerBaseTrainer._setup_scheduler   sX    99499==$++>DGcDG++44T^^tww4Wr   c                 @   [         R                  R                  [        5        [         R                  " S[        5      U l        S[
        R                  S'   [        R                  " [        R                  " 5       (       a  SOS[        SS9[        U R                  S9  g	)
zGInitialize and set the DistributedDataParallel parameters for training.r8   1TORCH_NCCL_BLOCKING_WAITncclglooi0*  )seconds)backendtimeoutrankr   N)r{   r8   
set_devicer   rM   rO   r}   distinit_process_groupis_nccl_availabler   r   r   s    r   
_setup_ddpBaseTrainer._setup_ddp   sh    

d#ll64014

-."4466FFe,		
r   c           	        ^ U R                  5       nU R                  R                  U R                  5      U l        U R	                  5         [        U R                  U R                  U R                  R                  S9U l        [        U R                  R                  [        5      (       a  U R                  R                  OI[        U R                  R                  [        5      (       a  [        U R                  R                  5      O/ nS/nU Vs/ s H	  nSU S3PM     snU-   nXPl        U R                  R                  5        H  u  mn[        U4S jU 5       5      (       a#  [         R"                  " ST S35        SUl        MC  UR$                  (       a  MV  UR&                  R(                  (       d  Ms  [         R*                  " S	T S
35        SUl        M     [,        R.                  " U R                  R0                  5      R                  U R                  5      U l        U R0                  (       aj  [2        S;   a`  [4        R6                  R9                  5       n[,        R.                  " [;        U R                  5      U R                  S9U l        U[4        l        [2        S:  a=  U R<                  S:  a-  [>        R@                  " U R0                  R                  5       SS9  [C        U R0                  5      U l        [D        (       a(  [,        R0                  RG                  SU R0                  S9O0[,        RH                  R0                  RG                  U R0                  S9U l%        U R<                  S:  a3  [L        RN                  RQ                  U R                  [2        /SS9U l        [S        [        [U        U R                  S5      (       a$  U R                  RV                  RS                  5       OS5      S5      n[Y        U R                  RZ                  XSS9U R                  l-        Xl+        U R\                  S:  a0  [2        S:X  a&  U R_                  5       =U R                  l0        U l.        U R\                  [S        U R<                  S5      -  n	U Rc                  U Rd                  S   U	[f        SS9U l4        U Rc                  U Rd                  Rk                  S5      =(       d    U Rd                  Rk                  S5      U R                  Rl                  S:X  a  U	OU	S-  [f        SS9U l7        U Rq                  5       U l9        [u        U R                  5      U l;        [2        S;   a  U Rr                  Rx                  Rz                  U R}                  SS9-   n
[        [        U
S/[        U
5      -  5      5      U l<        U R                  R                  (       a  U R                  5         [S        [        U R                  R                  U R\                  -  5      S5      U lF        U R                  R                  U R\                  -  U R                  -  U R                  R                  -  n[        R                  " [        U Rh                  R                  5      [S        U R\                  U R                  R                  5      -  5      U R                  -  nU R                  U R                  U R                  R                  U R                  R                  U R                  R                  UUS9U lM        U R                  5         [        U R                  R                  S 9SsU lS        U lT        U R                  U5        U R                  S-
  U R                  lX        U R                  S!5        g"s  snf )#z8Build dataloaders and optimizer on correct rank process.)rM   modez.dflzmodel.r   c              3   ,   >#    U  H	  oT;   v   M     g 7fN ).0r   ks     r   	<genexpr>+BaseTrainer._setup_train.<locals>.<genexpr>  s     6#5a6#5s   zFreezing layer ''Fz/setting 'requires_grad=True' for frozen layer 'zE'. See ultralytics.engine.trainer for customization of frozen layers.Tr=   )rM   r>   r:   r   srcr8   )enabled)
device_idsfind_unused_parametersstride    )r   floormax_dimr   )r_   r   r   valtestobbrF   )prefix)rd   rV   lrmomentumdecay
iterations)patienceon_pretrain_routine_endN)Zsetup_modelrd   torM   set_model_attributesr)   rK   compilerv   freezerz   intrangefreeze_layer_namesnamed_parametersanyr   r   requires_graddtypeis_floating_pointr   r{   tensorampr   r   default_callbacksr   r   r   r   	broadcastboolr&   
GradScalerr8   scalerr	   parallelDistributedDataParallelr   hasattrr   r   imgszr_   
auto_batchr^   get_dataloaderrf   r   train_loaderr   tasktest_loaderget_validatorrQ   r(   rg   rR   keyslabel_loss_itemsdictziprw   rS   plot_training_labelsroundnbs
accumulateweight_decaymathceildatasetr`   build_optimizerr   lr0r   r   r'   r   stopperstopresume_trainingra   ri   
last_epochr   )r   ckptfreeze_listalways_freeze_namesr   r   vcallbacks_backupgsr_   metric_keysr  r   r   s                @r   _setup_trainBaseTrainer._setup_train  s   !ZZ]]4;;/
!!# %TZZ$))J[J[\

 $))**D11 II $))**C00 tyy''( 	  &h5@A[qcm[ADWW"4JJ//1DAq6#5666.qc34"'___)B)B)BEaS IY Y #' 2 <<		.11$++>88(::??A||Idjj$9$++NDH*:I'"91,NN488<<>q1>>GiEII   :UZZ^^MfMfosowowMfMx 	 ??Q<<TZZUYTZsw<xDJ 

H0M0MTZZ&&**,SUVXZ[%diioobTUV		 ??Q42:040AADIIOdo __DOOQ(??
 //IIg:JW 0 
  ..IIMM% 9DIIMM&$9%)YY^^u%<z*q.	 / 
 ++-DJJ'7?..00558M8MUZ8M8[[KK!s;7G1G HIDLyy))+ eDIIMMDOO$CDaHyy--?$//QTXT]T]TaTaaYYs4#4#4#<#<=DOOUYU^U^UbUb@ccdgkgrgrr
--**$$yy}}YY''! . 
 	"/9K9K"LediT"$($4$4q$8!45W Bs   ^<c                 V   U R                   S:  a  U R                  5         U R                  5         [        U R                  5      nU R
                  R                  S:  a,  [        [        U R
                  R                  U-  5      S5      OSnSnSU l	        [        R                  " 5       U l        [        R                  " 5       U l        U R                  S5        [        R                  " SU R
                  R                    SU R
                  R                    S	U R                  R"                  U R                   =(       d    S-   S
[%        SU R&                  5       S3	U R
                  R                  (       a  U R
                  R                   S3OU R(                   S3-   5        U R
                  R*                  (       aI  U R(                  U R
                  R*                  -
  U-  nU R,                  R/                  XDS-   US-   /5        U R0                  nU R2                  R5                  5          XPl        U R                  S5        [8        R:                  " 5          [8        R<                  " S5        U R>                  RA                  5         SSS5        U RC                  5         [D        S:w  a%  U R                  RF                  RI                  U5        [K        U R                  5      nXPR(                  U R
                  R*                  -
  :X  a*  U RM                  5         U R                  RO                  5         [D        S;   aA  [        R                  " U RQ                  5       5        [S        [K        U R                  5      US9nSU l*        U GH  u  pxU R                  S5        XqU-  -   n	X::  Ga%  SU/n
[        S[W        [X        RZ                  " XSU R
                  R\                  U R^                  -  /5      R                  5       5      5      U l0        [K        U R2                  Rb                  5       H  u  p[X        RZ                  " XUS:X  a  U R
                  Rd                  OSUS   U Rg                  U5      -  /5      US'   SU;   d  MZ  [X        RZ                  " XU R
                  Rh                  U R
                  Rj                  /5      US'   M     [m        U Rn                  5         U Rq                  U5      nU R
                  Rr                  (       a@  U Ru                  US   5      n[w        U Rt                  5      Ry                  X5      u  ol=        OU Ru                  U5      u  ol=        UR}                  5       U l<        [D        S:w  a  U =Rx                  U R                   -  sl<        U RT                  c  U Rz                  O!U RT                  U-  U Rz                  -   US-   -  U l*        SSS5        U R~                  R                  U Rx                  5      R                  5         X-
  U R`                  :  a  U R                  5         U	nU R
                  R                  (       a  [        R                  " 5       U R                  -
  U R
                  R                  S-  :  U lC        [D        S:w  a:  [D        S:X  a  U R                  OS/n[        R                  " US5        US   U lC        U R                  (       a    GOG[D        S;   Ga&  [        U RT                  R                  5      (       a  U RT                  R                  S   OSnUR                  SSSU-   -  -   US-    SU R(                   3U R                  5       S S3/US:  a  U RT                  O [        R                  " U RT                  S5      QUS    R                  S   PUS   R                  S   P7-  5        U R                  S!5        U R
                  R                  (       a   XR,                  ;   a  U R                  X5        U R                  S"5        GM     [K        U R2                  Rb                  5       VVs0 s H  u  nnS#U 3US   _M     snnU lM        U R                  S$5        [D        S;   a8  US-   U R(                  :  nU R                  R                  U Rt                  / S%QS&9  U R
                  R                  (       d3  W(       d,  U R                  R                  (       d  U R                  (       a,  U R                  S'S(9  U R                  5       u  U lU        U lV        U R                  U5      (       a  GM@  SU lX        [D        S;   Ga*  U R                  0 U R                  U RT                  5      EU R                  EU R                  ES)9  U =R                  U R                  US-   U R                  5      =(       d    W-  slC        U R
                  R                  (       aO  U =R                  [        R                  " 5       U R                  -
  U R
                  R                  S-  :  -  slC        U R
                  R                  (       d  W(       a!  U R                  5         U R                  S*5        [        R                  " 5       nUU R                  -
  U l	        UU l        U R
                  R                  (       a  UU R                  -
  XPR0                  -
  S-   -  n[        R                  " U R
                  R                  S-  U-  5      =U l        U R
                  l        U R                  5         U R6                  U R>                  l`        U =R                  XPR(                  :  -  slC        U R                  S+5        U R                  S'5        [D        S:w  a:  [D        S:X  a  U R                  OS/n[        R                  " US5        US   U lC        U R                  (       a  OUS-  nG	M  [        R                  " 5       U R                  -
  n[        R                  " S,XPR0                  -
  S-    S-US-  S. S/35        U R                  5         [D        S;   a<  U R
                  R                  (       a  U R                  5         U R                  S05        U R                  5         [        5         U R                  S15        g! , (       d  f       G
Nw= f! , (       d  f       GN(= fs  snnf )2z.Train the model with the specified world size.r:   r   rB   r>   Non_train_startzImage sizes z train, z val
Using z' dataloader workers
Logging results to boldz
Starting training for z	 hours...z
 epochs...rF   on_train_epoch_startignorer=   )totalon_train_batch_start        
initial_lrr   r   imgi  z%11s%11sz%11.4g/z.3gGclson_batch_endon_train_batch_endzlr/pgon_train_epoch_end)yamlncrK   namesr   class_weights)includeg      ?)	threshold)rR   on_model_saveon_fit_epoch_end
z epochs completed in z.3fz hours.on_train_endteardown)dr   r   r  rw   r   rK   warmup_epochsr   r	  
epoch_timetimeepoch_time_starttrain_time_startr   r   r   r   num_workersr   rU   r`   close_mosaicrs   extendra   r   	zero_gradepochwarningscatch_warningssimplefilterri   step_model_trainr   sampler	set_epoch	enumerate_close_dataloader_mosaicresetprogress_stringr   rm   r   npinterpr
  r_   r  param_groupswarmup_bias_lrrh   warmup_momentumr   r*   r   preprocess_batchr   rd   r2   rl   
loss_itemssumr   scalebackwardoptimizer_stepr  r   broadcast_object_listshapeset_description_get_memoryr{   	unsqueezerS   plot_training_samplesr   rg   update_attrr   r  possible_stop_clear_memoryvalidaterR   rk   _handle_nan_recoveryrt   save_metricsr  rY   
save_modelr  r  r   r  
final_evalplot_metricsr1   )r   nbnwlast_opt_stepbase_idxrC  pbarir^   nixijr   predsrl   broadcast_listloss_lengthirfinal_epochtmean_epoch_timer   s                         r   r   BaseTrainer._do_trainc  s 
   ??QOO""#>Bii>U>UXY>YStyy..34c:_a $		 $		+,499??+8DIIOO3D E&&22doo6JKL M""*64=="A!B C%& JNDIINN+;9)E`d`k`k_llv]wy	
 99!!dii&<&<<BHMM  (qL(Q,!GH    "J56((*%%h/##% + rz!!))33E:T../Dtyy'='==>--/!!'')wD0023Id&7&78CDJ ""#9:e^8RB&)!S2Atyy}}W[WfWfGfCg1h1n1n1p-q&rDO )$..*E*E F"$))aTYY%=%=SRST`RadhdkdklqdrRr$s#$ &?,.IIbtyy?X?XZ^ZcZcZlZl>m,nAjM !G dhh' 11%8Eyy(( $

5< 80<TZZ0H0M0Me0[-o04

50A-o $
DIrz		T__4	48JJ4FTZZZ[^^b^m^mMmrsvwrwLxDJ ( !!$)),557%8'')$&M yy~~%)YY[43H3H%HTYY^^^bMb$c	2:;?19dii$-ON 66~qI(6q(9DI99! 7?9<TZZ=M=M9N9N$**"2"21"5TUK((#h!k/&BB$qyk4;;-8#//1#6a8 -8!OdjjQUQ[Q[]^A_ "%L..q1	
 "%L..r2	 &&~6yy2+>225=""#78} !@ ;DDNND_D_:`a:`Qrd|QtW,:`aDG34w#ai4;;6$$TZZ9s$t yy}}t||/I/ITYY""S"1-1]]_*dl ((//)*D&w!!*jT-B-B4::-N*jRVR^R^*jbfbibi*j!k		T\\%!)T\\BQkQ	99>>II$))+0E0E"E$))..[_J_!``I 99>>[OO%&&7 		A$"7"77DO$%D!yy~~#$t'<'<#<IYIYAY\]A]"^15499>>D;PSb;b1ccdii.%%',0JJ)		Ukk11	12s# rz/3qy$))d!C**>1=*1-	yyQJEI L ))+ 5 55b!1!11A566KGVZN[^K__fgh7?yy!!#~.:&[ +*B ('` bs    1t/Dt.t%
t
t"	c                     [        U R                  U R                  R                  U R                  U R
                  US9$ )zJCalculate optimal batch size based on model and device memory constraints.)rd   r   r   r^   max_num_obj)r   rd   rK   r   r   r_   )r   r{  s     r   r   BaseTrainer.auto_batch  s2    %**))////#
 	
r   c                    Su  p#U R                   R                  S:X  aL  [        R                  R	                  5       nU(       a&  [        S5      R                  5       R                  S-  $ OrU R                   R                  S:w  aX  [        R                  R                  5       nU(       a3  [        R                  R                  U R                   5      R                  nU(       a  US:  a  X#-  $ S$ US-  $ )zJGet accelerator memory utilization in GB or as a fraction of total memory.)r   r   rD   psutilrB   rC   r   i   @)rM   rb   r{   rD   driver_allocated_memory
__import__virtual_memorypercentr8   memory_reservedget_device_propertiestotal_memory)r   fractionmemoryr$  s       r   r]  BaseTrainer._get_memory  s    ;;u$YY668F!(+::<DDsJJ [[&ZZ//1F

88ERR9AEAIW1WQVWr   r4  c                    U(       a0  SUs=::  a  S::  d   S5       e   S5       eU R                  SS9U::  a  g[        R                  " 5         U R                  R                  S:X  a  [
        R                  R                  5         gU R                  R                  S:X  a  g[
        R                  R                  5         g)	zIClear accelerator memory by calling garbage collector and emptying cache.r   r:   z"Threshold must be between 0 and 1.T)r  NrD   rC   )	r]  gccollectrM   rb   r{   rD   empty_cacher8   )r   r4  s     r   rb  BaseTrainer._clear_memory$  s    	&Q&L(LL&L(LL&.);


;;u$II!!#[[&JJ""$r   c                 ~    SSK n UR                  U R                  SS9R                  SS9$ ! [         a    0 s $ f = f)z0Read results.csv into a dictionary using polars.r   N)infer_schema_lengthF)	as_series)polarsread_csvro   to_dictr   )r   pls     r   read_results_csvBaseTrainer.read_results_csv2  sD    	;;txxT;BJJUZJ[[ 	I	s   &- <<c                 8  ^ U R                   R                  5         U R                   R                  5        Ha  u  mn[        [	        U4S jU R
                  5      5      (       d  M0  [        U[        R                  5      (       d  MQ  UR                  5         Mc     g)zSet model in training mode.c                    > U T;   $ r   r   )fns    r   r   *BaseTrainer._model_train.<locals>.<lambda>@  s	    AFr   N)
rd   r   named_modulesr   filterr   rv   r	   BatchNorm2deval)r   mr  s     @r   rH  BaseTrainer._model_train;  sd    

JJ,,.DAq6*D,C,CDEE*UVXZXfXfJgJg /r   c                    SSK nUR                  5       n[        R                  " U R                  U R
                  S[        [        U R                  R                  5      5      R                  5       U R                  R                  [        [        U R                  R                  5       5      5      U R                  R                  5       [        U R                   5      0 U R"                  ESU R$                  0EU R'                  5       [(        R*                  " 5       R-                  5       [.        [1        [2        R4                  5      [2        R6                  [2        R8                  [2        R:                  S.SSS.U5        UR=                  5       nU R>                  RA                  SSS	9  U RB                  RE                  U5        U R
                  U R$                  :X  a  U RF                  RE                  U5        U RH                  S:  aK  U R                  U RH                  -  S:X  a-  U R>                  S
U R                   S3-  RE                  U5        ggg)z9Save model training checkpoints with additional metadata.r   Nrk   )rootbranchcommitoriginz*AGPL-3.0 (https://ultralytics.com/license)zhttps://docs.ultralytics.com)rC  rj   rd   rg   updatesr   r   
train_argstrain_metricstrain_resultsdateversiongitlicensedocsTr?   rC  .pt)%ioBytesIOr{   rY   rC  rj   r   r2   rg   halfr  r+   r   
state_dictr   rZ   rK   rR   rk   r  r   now	isoformatr   rN   r   r  r  r  r  getvaluerW   rX   r[   write_bytesr\   r]   )r   r  bufferserialized_ckpts       r   rf  BaseTrainer.save_modelC  s    

 $ 1 1TXX\\ :;@@B88++A(4>>KdKdKfBgh++002"499o!NDLL!NY4M!N!%!6!6!8 002&M!jj!jj!jj	 H6), /	
2 !//+ 			t4		o.,II!!/2q tzzD4D4D'D'IYY5C00==oN (J r   c           	          U R                   R                  S:X  a!  [        U R                   R                  5      nGOU R                   R                  R	                  SS5      S   S:X  ao  SSKnSSKJn  UR                  U" U R                   R                  5      5      n[        U5      U R                   l        [        U R                   R                  5      nOU R                   R                  R	                  SS5      S   S	;   d  U R                   R                  S
;   a9  [        U R                   R                  5      nSU;   a  US   U R                   l        U R                   R                  (       a"  [         R"                  " S5        SS0WS'   SUS'   W$ ! [         a=  n[        [        S[        U R                   R                  5       SU 35      5      UeSnAff = f)z
Get train and validation datasets from data dictionary.

Returns:
    (dict): A dictionary containing the training/validation/test dataset and category names.
classifyr   r:   r>   ndjsonr   N)convert_ndjson_to_yolo>   ymlr/  >   r   posedetectsegment	yaml_filez	Dataset 'u   ' error ❌ z)Overriding class names with single class.itemr1  r0  )rK   r  r   rf   rsplitasyncioultralytics.data.converterr  r   rN   r   r   RuntimeErrorr   r   
single_clsr   r   )r   rf   r  r  	yaml_pathr   s         r   re   BaseTrainer.get_datasetl  sn   	fyy~~+(8&&sA.r2h>M#KK(>tyy~~(NO	!$Y		(8&&sA.r2oE \ J )8$&%)+%6DIIN 99KKCDKDMDJ  	fv	)DIINN2K1LLYZX[&\]^dee	fs$   9F BF B F 
G"8GGc                    [        U R                  [        R                  R                  5      (       a  gU R                  Sp!Sn[        U R                  5      R                  S5      (       a$  [        U R                  5      u  p#UR                  nOP[        U R                  R                  [
        [        45      (       a!  [        U R                  R                  5      u  p$U R                  X[        S:H  S9U l        U$ )zr
Load, create, or download model for any task.

Returns:
    (dict): Optional checkpoint to resume training from.
Nr  r>   )r   r<   verbose)rv   rd   r{   r	   ModulerN   endswithr   r/  rK   
pretrainedr   	get_modelr   )r   r   r<   r  _s        r   r   BaseTrainer.setup_model  s     djj%((//22zz4Wtzz?##E**+DJJ7MG,,C		,,sDk::()=)=>JG^^dbj^Q
r   c                    U R                   R                  U R                  5        [        R                  R
                  R                  U R                  R                  5       SS9  U R                   R                  U R                  5        U R                   R                  5         U R                  R                  5         U R                  (       a&  U R                  R                  U R                  5        gg)zVPerform a single step of the training optimizer with gradient clipping and EMA update.g      $@)max_normN)r   unscale_r   r{   r	   utilsclip_grad_norm_rd   
parametersrG  updaterB  rg   r   s    r   rY  BaseTrainer.optimizer_step  s    T^^,&&tzz'<'<'>&N(  "88HHOODJJ' r   c                     U$ )zQAllow custom preprocessing model inputs and ground truths depending on task type.r   )r   r^   s     r   rT  BaseTrainer.preprocess_batch  s    r   c                    U R                   (       aP  U R                  S:  a@  U R                   R                   R                  5        H  n[        R                  " USS9  M     U R                  U 5      nUc  gUR                  SU R                  R                  5       R                  5       R                  5       * 5      nU R                  (       a  U R                  U:  a  X0l        X#4$ )z
Run validation on val set using self.validator.

Returns:
    metrics (dict): Dictionary of validation metrics.
    fitness (float): Fitness score for the validation.
r:   r   r   )NNrk   )rg   r   buffersr   r   rQ   rI   rl   detachrC   numpyrj   )r   r  rR   rk   s       r   rc  BaseTrainer.validate  s     88!+((,,..0v1- 1..&?++i$))*:*:*<*@*@*B*H*H*J)JK  D$5$5$? 'r   c                     [        S5      e)z>Get model and raise NotImplementedError for loading cfg files.z3This task trainer doesn't support loading cfg filesNotImplementedError)r   r   r<   r  s       r   r  BaseTrainer.get_model  s    !"WXXr   c                     [        S5      e)zGReturn a NotImplementedError when the get_validator function is called.z1get_validator function not implemented in trainerr  r   s    r   r  BaseTrainer.get_validator      !"UVVr   c                     [        S5      e)z5Return dataloader derived from torch.data.Dataloader.z2get_dataloader function not implemented in trainerr  )r   dataset_pathr_   r   r   s        r   r   BaseTrainer.get_dataloader  s    !"VWWr   c                     [        S5      e)zBuild dataset.z1build_dataset function not implemented in trainerr  )r   img_pathr   r^   s       r   build_datasetBaseTrainer.build_dataset  r  r   c                     Ub  SU0$ S/$ )z
Return a loss dict with labelled training loss items tensor.

Note:
    This is not needed for classification but necessary for segmentation & detection
rl   r   )r   rU  r   s      r   r  BaseTrainer.label_loss_items  s     (2'=
#KF8Kr   c                 @    U R                   S   U R                  l        g)z/Set or update model parameters before training.r1  N)rf   rd   r1  r   s    r   r    BaseTrainer.set_model_attributes  s    99W-

r   c                     g)z-Build target tensors for training YOLO model.Nr   )r   rr  targetss      r   build_targetsBaseTrainer.build_targets      r   c                     g)z-Return a string describing training progress. r   r   s    r   rN  BaseTrainer.progress_string  s    r   c                     g)z+Plot training samples during YOLO training.Nr   )r   r^   ro  s      r   r_  !BaseTrainer.plot_training_samples  r  r   c                     g)z$Plot training labels for YOLO model.Nr   r   s    r   r   BaseTrainer.plot_training_labels  r  r   c           	         [        UR                  5       5      [        UR                  5       5      p2[        U5      S-   n[        R                  " 5       U R
                  -
  nU R                  R                  R                  SSS9  U R                  R                  5       (       a  SO'SU-  [        SS/U-   5      -  R                  S5      S	-   n[        U R                  S
SS9 nUR                  USU-  [        U R                  S-   U/U-   5      -  R                  S5      -   S	-   5        SSS5        g! , (       d  f       g= f)z$Save training metrics to a CSV file.rF   Tr?   r  z%s,rC  r<  rG   r7  azutf-8)encodingz%.6g,r:   N)rz   r  valuesrw   r<  r>  ro   parentrX   rp   ry   rstripopenwriterC  )r   rR   r  valsr  rw  sr  s           r   re  BaseTrainer.save_metrics  s   ',,.)40@+AdL1IIK$///dT:((//##B519ugv=NQU=U7V+V*^*^_b*cfj*j$((C'2aGGA1udjj1na-@4-G'HHPPQTUUX\\] 322s   $AD55
Ec                 @    [        U R                  U R                  S9  g)zPlot metrics from a CSV file.)r   on_plotN)r%   ro   r  r   s    r   rh  BaseTrainer.plot_metrics  s    $((DLL9r   c                 d    [        U5      nU[        R                  " 5       S.U R                  U'   g)z2Register plots (e.g. to be consumed in callbacks).)rf   	timestampN)r   r<  rS   )r   rV   rf   paths       r   r  BaseTrainer.on_plot  s$    Dz$(tyy{C

4r   c                    U R                   R                  5       (       a  U R                   OSn[        [        5         [        S;   ab  U R
                  R                  5       (       a  [        U R
                  5      O0 nU(       a%  [        U R                   SUR                  S5      0S9  SSS5        U(       a  [        R                  " SU S35        U R                  R                  U R                  R                  l        SU R                  R                  l        U R                  US9U l        U R                  R                  S	S5        U R!                  S
5        gg! , (       d  f       N= f)zHPerform final evaluation and validation for object detection YOLO model.Nr=   r  )r  z
Validating z...F)rd   rk   r6  )r\   rp   r0   r   r   r[   r/   r   r   r   rK   rS   rQ   r   rR   rI   r   )r   rd   r  s      r   rg  BaseTrainer.final_eval  s    !YY--//		T)*5w59YY5E5E5G5Gtyy1R#DIIRaIb7cd 6 KK-wc23(,		DNN%*/DNN'>>>6DLLLY-12  65s   A-E  
E.c                    U R                   R                  nU(       Ga,   [        U[        [        45      =(       a    [	        U5      R                  5       n[	        U(       a  [        U5      O	[        5       5      n[        U5      S   R                   n[        US   [        5      (       d:  [	        US   5      R                  5       (       d  U R                   R                  US'   Sn[        U5      U l         [        U5      =U R                   l        U R                   l        S H#  nXa;   d  M
  [        U R                   XaU   5        M%     X l        g! [         a  n[        S5      UeSnAff = f)zCCheck if resume checkpoint exists and update arguments accordingly.r   rf   T)r   r^   rM   r@  zzResume checkpoint not found. Please pass a valid checkpoint to resume from, i.e. 'yolo train resume model=path/to/last.pt'N)rK   rq   rv   rN   r   rp   r   r$   r   r  rf   r   rd   setattrr   FileNotFoundError)r   r   rq   rp   r[   	ckpt_argsr   r   s           r   rL   BaseTrainer.check_resume  s'   !!#FS$K8RT&\=P=P=R&Jv.n>NO ,D1!499	!)F"3T::4	RXHYCZCaCaCcCc(,		If%#I.	58Y>		$))"2A ~		1l;   'E s   D
E .E 
E-E((E-c                 $   UR                  S5      b  U R                  R                  US   5        UR                  S5      b  U R                  R                  US   5        U R                  (       a  UR                  S5      (       ar  [        U R                  5      U l        U R                  R                  R                  US   R                  5       R                  5       5        US   U R                  l	        UR                  SS5      U l
        g)z>Load optimizer, scaler, EMA, and best_fitness from checkpoint.r   Nr   rg   r  rj   r&  )r   r   load_state_dictr   rg   r(   rd   floatr  r  rj   )r   r  s     r   _load_checkpoint_state"BaseTrainer._load_checkpoint_state8  s    88K ,NN**4+<=88H)KK''X788

+DHHHLL((e):):)<)G)G)IJ#IDHH HH^S9r   c                 4   U R                   SL=(       a    U R                   R                  5       (       + nU R                  SL=(       a%    [        R                  " U R                  5      (       + nU R                  =(       a%    U R                  S:  =(       a    U R                  S:H  n[
        S;   =(       a    U=(       a    U=(       d    UnU(       a  SO
U(       a  SOSn[
        S:w  a+  [
        S:X  a  UOS/n[        R                  " US5        US   nU(       d  gXR                  :X  d  U R                  R                  5       (       d  [        R                  " U S	35        gU =R                  S
-  sl        U R                  S:  a  [        SU R                   S35      e[        R                  " U SU R                   S35        U R                  5         [!        U R                  5      u  pU	S   R#                  5       R%                  5       n
['        S U
R)                  5        5       5      (       d  [        SU R                   S35      e[+        U R,                  5      R/                  U
5        U R1                  U	5        A	A
US
-
  U R2                  l        g)zUDetect and recover from NaN/Inf loss and fitness collapse by loading last checkpoint.Nr   r=   zLoss NaN/InfzFitness NaN/InfzFitness collapser>   Fz- detected but can not recover from last.pt...r:      z#Training failed: NaN persisted for z epochsz detected (attempt z/3), recovering from last.pt...rg   c              3      #    U  HJ  n[        U[        R                  5      (       d  M$  [        R                  " U5      R	                  5       v   ML     g 7fr   )rv   r{   Tensorisfiniteall)r   r  s     r   r   3BaseTrainer._handle_nan_recovery.<locals>.<genexpr>[  s;     f4Fq*UVX]XdXdJe*5>>!$((**4Fs
   #A+AzCheckpoint z" is corrupted with NaN/Inf weightsT)rl   r$  rk   rO  rj   r   r   rZ  ra   r[   rp   r   r   rt   r  rH  r   r  r  r%  r  r2   rd   r  r  ri   r  )r   rC  loss_nanfitness_nanfitness_collapse	corruptedreasonrs  r  r  	ema_states              r   rd   BaseTrainer._handle_nan_recoveryD  s   99D(E1C1C1E-Ell$.Pr{{4<<7P3P,,\1B1BQ1F\4<<[\K\GOVVk6UEU	#+k1BWi2:+/19i$?N&&~q9&q)I$$$DII,<,<,>,>NNfX%RST""a'"%%)!DTE_E_D``ghii&!4T5O5O4PPopq!$)),K%%'224	fI4D4D4FfffTYYK7YZ[[TZZ 00;##D))$)AI!r   c           	         Ub  U R                   (       d  gUR                  SS5      S-   nUS:  dB   U R                  R                   SU R                   SU R                  R                   S35       e[
        R                  " S	U R                  R                   S
US-    SU R                   S35        U R                  U:  aN  [
        R                  " U R                   SUS    SU R                   S35        U =R                  US   -  sl        U R                  U5        X l        X R                  U R                  R                  -
  :  a  U R                  5         gg)z7Resume YOLO training from given epoch and best fitness.NrC  r>   r:   r   z training to zf epochs is finished, nothing to resume.
Start a new training without resuming, i.e. 'yolo train model=r   zResuming training z from epoch z to z total epochsz has been trained for z epochs. Fine-tuning for z more epochs.)rq   r   rK   rd   r`   r   r   r  ra   r@  rL  )r   r  ra   s      r   r  BaseTrainer.resume_trainingc  s>   <t{{hhw+a/Q 	
yy}T[[M :MMQYY__L]]^`	
 	((9kTUoEVVZ[_[f[fZggtuv;;$KK::,4T']OC\]a]h]h\iivw KK4=(K##D)&++		(>(>>?))+ @r   c                 h   [        U R                  R                  S5      (       a  SU R                  R                  l        [        U R                  R                  S5      (       aM  [        R
                  " S5        U R                  R                  R                  [        U R                  5      S9  gg)z5Update dataloaders to stop using mosaic augmentation.mosaicFr@  zClosing dataloader mosaic)hypN)	r   r   r  r1  r   r   r@  r   rK   r   s    r   rL  $BaseTrainer._close_dataloader_mosaicw  s    4$$,,h77/4D%%,4$$,,n==KK34%%22tDII2G >r   c                    / / / 4n[        S [        R                  R                  5        5       5      nUS:X  a  [        R
                  " [        S5       SU R                  R                   SU R                  R                   S35        U R                  R                  SS5      n	[        S	S
U	-   -  S5      n
US:  a  SOSU
S4u  p#nSU R                  l        UR                  5        H  u  pUR                  SS9 Hq  u  pU(       a  U SU 3OUnSU;   a  US   R!                  U5        M1  [#        X5      (       d  SU;   a  US   R!                  U5        M]  US   R!                  U5        Ms     M     1 SknU Vs0 s H  nUR%                  5       U_M     snR                  UR%                  5       5      nUS;   a+  ['        [(        U[(        R*                  5      " US   X4S4SS9nOQUS:X  a  [(        R,                  " US   X4S9nO2US:X  a  [(        R.                  " US   X4S S!9nO[1        S"U S#U S$35      eUR3                  US   US%.5        UR3                  US   SS%.5        [        R
                  " [        S5       S&[5        U5      R6                   S'U S(U S)[9        US   5       S*[9        US   5       S+U S,[9        US   5       S-35        U$ s  snf ).a  
Construct an optimizer for the given model.

Args:
    model (torch.nn.Module): The model for which to build an optimizer.
    name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected
        based on the number of iterations.
    lr (float, optional): The learning rate for the optimizer.
    momentum (float, optional): The momentum factor for the optimizer.
    decay (float, optional): The weight decay for the optimizer.
    iterations (float, optional): The number of iterations, which determines the optimizer if
        name is 'auto'.

Returns:
    (torch.optim.Optimizer): The constructed optimizer.
c              3   :   #    U  H  u  pS U;   d  M  Uv   M     g7f)NormNr   )r   r   r  s      r   r   .BaseTrainer.build_optimizer.<locals>.<genexpr>  s     D!4!11!4s   	autoz
optimizer:z' 'optimizer=auto' found, ignoring 'lr0=z' and 'momentum=zJ' and determining best 'optimizer', 'lr0' and 'momentum' automatically... r0  
   {Gz?      i'  )SGDr:  ?AdamWr>  r&  F)recurser   biasrF   logit_scaler:   r   >   r=  Adamr8  r?  NAdamRAdamAdamaxRMSProp>   rC  r?  rD  rE  rF  g+?)r   betasr  rG  )r   r   r=  T)r   r   nesterovzOptimizer 'z,' not found in list of available optimizers zX. Request support for addition optimizers at https://github.com/ultralytics/ultralytics.)paramsr  r   z(lr=z, momentum=z) with parameter groups z weight(decay=0.0), z weight(decay=z), z bias(decay=0.0))ry   r	   __dict__itemsr   r   r   rK   r  r   rf   r   r	  rR  r  r   r   rv   lowergetattrr
   rC  RMSpropr=  r  add_param_grouprb   __name__rw   )r   rd   rV   r   r   r   r   gbnr0  lr_fitmodule_namemodule
param_nameparamfullname
optimizersr   r   s                      r   r  BaseTrainer.build_optimizer  s   " BJD!2!2!4DD6>KKL)* +!!%/?		@R@R?S TWX
 tR(B9B/3F7AE7I!3PWY_adOeDh'*DII$#(#6#6#8K%+%<%<U%<%K!
<Gk]!J<8ZX%aDKK&++}/HaDKK&aDKK& &L $9 ]
&01j	1j155djjlC@@tUZZ81"W\L]lopIYadrEIU]		!A$24PI%dV#OPZ| \i i 
 	!!QqT5"IJ!!QqT3"GH%&aY(@(@'AbTU]T^^v1Q4yk-c!A$i[ugSQTUVWXUYQZP[[km	
 ' 2s   >K)/r  r   rK   r_   r\   rj   r   ro   rf   r~   rM   rg   rC  r;  r=  r`   rk   r   rJ   r[   rh   rl   rU  rn   r   rR   rd   rt   r   rs   rS   rq   rU   r]   r   ri   ra   r  r  r   r  rm   r   r>  rQ   rW   r   )r   )Fr   )NNT)   r   r   )r   N)Nr   )r8  gMbP?r>  gh㈵>g     j@)1rQ  
__module____qualname____firstlineno____doc__r   r   rN   r   r   r   r   r   r   r  r   r   r]  r  rb  r  rH  rf  re   r   rY  rT  rc  r  r  r   r  r  r   r  rN  r_  r  re  rh  r  rg  rL   r  rd  r  rL  r  __static_attributes__r   r   r   r4   r4   ?   s	   0d '$4 T<l/# /+# +3 
6X

]6~h'T
X%u %'OR"H(( (YWXWL.
^:D
3"@
:>,(H=r   r4   )Hr`  r  r  rO   r   r<  rD  r   r   r   r   pathlibr   r  rO  r{   r   r   r	   r
   ultralyticsr   ultralytics.cfgr   r   ultralytics.data.utilsr   r   ultralytics.nn.tasksr   ultralytics.utilsr   r   r   r   r   r   r   r   r   r   r   ultralytics.utils.autobatchr   ultralytics.utils.checksr   r   r   r    r!   ultralytics.utils.distr"   r#   ultralytics.utils.filesr$   ultralytics.utils.plottingr%   ultralytics.utils.torch_utilsr&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r4   r   r   r   <module>rn     s    
  	     (    %  # 1 G 0    ? o o D 2 3   "} }r   