
    h                        S SK Jr  S SKrS SKrS SKJr  S SKJrJr  S SK	J
r
JrJr  SSS jjr\
S-  SS	4       SS
 jjr\S:X  a	  \" S5        gg)    )annotationsN)Path)IMG_FORMATSimg2label_paths)DATASETS_DIRLOGGERTQDMc                   [        U 5      n[        U S35      nUS-  US-  pTUR                  SS9  UR                  SS9  UR                  SS9  UR                  5        Vs/ s H  ofR                  5       (       d  M  UPM     nn[	        S U 5       5      n[        U5       SU S3n	[        R                  " S	U S
U	 SUS SSU-
  S S3	5        U H  n
XJR                  -  R                  SS9  XZR                  -  R                  SS9  [        U
R                  S5      5      n[        R                  " U5        [        [        U5      U-  5      nUSU  H3  n[        R                  " XU
R                  -  UR                  -  5        M5     XS  H3  n[        R                  " XU
R                  -  UR                  -  5        M5     M     [        R                  " SU S35        U$ s  snf )u  
Split classification dataset into train and val directories in a new directory.

Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class
structure with an 80/20 split by default.

Directory structure:
    Before:
        caltech/
        ├── class1/
        │   ├── img1.jpg
        │   ├── img2.jpg
        │   └── ...
        ├── class2/
        │   ├── img1.jpg
        │   └── ...
        └── ...

    After:
        caltech_split/
        ├── train/
        │   ├── class1/
        │   │   ├── img1.jpg
        │   │   └── ...
        │   ├── class2/
        │   │   ├── img1.jpg
        │   │   └── ...
        │   └── ...
        └── val/
            ├── class1/
            │   ├── img2.jpg
            │   └── ...
            ├── class2/
            │   └── ...
            └── ...

Args:
    source_dir (str | Path): Path to classification dataset root directory.
    train_ratio (float): Ratio for train split, between 0 and 1.

Returns:
    (Path): Path to the created split directory.

Examples:
    Split dataset with default 80/20 ratio
    >>> split_classify_dataset("path/to/caltech")

    Split with custom ratio
    >>> split_classify_dataset("path/to/caltech", 0.75)
_splittrainvalT)exist_okc              3  h   #    U  H(  n[        [        UR                  S 5      5      5      v   M*     g7f)*.*N)lenlistglob).0ds     P/home/james-whalen/.local/lib/python3.13/site-packages/ultralytics/data/split.py	<genexpr>)split_classify_dataset.<locals>.<genexpr>K   s%     DAs4u.//s   02z
 classes, z imagesz
Splitting z (z) into z.0%z train,    z val...r   NzSplit complete in u    ✅)r   mkdiriterdiris_dirsumr   r   infonamer   r   randomshuffleintshutilcopy2)
source_dirtrain_ratiosource_path
split_path
train_pathval_pathr   
class_dirstotal_imagesstats	class_dirimage_files	split_idximgs                 r   split_classify_datasetr2      s   f z"KV,-J%/e1C d#d#NNDN! )002A2hhj!2JADDDL:z,w?E
KK*[ME7'+c9J(STWbSbcfRggnop		nn	$++T+:	NN	"))4)8 9>>%01{#K(;67	z	*CLL9>>9CHHDE + z*CLL7#((BC +    KK$ZL56- Bs   "G*>G*zcoco8/images)g?g?g        Fc                >   [        U 5      n [        S U R                  S5       5       5      n[        U5      n[        R
                  " S5        [        R                  " / SQXS9n/ SQnU HD  nU R                  U-  R                  5       (       d  M'  U R                  U-  R                  5         MF     [        R                  " SU  3SU-  -   5        [        [        XS5      US	9 H  u  pU(       a6  [        [        [        U	5      /5      S   5      R                  5       (       d  MB  [!        U R                  Xh   -  S
SS9 n
U
R#                  SU	R%                  U R                  5      R'                  5        3S-   5        SSS5        M     g! , (       d  f       M  = f)aJ  
Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.

Args:
    path (Path): Path to images directory.
    weights (tuple): Train, validation, and test split fractions.
    annotated_only (bool): If True, only images with an associated txt file are used.

Examples:
    Split images with default weights
    >>> from ultralytics.data.split import autosplit
    >>> autosplit()

    Split with custom weights and annotated images only
    >>> autosplit(path="path/to/images", weights=(0.8, 0.15, 0.05), annotated_only=True)
c              3  r   #    U  H-  oR                   S S R                  5       [        ;   d  M)  Uv   M/     g7f)r   N)suffixlowerr   )r   xs     r   r   autosplit.<locals>.<genexpr>y   s,     W/88AB<3E3E3G;3V11/s   (7	7r   r   )r   r      )weightsk)zautosplit_train.txtzautosplit_val.txtzautosplit_test.txtzAutosplitting images from z!, using *.txt labeled images only)totalazutf-8)encodingz./
N)r   sortedrglobr   r    seedchoicesparentexistsunlinkr   r   r	   zipr   stropenwriterelative_toas_posix)pathr:   annotated_onlyfilesnindicestxtr7   ir1   fs              r   	autosplitrU   c   s<   * :DWdjj/WWEE
A
KKNnnY=G
LCKK!O##%%[[1_$$&  KK,TF36Y\j6jjks7*!4os3xj&A!&D!E!L!L!N!NdkkCF*C'Ba"S__T[[9BBDEFMN CB 5BBs   ?F
F	__main__
caltech101)g?)r%   z
str | Pathr&   floatreturnr   )rM   r   r:   ztuple[float, float, float]rN   boolrY   None)
__future__r   r    r#   pathlibr   ultralytics.data.utilsr   r   ultralytics.utilsr   r   r	   r2   rU   __name__     r   <module>rc      sy    #    ? 8 8Sn .*9 $O
$O'$O $O 
	$ON z<( rb   