
    ΅i0                        S SK Jr  S SKJrJrJr  S SKJrJrJ	r	  S SK
Jr  S SKJrJr  S SKJr  / SQr\	" SS	S
9rS\S\4S jr\" S5       " S S\\   5      5       r\" S5       " S S\5      5       r\" S5       " S S\\   5      5       rg)    )defaultdict)CallableIteratorSized)AnyNoReturnTypeVar)functional_datapipe)	DataChunkIterDataPipe)_check_unpickable_fn)BatcherIterDataPipeGrouperIterDataPipeUnBatcherIterDataPipe_T_coT)	covariantnamereturnc                 ,    [        S[         SU  35      e)Nzmodule z has no attribute )AttributeError__name__)r   s    b/home/james-whalen/.local/lib/python3.13/site-packages/torch/utils/data/datapipes/iter/grouping.py__getattr__r      s    
78*,>tfE
FF    batchc                      ^  \ rS rSr% Sr\\S'   \\S'   \\S'   S\	4S\S\S\S\
\	   SS	4
U 4S
 jjjrS\\	   4S jrS\4S jrSrU =r$ )r      a  
Creates mini-batches of data (functional name: ``batch``).

An outer dimension will be added as ``batch_size`` if ``drop_last`` is set to ``True``, or ``length % batch_size`` for the
last batch if ``drop_last`` is set to ``False``.

Args:
    datapipe: Iterable DataPipe being batched
    batch_size: The size of each batch
    drop_last: Option to drop the last batch if it's not full
    wrapper_class: wrapper to apply onto each batch (type ``List``) before yielding,
        defaults to ``DataChunk``

Example:
    >>> # xdoctest: +SKIP
    >>> from torchdata.datapipes.iter import IterableWrapper
    >>> dp = IterableWrapper(range(10))
    >>> dp = dp.batch(batch_size=3, drop_last=True)
    >>> list(dp)
    [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
datapipe
batch_size	drop_lastFwrapper_classr   Nc                 t   > US::  a  [        S5      e[        TU ]	  5         Xl        X l        X0l        X@l        g )Nr   z+Batch size is required to be larger than 0!)AssertionErrorsuper__init__r   r   r    r!   )selfr   r   r    r!   	__class__s        r   r%   BatcherIterDataPipe.__init__5   s8     ? !NOO $"*r   c              #   "  #    / nU R                    HD  nUR                  U5        [        U5      U R                  :X  d  M/  U R	                  U5      v   / nMF     [        U5      S:  a&  U R
                  (       d  U R	                  U5      v   g g g 7fNr   )r   appendlenr   r!   r    )r&   r   xs      r   __iter__BatcherIterDataPipe.__iter__D   sx     ALLO5zT__,((//	 
 u:>>>((// " s   :B ABc                 L   [        U R                  [        5      (       ae  U R                  (       a"  [	        U R                  5      U R
                  -  $ [	        U R                  5      U R
                  -   S-
  U R
                  -  $ [        [        U 5      R                   S35      e)N   z# instance doesn't have valid length)	
isinstancer   r   r    r,   r   	TypeErrortyper   r&   s    r   __len__BatcherIterDataPipe.__len__O   sv    dmmU++~~4==)T__<<DMM*T__<q@T__TTtDz2233VWXXr   )r   r   r    r!   )r   
__module____qualname____firstlineno____doc__r   __annotations__intboolr   r4   r%   r   r.   r6   __static_attributes____classcell__)r'   s   @r   r   r      s    , OO  )2++ + 	+
 I+ 
+ +	0(9- 	0Y Y Yr   r   unbatchc                   >    \ rS rSrSrSS\S\SS4S jjrS rS	 r	S
r
g)r   Y   a  
Undos batching of data (functional name: ``unbatch``).

In other words, it flattens the data up to the specified level within a batched DataPipe.

Args:
    datapipe: Iterable DataPipe being un-batched
    unbatch_level: Defaults to ``1`` (only flattening the top level). If set to ``2``,
        it will flatten the top two levels, and ``-1`` will flatten the entire DataPipe.

Example:
    >>> # xdoctest: +SKIP
    >>> from torchdata.datapipes.iter import IterableWrapper
    >>> source_dp = IterableWrapper([[[0, 1], [2]], [[3, 4], [5]], [[6]]])
    >>> dp1 = source_dp.unbatch()
    >>> list(dp1)
    [[0, 1], [2], [3, 4], [5], [6]]
    >>> dp2 = source_dp.unbatch(unbatch_level=2)
    >>> list(dp2)
    [0, 1, 2, 3, 4, 5, 6]
r   unbatch_levelr   Nc                     Xl         X l        g Nr   rD   )r&   r   rD   s      r   r%   UnBatcherIterDataPipe.__init__q   s     *r   c              #   x   #    U R                    H$  nU R                  XR                  S9 S h  vN   M&     g  N	7f)NrD   )r   _diverD   )r&   elements     r   r.   UnBatcherIterDataPipe.__iter__u   s0     }}Gzz'9K9KzLLL %Ls   ,:8
:c              #     #    US:  a  [        S5      eUS:X  aB  [        U[        [        45      (       a"  U H  nU R	                  USS9 S h  vN   M     g Uv   g US:X  a  Uv   g [        U[        [        45      (       a$  U H  nU R	                  X2S-
  S9 S h  vN   M     g [        SU R                   S35      e Nq N$7f)Nz unbatch_level must be -1 or >= 0rJ   r   r1   zunbatch_level z" exceeds the depth of the DataPipe)
ValueErrorr2   listr   rK   
IndexErrorrD   )r&   rL   rD   items       r   rK   UnBatcherIterDataPipe._divey   s     2?@@B'D)#455#D#zz$bzAAA $ aM'D)#455#D#zz$a>OzPPP $ !$T%7%7$88Z[  B Qs%   ACB?ACC#CCrG   )r1   )r   r8   r9   r:   r;   r   r=   r%   r.   rK   r?    r   r   r   r   Y   s,    ,+ +c +$ +Mr   r   groupbyc                       \ rS rSrSrSSSSSS.S\\   S\\/\4   S	\	S
\
S\
S-  S\
S-  S\	SS4S jjrS rS rSS jrS rS rSS jrSrg)r      a	  
Groups data from IterDataPipe by keys from ``group_key_fn``, yielding a ``DataChunk`` with batch size up to ``group_size``.

(functional name: ``groupby``).

The samples are read sequentially from the source ``datapipe``, and a batch of samples belonging to the same group
will be yielded as soon as the size of the batch reaches ``group_size``. When the buffer is full,
the DataPipe will yield the largest batch with the same key, provided that its size is larger
than ``guaranteed_group_size``. If its size is smaller, it will be dropped if ``drop_remaining=True``.

After iterating through the entirety of source ``datapipe``, everything not dropped due to the buffer capacity
will be yielded from the buffer, even if the group sizes are smaller than ``guaranteed_group_size``.

Args:
    datapipe: Iterable datapipe to be grouped
    group_key_fn: Function used to generate group key from the data of the source datapipe
    keep_key: Option to yield the matching key along with the items in a tuple,
        resulting in `(key, [items])` otherwise returning [items]
    buffer_size: The size of buffer for ungrouped data
    group_size: The max size of each group, a batch is yielded as soon as it reaches this size
    guaranteed_group_size: The guaranteed minimum group size to be yielded in case the buffer is full
    drop_remaining: Specifies if the group smaller than ``guaranteed_group_size`` will be dropped from buffer
        when the buffer is full

Example:
    >>> import os
    >>> # xdoctest: +SKIP
    >>> from torchdata.datapipes.iter import IterableWrapper
    >>> def group_fn(file):
    ...     return os.path.basename(file).split(".")[0]
    >>> source_dp = IterableWrapper(
    ...     ["a.png", "b.png", "a.json", "b.json", "a.jpg", "c.json"]
    ... )
    >>> dp0 = source_dp.groupby(group_key_fn=group_fn)
    >>> list(dp0)
    [['a.png', 'a.json', 'a.jpg'], ['b.png', 'b.json'], ['c.json']]
    >>> # A group is yielded as soon as its size equals to `group_size`
    >>> dp1 = source_dp.groupby(group_key_fn=group_fn, group_size=2)
    >>> list(dp1)
    [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
    >>> # Scenario where `buffer` is full, and group 'a' needs to be yielded since its size > `guaranteed_group_size`
    >>> dp2 = source_dp.groupby(
    ...     group_key_fn=group_fn,
    ...     buffer_size=3,
    ...     group_size=3,
    ...     guaranteed_group_size=2,
    ... )
    >>> list(dp2)
    [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
Fi'  N)keep_keybuffer_size
group_sizeguaranteed_group_sizedrop_remainingr   group_key_fnrY   rZ   r[   r\   r]   r   c                T   [        U5        Xl        X l        X0l        X@l        [        [        5      U l        SU l        XPl	        S U l
        Ub#  Ub   SUs=:  a  U::  d  O  [        S5      eXPl
        Ub#  Ub  SUs=:  a  U::  d  O  [        S5      eX`l
        Xpl        [        U l        g )Nr   z)group_size must be > 0 and <= buffer_sizezNguaranteed_group_size must be > 0 and <= group_size and group_size must be set)r   r   r^   rY   max_buffer_sizer   rQ   buffer_elementscurr_buffer_sizer[   r\   r#   r]   r   r!   )r&   r   r^   rY   rZ   r[   r\   r]   s           r   r%   GrouperIterDataPipe.__init__   s     	\* ( *7B47H !$%)"!k&=
1k1$%PQQ)3& ,!!.C*Qz*Q$d  *?&,&r   c                    S nSnS nU R                    H;  n[        U R                   U   5      U:  d  M!  [        U R                   U   5      nUnM=     U R                  bB  X R                  :  a3  U R                  (       d"  [	        S[        U R                   U   5      5      eU R                  b  X R                  :  a  U R                   U   nU =R                  U-  sl        U R                   U	 U$ )Nr   zFailed to group items)ra   r,   r\   r]   RuntimeErrorstrrb   )r&   biggest_keybiggest_sizeresult_to_yieldfindkeys        r   _remove_biggest_key'GrouperIterDataPipe._remove_biggest_key   s    ++G4''01L@"4#7#7#@A% , &&2999'''T-A-A+-N)O 
 &&.999"22;?O-  -r   c              #     #    U R                    GHE  nU R                  U5      nU R                  U   R                  U5        U =R                  S-  sl        U R
                  b  U R
                  [        U R                  U   5      :X  an  U R                  U R                  U   5      nU R                  (       a  X#4OUv   U =R                  [        U R                  U   5      -  sl        U R                  U	 U R                  U R                  :X  d  GM  U R                  5       nUc  GM  U R                  U5      nU R                  (       a  X#4OUv   GMH     [        U R                  R                  5       5       Hc  nU R                  U R                  R                  U5      5      nU =R                  [        U5      -  sl        U R                  (       a  X#4OUv   Me     g 7f)Nr1   )r   r^   ra   r+   rb   r[   r,   r!   rY   r`   rk   tuplekeyspop)r&   r-   keyresultri   s        r   r.   GrouperIterDataPipe.__iter__  s    A##A&C  %,,Q/!!Q&!*t#$$S)C 0 *.););D<P<PQT<U)V'+}}sm&@%%T-A-A#-F)GG%((-$$(<(<<"&":":"<".!//@F+/==3-fD% ( --2245C''(<(<(@(@(EFF!!S[0!#'==3-f< 6s   DG$G$+B9G$c                 :    SU l         [        [        5      U l        g r*   )rb   r   rQ   ra   r5   s    r   resetGrouperIterDataPipe.reset   s     !*40r   c           
      4   U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  4
n[        R                  b  [        R                  " U5      $ U$ rF   )r   r^   rY   r`   r[   r\   r]   r!   _valid_iterator_id_number_of_samples_yieldedr   getstate_hookr&   states     r   __getstate__ GrouperIterDataPipe.__getstate__$  s    MMMM  OO&&##++
 %%1--e44r   c                     Uu
  U l         U l        U l        U l        U l        U l        U l        U l        U l        U l	        SU l
        [        [        5      U l        g r*   )r   r^   rY   r`   r[   r\   r]   r!   rx   ry   rb   r   rQ   ra   r{   s     r   __setstate__ GrouperIterDataPipe.__setstate__5  sZ     	
MM O&#+ !*40r   c                 8    U R                   R                  5         g rF   )ra   clearr5   s    r   __del__GrouperIterDataPipe.__del__E  s    ""$r   )ry   rx   ra   rb   r   r]   r^   r[   r\   rY   r`   r!   )r   N)r   r8   r9   r:   r;   r   r   r   r   r>   r=   r%   rk   r.   ru   r}   r   r   r?   rU   r   r   r   r      s    1p  !%,0$$'u%$' w|,$'
 $' $' $J$'  #Tz$' $' 
$'L:=41"1 %r   r   N)collectionsr   collections.abcr   r   r   typingr   r   r	   %torch.utils.data.datapipes._decoratorr
   #torch.utils.data.datapipes.datapiper   r   'torch.utils.data.datapipes.utils.commonr   __all__r   rf   r   r   r   r   rU   r   r   <module>r      s    # 5 5 ) ) E G H 	4(Gc Gh G W<Y,y1 <Y <Y~ Y1L 1  1h Yw%,y1 w%  w%r   