
    h                         S SK r S SKJrJr  S SKJr  SSKJrJr  \R                  " \
5      r " S S5      r\S 5       rS	 rS
 r\\ R                   S\4S j5       5       rg)    N)PoolRLock)tqdm   )experimentalloggingc                       \ rS rSrSrSrg)ParallelBackendConfig   N )__name__
__module____qualname____firstlineno__backend_name__static_attributes__r       T/home/james-whalen/.local/lib/python3.13/site-packages/datasets/parallel/parallel.pyr
   r
      s    Lr   r
   c	                 `    [         R                  c  [        XX#XEXgU5	      $ [        XX#XEXgU5	      $ )a  
**Experimental.** Apply a function to iterable elements in parallel, where the implementation uses either
multiprocessing.Pool or joblib for parallelization.

Args:
    function (`Callable[[Any], Any]`): Function to be applied to `iterable`.
    iterable (`list`, `tuple` or `np.ndarray`): Iterable elements to apply function to.
    num_proc (`int`): Number of processes (if no backend specified) or jobs (using joblib).
    types (`tuple`): Additional types (besides `dict` values) to apply `function` recursively to their elements.
    disable_tqdm (`bool`): Whether to disable the tqdm progressbar.
    desc (`str`): Prefix for the tqdm progressbar.
    single_map_nested_func (`Callable`): Map function that applies `function` to an element from `iterable`.
        Takes a tuple of function, data_struct, types, rank, disable_tqdm, desc as input, where data_struct is an
        element of `iterable`, and `rank` is used for progress bar.
)r
   r   _map_with_multiprocessing_pool_map_with_joblib)	functioniterablenum_procbatched
batch_sizetypesdisable_tqdmdescsingle_map_nested_funcs	            r   parallel_mapr!      sD    " ))1-:lZp
 	
 Hz,Vl r   c	                    U[        U5      ::  a  UO
[        U5      n/ n	[        U5       HT  n
[        U5      U-  n[        U5      U-  nX-  [        X5      -   nX-   X:  a  SOS-   nU	R                  XX X4XZXg45        MV     [        U5      [	        S U	 5       5      :w  a*  [        S[        U5       S[	        S U	 5       5       35      e[        R                  SU S[        U5       S	U	 Vs/ s H  n[        US   5      PM     sn 35        S
u  nnU(       d  [        5       4[        R                  nn[        UUUS9 nUR                  X5      nS S S 5        [        R                  SU S35        W VVs/ s H  nU  H  nUPM     M     nnn[        R                  S[        U5       S35        U$ s  snf ! , (       d  f       Nn= fs  snnf )N   r   c              3   >   #    U  H  n[        US    5      v   M     g7fr#   Nlen.0is     r   	<genexpr>1_map_with_multiprocessing_pool.<locals>.<genexpr>7   s     :z!C!IIz   zHError dividing inputs iterable among processes. Total number of objects z
, length: c              3   >   #    U  H  n[        US    5      v   M     g7fr%   r&   r(   s     r   r+   r,   ;   s     9j3qt99jr-   z	Spawning z processes for z objects in slices of )NN)initargsinitializerz	Finished z
 processesz	Unpacked z objects)r'   rangeminappendsum
ValueErrorloggerinfor   r   set_lockr   map)r   r   r   r   r   r   r   r   r    
split_kwdsindexdivmodstartendr*   r/   r0   poolmappedproc_resobjs                         r   r   r   +   s    $s8}4x#h-HJx(mx'(mh&c%o-k%+Q158e%8'u]ipq ! 8}:z:::''*8}o 69j99:<
 	
 KK
H:_S]O;QfpRqfpabSVWXYZW[S\fpRqQrs 'Hk!&
DMM+	h{	Ct0= 
D
KK)H:Z01"(=&hHScHc&F=
KK)CK=12M Sr
 
D	C >s   .F6F;9G;
G	c	           	         ^ ^^^^^	 SS K m	T	R                  [        R                  US9   T	R	                  5       " UUU U	UU4S jU 5       5      sS S S 5        $ ! , (       d  f       g = f)Nr   )n_jobsc              3   b   >#    U  H$  nTR                  T5      " TUTTTS SS 45      v   M&     g 7f)NT)delayed)r)   rC   r   r   r   joblibr    r   s     r   r+   #_map_with_joblib.<locals>.<genexpr>U   s?      !
 NN12Hc7JX]_ceiko3pqqs   ,/)rH   parallel_backendr
   r   Parallel)
r   r   r   r   r   r   r   r   r    rH   s
   `  ```  `@r   r   r   M   sP    
 		 	 !6!C!CH	 	U  !
 !
!
 
 
V	U	Us   %A
A&r   c              #      #    U [         l        U S:X  a  SSKJn  U" 5          Sv   S[         l        g! S[         l        f = f7f)ag  
**Experimental.**  Configures the parallel backend for parallelized dataset loading, which uses the parallelization
implemented by joblib.

Args:
    backend_name (str): Name of backend for parallelization implementation, has to be supported by joblib.

 Example usage:
 ```py
 with parallel_backend('spark'):
   dataset = load_dataset(..., num_proc=2)
 ```
sparkr   )register_sparkN)r
   r   joblibsparkrN   )r   rN   s     r   rJ   rJ   [   s;       *6&w.
2-1*T*s   A2 A?A)
contextlibmultiprocessingr   r   	tqdm.autor   utilsr   r   
get_loggerr   r6   r
   r!   r   r   contextmanagerstrrJ   r   r   r   <module>rW      su     '  ) 
		H	%   4D
 23 2  2r   