
    h1                     :   S r SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
  SSKrSSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJrJrJrJr  SSKJr  SSKJrJr  SSK J!r!J"r"  SSK#J$r$  SSK%J&r&  \" \'5      r( " S S\RR                  5      r* " S S5      r+g)zDownload manager interface.    N)datetime)partial)OptionalUnion)	url_to_fs)
thread_map   )config)tqdm)ArchiveIterableFilesIterablecached_pathis_relative_path,stack_multiprocessing_download_progress_barsurl_or_path_join)get_size_checksum_dict)
get_loggerr   )NestedDataStructure
map_nested)tracked_str   )DownloadConfigc                   $    \ rS rSrSrSrSrSrSrg)DownloadMode2   a  `Enum` for how to treat pre-existing downloads and data.

The default mode is `REUSE_DATASET_IF_EXISTS`, which will reuse both
raw downloads and the prepared dataset if they exist.

The generations modes:

|                                     | Downloads | Dataset |
|-------------------------------------|-----------|---------|
| `REUSE_DATASET_IF_EXISTS` (default) | Reuse     | Reuse   |
| `REUSE_CACHE_IF_EXISTS`             | Reuse     | Fresh   |
| `FORCE_REDOWNLOAD`                  | Fresh     | Fresh   |

reuse_dataset_if_existsreuse_cache_if_existsforce_redownload N)	__name__
__module____qualname____firstlineno____doc__REUSE_DATASET_IF_EXISTSREUSE_CACHE_IF_EXISTSFORCE_REDOWNLOAD__static_attributes__r       \/home/james-whalen/.local/lib/python3.13/site-packages/datasets/download/download_manager.pyr   r   2   s     83)r)   r   c            
       0   \ rS rSrSr     SS\\   S\\   S\\   S\\   4S jjr\	S	 5       r
\	S
 5       rS\S\4S jrS rS\\   S\S\\   4S jrS\S\S\4S jrS\\\R(                  4   4S jrS\\\\   4   4S jrS rS rS rS rS rSrg)DownloadManagerG   FNdataset_namedata_dirdownload_config	base_pathc                     Xl         X l        U=(       d    [        R                  R	                  S5      U l        0 U l        XPl        U=(       d
    [        5       U l	        0 U l
        0 U l        g)a  Download manager constructor.

Args:
    data_dir:
        can be used to specify a manual directory to get the files from.
    dataset_name (`str`):
        name of dataset this instance will be used for. If
        provided, downloads will contain which datasets they were used for.
    download_config (`DownloadConfig`):
        to specify the cache directory and other
        download options
    base_path (`str`):
        base path that is used when relative paths are used to
        download files. This can be a remote url.
    record_checksums (`bool`, defaults to `True`):
        Whether to record the checksums of the downloaded files. If None, the value is inferred from the builder.
.N)_dataset_name	_data_dirospathabspath
_base_path_recorded_sizes_checksumsrecord_checksumsr   r0   downloaded_pathsextracted_paths)selfr.   r/   r0   r1   r;   s         r*   __init__DownloadManager.__init__J   sV    2 *!#;rwws';Z\& 0.B.2B "!r)   c                     U R                   $ N)r5   r>   s    r*   
manual_dirDownloadManager.manual_dirm   s    ~~r)   c                 V    [        S U R                  R                  5        5       5      $ )z+Returns the total size of downloaded files.c              3   *   #    U  H	  oS    v   M     g7f)	num_bytesNr   ).0checksums_dicts     r*   	<genexpr>2DownloadManager.downloaded_size.<locals>.<genexpr>t   s     mEl>+.Els   )sumr:   valuesrC   s    r*   downloaded_sizeDownloadManager.downloaded_sizeq   s$     mTEcEcEjEjElmmmr)   url_or_urlsdownloaded_path_or_pathsc           	          Sn[        [        [        UR                  5       UR                  5       5      5      USS9 H.  u  pE[	        XPR
                  S9U R                  [        U5      '   M0     g)z)Record size/checksum of downloaded files.   zComputing checksums)delaydesc)record_checksumN)hf_tqdmlistzipflattenr   r;   r:   str)r>   rQ   rR   rU   urlr7   s         r*   _record_sizes_checksums'DownloadManager._record_sizes_checksumsv   se     [((*,D,L,L,NOP&
IC 8N&;&;8D**3s84
r)   c                 f   U R                   R                  5       nSUl        UR                  c  SUl        [	        U R
                  US9n[        R                  " 5       n[        5          [        UUSUR                  SSSS9nSSS5        [        R                  " 5       U-
  n[        R                  S	UR                  5       S
-   S35        [        U5      n[        W5      nU R                  R!                  [#        [%        UR'                  5       UR'                  5       5      5      5        [        R                  " 5       nU R)                  X5        [        R                  " 5       U-
  n[        R                  SUR                  5       S
-   S35        UR*                  $ ! , (       d  f       GN%= f)a  Download given URL(s).

By default, only one process is used for download. Pass customized `download_config.num_proc` to change this behavior.

Args:
    url_or_urls (`str` or `list` or `dict`):
        URL or `list` or `dict` of URLs to download. Each URL is a `str`.

Returns:
    `str` or `list` or `dict`:
        The downloaded paths matching the given input `url_or_urls`.

Example:

```py
>>> downloaded_files = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
```
FNzDownloading datar0   TzDownloading data files)	map_tuplenum_procrV   batched
batch_sizezDownloading took <   z minzChecksum Computation took )r0   copyextract_compressed_filedownload_descr   _download_batchedr   nowr   r   rd   loggerinfototal_secondsr   r<   updatedictrZ   r[   r^   data)r>   rQ   r0   download_func
start_timerR   durations          r*   downloadDownloadManager.download   sf   & ..33527/((0,>O) 6 6X\\^
9;'1(11-($ < <<>J.'(>(>(@B(F'GtLM)+6#67O#P $$T#k.A.A.CE]EeEeEg*h%ij\\^
$$[K<<>J.01G1G1IR1O0PPTUV',,,+ <;s   )F!!
F0url_or_filenamesreturnc           
         [        U5      S:  GaB  UR                  5       nSUl        [        U R                  US9n[        US   5      n[        U5      (       a  [        U R                  U5      n[        U40 UR                  D6u  pTSn UR                  U5      R                  SS5      nUS:  a  [        R                  OSn[!        UUUR"                  =(       d    SS	[$        R&                  R                  S
5      S:X  aF  [(        R*                  " 5       R,                  (       a"  [(        R*                  " 5       R,                  S   OS U[.        S9$ U Vs/ s H  nU R	                  XS9PM     sn$ ! [         a     Nf = fs  snf )N   Tra   r   sizei  @r   Downloadingfiles8HF_DATASETS_STACK_MULTIPROCESSING_DOWNLOAD_PROGRESS_BARS1rb   )rV   unitpositionmax_workers
tqdm_class)lenrh   disable_tqdmr   _download_singler\   r   r   r9   r   storage_optionsrn   get	Exceptionr
   &HF_DATASETS_MULTITHREADING_MAX_WORKERSr   rj   r6   environmultiprocessingcurrent_process	_identityr   )	r>   rx   r0   rs   r7   fsr|   r   url_or_filenames	            r*   rk   !DownloadManager._download_batched   sy   
  B&-224O+/O(#D$9$9?[M '*+D%%'> I)H)HIHBDwwt}((3 BFAR==XY   $22Cm::>>"\]add#335?? )88:DDRH '  (8'7O %%o%W'7 '  &s   !E3 F3
F ?F r   c                     [        U5      n[        U5      (       a  [        U R                  U5      n[	        XS9n[        U5      nUR                  U5        U$ )Nra   )r\   r   r   r9   r   r   
set_origin)r>   r   r0   outs       r*   r    DownloadManager._download_single   sL    o.O,,.tPO/K#'
r)   path_or_bufc                 |    [        US5      (       a  [        R                  " U5      $ [        R                  " U5      $ )a  Iterate over files within an archive.

Args:
    path_or_buf (`str` or `io.BufferedReader`):
        Archive path or archive binary file object.

Yields:
    `tuple[str, io.BufferedReader]`:
        2-tuple (path_within_archive, file_object).
        File object is opened in binary mode.

Example:

```py
>>> archive = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
>>> files = dl_manager.iter_archive(archive)
```
read)hasattrr   from_buffrom_urlpath)r>   r   s     r*   iter_archiveDownloadManager.iter_archive   s3    ( ;''"++K88"//<<r)   pathsc                 .    [         R                  " U5      $ )a(  Iterate over file paths.

Args:
    paths (`str` or `list` of `str`):
        Root paths.

Yields:
    `str`: File path.

Example:

```py
>>> files = dl_manager.download_and_extract('https://huggingface.co/datasets/beans/resolve/main/data/train.zip')
>>> files = dl_manager.iter_files(files)
```
)r   from_urlpaths)r>   r   s     r*   
iter_filesDownloadManager.iter_files  s    " **511r)   c           	      p   U R                   R                  5       nSUl        [        U R                  US9n[        UUUR                  SS9n[        U5      n[        U5      nU R                  R                  [        [        UR                  5       UR                  5       5      5      5        UR                  $ )a  Extract given path(s).

Args:
    path_or_paths (path or `list` or `dict`):
        Path of file to extract. Each path is a `str`.

Returns:
    extracted_path(s): `str`, The extracted paths matching the given input
    path_or_paths.

Example:

```py
>>> downloaded_files = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
>>> extracted_files = dl_manager.extract(downloaded_files)
```
Tra   zExtracting data files)rd   rV   )r0   rh   ri   r   r   r   rd   r   r=   rp   rq   rZ   r[   rr   )r>   path_or_pathsr0   extract_funcr=   s        r*   extractDownloadManager.extract  s    $ ..33526/t44oV$$--(	
 ,M:-o>##D]-B-B-DoF]F]F_)`$ab###r)   c                 B    U R                  U R                  U5      5      $ )ah  Download and extract given `url_or_urls`.

Is roughly equivalent to:

```
extracted_paths = dl_manager.extract(dl_manager.download(url_or_urls))
```

Args:
    url_or_urls (`str` or `list` or `dict`):
        URL or `list` or `dict` of URLs to download and extract. Each URL is a `str`.

Returns:
    extracted_path(s): `str`, extracted paths of given URL(s).
)r   rv   )r>   rQ   s     r*   download_and_extract$DownloadManager.download_and_extract6  s      ||DMM+677r)   c                 6    U R                   R                  5       $ rB   )r:   rh   rC   s    r*   get_recorded_sizes_checksums,DownloadManager.get_recorded_sizes_checksumsH  s    --2244r)   c                    [        U R                  R                  5       5      [        U R                  R                  5       5      -
  n[	        U R                  R                  5       5       HU  u  p#X1;   d  M  [        R                  R                  U5      (       d  M2  [        R                  " U5        U R                  U	 MW     g rB   )
setr=   rN   r<   rY   itemsr6   r7   isfileremove)r>   paths_to_deletekeyr7   s       r*   delete_extracted_files&DownloadManager.delete_extracted_filesK  s    d2299;<s4CXCXC_C_Ca?bbd2288:;IC&277>>$+?+?		$((- <r)   c                 \    U R                   R                  (       a  U R                  5         g g rB   )r0   delete_extractedr   rC   s    r*   manage_extracted_files&DownloadManager.manage_extracted_filesR  s"    00'') 1r)   )r9   r5   r4   r:   r0   r<   r=   r;   )NNNNT)r    r!   r"   r#   is_streamingr   r\   r   r?   propertyrD   rO   r   r^   rv   rY   rk   r   r   ioBufferedReaderr   r   r   r   r   r   r   r(   r   r)   r*   r,   r,   G   s.   L '+"&48#'!"sm!" 3-!" ".1	!"
 C=!"F   n n3F bu 0-d)s)) () 
c	)V n Y\ =c23D3D.D(E =22c49n 5 2&$@8$5.*r)   r,   ),r$   enumr   r   r6   r   	functoolsr   typingr   r   fsspecfsspec.corer   tqdm.contrib.concurrentr    r
   utilsr   rX   utils.file_utilsr   r   r   r   r   r   utils.info_utilsr   utils.loggingr   utils.py_utilsr   r   utils.trackr   r0   r   r    rm   Enumr   r,   r   r)   r*   <module>r      sq     "  	  	   "  ! .  #  6 , < % + 
H	*499 **M* M*r)   