
    hq                         S SK r S SKrS SKJr  S SKJrJr  SSKJrJ	r	J
r
JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(  SSK)J*r*  SS	K+J,r,  \(" \-5      r. " S
 S5      r/g)    N)Iterable)OptionalUnion   )!SINGLE_FILE_COMPRESSION_PROTOCOLSArchiveIterableFilesIterable_get_extraction_protocol_get_path_extension!_prepare_path_and_storage_optionsis_relative_pathurl_or_path_join	xbasenamexdirname	xet_parsexexistsxgetsizexglob
xgzip_openxisdirxisfilexjoinxlistdirxnumpy_loadxopenxpandas_read_csvxpandas_read_excelxPathxpyarrow_parquet_read_tablexrelpathxsio_loadmatxsplit	xsplitextxwalkxxml_dom_minidom_parse)
get_logger)
map_nested   )DownloadConfigc            
       
   \ rS rSrSrSr    SS\\   S\\   S\\   S\\   4S	 jjr	\
S
 5       rS rS\S\4S jrS rS\S\4S jrS rS\\\R&                  4   S\\   4S jrS\\\\   4   S\\   4S jrS rS rSrg)StreamingDownloadManager/   a  
Download manager that uses the "::" separator to navigate through (possibly remote) compressed archives.
Contrary to the regular `DownloadManager`, the `download` and `extract` methods don't actually download nor extract
data, but they rather return the path or url that could be opened using the `xopen` function which extends the
built-in `open` function to stream data from remote files.
TNdataset_namedata_dirdownload_config	base_pathc                     Xl         X l        U=(       d    [        R                  R	                  S5      U l        U=(       d
    [        5       U l        S U l        SU l	        g )N.F)
_dataset_name	_data_dirospathabspath
_base_pathr)   r/   downloaded_sizerecord_checksums)selfr-   r.   r/   r0   s        f/home/james-whalen/.local/lib/python3.13/site-packages/datasets/download/streaming_download_manager.py__init__!StreamingDownloadManager.__init__9   sH     *!#;rwws';.B.2B# %    c                     U R                   $ N)r4   r;   s    r<   
manual_dir#StreamingDownloadManager.manual_dirG   s    ~~r?   c                 0    [        U R                  USS9nU$ )a  Normalize URL(s) of files to stream data from.
This is the lazy version of `DownloadManager.download` for streaming.

Args:
    url_or_urls (`str` or `list` or `dict`):
        URL(s) of files to stream data from. Each url is a `str`.

Returns:
    url(s): (`str` or `list` or `dict`), URL(s) to stream data from matching the given input url_or_urls.

Example:

```py
>>> downloaded_files = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
```
T	map_tuple)r'   _download_singler;   url_or_urlss     r<   download!StreamingDownloadManager.downloadK   s    " !!6!6tTr?   urlpathreturnc                 h    [        U5      n[        U5      (       a  [        U R                  U5      nU$ rA   )strr   r   r8   )r;   rM   s     r<   rH   )StreamingDownloadManager._download_single_   s+    g,G$$&t@Gr?   c                 0    [        U R                  USS9nU$ )aC  Add extraction protocol for given url(s) for streaming.

This is the lazy version of `DownloadManager.extract` for streaming.

Args:
    url_or_urls (`str` or `list` or `dict`):
        URL(s) of files to stream data from. Each url is a `str`.

Returns:
    url(s): (`str` or `list` or `dict`), URL(s) to stream data from matching the given input `url_or_urls`.

Example:

```py
>>> downloaded_files = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
>>> extracted_files = dl_manager.extract(downloaded_files)
```
TrF   )r'   _extract)r;   rJ   urlpathss      r<   extract StreamingDownloadManager.extractf   s    & dmm[DIr?   c                    [        U5      n[        XR                  S9nUR                  S5      S   n[	        U5      nUS;   d  UR                  S5      (       a  [        SU S35      eUc  U$ U[        ;   aW  [        R                  R                  UR                  S5      S   5      nSU;   a  US UR                  S5       OUnU S	U SU 3$ U S
U 3$ )Nr/   z::r   )tgztar)z.tar.gzz.tar.bz2z.tar.xzz+Extraction protocol for TAR archives like 'z' is not implemented in streaming mode. Please use `dl_manager.iter_archive` instead.

Example usage:

	url = dl_manager.download(url)
	tar_archive_iterator = dl_manager.iter_archive(url)

	for filename, file in tar_archive_iterator:
		...r2   z://z://::)rP   r
   r/   splitr   endswithNotImplementedErrorr   r5   r6   basenamerindex)r;   rM   protocolr6   	extension
inner_files         r<   rS   !StreamingDownloadManager._extract|   s    g,+GEYEYZ}}T"1%'-	&$--8Z*[*[%=gY G   N::))'--*=a*@AJAD
AR$<j&7&7&<=XbJZs:,b	::ZuWI..r?   c                 B    U R                  U R                  U5      5      $ )a  Prepare given `url_or_urls` for streaming (add extraction protocol).

This is the lazy version of `DownloadManager.download_and_extract` for streaming.

Is equivalent to:

```
urls = dl_manager.extract(dl_manager.download(url_or_urls))
```

Args:
    url_or_urls (`str` or `list` or `dict`):
        URL(s) to stream from data from. Each url is a `str`.

Returns:
    url(s): (`str` or `list` or `dict`), URL(s) to stream data from matching the given input `url_or_urls`.
)rU   rK   rI   s     r<   download_and_extract-StreamingDownloadManager.download_and_extract   s    $ ||DMM+677r?   urlpath_or_bufc                     [        US5      (       a  [        R                  " U5      $ [        R                  " XR                  S9$ )a  Iterate over files within an archive.

Args:
    urlpath_or_buf (`str` or `io.BufferedReader`):
        Archive path or archive binary file object.

Yields:
    `tuple[str, io.BufferedReader]`:
        2-tuple (path_within_archive, file_object).
        File object is opened in binary mode.

Example:

```py
>>> archive = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
>>> files = dl_manager.iter_archive(archive)
```
readrX   )hasattrr   from_buffrom_urlpathr/   )r;   rg   s     r<   iter_archive%StreamingDownloadManager.iter_archive   s9    ( >6**"++N;;"//PdPdeer?   rT   c                 >    [         R                  " XR                  S9$ )a(  Iterate over files.

Args:
    urlpaths (`str` or `list` of `str`):
        Root paths.

Yields:
    str: File URL path.

Example:

```py
>>> files = dl_manager.download_and_extract('https://huggingface.co/datasets/beans/resolve/main/data/train.zip')
>>> files = dl_manager.iter_files(files)
```
rX   )r	   from_urlpathsr/   )r;   rT   s     r<   
iter_files#StreamingDownloadManager.iter_files   s    " **8EYEYZZr?   c                     g rA    rB   s    r<   manage_extracted_files/StreamingDownloadManager.manage_extracted_files       r?   c                     g rA   rt   rB   s    r<   get_recorded_sizes_checksums5StreamingDownloadManager.get_recorded_sizes_checksums   rw   r?   )r8   r4   r3   r/   r9   r:   )NNNN)__name__
__module____qualname____firstlineno____doc__is_streamingr   rP   r)   r=   propertyrC   rK   rH   rU   rS   re   r   ioBufferedReaderr   tuplerm   listrq   ru   ry   __static_attributes__rt   r?   r<   r+   r+   /   s     L '+"&48#'&sm& 3-& ".1	&
 C=&  (  ,/ / /68(f5b6G6G1G+H fXV[_ f2[5d3i#8 [Xc] [&r?   r+   )0r   r5   collections.abcr   typingr   r   utils.file_utilsr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   utils.loggingr&   utils.py_utilsr'   r/   r)   r{   loggerr+   rt   r?   r<   <module>r      sb    	 	 $ "                 B ' ' + 
H	l lr?   