
    h=                     D   S r SSKrSSKJrJr  SSKJrJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJrJrJr  SSKJr  SSKJr  \" \5      r " S S\5      r     SS\S\\\\\4      S\\	   S\\\\4      S\\\\4      S\\\\4      4S jjr     SS\S\\\\4      S\\	   S\\\\4      S\\\\\4      4
S jjr!    SS\S\\\\4      S\\	   S\\\\4      S\\\\\4      S\\   4S jjr"      SS\S\\   S\\\\\   \\\\\\   4   4   4      S\\	   S\\\\4      S\\\\4      S\\\\4      S\4S jjr#      SS\S\\   S\\\\\   \\\\\\   4   4   4      S\\	   S\\\\4      S\\\\4      S\\\\4      4S jjr$g)zList and inspect datasets.    N)MappingSequence)OptionalUnion   )DownloadConfig)DownloadMode)StreamingDownloadManager)DatasetInfo)dataset_module_factoryget_dataset_builder_classload_dataset_builder)
get_logger)Versionc                       \ rS rSrSrg)SplitsNotFoundError&    N)__name__
__module____qualname____firstlineno____static_attributes__r       J/home/james-whalen/.local/lib/python3.13/site-packages/datasets/inspect.pyr   r   &   s    r   r   path
data_filesdownload_configdownload_moderevisiontokenc                 r    [        U UUUUUS9nU Vs0 s H  nU[        SU UUUUUUS.UD6_M     sn$ s  snf )a6  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('cornell-movie-review-data/rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    )r   r    r   r   r   r!   )r   config_namer   r   r   r    r!   r   )get_dataset_config_namesget_dataset_config_info)	r   r   r   r   r    r!   config_kwargsconfig_namesr#   s	            r   get_dataset_infosr(   *   s}    X ,'#L& ( (K 	, 	
#!+'	
 	
 		
 (  s   4c                 &   [        U 4UUUUS.UD6n[        U[        R                  R	                  U 5      S9n[        UR                  R                  5       5      =(       d0    UR                  R                  SUR                  =(       d    S5      /$ )a  Get the list of available config names for a particular dataset.

Args:
    path (`str`): path to the dataset repository. Can be either:

        - a local path to the dataset directory containing the data files,
            e.g. `'./dataset/squad'`
        - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
            e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
    revision (`Union[str, datasets.Version]`, *optional*):
        If specified, the dataset module will be loaded from the datasets repository at this version.
        By default:
        - it is set to the local version of the lib.
        - it will also try to load it from the main branch if it's not available at the local version of the lib.
        Specifying a version that is different from your local version of the lib might cause compatibility issues.
    download_config ([`DownloadConfig`], *optional*):
        Specific download configuration parameters.
    download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
        Download/generate mode.
    data_files (`Union[Dict, List, str]`, *optional*):
        Defining the data_files of the dataset configuration.
    **download_kwargs (additional keyword arguments):
        Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
        for example `token`.

Example:

```py
>>> from datasets import get_dataset_config_names
>>> get_dataset_config_names("nyu-mll/glue")
['cola',
 'sst2',
 'mrpc',
 'qqp',
 'stsb',
 'mnli',
 'mnli_mismatched',
 'mnli_matched',
 'qnli',
 'rte',
 'wnli',
 'ax']
```
r    r   r   r   dataset_namer#   default)r   r   osr   basenamelistbuilder_configskeysbuilder_kwargsgetDEFAULT_CONFIG_NAME)r   r    r   r   r   download_kwargsdataset_modulebuilder_clss           r   r$   r$   m   s    h ,'# N ,NIYIYZ^I_`K++0023 %%))-9X9X9e\ef8 r   returnc                 "   [        U 4UUUUS.UD6n[        U[        R                  R	                  U 5      S9n[        UR                  R                  5       5      nU(       a  [        U5      S:X  a  US   OSn	OSn	UR                  =(       d    U	$ )a  Get the default config name for a particular dataset.
Can return None only if the dataset has multiple configurations and no default configuration.

Args:
    path (`str`): path to the dataset repository. Can be either:

        - a local path to the dataset directory containing the data files,
            e.g. `'./dataset/squad'`
        - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
            e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
    revision (`Union[str, datasets.Version]`, *optional*):
        If specified, the dataset module will be loaded from the datasets repository at this version.
        By default:
        - it is set to the local version of the lib.
        - it will also try to load it from the main branch if it's not available at the local version of the lib.
        Specifying a version that is different from your local version of the lib might cause compatibility issues.
    download_config ([`DownloadConfig`], *optional*):
        Specific download configuration parameters.
    download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
        Download/generate mode.
    data_files (`Union[Dict, List, str]`, *optional*):
        Defining the data_files of the dataset configuration.
    **download_kwargs (additional keyword arguments):
        Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
        for example `token`.

Returns:
    Optional[str]: the default config name if there is one

Example:

```py
>>> from datasets import get_dataset_default_config_name
>>> get_dataset_default_config_name("openbookqa")
'main'
```
r*   r+   r   r   Nr-   )
r   r   r.   r   r/   r0   r1   r2   lenr5   )
r   r    r   r   r   r6   r7   r8   r1   default_config_names
             r   get_dataset_default_config_namer=      s    Z ,'# N ,NIYIYZ^I_`K;66;;=>O474HA4Moa0SW'**A.AAr   r#   c           
         [        U 4UUUUUUS.UD6nUR                  n	U	R                  c  U(       a  UR                  5       O	[	        5       nUb  Xcl        UR                  [        UR                  US95         UR                  [        UR                  US95       V
s0 s H  n
U
R                  U
R                  U S._M     sn
U	l        U	$ U	$ s  sn
f ! [         a  n[        S5      UeSnAff = f)aG  Get the meta information (DatasetInfo) about a dataset for a particular config

Args:
    path (`str`): path to the dataset repository. Can be either:

        - a local path to the dataset directory containing the data files,
            e.g. `'./dataset/squad'`
        - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
            e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
    config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
    data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
    download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
    download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
    revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset to load.
        As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
        You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
    token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
        If True, or not specified, will get token from `"~/.huggingface"`.
    **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

)namer   r   r   r    r!   N)	base_pathr   )r?   r,   z<The split names could not be parsed from the dataset config.)r   infosplitscopyr   r!   _check_manual_downloadr
   r@   _split_generatorsr?   	Exceptionr   )r   r#   r   r   r   r    r!   r&   builderrA   split_generatorerrs               r   r%   r%      s   > #	'#	 	G <<D{{4C/..0IY$)!&&$w/@/@Rab	
	o (/'@'@,w7H7HZij((O  $$/C/CUY&ZZ(DK K4K  	o%&deknn	os*   ;%C  $CC C 
C0C++C0c           
      n    [        U 4UUUUUUS.UD6n[        UR                  R                  5       5      $ )a  Get the list of available splits for a particular config and dataset.

Args:
    path (`str`): path to the dataset repository. Can be either:

        - a local path to the dataset directory containing the data files,
            e.g. `'./dataset/squad'`
        - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
            e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
    config_name (`str`, *optional*):
        Defining the name of the dataset configuration.
    data_files (`str` or `Sequence` or `Mapping`, *optional*):
        Path(s) to source data file(s).
    download_config ([`DownloadConfig`], *optional*):
        Specific download configuration parameters.
    download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
        Download/generate mode.
    revision ([`Version`] or `str`, *optional*):
        Version of the dataset to load.
        As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
        You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
    token (`str` or `bool`, *optional*):
        Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
        If `True`, or not specified, will get token from `"~/.huggingface"`.
    **config_kwargs (additional keyword arguments):
        Optional attributes for builder class which will override the attributes if supplied.

Example:

```py
>>> from datasets import get_dataset_split_names
>>> get_dataset_split_names('cornell-movie-review-data/rotten_tomatoes')
['train', 'validation', 'test']
```
)r#   r   r   r   r    r!   )r%   r0   rB   r2   )	r   r#   r   r   r   r    r!   r&   rA   s	            r   get_dataset_split_namesrK   *  sM    Z #	'#	 	D   "##r   )NNNNN)NNNN)NNNNNN)%__doc__r.   collections.abcr   r   typingr   r   download.download_configr   download.download_managerr	   #download.streaming_download_managerr
   rA   r   loadr   r   r   utils.loggingr   utils.versionr   r   logger
ValueErrorr   strdictr0   boolr(   r$   r=   r%   rK   r   r   r   <module>rZ      sa    ! 	 - " 4 3 I  
 & " 
H		* 	 48048<.2(,@
@tT3/0@ n-@ E,"345	@
 uS'\*+@ E$)$%@J /3048<37?
?uS'\*+? n-? E,"345	?
 tT3/0?H /3048<37;B
;BuS'\*+;B n-;B E,"345	;B
 tT3/0;B c];B@ "&_c048<.2(,:
:#: sHSM73c8TW=FX@Y;Y3ZZ[\: n-	:
 E,"345: uS'\*+: E$)$%: :~ "&_c048<.2(,7$
7$#7$ sHSM73c8TW=FX@Y;Y3ZZ[\7$ n-	7$
 E,"3457$ uS'\*+7$ E$)$%7$r   