
    hL                        S r SSKrSSKrSSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
JrJr  SSKrSSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJrJr  \" \ 5      r!\ " S S5      5       r"\ " S S5      5       r# " S S\$5      r% " S S\$5      r&\ " S S5      5       r'\ " S S5      5       r( " S S\)\*\(4   5      r+g)aw  DatasetInfo record information we know about a dataset.

This includes things that we know about the dataset statically, i.e.:
 - description
 - canonical location
 - does it have validation and tests splits
 - size
 - etc.

This also includes the things that can and should be computed once we've
processed the dataset as well:
 - number of examples (in each split)
 - etc.
    N)	dataclass)Path)ClassVarOptionalUnion)	url_to_fs)DatasetCardDatasetCardData   )config)Features)	SplitDict)Version)
get_logger)asdictunique_valuesc                   2    \ rS rSr% Sr\\S'   Sr\\S'   Srg)SupervisedKeysData7    inputoutput N)	__name__
__module____qualname____firstlineno__r   str__annotations__r   __static_attributes__r       G/home/james-whalen/.local/lib/python3.13/site-packages/datasets/info.pyr   r   7   s    E3OFCr!   r   c                   2    \ rS rSr% Sr\\S'   Sr\\S'   Srg)DownloadChecksumsEntryData=   r   keyvaluer   N)	r   r   r   r   r&   r   r   r'   r    r   r!   r"   r$   r$   =   s    CME3Or!   r$   c                       \ rS rSrSrSrg)MissingCachedSizesConfigErrorC   z;The expected cached sizes of the download file are missing.r   Nr   r   r   r   __doc__r    r   r!   r"   r)   r)   C   s    Er!   r)   c                       \ rS rSrSrSrg)NonMatchingCachedSizesErrorG   z/The prepared split doesn't have expected sizes.r   Nr+   r   r!   r"   r.   r.   G   s    9r!   r.   c                   `    \ rS rSr% Sr\\   \S'   Sr\\	   \S'   S r
\S\	SS 4S j5       rS	rg)
PostProcessedInfoK   Nfeaturesresources_checksumsc                     U R                   bF  [        U R                   [        5      (       d&  [        R                  " U R                   5      U l         g g g N)r3   
isinstancer   	from_dictselfs    r"   __post_init__PostProcessedInfo.__post_init__P   s;    ==$Zx-P-P$..t}}=DM .Q$r!   post_processed_info_dictreturnc           
          [         R                  " U 5       Vs1 s H  o"R                  iM     nnU " S0 UR                  5        VVs0 s H  u  pEXC;   d  M  XE_M     snnD6$ s  snf s  snnf Nr   dataclassesfieldsnameitems)clsr=   ffield_nameskvs         r"   r8   PostProcessedInfo.from_dictU   sb    '2'9'9#'>?'>!vv'>?]'?'E'E'G\'Gtq1K[dad'G\]] @\   A"A'A')r3   )r   r   r   r   r3   r   r   r   r4   dictr;   classmethodr8   r    r   r!   r"   r1   r1   K   sL    #'Hhx '*.$.>
 ^ ^:M ^ ^r!   r1   c                      \ rS rSr% Sr\R                  " \S9r\\	S'   \R                  " \S9r
\\	S'   \R                  " \S9r\\	S'   \R                  " \S9r\\	S'   Sr\\   \	S	'   Sr\\   \	S
'   Sr\\   \	S'   Sr\\   \	S'   Sr\\   \	S'   Sr\\   \	S'   Sr\\\\4      \	S'   Sr\\   \	S'   Sr\\   \	S'   Sr\\   \	S'   Sr\\   \	S'   Sr \\   \	S'   Sr!\\   \	S'   / SQr"\#\$\      \	S'   S r%S*S\\   4S jjr&S+S jr'S r(\)S\$S    4S j5       r*\)S,S\S\\   S S 4S! jj5       r+\)S"\S S 4S# j5       r,S-S.S$ jjr-S/S% jr.S \4S& jr/\)S'\S S 4S( j5       r0S)r1g)0DatasetInfo[   a	  Information about a dataset.

`DatasetInfo` documents datasets, including its name, version, and features.
See the constructor arguments and properties for a full list.

Not all fields are known on construction and may be updated later.

Attributes:
    description (`str`):
        A description of the dataset.
    citation (`str`):
        A BibTeX citation of the dataset.
    homepage (`str`):
        A URL to the official homepage for the dataset.
    license (`str`):
        The dataset's license. It can be the name of the license or a paragraph containing the terms of the license.
    features ([`Features`], *optional*):
        The features used to specify the dataset's column types.
    post_processed (`PostProcessedInfo`, *optional*):
        Information regarding the resources of a possible post-processing of a dataset. For example, it can contain the information of an index.
    supervised_keys (`SupervisedKeysData`, *optional*):
        Specifies the input feature and the label for supervised learning if applicable for the dataset (legacy from TFDS).
    builder_name (`str`, *optional*):
        The name of the `GeneratorBasedBuilder` subclass used to create the dataset. It is also the snake_case version of the dataset builder class name.
    config_name (`str`, *optional*):
        The name of the configuration derived from [`BuilderConfig`].
    version (`str` or [`Version`], *optional*):
        The version of the dataset.
    splits (`dict`, *optional*):
        The mapping between split name and metadata.
    download_checksums (`dict`, *optional*):
        The mapping between the URL to download the dataset's checksums and corresponding metadata.
    download_size (`int`, *optional*):
        The size of the files to download to generate the dataset, in bytes.
    post_processing_size (`int`, *optional*):
        Size of the dataset in bytes after post-processing, if any.
    dataset_size (`int`, *optional*):
        The combined size in bytes of the Arrow tables for all splits.
    size_in_bytes (`int`, *optional*):
        The combined size in bytes of all files associated with the dataset (downloaded files + Arrow files).
    **config_kwargs (additional keyword arguments):
        Keyword arguments to be passed to the [`BuilderConfig`] and used in the [`DatasetBuilder`].
)default_factorydescriptioncitationhomepagelicenseNr3   post_processedsupervised_keysbuilder_namedataset_nameconfig_nameversionsplitsdownload_checksumsdownload_sizepost_processing_sizedataset_sizesize_in_bytes)r[   r_   ra   r3   r]   _INCLUDED_INFO_IN_YAMLc                    U R                   bD  [        U R                   [        5      (       d%  [        R                  " U R                   5      U l         U R                  bC  [        U R                  [
        5      (       d$  [
        R                  U R                  5      U l        U R                  b~  [        U R                  [        5      (       d_  [        U R                  [        5      (       a  [        U R                  5      U l        O%[        R                  " U R                  5      U l        U R                  bD  [        U R                  [        5      (       d%  [        R                  " U R                  5      U l	        U R                  bx  [        U R                  [        5      (       dX  [        U R                  [        [        45      (       a  [        U R                  6 U l        g [        S0 U R                  D6U l        g g g r@   )r3   r7   r   r8   rW   r1   r\   r   r   r]   r   from_split_dictrX   r   tuplelistr9   s    r"   r;   DatasetInfo.__post_init__   sS   ==$Zx-P-P$..t}}=DM*:d>Q>QSd3e3e"3"="=d>Q>Q"RD<<#Jt||W,M,M$,,,,&t||4&00>;;":dkk9+M+M#33DKK@DK+Jt?S?SUg4h4h$..>>'94;O;O'P$'9'QD<P<P'Q$	 5i+r!   storage_optionsc                    [        U40 U=(       d    0 D6tpEUR                  [        R                  " U[        R
                  5      S5       nU R                  XbS9  SSS5        U R                  (       aP  UR                  [        R                  " U[        R                  5      S5       nU R                  U5        SSS5        gg! , (       d  f       Np= f! , (       d  f       g= f)au  Write `DatasetInfo` and license (if present) as JSON files to `dataset_info_dir`.

Args:
    dataset_info_dir (`str`):
        Destination directory.
    pretty_print (`bool`, defaults to `False`):
        If `True`, the JSON will be pretty-printed with the indent level of 4.
    storage_options (`dict`, *optional*):
        Key/value pairs to be passed on to the file-system backend, if any.

        <Added version="2.9.0"/>

Example:

```py
>>> from datasets import load_dataset
>>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
>>> ds.info.write_to_directory("/path/to/directory/")
```
wb)pretty_printN)
r   open	posixpathjoinr   DATASET_INFO_FILENAME
_dump_inforV   LICENSE_FILENAME_dump_license)r:   dataset_info_dirrl   ri   fs_rG   s          r"   write_to_directoryDatasetInfo.write_to_directory   s    , +G0E2GWWY^^$4f6R6RSUYZ^_OOAO9 [<<(8&:Q:QRTXY]^""1% ZY  [Z ZYs   C*C
C
C%c                     UR                  [        R                  " [        U 5      U(       a  SOSS9R	                  S5      5        g)zQDump info in `file` file-like object open in bytes mode (to support remote files)   Nindentutf-8)writejsondumpsr   encode)r:   filerl   s      r"   rq   DatasetInfo._dump_info   s-    

4::fTl1$OVVW^_`r!   c                 X    UR                  U R                  R                  S5      5        g)zTDump license in `file` file-like object open in bytes mode (to support remote files)r}   N)r~   rV   r   )r:   r   s     r"   rs   DatasetInfo._dump_license   s    

4<<&&w/0r!   dataset_infosc           	      J  ^ T Vs/ s H  o"c  M  UR                  5       PM     snm[        T5      S:  a  [        U4S jT 5       5      (       a  TS   $ SR                  [	        S T 5       5      5      R                  5       nSR                  [	        S T 5       5      5      R                  5       nSR                  [	        S T 5       5      5      R                  5       nSR                  [	        S T 5       5      5      R                  5       nS nS nU " UUUUUUS9$ s  snf )	Nr   c              3   4   >#    U  H  nTS    U:H  v   M     g7f)r   Nr   ).0	dset_infor   s     r"   	<genexpr>)DatasetInfo.from_merge.<locals>.<genexpr>   s     )gYfI-*:i*GYfs   z

c              3   8   #    U  H  oR                   v   M     g 7fr6   )rS   r   infos     r"   r   r      s     /[]T0@0@]   c              3   8   #    U  H  oR                   v   M     g 7fr6   )rT   r   s     r"   r   r           ,U}t]]}r   c              3   8   #    U  H  oR                   v   M     g 7fr6   )rU   r   s     r"   r   r      r   r   c              3   8   #    U  H  oR                   v   M     g 7fr6   )rV   r   s     r"   r   r      s     +S]TLL]r   )rS   rT   rU   rV   r3   rX   )copylenallro   r   strip)	rF   r   r   rS   rT   rU   rV   r3   rX   s	    `       r"   
from_mergeDatasetInfo.from_merge   s   ;Hb=i))=b}!c)gYf)g&g&g ##kk-/[]/["[\bbd;;},U},UUV\\^;;},U},UUV\\^++m+S]+SSTZZ\#+
 	
 cs
   D D rt   r>   c                 l   [        U40 U=(       d    0 D6tp4[        R                  SU 35        U(       d  [        S5      eUR	                  [
        R                  " U[        R                  5      SSS9 n[        R                  " U5      nSSS5        U R                  W5      $ ! , (       d  f       N= f)a  Create [`DatasetInfo`] from the JSON file in `dataset_info_dir`.

This function updates all the dynamically generated fields (num_examples,
hash, time of creation,...) of the [`DatasetInfo`].

This will overwrite all previous metadata.

Args:
    dataset_info_dir (`str`):
        The directory containing the metadata file. This
        should be the root directory of a specific dataset version.
    storage_options (`dict`, *optional*):
        Key/value pairs to be passed on to the file-system backend, if any.

        <Added version="2.9.0"/>

Example:

```py
>>> from datasets import DatasetInfo
>>> ds_info = DatasetInfo.from_directory("/path/to/directory/")
```
zLoading Dataset info from zECalling DatasetInfo.from_directory() with undefined dataset_info_dir.rr}   encodingN)r   loggerdebug
ValueErrorrm   rn   ro   r   rp   r   loadr8   )rF   rt   ri   ru   rv   rG   dataset_info_dicts          r"   from_directoryDatasetInfo.from_directory   s    4 +G0E2G12B1CDEdeeWWY^^$4f6R6RSUXcjWkop $		! l}}.// lks   5B%%
B3r   c           
          [         R                  " U 5       Vs1 s H  o"R                  iM     nnU " S0 UR                  5        VVs0 s H  u  pEXC;   d  M  XE_M     snnD6$ s  snf s  snnf r@   rA   )rF   r   rG   rH   rI   rJ   s         r"   r8   DatasetInfo.from_dict  sb    '2'9'9#'>?'>!vv'>?V'8'>'>'@U'@tqADTdad'@UVV @UrL   c                     U R                   nUR                  " S0 UR                   R                  5        VVs0 s H(  u  pEUc	  U(       a  M  U[        R                  " U5      _M*     snnD6  g s  snnf r@   )__dict__updaterE   r   deepcopy)r:   other_dataset_infoignore_none	self_dictrI   rJ   s         r"   r   DatasetInfo.update  sh    MM	 	
 /77==??DAM $4==##?	
s   A.A.c                     U R                   " S0 U R                  R                  5        VVs0 s H  u  pU[        R                  " U5      _M     snnD6$ s  snnf r@   )	__class__r   rE   r   r   )r:   rI   rJ   s      r"   r   DatasetInfo.copy(  sE    ~~XATATAV WAVDMM!$4!4AV WXX Ws   #Ac                    0 n[        U 5      nU Hm  nX0R                  ;   d  M  [        X5      n[        US5      (       a  UR	                  5       X'   MD  [        US5      (       a  UR                  5       X'   Mi  XAU'   Mo     U$ )N_to_yaml_list_to_yaml_string)r   rc   getattrhasattrr   r   )r:   	yaml_dictr   r&   r'   s        r"   _to_yaml_dictDatasetInfo._to_yaml_dict+  s{    	"4L$C111*5/22%*%8%8%:INU$566%*%:%:%<IN%*cN % r!   	yaml_datac           
         [         R                  " U5      nUR                  S5      b  [        R                  " US   5      US'   UR                  S5      b  [
        R                  " US   5      US'   [        R                  " U 5       Vs1 s H  o"R                  iM     nnU " S0 UR                  5        VVs0 s H  u  pEXC;   d  M  XE_M     snnD6$ s  snf s  snnf )Nr3   r]   r   )
r   r   getr   _from_yaml_listr   rB   rC   rD   rE   )rF   r   rG   rH   rI   rJ   s         r"   _from_yaml_dictDatasetInfo._from_yaml_dict9  s    MM),	==$0$,$<$<Yz=R$SIj!=="."+";";Ih<O"PIh'2'9'9#'>?'>!vv'>?Ny'8M'8tqA<Ldad'8MNN @Ms   C9CC)r3   rW   r]   rX   r\   )FN)Fr6   )T)r   rP   )r>   rP   )2r   r   r   r   r,   rB   fieldr   rS   r   rT   rU   rV   r3   r   r   rW   r1   rX   r   rY   rZ   r[   r\   r   r   r]   rM   r^   r_   intr`   ra   rb   rc   r   rg   r;   rw   rq   rs   rN   r   r   r8   r   r   r   r   r    r   r!   r"   rP   rP   [   s   *Z #((=K=%%c:Hc:%%c:Hc:$$S9GS9#'Hhx '26NH./648OX018 #'L(3-&"&L(3-&!%K#%-1GXeCL)*1!FHTN!)--#'M8C='*.(3-."&L(3-&#'M8C='3HT#Y/ R&&X`aeXf &:a1 
tM': 
 
. 0c 0HTN 0^k 0 0B W$ W= W W
Yt  O O O Or!   rP   c                   d    \ rS rSrS
SS jjr\SS j5       r\S\SS 4S j5       rS\SS4S jr	S	r
g)DatasetInfosDictiD  r>   Nc           	         0 n[         R                  R                  U[        R                  5      n[         R                  R                  U[        R
                  5      nU(       d  U R                  U5      nUR                  U 5        [         R                  R                  U5      (       a`  [        USSS9 nUR                  5        VV	s0 s H  u  pU[        U	5      _M     n
nn	[        R                  " XU(       a  SOS S9  S S S 5        [         R                  R                  U5      (       a#  [        R                  " U5      nUR                   nOS n[#        5       nU(       aK  UR%                  U5        Uc  [        S['        U5      -   S-   5      OUnUR)                  [+        U5      5        g g s  sn	nf ! , (       d  f       N= f)Nwr}   r   rz   r{   z---
z
---
)ospathro   r   DATASETDICT_INFOS_FILENAMEREPOCARD_FILENAMEr   r   existsrm   rE   r   r   dumpr	   r   datar
   to_dataset_card_datar   saver   )r:   dataset_infos_dir	overwriterl   total_dataset_infosdataset_infos_pathdataset_readme_pathrG   r[   r   dataset_infos_dictdataset_carddataset_card_datas                r"   rw   #DatasetInfosDict.write_to_directoryE  sv    WW\\*;V=^=^_ ggll+<f>V>VW"&"5"56G"H""4(77>>,--(#@AQdQjQjQl&Ql7M{K	!22Ql # & 		,\tT	 A 77>>-..&++,?@L , 1 1L / 1445FGMYMaGc*;&<<yHIgs  d#678 & A@s   7F8F2#!F82F88
Gc                 v   [         R                  SU 35        [        R                  R	                  [        R                  R                  U[        R                  5      5      (       aQ  [        R                  " [        U5      [        R                  -  5      R                  nSU;   a  U R                  U5      $ [        R                  R	                  [        R                  R                  U[        R                  5      5      (       a  [        [        R                  R                  U[        R                  5      SS9 nU " [        R                  " U5      R!                  5        VVs0 s H  u  pEU["        R%                  U5      _M     snn5      sS S S 5        $ U " 5       $ s  snnf ! , (       d  f       g = f)NzLoading Dataset Infos from dataset_infor}   r   )r   r   r   r   r   ro   r   r   r	   r   r   r   from_dataset_card_datar   rm   r   rE   rP   r8   )rF   r   r   rG   r[   r   s         r"   r   DatasetInfosDict.from_directorya  s=   23D2EFG77>>"'',,'8&:R:RSTT + 0 06G1H6KcKc1c d i i!22112CDD77>>"'',,'8&:[:[\]]bggll#4f6W6WXcjkop ?Ciil>P>P>R>R:K $[%:%:;L%MM>R lk 5L lks    *F**"F$F*$F**
F8r   c           	         [        UR                  S5      [        [        45      (       a  [        US   [        5      (       a@  U " US    Vs0 s H)  nUR                  SS5      [        R                  U5      _M+     sn5      $ [        R                  US   5      nUS   R                  SS5      Ul        U " UR                  U05      $ U " 5       $ s  snf )Nr   r[   default)r7   r   rg   rM   rP   r   r[   )rF   r   dataset_info_yaml_dictr   s       r"   r   'DatasetInfosDict.from_dataset_card_datau  s    '++N;dD\JJ+N;TBB
 7H6W	 7X2 /22=)LkNiNi2O  7X	   +::;L^;\]+<^+L+P+PQ^`i+j(L44lCDD5Ls   0Cc                    U (       Gax  SU;   a3  [        US   [        5      (       a  US   R                  SS5      US   0nO:SU;   a2  [        US   [        5      (       a  US    Vs0 s H	  nUS   U_M     nnO0 n0 UEU R	                  5        VVs0 s H  u  pEXER                  5       _M     snnEnUR	                  5        H	  u  pGXGS'   M     [        U5      S:X  aN  [        [        UR                  5       5      5      US'   US   R                  SS 5      nUS:w  a  SU0US   EUS'   g g / US'   [        UR	                  5       5       H1  u  pHUR                  SS 5        SU0UEnUS   R                  U5        M3     g g s  snf s  snnf )Nr   r[   r   r   )r7   rM   r   rg   rE   r   r   nextitervaluespopsortedappend)	r:   r   dataset_metadata_infosconfig_metadatar[   r   r   dset_info_yaml_dictr   s	            r"   r   %DatasetInfosDict.to_dataset_card_data  s   !22zBSTbBcei7j7j%n599-SUfguUv*&  #44DUVdDegk9l9l ,=^+L*+L $M2OC+L ' *&
 *,&#(#X\XbXbXdeXd>Tk; 7 7 99Xde#
 5H4M4M4O05@M2 5P&'1,48>Q>X>X>Z9[4\!.1/?CCMSWX)+ &{9+N;9%n5 , 57!.1;ABUB[B[B];^7K*..}dC.;[-cLb-c*%n5<<=ST	 <_C * fs   'FFr   )FF)r>   N)r>   r   )r   r   r   r   rw   rN   r   r
   r   r   r    r   r!   r"   r   r   D  sT    98  &  K]  $&Uo &U$ &Ur!   r   ),r,   r   rB   r   r   rn   r   pathlibr   typingr   r   r   fsspecfsspec.corer   huggingface_hubr	   r
   r   r   r3   r   r]   r   utilsr   utils.loggingr   utils.py_utilsr   r   r   r   r   r$   	Exceptionr)   r.   r1   rP   rM   r   r   r   r!   r"   <module>r      s        	  !  , ,  ! 8     % 1 
H	   
   
FI F:) : ^ ^ ^ eO eO eOPjUtC,- jUr!   