
    h$              	          S SK r S SKrS SKJr  S SKJr  S SKJr  S SKJ	r	J
r
Jr  S SKrS SKJr  SSKJr  SS	KJr  SS
KJrJr  SSKJr  SSKJr  \" \5      r " S S\R8                  5      rS\S\\\   \4   4S jr  " S S\!\\!\\	4   4   5      r"0 S/ _S/ _S/ _S/ _S/ _S/ _S/ _S/ _S/ _S/ _S/ _S/ _S / _S!/ _S"/ _S#/ _S$/ _0 S%/ _S&/ _S'/ _S(/ _S)/ _S*/ _S+/ _S,/ _S-/ _S./ _S// _S0/ _S1/ _S2/ _S3/ _S4/ _S5/ _E/ / / S6.Er#g)7    N)Counter)groupby)
itemgetter)AnyClassVarOptional)DatasetCardData   )METADATA_CONFIGS_FIELD)Features)DatasetInfoDatasetInfosDict)	_split_re)
get_loggerc                   2   ^  \ rS rSrS rSU 4S jjrSrU =r$ )_NoDuplicateSafeLoader   c                 Z   UR                    VVs/ s H  u  p#U R                  U   PM     nnnU Vs/ s H%  n[        U[        5      (       a  [	        U5      OUPM'     nn[        U5      nU Vs/ s H  oVU   S:  d  M  UPM     nnU(       a  [        SU 35      eg s  snnf s  snf s  snf )N   zGot duplicate yaml keys: )valueconstructed_objects
isinstancelisttupler   	TypeError)selfnodekey_node_keyskeycounterduplicate_keyss           Q/home/james-whalen/.local/lib/python3.13/site-packages/datasets/utils/metadata.py(_check_no_duplicates_on_constructed_node?_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node   s    FJjjQj{x((2jQHLMjd33c
<M$-)0E#CL14D#E77GHII 	 RMEs   B,B#/B(?B(c                 D   > [         TU ]  XS9nU R                  U5        U$ )N)deep)superconstruct_mappingr%   )r   r   r(   mapping	__class__s       r$   r*   (_NoDuplicateSafeLoader.construct_mapping   s(    '+D+<55d;     )F)__name__
__module____qualname____firstlineno__r%   r*   __static_attributes____classcell__)r,   s   @r$   r   r      s    J r.   r   readme_contentreturnc                    [        U R                  5       5      nU(       aU  US   S:X  aL  SUSS  ;   aC  USS  R                  S5      S-   nSR                  USU 5      nUSR                  XS-   S  5      4$ S SR                  U5      4$ )Nr   z---r   
)r   
splitlinesindexjoin)r6   full_contentsep_idx	yamlblocks       r$   _split_yaml_from_readmer@   $   s    1134LQ50Ul12>N5Nqr"((/!3IIl1W56	$))L1$?@@@<(((r.   c            	           \ rS rSr% Sr\r\\   \	S'   \
S\4S j5       r\S\S\\\\4      S\S	S 4S
 j5       r\S\S	S 4S j5       rS\S	S4S jrS	\\   4S jrSrg)MetadataConfigs.   z5Should be in format {config_name: {**config_params}}.
FIELD_NAMEmetadata_configc                 *   U R                  S5      nUb  [        R                  " SU S35      n[        U[        [
        45      (       d  [        U5      e[        U[        5      (       a  U H  n[        U[
        [        45      (       a{  [        U[        5      (       d  M5  [        U5      S:X  aU  SU;   aO  [        R                  " [        US   5      (       a,  [        UR                  S5      [
        [        45      (       a  M  [        U5      e   g g g )N
data_filesz
                Expected data_files in YAML to be either a string or a list of strings
                or a list of dicts with two keys: 'split' and 'path', but got a  
                Examples of data_files in YAML:

                   data_files: data.csv

                   data_files: data/*.png

                   data_files:
                    - part0/*
                    - part1/*

                   data_files:
                    - split: train
                      path: train/*
                    - split: test
                      path: test/*

                   data_files:
                    - split: train
                      path:
                      - train/part1/*
                      - train/part2/*
                    - split: test
                      path: test/*

                PS: some symbols like dashes '-' are not allowed in split names
                r
   splitpath)gettextwrapdedentr   r   str
ValueErrordictlenrematchr   )rE   yaml_data_filesyaml_error_messageyaml_data_files_items       r$   $_raise_if_data_files_field_not_valid4MetadataConfigs._raise_if_data_files_field_not_valid3   s    )--l;&!)OO^N_ `"> oc{;; !344/400,;(&';c4[II%&:DAA 45: '+? ? "4H4Q R R *+?+C+CF+KcSW[ Y Y ));<< -< 1E 'r.   parquet_commit_hashexported_parquet_filesdataset_infosr7   c                    [        U[        S5      5       VVVVVs0 s H  u  pEU[        U[        S5      5       VVVs/ s H.  u  pgUU Vs/ s H  nUS   R                  SU5      PM     snS.PM0     snnn[        UR	                  U[        5       5      R                  =(       d    S5      S._M     n	nnnnnU(       ah  UR                  5        VV
VVs0 s HG  u  pJUU
R                   VVs/ s H  nX   S     H  nUS   U:X  d  M  UPM     M!     snnX   S	   S._MI     n	nn
nnU " U	5      $ s  snf s  snnnf s  snnnnnf s  snnf s  snnn
nf )
NconfigrH   urlzrefs%2Fconvert%2Fparquet)rH   rI   z0.0.0)rG   versionrG   r^   )	r   r   replacerM   rJ   r   r^   itemssplits)clsrX   rY   rZ   config_nameparquet_files_for_config
split_nameparquet_files_for_splitparquet_filemetadata_configsdataset_info	data_files               r$   ._from_exported_parquet_files_and_dataset_infos>MetadataConfigs._from_exported_parquet_files_and_dataset_infosf   s   * :AAWYcdlYm9n
 :o5  @GG_aklsat?u	 @v;
 ", 1H!0G )/778RTgh0G! @v	 }00kmLTT_X_`  :o 	 
   2?1D1D1F  2G-K  +7*=*=#*=J)9)F|)TI$W-; ")T "*=#  0<YG  2G    #$$3!	
(# sG   "D<D5D0 *	D53A D<E
-E	
EE
0D55D<E
dataset_card_datac                 P   UR                  U R                  5      (       a  XR                     n[        U[        5      (       d  [	        SU R                   SU S35      eU H)  nSU;  a  [	        SU S35      eU R                  U5        M+     U " U VVVs0 s Hm  nUR                  5       =n(       d  M  UR                  S5      UR                  5        VVs0 s H#  u  pEXDS:w  a  UO[        R                  " U5      _M%     snn_Mo     snnn5      $ U " 5       $ s  snnf s  snnnf )Nz	Expected z to be a list, but got ''rc   zUEach config must include `config_name` field with a string name of a config, but got z. features)rJ   rD   r   r   rN   rV   copypopr`   r   _from_yaml_list)rb   rm   rh   rE   paramr   r\   s          r$   from_dataset_card_data&MetadataConfigs.from_dataset_card_data   s8     000@.55 9S^^,<<TUeTffg!hii#3 7$##2"327  88I $4  ,<
 ,<"1"6"6"888FJJ}-,2LLN0,:LE (;uAYAYZ_A``,:0  ,<	 	 u0s   D!6#D!*DD!D!Nc                    U (       a  U R                  5        H  nU R                  U5        M     U R                  U5      n[        [	        0 UEU ER                  5       5      5      nUR                  5        H  u  pVUR                  SS 5        M     UR                  5        VVs/ s H  u  pVSU0UEPM     snnXR                  '   g g s  snnf )Nrc   )valuesrV   ru   rO   sortedr`   rr   rD   )r   rm   rE   current_metadata_configstotal_metadata_configsrc   config_metadatas          r$   to_dataset_card_data$MetadataConfigs.to_dataset_card_data   s    #';;=99/J $1'+'B'BCT'U$%)&1U4L1UPT1U1[1[1]*^%_"0F0L0L0N,##M48 1O 5K4P4P4R24R0K ??4R2oo. 2s   %C	c                     S nU R                  5        HI  u  p#[        U 5      S:X  d  US:X  d  UR                  S5      (       d  M2  Uc  UnM9  [        SU SU S35      e   U$ )Nr   defaultz&Dataset has several default configs: 'z' and 'z'.)r`   rP   rJ   rN   )r   default_config_namerc   rE   s       r$   get_default_config_name'MetadataConfigs.get_default_config_name   sw    ",0JJL(K4yA~	!9_=P=PQZ=[=[&.*5'$@AT@UU\]h\iikl  -9 #"r.   r/   )r0   r1   r2   r3   __doc__r   rD   r   rM   __annotations__staticmethodrO   rV   classmethodr   r   r   rk   r	   ru   r}   r   r   r4   r/   r.   r$   rB   rB   .   s    ? 6J60=d 0= 0=d $% $% !%T#s(^ 4$% (	$%
 
$% $%L  K\  0o $ 
## 
#r.   rB   zimage-classificationtranslationzimage-segmentationz	fill-maskzautomatic-speech-recognitionztoken-classificationzsentence-similarityzaudio-classificationzquestion-answeringsummarizationzzero-shot-classificationztable-to-textzfeature-extractionotherzmultiple-choiceztext-classificationztext-to-imageztext2text-generationzzero-shot-image-classificationztabular-classificationztabular-regressionzimage-to-imageztabular-to-textzunconditional-image-generationztext-retrievalztext-to-speechzobject-detectionzaudio-to-audioztext-generationconversationalztable-question-answeringzvisual-question-answeringzimage-to-textzreinforcement-learning)zvoice-activity-detectionztime-series-forecastingzdocument-question-answering)$rQ   rK   collectionsr   	itertoolsr   operatorr   typingr   r   r   yamlhuggingface_hubr	   r\   r   rp   r   infor   r   namingr   utils.loggingr   r0   logger
SafeLoaderr   rM   r   r@   rO   rB   known_task_idsr/   r.   r$   <module>r      s@   	     * *  + +  0  & 
H	T__ )C )E(3-:L4M )O#d3S#X./ O#j&B&2& "& 	&
 #B& B& 2& B& "& R& & R& "& R& r&  2!&" R#&$ B%&& %b'&( b)&* "+&, b-&. r/&0 %b1&2 b3&4 b5&6 7&8 b9&: r;&< b=&> ?&@  A&B RC&D bE&F !#!#%K&r.   