
    h                      F   S SK r S SKrS SKrS SKrS SKrS SKJr  S SKJrJ	r	  S SK
rS SKrS SKrS SKrS SKJrJr  \R$                  R&                  R)                  \5      rS rS\S\\   S\\   S	\S
\\R4                     S\\\\4   4S jr " S S\R:                  5      rg)    N)Path)OptionalUnion)camelcase_to_snakecasefilenames_for_dataset_splitc                 H    [        U 5      R                  5       R                  $ N)r   statst_mtime)cached_directory_paths    _/home/james-whalen/.local/lib/python3.13/site-packages/datasets/packaged_modules/cache/cache.py_get_modification_timer      s    &'--/888    dataset_nameconfig_name	cache_dirconfig_kwargscustom_featuresreturnc                 b   U(       d  U(       d  U(       a-  [         R                  " U=(       d    S5      R                  X4S9nOS n[        R                  R                  [        U=(       d    [         R                  R                  5      5      nU R                  S5      n[        US   5      US'   SR                  U5      n[        R                  R                  X'5      n[        R                  " [        R                  R                  X=(       d    SSS5      5       V	s/ s H  n	[        R                  R                  U	5      (       d  M)  U(       dS  U(       dL  [        R                  " [!        U	S5      R#                  SS	95      S
   [!        U	5      R$                  S   :X  d  M  U	PM     n
n	U
(       d  [        R                  " [        R                  R                  USSS5      5       V	s/ s H+  n	[        R                  R                  U	5      (       d  M)  U	PM-     n
n	['        U
 V	s1 s H  n	[!        U	5      R$                  S   iM     sn	5      n[)        SU  3U(       a  SU S3OS-   U(       a  SU 3-   5      eS-   5      e[!        ['        U
[*        S9S   5      n	U	R$                  SS  u  p[        R                  " [        R                  R                  USX5      5       Vs/ s H  n[        R                  R                  U5      (       d  M)  U(       dS  U(       dL  [        R                  " [!        US5      R#                  SS	95      S
   [!        U5      R$                  S   :X  d  M  [!        U5      R$                  S   PM     nnU(       d9  [-        U5      S:  a*  [)        SU  SSR                  U5       SU  SUS    S3	5      eU	R$                  S   nSU SU	 S[.        R0                  " [+        U	5      5       S3n[2        R5                  U5        XU4$ s  sn	f s  sn	f s  sn	f s  snf )Ndefault)r   r   /___*zdataset_info.jsonzutf-8)encodingr   zCouldn't find cache for z for config '' z!
Available configs in the cache: )key   zThere are multiple 'z' configurations in the cache: z, zR
Please specify which configuration to reload from the cache, e.g.
	load_dataset('z', 'r   z')z/Found the latest cached dataset configuration 'z' at z (last modified on z).)datasetsBuilderConfigcreate_config_idospath
expanduserstrconfigHF_DATASETS_CACHEsplitr   joinglobisdirjsonloadsr   	read_textpartssorted
ValueErrorr   lentimectimeloggerwarning)r   r   r   r   r   	config_idnamespace_and_dataset_namecached_relative_path#cached_datasets_directory_path_rootr   cached_directory_pathsavailable_configsversionhash_cached_directory_pathother_configswarning_msgs                    r   _find_hash_in_cacherF      s    m**;+C)DUU' V 
	 	""3y'UHOO4U4U#VWI!-!3!3C!8%;<VWY<Z%[r" ::&@A*,'',,y*W' &*YYGGLL<>N3PSUXY&
&
! 77==./	 	 zz$46IJTT^eTfghuv)*0045 	&
   " *.277<<@cehjmor3s)t"
)t%ww}}23 ")t 	 "

 #PfgPf7LT'(..r2Pfg
 &|n5/8yk+bBK\34E3FGf
 	
 cef
 	
 !(>DZ![\^!_`)//4MG '+ii=`begn0u&v
&v"77==/0 	/ zz$57JKUU_fUghivw*+11"56 	/#$**2.&v  
 ]+a/"<.0OPTPYPYZgPhOi!!-d=3C2DBH
 	

 (--b1K
9+eLaKb c!ZZ(>?T(UVWWY	[  NN;%%k"
 h
s8    (PAP&P-(P"P"*"P'((P,AP,.P,c                     ^  \ rS rSr              SS\\   S\\   S\\   S\\   S\\   S\\   S\\R                     S	\\R                     S
\\	\
\4      S\\   S\\	\\\\R                  R                  4      S\\   S\\   S\\   4U 4S jjjrS\R                  4S jrSS\\   4S jjrS rS rSrU =r$ )Cacheb   r   r   r   rA   rB   	base_pathinfofeaturestokenrepo_id
data_filesdata_dirstorage_optionswriter_batch_sizec                    > U
c  Uc  [        S5      eUb  XS'   Ub  XS'   US:X  a   US:X  a  [        U
=(       d    UUUUUS9u  p4nOUS:X  d  US:X  a  [        S5      e[        TU ]  UUUUUUUU	U
UUS9  g )NzArepo_id or dataset_name is required for the Cache dataset builderrO   rP   auto)r   r   r   r   r   z0Pass both hash='auto' and version='auto' instead)r   r   r   rA   rB   rJ   rK   rM   rN   rQ   rR   )r5   rF   NotImplementedErrorsuper__init__)selfr   r   r   rA   rB   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   r   	__class__s                   r   rW   Cache.__init__c   s    $ ?|3`aa!*4,'(0*%6>g/)<$4'#+ (*&K$ V^w&0%&XYY%#+/ 	 	
r   r   c                 ,    [         R                  " 5       $ r	   )r#   DatasetInfo)rX   s    r   _infoCache._info   s    ##%%r   
output_dirc                    [         R                  R                  U R                  5      (       d%  [	        SU R
                   SU R                   35      eUb2  XR                  :w  a"  [        R                  " U R                  U5        g g g )NzCache directory for z doesn't exist at )r&   r'   existsr   r5   r   shutilcopytree)rX   r_   argskwargss       r   download_and_prepareCache.download_and_prepare   sl    ww~~dnn--3D4E4E3FFXY]YgYgXhijj!jNN&BOODNNJ7 'C!r   c                    [        U R                  R                  [        R                  5      (       a.  [        U R                  R                  R                  5       5      nO%[        SU R                   SU R                   35      eU Vs/ s HW  n[        R                  " UR                  S[        U R                  U R                  UR                  SUR                  S90S9PMY     sn$ s  snf )NzMissing splits info for z in cache directory filesarrow)r   r,   filetype_suffixshard_lengths)name
gen_kwargs)
isinstancerK   splitsr#   	SplitDictlistvaluesr5   r   r   SplitGeneratorrm   r   rl   )rX   
dl_managersplit_infos
split_infos       r   _split_generatorsCache._split_generators   s    dii&&(:(:;;489I9I9P9P9R4SK78I8I7JJ^_c_m_m^nopp *
 *
 ##__8%)%6%6(oo(/&0&>&> *
 	
 
s   AC,c              #     #    [        U5       Hx  u  p#[        US5       n [        [        R                  R	                  U5      5       H0  u  pV[        R
                  R                  U/5      nU SU 3U4v   M2      S S S 5        Mz     g ! [         a-  n[        R                  SU S[        U5       SU 35        e S nAff = f! , (       d  f       M  = f7f)Nrb_zFailed to read file 'z' with error z: )	enumerateopenpaipcopen_streamTablefrom_batchesr5   r9   errortype)	rX   ri   file_idxfilef	batch_idxrecord_batchpa_tablees	            r   _generate_tablesCache._generate_tables   s     '.NHdD!Q	3<RVV=O=OPQ=R3S/	#%88#8#8,#H "*
!I;7AA 4T "! / " LL#8mDQRG9TVWXVY!Z[ "!s:   CCAB>C
C(B==CC
C	C )NNNz0.0.0NNNNNNNNNNr	   )__name__
__module____qualname____firstlineno__r   r)   r#   r\   Featuresr   boolrr   dictrO   DataFilesDictintrW   r]   rf   rx   r   __static_attributes____classcell__)rY   s   @r   rH   rH   b   s`    $(&*%)!("#'/304,0!%Z^"&*.+/.
C=.
 sm.
 c]	.

 #.
 sm.
 C=.
 x++,.
 8,,-.
 dCi().
 #.
 U3dH4G4G4U4U#UVW.
 3-.
 "$.
 $C=.
 .
`&x++ &8x} 8
, r   rH   )r.   r0   r&   rb   r7   pathlibr   typingr   r   pyarrowr   r#   datasets.configdatasets.data_filesdatasets.namingr   r   utilslogging
get_loggerr   r9   r   r)   r   r   tuplerF   ArrowBasedBuilderrH   r   r   r   <module>r      s      	    "     O 
			*	*8	49G&G&#G& }G& 	G&
 h//0G& 3S=G&T]H&& ]r   