
    h                         S SK r S SKrS SKJr  S SKJr  SSKJr  SSKJ	r	J
r
JrJrJrJr  SSKJr  \" \5      r " S	 S
\ R&                  5      rSS\\   S\4S jjrS\\   S\4S jrSS\S\S\4S jjrS rg)    N)Optional)insecure_hashlib   )config) ExpectedMoreDownloadedFilesErrorExpectedMoreSplitsErrorNonMatchingChecksumErrorNonMatchingSplitsSizesErrorUnexpectedDownloadedFileErrorUnexpectedSplitsError   )
get_loggerc                   $    \ rS rSrSrSrSrSrSrg)VerificationMode   a  `Enum` that specifies which verification checks to run.

The default mode is `BASIC_CHECKS`, which will perform only rudimentary checks to avoid slowdowns
when generating/downloading a dataset for the first time.

The verification modes:

|                           | Verification checks                                                           |
|---------------------------|------------------------------------------------------------------------------ |
| `ALL_CHECKS`              | Split checks, uniqueness of the keys yielded in case of the GeneratorBuilder  |
|                           | and the validity (number of files, checksums, etc.) of downloaded files       |
| `BASIC_CHECKS` (default)  | Same as `ALL_CHECKS` but without checking downloaded files                    |
| `NO_CHECKS`               | None                                                                          |


all_checksbasic_checks	no_checks N)	__name__
__module____qualname____firstlineno____doc__
ALL_CHECKSBASIC_CHECKS	NO_CHECKS__static_attributes__r       S/home/james-whalen/.local/lib/python3.13/site-packages/datasets/utils/info_utils.pyr   r      s      J!LIr   r   expected_checksumsrecorded_checksumsc                 6   U c  [         R                  S5        g [        [        U 5      [        U5      -
  5      S:  a)  [	        [        [        U 5      [        U5      -
  5      5      e[        [        U5      [        U 5      -
  5      S:  a)  [        [        [        U5      [        U 5      -
  5      5      eU  Vs/ s H  o0U   X   :w  d  M  UPM     nnUb  SU-   OSn[        U5      S:  a  [        SU SU S35      e[         R                  SU-   5        g s  snf )	NzUnable to verify checksums.r   z for  zChecksums didn't matchz:
zY
Set `verification_mode='no_checks'` to skip checksums verification and ignore this errorz&All the checksums matched successfully)loggerinfolensetr   strr   r	   )r!   r"   verification_nameurlbad_urlsfor_verification_names         r    verify_checksumsr.   ,   s   !12
3!"S);%<<=A.s37I3JSQcMd3d/eff
3!"S);%<<=A+C4F0G#N`Ja0a,bcc1h15LPbPg5g1Hh;L;XG&77^`
8}q&$%:$;3j gg
 	

 KK8;PPQ is   8D
Dexpected_splitsrecorded_splitsc                 V   U c  [         R                  S5        g [        [        U 5      [        U5      -
  5      S:  a)  [	        [        [        U 5      [        U5      -
  5      5      e[        [        U5      [        U 5      -
  5      S:  a)  [        [        [        U5      [        U 5      -
  5      5      eU  Vs/ s H,  nX   R                  X   R                  :w  d  M#  X   X   S.PM.     nn[        U5      S:  a  [        [        U5      5      e[         R                  S5        g s  snf )NzUnable to verify splits sizes.r   )expectedrecordedz$All the splits matched successfully.)	r%   r&   r'   r(   r   r)   r   num_examplesr
   )r/   r0   name
bad_splitss       r    verify_splitsr7   ?   s   45
3#o"667!;%c#o*>_AU*U&VWW
3#o"667!;#CO(<s??S(S$TUU $#D --1F1S1SS 	O_*8MN#  
 :)#j/::
KK67s   8"D&D&pathrecord_checksumreturnc                 B  ^ U(       ac  [         R                  " 5       n[        U S5       m[        U4S jS5       H  nUR	                  U5        M     UR                  5       nSSS5        OSn[        R                  R                  U 5      WS.$ ! , (       d  f       N0= f)z7Compute the file size and the sha256 checksum of a filerbc                  &   > T R                  S5      $ )Ni   )read)fs   r    <lambda>(get_size_checksum_dict.<locals>.<lambda>V   s    affWor   r   N)	num_byteschecksum)	r   sha256openiterupdate	hexdigestosr8   getsize)r8   r9   mchunkrC   r?   s        @r    get_size_checksum_dictrM   Q   sx    ##%$5s; <{{}H 
 .HEE s   9B
Bc                 b    U (       a(  [         R                  (       a  U [         R                  :  $ g)zCheck if `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.

Args:
    dataset_size (int): Dataset size in bytes.

Returns:
    bool: Whether `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.
F)r   IN_MEMORY_MAX_SIZE)dataset_sizes    r    is_small_datasetrQ   ^   s#     11f7777r   )N)T)enumrI   typingr   huggingface_hub.utilsr   r$   r   
exceptionsr   r   r	   r
   r   r   loggingr   r   r%   Enumr   dictr.   r7   r)   boolrM   rQ   r   r   r    <module>rZ      s     	  2     
H	tyy ,R$ RT R&88D> 8D 8$
F 
Ft 
Ft 
Fr   