
    h8                         S r SSKJr  SSKJr  S\\\\\4   S\4S jr	 " S S\
5      r " S	 S
\
5      r " S S5      rg)a  
Hashing function for dataset keys using `hashlib.md5`

Requirements for the hash function:

- Provides a uniformly distributed hash from random space
- Adequately fast speed
- Working with multiple input types (in this case, `str`, `int` or `bytes`)
- Should be platform independent (generates same hash on different OS and systems)

The hashing function provides a unique 128-bit integer hash of the key provided.

The split name is being used here as the hash salt to avoid having same hashes
in different splits due to same keys
    )Union)insecure_hashlib	hash_datareturnc                    [        U [        [        45      (       a  U $ [        U [        5      (       a  U R	                  SS5      n O,[        U [
        5      (       a  [        U 5      n O[        U 5      eU R                  S5      $ )zl
Returns the input hash_data in its bytes form

Args:
hash_data: the hash salt/key to be converted to bytes
\/zutf-8)
isinstancebytes	bytearraystrreplaceintInvalidKeyErrorencode)r   s    J/home/james-whalen/.local/lib/python3.13/site-packages/datasets/keyhash.py	_as_bytesr   &   sq     )eY/00	Is	#	# %%dC0		Is	#	#	N	 i((G$$    c                   ,   ^  \ rS rSrSrU 4S jrSrU =r$ )r   =   z6Raises an error when given key is of invalid datatype.c                    > SU l         SU S[        U5       3U l        SU l        [        TU ]  U R                    U R                   U R                   35        g )Nz7
FAILURE TO GENERATE DATASET: Invalid key type detectedz
Found Key z	 of type z-
Keys should be either str, int or bytes type)prefixtypeerr_msgsuffixsuper__init__)selfr   	__class__s     r   r   InvalidKeyError.__init__@   sP    P%i[	$y/9JKFDKK=t{{mDEr   )r   r   r   __name__
__module____qualname____firstlineno____doc__r   __static_attributes____classcell__r   s   @r   r   r   =   s    @F Fr   r   c                   0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )DuplicatedKeysErrorG   z(Raise an error when duplicate key found.c                 x  > Xl         X l        X0l        SU l        [	        U5      S::  a  SSR                  U5       SU 3U l        O.SSR                  US S 5       S[	        U5      S-
   SU 3U l        U(       a  SU-   OS	U l        [        TU ]%  U R                   U R                   U R                   35        g )
Nz3Found multiple examples generated with the same key   z
The examples at index z, z have the key z... (z more) have the key 
 )
keyduplicate_key_indicesfix_msgr   lenjoinr   r   r   r   )r   r1   r2   r3   r   s       r   r   DuplicatedKeysError.__init__J   s    %:"K$%+5dii@U6V5WWefiejkDL5dii@UVYWY@Z6[5\\abef{b|  @B  cB  bC  CW  X[  W\  ]DL(/dWnRDKK=t{{mDEr   )r2   r   r3   r1   r   r   )r0   r!   r)   s   @r   r+   r+   G   s    2
F 
Fr   r+   c                   D    \ rS rSrSrS\4S jrS\\\\	4   S\4S jr
Srg	)
	KeyHasherW   z,KeyHasher class for providing hash using md5	hash_saltc                 L    [         R                  " [        U5      5      U l        g )N)r   md5r   
_split_md5)r   r:   s     r   r   KeyHasher.__init__Z   s    *..y/CDr   r1   r   c                     U R                   R                  5       n[        U5      nUR                  U5        [	        UR                  5       S5      $ )zReturns 128-bits unique hash of input key

Args:
key: the input key to be hashed (should be str, int or bytes)

Returns: 128-bit int hash key   )r=   copyr   updater   	hexdigest)r   r1   r<   byte_keys       r   hashKeyHasher.hash]   s>     oo""$S>

83==?B''r   )r=   N)r"   r#   r$   r%   r&   r   r   r   r   r   rE   r'    r   r   r8   r8   W   s1    6E# E(c3o. (3 (r   r8   N)r&   typingr   huggingface_hub.utilsr   r   r   r   r   r   	Exceptionr   r+   r8   rG   r   r   <module>rK      sY   "   2%sC	9: %u %.Fi FF) F ( (r   