
    :i^                        % S SK r S SKrS SKJr  S SKJr  S SKrS SKJr  S SK	J
r
  \\S'    S SKrS SKrSr\ R"                  " \5      rSS
 jrS\\   S\S	\\\      4S jrS\S\S-  S	\\   4S jrS\\   S	\\\\4      4S jrS\\   S	\4S jr     SS\S\\-  S-  S\\-  S-  S\S-  S\S\S	\4S jjrg! \ a    Sr Nf = f)    N)Path)Iterator)TokenizerVersion)MultiModalVersion_hub_installedTFreturnc                  0    [         (       d  [        S5      eg )NzxPlease install the `huggingface_hub` package to use this method.
Run `pip install mistral-common[hf-hub]` to install it.)r   ImportError     `/home/james-whalen/.local/lib/python3.13/site-packages/mistral_common/tokens/tokenizers/utils.py_assert_hub_installedr      s    >F
 	
 r   lst
chunk_sizec              #   V   #    [        S[        U 5      U5       H  nXX!-    v   M     g7f)zChunk a list into smaller lists of a given size.

Args:
    lst: The list to chunk.
    chunk_size: The size of each chunk.

Returns:
    An iterator over the chunks.

Examples:
    >>> all_chunks = list(chunks([1, 2, 3, 4, 5], 2))
r   N)rangelen)r   r   is      r   chunksr      s+      1c#h
+an%% ,s   ')repo_idrevisionc                 2   [        5         [        [        R                  R                  5      [        R                  R
                  R                  S/U R                  S5      Q5      -  nUc_  US-  [        R                  R                  -  nUR                  5       (       a*  UR                  S5       nUR                  5       nSSS5        U(       a3  US-  U-  nUR                  5       (       a  [        R                  " U5      $ / $ ! , (       d  f       NJ= f)zlist the files of a local Hugging Face repo.

Args:
    repo_id: The Hugging Face repo ID.
    revision: The revision of the model to use. If `None`, the latest revision will be used.
models/Nrefsr	snapshots)r   r   huggingface_hub	constantsHF_HUB_CACHEREPO_ID_SEPARATORjoinsplitDEFAULT_REVISIONis_fileopenreadis_diroslistdir)r   r   
repo_cacherevision_filefilerevision_dirs         r   list_local_hf_repo_filesr/   0   s     o//<<=@Y@Y@k@k@p@p	'GMM#&'A J "V+o.G.G.X.XX  ""##C(D99; ) !K/(:  ::l++I )(s   3D
Dfilesc                    / n[        [        R                  5      n[        [        R                  5      S/-   nU VVs/ s H  oC  H
  nSU U 3PM     M     snnS/-   nU  Hh  n[	        U5      nUR
                  n	SR                  UR                  5      n
U	S:X  a  UR                  X45        MO  X;   d  MV  UR                  X45        Mj     U$ s  snnf )zFilter the valid tokenizer files from a list of files.

Args:
    files: The list of files to filter.

Returns:
    The list of tuples of file names and paths to the valid tokenizer files.
 z.model.z.modeltekken.json)	listr   __members__r   r   namer"   suffixesappend)r0   valid_tokenizer_filesinstruct_versionsmm_versionsvmsentencepiece_suffixesr-   pathlib_file	file_namesuffixs              r   _filter_valid_tokenizer_filesrB   K   s     -99:(445<K8I_8I1S^as1#.S^.8I_ckbllDz %%	../%!(()):;-!(()):;  !  `s   Cc                     [        U 5      n[        U5      S:X  a  [        S5      e[        U5      S:  aA  U H  u  p#SU:X  d  M  Us  $    [        US S9S   S   n[        R                  SU S	35        U$ US   S   nU$ )
zGet one valid tokenizer file from a list of files.

Args:
    files: The list of files to filter.

Returns:
    The path to the tokenizer file.
r   zNo tokenizer file found.   r3   c                     U S   $ )Nr   r   )xs    r   <lambda>.get_one_valid_tokenizer_file.<locals>.<lambda>x   s    TUVWTXr   )keyz,Multiple valid tokenizer files found. Using .)rB   r   
ValueErrorsortedloggerwarning)r0   $valid_tokenizer_file_names_and_filesr@   tokenizer_files       r   get_one_valid_tokenizer_filerR   f   s     ,I+O(
/0A5344
/014)M%I	)%% *N   D.YZ\]^_`EnEUUVWX  >a@Cr   	cache_dirtokenforce_downloadlocal_files_onlyc           
         [        5         U(       a  U(       a  [        S5      eU(       d)   [        R                  " 5       nUR	                  XUS9nSnO*[        XS9n[        U5      S:X  a  [        SU  S	U S35      e [        US9n	[        R                  " U UU	UUUUS9n
U
$ ! [
        R                  [
        R                  [
        R                  4 aU  nU(       a  Ue[        XS9nSn[        R                  S5        [        U5      S:X  a  [        SU  S	U S
35      Ue SnANSnAff = f! [         a    [        SU  S35      ef = f)a  Download the tokenizer file of a Mistral model from the Hugging Face Hub.

See [here](https://huggingface.co/mistralai/models) for a list of our OSS models.

Note:
    You need to install the `huggingface_hub` package to use this method.

    Please run `pip install mistral-common[hf-hub]` to install it.

Args:
    repo_id: The Hugging Face repo ID.
    cache_dir: The directory where the tokenizer will be cached.
    token: The Hugging Face token to use to download the tokenizer.
    revision: The revision of the model to use. If `None`, the latest revision will be used.
    force_download: Whether to force the download of the tokenizer. If `True`, the tokenizer will be downloaded
        even if it is already cached.
    local_files_only: Whether to only use local files. If `True`, the tokenizer will be downloaded only if it is
        already cached.

Returns:
    The downloaded tokenizer local path for the given model ID.
zSYou cannot force the download of the tokenizer if you only want to use local files.)r   rT   F)r   r   TzBCould not connect to the Hugging Face Hub. Using local files only.r   zXCould not connect to the Hugging Face Hub and no local files were found for the repo ID z and revision z6. Please check your internet connection and try again.Nz%No local files found for the repo ID zz. Please check the repo ID and the revision or try to download the tokenizer without setting `local_files_only` to `True`.)r0   z*No valid tokenizer file found in the repo rK   )r   rS   filenamerT   r   rV   rU   )r   rL   r   HfApilist_repo_filesrequestsConnectionError	HTTPErrorTimeoutr/   rN   infor   FileNotFoundErrorrR   hf_hub_download)r   rS   rT   r   rU   rV   hf_api
repo_fileserQ   tokenizer_paths              r   download_tokenizer_from_hf_hubrf      s   < *noo	$**,F//RW/XJ$  .gQ
z?a#7yxj Yo o 
R5JG %44)%N I (((*<*<h>N>NO 	1'UJ#KK\]:!#'novnw$XJ.df  $	0  REgYaPQQRs$   'B$ ?	D/ $3D,AD''D,/E	)r   N)NNNFF)loggingr)   pathlibr   typingr   r[   %mistral_common.tokens.tokenizers.baser   &mistral_common.tokens.tokenizers.imager   bool__annotations__r   huggingface_hub.constantsr   r
   	getLogger__name__rN   r   r4   strintr   r/   tuplerB   rR   rf   r   r   r   <module>rt      sX    	    B D $N 
		8	$
&S	 &s &xS	/B &"c S4Z DI 6!c !tE#s(O7L !6S	 c 8 $(# "LLTzD L #:L Dj	L
 L L 	L_  Ns   
B< <CC