
    bCi                     .   S r SSKrSSKrSSKrSSKrSSKJr  SSKJrJ	r	J
r
  SSKJr  SSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJrJrJrJr  SSKJ r   SSK!J"r"  SSK#J$r$J%r%J&r&J'r'J(r(  \" 5       (       a  SSK)J*r*  OSr*\RV                  " \,5      r-\\.\/\	\.   \	\.   4   4   " / SS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PS S\" 5       (       a  S!OS44PS"S\" 5       (       a  SOS44PS#PS$\" 5       (       a  S%OS\" 5       (       a  S&OS44PS'PS(S\" 5       (       a  SOS44PS)\" 5       (       a  S*OSS44PS+PS,PS-\" 5       (       a  S.OS\" 5       (       a  S/OS44PS0S1\" 5       (       a  S2OS44PS3PS4S\" 5       (       a  S5OS44PS6PS7PS8S\" 5       (       a  SOS44PS9S:\" 5       (       a  S;OS44PS<S\" 5       (       a  S=OS44PS>S\" 5       (       a  S5OS44PS?S@\" 5       (       a  SAOS44PSBS\" 5       (       a  SOS44PSCPSD\" 5       (       a  SEOS\" 5       (       a  SFOS44PSGPSH\" 5       (       a  SOS\" 5       (       a  SOS44PSIS\" 5       (       a  SOS44PSJS@\" 5       (       a  SAOS44PSKS\" 5       (       a  SOS44PSLS\" 5       (       a  SOS44PSMPSN\" 5       (       a  SOOS\" 5       (       a  SPOS44PSQSR\" 5       (       a  SSOS44PSTS\" 5       (       a  S!OS44PSUS\" 5       (       a  S!OS44PSVS\" 5       (       a  SOS44PSWSX\" 5       (       a  SYOS44PSZS[\" 5       (       a  S\OS44PS]\" 5       (       a  S^OS\" 5       (       a  S_OS44PS`PSaS\" 5       (       a  S5OS44PSbPScPSdS@\" 5       (       a  SAOS44PSeS:\" 5       (       a  S;OS44PSfSg\" 5       (       a  ShOS44PSi\" 5       (       a  SjOS\" 5       (       a  SkOS44PSl\" 5       (       a  SOS\" 5       (       a  SOS44PSm\" 5       (       a  SOS\" 5       (       a  SOS44PSn\" 5       (       a  SOS\" 5       (       a  SOS44PSo\" 5       (       a  SOS\" 5       (       a  SOS44PSpPSq\" 5       (       a  SOS\" 5       (       a  SOS44PSrSs\" 5       (       a  StOS44PSuSv\" 5       (       a  SwOS44PSxSy\" 5       (       a  SzOS44PS{S:\" 5       (       a  S;OS44PS|S\" 5       (       a  SOS44PS}S\" 5       (       a  SOS44PS~S\" 5       (       a  SOS44PS\" 5       (       a  SOSS44PSPS\" 5       (       a  S:OS\" 5       (       a  S;OS44PSS\" 5       (       a  S5OS44PSS\" 5       (       a  SOS44PS\" 5       (       a  SOSS44PSPSS\" 5       (       a  S;OS44PSS\" 5       (       a  SOS44PSPSS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  S5OS44PSS\" 5       (       a  S5OS44PSS\" 5       (       a  S5OS44PSS\" 5       (       a  S5OS44PSS\" 5       (       a  S5OS44PS\" 5       (       a  SOSS44PSS:\" 5       (       a  S;OS44PSS:\" 5       (       a  S;OS44PSS:\" 5       (       a  S;OS44PSS\" 5       (       a  SOS44PSPSS\" 5       (       a  S5OS44PSS:\" 5       (       a  S;OS44PSPSPSPSPSPSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  S5OS44PSS\" 5       (       a  SOS44PSPSS@\" 5       (       a  SAOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS:\" 5       (       a  S;OS44PSS:\" 5       (       a  S;OS44PSSX\" 5       (       a  SYOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PSPS\" 5       (       a  SOS\" 5       (       a  SOS44PSS\" 5       (       a  S5OS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PSPSS\" 5       (       a  SOS44PS\" 5       (       a  SOSS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PS\" 5       (       a  SOSS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PS\" 5       (       a  SOS\" 5       (       a  SOS44PSS@\" 5       (       a  SAOS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  SOS44PSPS\" 5       (       a  S:OS\" 5       (       a  S;OS44PS\" 5       (       a  SO\" 5       (       a  SOS\" 5       (       a  \" 5       (       d  SOS44PS\" 5       (       a  SO\" 5       (       a  SOS\" 5       (       a  \" 5       (       d  SOS44PS\" 5       (       a  SO\" 5       (       a  SOS\" 5       (       a  \" 5       (       d  SOS44PS\" 5       (       a  SO\" 5       (       a  SOS\" 5       (       a  \" 5       (       d  SOS44PSS\" 5       (       a  SOS44PS\" 5       (       a  SOSS44PSS\" 5       (       a  SOS44PSS\" 5       (       a  GS OS44PGSS\" 5       (       a  S5OS44PGSS\" 5       (       a  S5OS44PGSS\" 5       (       a  S5OS44PGSGS\" 5       (       a  GSOS44PGSS\" 5       (       a  SOS44PGSS@\" 5       (       a  SAOS44PGS	\" 5       (       a  GS
OS\" 5       (       a  GSOS44PGSS\" 5       (       a  SOS44PGSS\" 5       (       a  SOS44PGSGS\" 5       (       a  GSOS44PGSPGSS\" 5       (       a  S5OS44PGSS\" 5       (       a  SOS44PGS\" 5       (       a  GSOS\" 5       (       a  GSOS44PGS\" 5       (       a  GSOS\" 5       (       a  GSOS44PGS\" 5       (       a  SOS\" 5       (       a  SOS44PGSS\" 5       (       a  SOS44PGSS\" 5       (       a  SOS44PGSS\" 5       (       a  S;OS44PGSS\" 5       (       a  SOS44PGSS\" 5       (       a  SOS44PGSS\" 5       (       a  SOS44PGSGS \" 5       (       a  GS!OS44PGS"S:\" 5       (       a  S;OS44PGS#S\" 5       (       a  SOS44PGS$S\" 5       (       a  SOS44PGS%S\" 5       (       a  SOS44PGS&PGS'\" 5       (       a  S1OS\" 5       (       a  S2OS44PGS(\" 5       (       a  S1OS\" 5       (       a  S2OS44PGS)PGS*\" 5       (       a  SOS\" 5       (       a  SOS44PGS+SR\" 5       (       a  SSOS44PGS,S\" 5       (       a  SOS44PGS-S\" 5       (       a  SOS44PGS.PGS/S\" 5       (       a  SOS44PGS0S\" 5       (       a  SO\" 5       (       a  S5OS44PGS1\" 5       (       a  GS2OSS44PGS3PGS4S\" 5       (       a  SOS44PGS5SX\" 5       (       a  SYOS44PGS6SX\" 5       (       a  SYOS44PGS7SX\" 5       (       a  SYOS44PGS8SX\" 5       (       a  SYOS44PGS9SX\" 5       (       a  SYOS44PGS:SX\" 5       (       a  SYOS44PGS;SX\" 5       (       a  SYOS44PGS<SX\" 5       (       a  SYOS44PGS=SX\" 5       (       a  SYOS44PGS>SX\" 5       (       a  SYOS44PGS?SX\" 5       (       a  SYOS44PGS@SX\" 5       (       a  SYOS44PGSAPGSBGSC\" 5       (       a  GSDOS44PGSE\" 5       (       a  SOS\" 5       (       a  SOS44PGSF\" 5       (       a  GSGOS\" 5       (       a  GSHOS44PGSI\" 5       (       a  GSJOS\" 5       (       a  GSKOS44PGSLGSM\" 5       (       a  GSNOS44PGSOS@\" 5       (       a  SAOS44PGSPS@\" 5       (       a  SAOS44PGSQPGSRGSS\" 5       (       a  GSTOS44PGSUS\" 5       (       a  SOS44PGSV\" 5       (       a  GSWOS\" 5       (       a  GSXOS44PGSY\" 5       (       a  GSWOS\" 5       (       a  GSXOS44PGSZ\" 5       (       a  SOS\" 5       (       a  SOS44PGS[\" 5       (       a  GS\OSS44PGS]\" 5       (       a  SOS\" 5       (       a  SOS44PGS^S\" 5       (       a  S5OS44PGS_\" 5       (       a  GS`OSS44PGSaPGSb\" 5       (       a  GScOSS44PGSdPGSeGSf\" 5       (       a  GSgOS44PGShS\" 5       (       a  SOS44PGSiS:\" 5       (       a  S;OS44PGSj\" 5       (       a  SOS\" 5       (       a  SOS44PGSk\" 5       (       a  SOS\" 5       (       a  SOS44PGSl\" 5       (       a  SOS\" 5       (       a  SOS44PGSmPGSnPGSoPGSpS\" 5       (       a  SOS44PGSq\" 5       (       a  GSrOS\" 5       (       a  GSsOS44PGSt\" 5       (       a  SOS\" 5       (       a  SOS44PGSuS\" 5       (       a  SOS44PGSvS\" 5       (       a  SOS44PGSwS\" 5       (       a  SOS44PGSxS\" 5       (       a  SOS44PGSyPGSz\" 5       (       a  SOS\" 5       (       a  \" 5       (       d  S5OS44PGS{PGS|PGS}PGS~PGSGS\" 5       (       a  GSOS44PGSS\" 5       (       a  SOS44PGS\" 5       (       a  GSOS\" 5       (       a  GSOS44PGSPGS\" 5       (       a  GSOSS44PGS\" 5       (       a  SOS\" 5       (       a  SOS44PGS\" 5       (       a  SOS\" 5       (       a  SOS44PGS\" 5       (       a  GSOS\" 5       (       a  GSOS44PGSS\" 5       (       a  SOS44PGS\" 5       (       a  SOS\" 5       (       a  SOS44PGS\" 5       (       a  SOS\" 5       (       a  SOS44PGS\" 5       (       a  SOS\" 5       (       a  SOS44PGS\" 5       (       a  SOS\" 5       (       a  SOS44P5      r0\"" \$\05      r1\$Rd                  " 5        V Vs0 s H  u  pX_M	     snn r3GS\.GS\
\4\   S4   4GS jr5        GSGS\
\.\Rl                  \.   4   GS\	\
\.\Rl                  \.   4      GS\7GS\	\7   GS\	\8\.\.4      GS\	\
\7\.4      GS\	\.   GS\7GS\.GS\8\.\4   4GS jjr9 " GS GS5      r:GSGS/r;gs  snn f (  zAuto Tokenizer class.    N)OrderedDict)AnyOptionalUnion)is_mistral_common_available   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)PreTrainedTokenizer)TOKENIZER_CONFIG_FILE)cached_fileextract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)PreTrainedTokenizerFastaimv2CLIPTokenizerCLIPTokenizerFastalbertAlbertTokenizerAlbertTokenizerFastalignBertTokenizerBertTokenizerFastarceeLlamaTokenizerLlamaTokenizerFastaria
aya_visionCohereTokenizerFastbark)bart)BartTokenizerBartTokenizerFastbarthezBarthezTokenizerBarthezTokenizerFast)bartpho)BartphoTokenizerNbertzbert-generationBertGenerationTokenizer)zbert-japanese)BertJapaneseTokenizerN)bertweet)BertweetTokenizerNbig_birdBigBirdTokenizerBigBirdTokenizerFastbigbird_pegasusPegasusTokenizerPegasusTokenizerFast)biogpt)BioGptTokenizerNbitnetr   )
blenderbot)BlenderbotTokenizerBlenderbotTokenizerFast)zblenderbot-small)BlenderbotSmallTokenizerNblipzblip-2GPT2TokenizerGPT2TokenizerFastbloomBloomTokenizerFastbltbridgetowerRobertaTokenizerRobertaTokenizerFastbros)byt5)ByT5TokenizerN	camembertCamembertTokenizerCamembertTokenizerFast)canine)CanineTokenizerN	chameleonchinese_clipclapclipclipseg)clvp)ClvpTokenizerN
code_llamaCodeLlamaTokenizerCodeLlamaTokenizerFastcodegenCodeGenTokenizerCodeGenTokenizerFastcoherecohere2colpalicolqwen2Qwen2TokenizerQwen2TokenizerFastconvbertConvBertTokenizerConvBertTokenizerFastcpmCpmTokenizerCpmTokenizerFast)cpmant)CpmAntTokenizerNcsm)ctrl)CTRLTokenizerN)zdata2vec-audioWav2Vec2CTCTokenizerNzdata2vec-textdbrxdebertaDebertaTokenizerDebertaTokenizerFastz
deberta-v2DebertaV2TokenizerDebertaV2TokenizerFastdeepseek_v2deepseek_v3deepseek_vldeepseek_vl_hybrid)dia)DiaTokenizerN	diffllama
distilbertDistilBertTokenizerDistilBertTokenizerFastdprDPRQuestionEncoderTokenizerDPRQuestionEncoderTokenizerFastelectraElectraTokenizerElectraTokenizerFastemu3ernieernie4_5ernie4_5_moeernie_mErnieMTokenizer)esm)EsmTokenizerNexaone4falconfalcon_mambaGPTNeoXTokenizerFastfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubert)FlaubertTokenizerN	flex_olmofnetFNetTokenizerFNetTokenizerFast)fsmt)FSMTTokenizerNfunnelFunnelTokenizerFunnelTokenizerFastgemmaGemmaTokenizerGemmaTokenizerFastgemma2gemma3gemma3_textgemma3ngemma3n_textgitglmglm4glm4_moeglm4v	glm4v_moezgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japanese)GPTNeoXJapaneseTokenizerNgpt_ossgptj)zgptsan-japanese)GPTSanJapaneseTokenizerN)graniterJ   N)
granitemoer   )granitemoehybridr   )granitemoesharedr   zgrounding-dinogroupvitheliumherbertHerbertTokenizerHerbertTokenizerFast)hubertrx   ibertideficsidefics2idefics3instructblipinstructblipvideointernvljambajanusjetmoe)jukebox)JukeboxTokenizerNzkosmos-2XLMRobertaTokenizerXLMRobertaTokenizerFastz
kosmos-2.5layoutlmLayoutLMTokenizerLayoutLMTokenizerFast
layoutlmv2LayoutLMv2TokenizerLayoutLMv2TokenizerFast
layoutlmv3LayoutLMv3TokenizerLayoutLMv3TokenizerFast	layoutxlmLayoutXLMTokenizerLayoutXLMTokenizerFastledLEDTokenizerLEDTokenizerFastliltllamallama4llama4_textllava
llava_nextllava_next_videollava_onevision
longformerLongformerTokenizerLongformerTokenizerFastlongt5T5TokenizerT5TokenizerFast)luke)LukeTokenizerNlxmertLxmertTokenizerLxmertTokenizerFastm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermbartMBartTokenizerMBartTokenizerFastmbart50MBart50TokenizerMBart50TokenizerFastmegazmegatron-bert
metaclip_2)zmgp-str)MgpstrTokenizerNminimax	ministralMistralCommonTokenizermistralmistral3mixtralmllamamlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizerMobileBertTokenizerFast
modernbert	moonshinemoshimpnetMPNetTokenizerMPNetTokenizerFastmptmramt5MT5TokenizerMT5TokenizerFastmusicgenmusicgen_melodymvpMvpTokenizerMvpTokenizerFast)myt5)MyT5TokenizerNnemotronnezhanllbNllbTokenizerNllbTokenizerFastznllb-moenystromformerolmoolmo2olmo3olmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizerOpenAIGPTTokenizerFastoptowlv2owlvit	paligemma)parakeet)ParakeetCTCTokenizerNpegasus	pegasus_x)	perceiver)PerceiverTokenizerN	persimmonphiphi3phimoe)phobert)PhobertTokenizerN
pix2structpixtralplbartPLBartTokenizer)
prophetnet)ProphetNetTokenizerNqdqbertqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)rag)RagTokenizerNrealmRealmTokenizerRealmTokenizerFastrecurrent_gemmareformerReformerTokenizerReformerTokenizerFastrembertRemBertTokenizerRemBertTokenizerFast	retribertRetriBertTokenizerRetriBertTokenizerFastrobertazroberta-prelayernorm)roc_bert)RoCBertTokenizerNroformerRoFormerTokenizerRoFormerTokenizerFastrwkvseamless_m4tSeamlessM4TTokenizerSeamlessM4TTokenizerFastseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2smollm3speech_to_textSpeech2TextTokenizer)speech_to_text_2)Speech2Text2TokenizerNspeecht5SpeechT5Tokenizer)splinter)SplinterTokenizerSplinterTokenizerFastsqueezebertSqueezeBertTokenizerSqueezeBertTokenizerFaststablelm
starcoder2switch_transformerst5t5gemma)tapas)TapasTokenizerN)tapex)TapexTokenizerN)z
transfo-xl)TransfoXLTokenizerNtvpudopUdopTokenizerUdopTokenizerFastumt5video_llavaviltvipllavavisual_bert)vits)VitsTokenizerNvoxtral)wav2vec2rx   )zwav2vec2-bertrx   )zwav2vec2-conformerrx   )wav2vec2_phoneme)Wav2Vec2PhonemeCTCTokenizerNwhisperWhisperTokenizerWhisperTokenizerFastxclipxglmXGLMTokenizerXGLMTokenizerFast)xlm)XLMTokenizerNzxlm-prophetnetXLMProphetNetTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerXLNetTokenizerFastxlstmxmodyosozambazamba2
class_namereturnc                    U S:X  a  [         $ [        R                  5        Ha  u  pX;   d  M  [        U5      nUS;   a  U S:X  a  [        R
                  " SS5      nO[        R
                  " SU 3S5      n [        X05      s  $    [        R                  R                  5        H#  nU H  n[        USS 5      U :X  d  M  Us  s  $    M%     [        R
                  " S5      n[        XP5      (       a  [        XP5      $ g ! [         a     M  f = f)	Nr   )r  r  r  r  z.tokenization_mistral_commontransformers.ztransformers.models__name__)r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattr)r  module_name
tokenizersmodule	tokenizermain_modules         d/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/auto/tokenization_auto.pytokenizer_class_from_namer  '  s   ..&&#:#@#@#B#3K@KAAjTlFl"001OQ_`"001[M1BDYZv22 $C (66==?
#Iy*d3zA   $ @ )).9K{''{// " s   7
C==
D
Dpretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_only	subfolderc	                    U	R                  SS5      n
U
b+  [        R                  " S[        5        Ub  [	        S5      eU
nU	R                  S5      n[        U [        UUUUUUUUSSSUS9nUc  [        R                  S5        0 $ [        X5      n[        US	S
9 n[        R                  " U5      nSSS5        UWS'   U$ ! , (       d  f       N= f)a=  
Loads the tokenizer configuration from a pretrained model tokenizer configuration.

Args:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        This can be either:

        - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
          huggingface.co.
        - a path to a *directory* containing a configuration file saved using the
          [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

    cache_dir (`str` or `os.PathLike`, *optional*):
        Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
        cache should not be used.
    force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force to (re-)download the configuration files and override the cached versions if they
        exist.
    resume_download:
        Deprecated and ignored. All downloads are now resumed by default when possible.
        Will be removed in v5 of Transformers.
    proxies (`dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
    token (`str` or *bool*, *optional*):
        The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
        when running `hf auth login` (stored in `~/.huggingface`).
    revision (`str`, *optional*, defaults to `"main"`):
        The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
        git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
        identifier allowed by git.
    local_files_only (`bool`, *optional*, defaults to `False`):
        If `True`, will only try to load the tokenizer configuration from local files.
    subfolder (`str`, *optional*, defaults to `""`):
        In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
        specify the folder name here.

<Tip>

Passing `token=True` is required when you want to use a private model.

</Tip>

Returns:
    `dict`: The configuration of the tokenizer.

Examples:

```python
# Download configuration from huggingface.co and cache.
tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
# This model does not have a tokenizer config so the result will be an empty dict.
tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

# Save a pretrained tokenizer locally and you can reload its config
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer.save_pretrained("tokenizer-test")
tokenizer_config = get_tokenizer_config("tokenizer-test")
```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`._commit_hashF)r  r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  z\Could not locate the tokenizer configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorgetr   r   loggerinfor   openjsonload)r  r  r  r  r  r  r  r  r  kwargsr  commit_hashresolved_config_filereaderresults                  r  get_tokenizer_configr  E  s    R ZZ 0$7N! A	
 uvv**^,K&%%'))..305   #rs	%&:HK	"W	56" 
6(F>M 
6	5s   C
Cc                   X    \ rS rSrSrS r\\" \5      S 5       5       r	\
SS j5       rSrg)	AutoTokenizeri  a  
This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
created with the [`AutoTokenizer.from_pretrained`] class method.

This class cannot be instantiated directly using `__init__()` (throws an error).
c                     [        S5      e)Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    r  __init__AutoTokenizer.__init__  s    _
 	
    c           	      
   UR                  SS5      nUb<  [        R                  " S[        5        UR	                  S5      b  [        S5      eXCS'   UR                  SS5      nSUS'   UR                  S	S5      nUR                  S
S5      nUR                  SS5      nUR	                  S5      n	Ub  Sn
[        R	                  US5      nUc,  [        SU SSR                  S [         5       5       S35      eUu  pU(       a$  Ub  [        U5      n
O[        R                  S5        U
c  [        U5      n
U
c  [        SU S35      eU
R                  " U/UQ70 UD6$ [        U40 UD6nSU;   a  US   US'   UR	                  S5      nSnSU;   a9  [        US   [        [        45      (       a  US   nOUS   R	                  SS5      nUc  [        U[         5      (       dP  U	(       a0  [#        X40 UD6n[%        USS9S   n[&        R(                  " S(0 UD6nO[&        R                  " U4SU0UD6nUR*                  n[-        US5      (       a  SUR.                  ;   a  UR.                  S   nUSLn[1        U5      [2        ;   =(       d/    USL=(       a$    [        U5      SL=(       d    [        US-   5      SLnU(       aC  U(       a  US   b  US   nOUS   nSU;   a  UR5                  S5      S   nOSn[7        XUUU5      nU(       aN  U(       aG  [9        WU40 UD6n
UR                  SS5      nU
R;                  5         U
R                  " U/UQ7SU0UD6$ Ubg  Sn
U(       a&  UR=                  S5      (       d  U S3n[        U5      n
U
c  Un[        U5      n
U
c  [        SW S 35      eU
R                  " U/UQ70 UD6$ [        U[>        5      (       a{  [1        UR@                  5      [1        URB                  5      LaD  [        R                  S!URB                  RD                   S"UR@                  RD                   S#35        URB                  n[G        [1        U5      RH                  5      nUb`  [2        [1        U5         u  nnU(       a   U(       d  Uc  UR                  " U/UQ70 UD6$ Ub  UR                  " U/UQ70 UD6$ [        S$5      e[        S%URD                   S&SR                  S' [2         5       5       S35      e))ae  
Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
falling back to using pattern matching on `pretrained_model_name_or_path`:

List options

Params:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        Can be either:

            - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
            - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
              using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
            - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
              single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
              applicable to all derived classes)
    inputs (additional positional arguments, *optional*):
        Will be passed along to the Tokenizer `__init__()` method.
    config ([`PretrainedConfig`], *optional*)
        The configuration object used to determine the tokenizer class to instantiate.
    cache_dir (`str` or `os.PathLike`, *optional*):
        Path to a directory in which a downloaded pretrained model configuration should be cached if the
        standard cache should not be used.
    force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download the model weights and configuration files and override the
        cached versions if they exist.
    resume_download:
        Deprecated and ignored. All downloads are now resumed by default when possible.
        Will be removed in v5 of Transformers.
    proxies (`dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
    revision (`str`, *optional*, defaults to `"main"`):
        The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
        git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
        identifier allowed by git.
    subfolder (`str`, *optional*):
        In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
        facebook/rag-token-base), specify it here.
    use_fast (`bool`, *optional*, defaults to `True`):
        Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
        a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
        is returned instead.
    tokenizer_type (`str`, *optional*):
        Tokenizer type to be loaded.
    trust_remote_code (`bool`, *optional*, defaults to `False`):
        Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
        should only be set to `True` for repositories you trust and in which you have read the code, as it will
        execute code present on the Hub on your local machine.
    kwargs (additional keyword arguments, *optional*):
        Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
        `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
        `additional_special_tokens`. See parameters in the `__init__()` for more details.

Examples:

```python
>>> from transformers import AutoTokenizer

>>> # Download vocabulary from huggingface.co and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

>>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

>>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
>>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

>>> # Download vocabulary from huggingface.co and define model-specific arguments
>>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
```r  Nr  r  r  configT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3   $   #    U  H  ov   M     g 7fN .0cs     r  	<genexpr>0AutoTokenizer.from_pretrained.<locals>.<genexpr>+  s      D,Cq,Cs   r  zt`use_fast` is set to `True` but the tokenizer class does not have a fast version.  Falling back to the slow version.zTokenizer class z is not currently imported.r  tokenizer_classauto_mapr  F)return_tensorsFastr   r   z--code_revisionz- does not exist or is not currently imported.z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.zzThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   8   #    U  H  oR                   v   M     g 7fr  )r  r  s     r  r  r    s     4[IZAZZIZs   r  )%r  r  r  r  r  r  r  joinr  r  warningfrom_pretrainedr  
isinstancetuplelistr	   r   r   r   	for_modelr   r  r  typer  splitr   r
   register_for_auto_classendswithr   decoderencoder	__class__r   r  )clsr  inputsr  r  r  r  r  r  r  r   tokenizer_class_tupletokenizer_class_nametokenizer_fast_class_nametokenizer_configconfig_tokenizer_classtokenizer_auto_map	gguf_pathconfig_dicthas_remote_codehas_local_code	class_refupstream_repo_tokenizer_class_candidate
model_typetokenizer_class_pytokenizer_class_fasts                               r  r  AutoTokenizer.from_pretrained  s   Z  $4d;%MM E zz'". l  -7OHd+#|::j$/$4d;"JJ':DAJJ{+	 %"O$;$?$?PT$U!$, .~.>>qyy D,C DDEQH 
 ?T; ,8&?@Y&ZONN= &";<P"Q& #34H3IId!eff"223PdSYd]cdd 00MXQWX--%5n%EF>"!1!5!56G!H!))*:6FF%5j%A"%5j%A%E%EoW[%\" ")f&677 +,I _X^ _I"6yQV"WX`"aK'11@K@F'775IZ^dF &,%;%;"vz**&///Q%+___%E",D8f):: 
"$. )*@AM Z,-Cf-LMUYY	 	 .q1=.q1	.q1	y  ) 5a 8 $ 9!.Racp! 0;IGdohnoO

?D1A335"22-06J[_e  $/"O 6 ? ? G G/E.Fd,K)";<U"V&,B)";<U"V& &'@&AAno  #223PdSYd]cdd f233FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EF
!7Hf7V4 4#5G5O+;;<Ym\bmflmm%1-==>[o^dohnoo$: 
 /0@0@/A B++/994[IZ4[+[*\\]_
 	
r  Nc                    Uc  Uc  [        S5      eUb   [        U[        5      (       a  [        S5      eUb   [        U[        5      (       a  [        S5      eUbD  UbA  [        U[        5      (       a,  UR                  U:w  a  [        SUR                   SU S35      eU [
        R                  ;   a  [
        U    u  pEUc  UnUc  Un[
        R                  XU4US9  g)	ar  
Register a new tokenizer in this mapping.


Args:
    config_class ([`PretrainedConfig`]):
        The configuration corresponding to the model to register.
    slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
        The slow tokenizer to register.
    fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
        The fast tokenizer to register.
NzKYou need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_classz:You passed a fast tokenizer in the `slow_tokenizer_class`.z:You passed a slow tokenizer in the `fast_tokenizer_class`.zThe fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not consistent with the slow tokenizer class you passed (fast tokenizer has z and you passed z!. Fix one of those so they match!)exist_ok)r  
issubclassr   r   slow_tokenizer_classr  r  register)config_classr+  fast_tokenizer_classr)  existing_slowexisting_fasts         r  r,  AutoTokenizer.register  s     ',@,Hjkk+
;OQh0i0iYZZ+
;OQd0e0eYZZ !,$0/1HII$99=QQ['<<==MNbMc d!!  ,;;;+<\+J(M#+'4$#+'4$""<H\1]hp"qr  r  )NNF)r  
__module____qualname____firstlineno____doc__r  classmethodr   r  r  staticmethodr,  __static_attributes__r  r  r  r  r    sH    
 &'>?`
 @ `
D )r )rr  r  r  )NFNNNNF )<r5  r  r  osr  collectionsr   typingr   r   r   transformers.utils.import_utilsr   configuration_utilsr	   dynamic_module_utilsr
   r   modeling_gguf_pytorch_utilsr   tokenization_utilsr   tokenization_utils_baser   utilsr   r   r   r   r   r   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_fastr   
get_loggerr  r  strr
  r  r  r  CONFIG_TO_TYPEr  r  PathLikebooldictr  r  __all__)kvs   00r  <module>rQ     sa'      	  # ' ' G 3 \ ? 5 <  3 *  B" 
		H	% &c5#1M+N&NOc'>'@'@#d	
c %?%A%A!t)@)B)B%	
c 
?;R;T;T$7Z^_`c  
#=T=V=V%9\`ab!c" 
"<S<U<U$8[_`a#c$ 
7N7P7P3VZ[\%c& 
/:Q:S:S#6Y]^_'c( 	9)c, &@&B&B"*A*C*C&	
+c8 	09c: 
/:Q:S:S#6Y]^_;c< 
:T:V:V6\`bfgh=c> 	;?c@ 	2AcD &@&B&B"*A*C*C&	
CcP 
/KbKdKd1GjnopQcR 	.ScT 
D7N7P7P3VZ[\UcV 	KWcX 	AYcZ 
/:Q:S:S#6Y]^_[c\ 
O<S<U<U%8[_`a]c^ 
41H1J1J-PTUV_c` 
4K4M4M0SWXYacb 
+G^G`G`-Cfjklccd 
/:Q:S:S#6Y]^_ecf 	*gcj (B(D(D$$,C,E,E(4	
icv 	.wcz $>$@$@ d(?(A(A$t	
ycF 
/BYB[B[+>aefgGcJ "*A*C*C&	
IcX '>'@'@#d	
Wcf '>'@'@#d	
ecr 	*scv (B(D(D$$,C,E,E(4	
ucB 
'CZC\C\)?bfghCcD 
D3J3L3L/RVWXEcF 
T4K4M4M0SWXYGcH 
%?V?X?X';^bcdIcJ 
&@W@Y@Y(<_cdeKcL 
)F]F_F_+BeijkMcP "<">">D&=&?&?"T	
Oc\ 	.]c^ 
4K4M4M0SWXY_c` 	*acb 	;ccd 
-I`IbIb/Ehlmnecf 
/:Q:S:S#6Y]^_gch 
'CZC\C\)?bfghicl (B(D(D$$,C,E,E(4	
kcz $>$@$@ d(?(A(A$t	
ycH $>$@$@ d(?(A(A$t	
GcV $>$@$@ d(?(A(A$t	
Ucd !$>$@$@ d(?(A(A$t	
ccp 	(qct $>$@$@ d(?(A(A$t	
sc@ 
-LcLeLe/HkopqAcD -5L5N5N1TX	
CcP 
'CZC\C\)?bfghQcR 
/:Q:S:S#6Y]^_ScT 
?;R;T;T$7Z^_`UcV 
d4K4M4M0SWXYWcX 
$8O8Q8Q 4W[\]YcZ 
*D*F*F&DRVWX[c\ 	(]c` #:#<#<$'>'@'@#d	
_cl 
D7N7P7P3VZ[\mcn 
$:Q:S:S 6Y]^_ocr $0C0E0E,4QUV	
qcx 	2ycz 
t4K4M4M0SWXY{c| 
/:Q:S:S#6Y]^_}c~ 	*c@ 
%@W@Y@Y'<_cdeAcD $>$@$@ d(?(A(A$t	
CcR $>$@$@ d(?(A(A$t	
Qc` $>$@$@ d(?(A(A$t	
_cn $>$@$@ d(?(A(A$t	
mc| $>$@$@ d(?(A(A$t	
{cJ $>$@$@ d(?(A(A$t	
IcV 
9P9R9R"5X\]^WcX 
4K4M4M0SWXYYcZ 
$5L5N5N1TXYZ[c\ 
d9P9R9R5X\]^]c^ 
46M6O6O2UYZ[_c` 
t:Q:S:S6Y]^_acb 
*D*F*F&DRVWXccd 
/:Q:S:S#6Y]^_ecf 
AXAZAZ*=`defgch 
_=T=V=V&9\`abicj 
d6M6O6O2UYZ[kcl 	Bmcn 
T8O8Q8Q4W[\]ocp 
/:Q:S:S#6Y]^_qcr 	?sct 	-ucv 	0wcx 	6ycz 	6{c| 
OD[D]D]-@cghi}c~ 
o>U>W>W':]abcc@ 
D7N7P7P3VZ[\AcB 
'CZC\C\)?bfghCcD 	3EcF 
%AXAZAZ'=`defGcH 
T3J3L3L/RVWXIcJ 
&@W@Y@Y(<_cdeKcL 
&@W@Y@Y(<_cdeMcN 
/BYB[B[+>aefgOcP 
G^G`G`0CfjklQcR 
&@W@Y@Y(<_cdeScV $>$@$@ d(?(A(A$t	
Ucb 
41H1J1J-PTUVccf $>$@$@ d(?(A(A$t	
ecr 	0scv )C)E)E%4-D-F-F)D	
ucB	 
;R;T;T7Z^_`C	cD	 
)F]F_F_+BeijkE	cF	 
-LcLeLe/HkopqG	cH	 
-LcLeLe/HkopqI	cJ	 
+I`IbIb-EhlmnK	cL	 
7N7P7P!3VZ[\M	cN	 
'F]F_F_)BeijkO	cR	 $>$@$@ d(?(A(A$t	
Q	c`	 $>$@$@ d(?(A(A$t	
_	cn	 $>$@$@ d(?(A(A$t	
m	cz	 
#=T=V=V%9\`ab{	c|	 
(BYB[B[*>aefg}	c~	 
.H_HaHa0Dgklm	c@
 
-G^G`G`/CfjklA
cB
 
-LcLeLe/HkopqC
cF
 !;!=!=4%<%>%>!D	
E
cR
 	*S
cT
 
%@W@Y@Y'<_cdeU
cV
 
*D*F*F&DRVWXW
cX
 
43J3L3L/RVWXY
cZ
 
D4K4M4M0SWXY[
c\
 
)C)E)E%4QUVW]
c`
 $>$@$@ d(?(A(A$t	
_
cn
 &@&B&B"*A*C*C&	
m
cz
 
$@W@Y@Y&<_cde{
c|
 
?CZC\C\,?bfgh}
c@ %-D-F-F)D	

cL 	/McP #=#?#?T'>'@'@#d	
Oc^  /00 )*D*F*F&D(?(A(AJeJgJg$mq		
]cp  /00 )*D*F*F&D(?(A(AJeJgJg$mq		
ocB  /00 )*D*F*F&D(?(A(AJeJgJg$mq		
AcT  /00 )*D*F*F&D(?(A(AJeJgJg$mq		
Scd 
$>U>W>W&:]abcecf 
'A'C'C#tTUgch 
G^G`G`0Cfjklicj 
-LcLeLe/Hkopqkcl 
;R;T;T7Z^_`mcn 
t:Q:S:S6Y]^_ocp 
46M6O6O2UYZ[qcr 
#=T=V=V%9\`absct 
1H1J1J-PTUVucv 
#?V?X?X%;^bcdwcz "<">">D&=&?&?"T	
ycF 
m:Q:S:S%6Y]^_GcH 
]AXAZAZ,=`defIcJ 
7N7P7P!3VZ[\KcL 	*McN 
d9P9R9R5X\]^OcP 
?;R;T;T$7Z^_`QcT #=#?#?T'>'@'@#d	
Scb #=#?#?T'>'@'@#d	
acp %?%A%A!t)@)B)B%	
oc| 
$2I2K2K.QUVW}c~ 
43J3L3L/RVWXc@ 
40G0I0I,tTUAcB 
43J3L3L/RVWXCcF 5L5N5N1TXY	
EcL 
?V?X?X(;^bcdMcP !?V?X?X#;^bc	
OcV 
9P9R9R"5X\]^WcX 
?;R;T;T$7Z^_`YcZ 
O<S<U<U%8[_`a[c\ 
'AXAZAZ)=`def]c^ 	5_cb &@&B&B"*A*C*C&	
acp &@&B&B"*A*C*C&	
oc|	
}cL $>$@$@ d(?(A(A$t	
KcX 
#?V?X?X%;^bcdYcZ 
"<S<U<U$8[_`a[c\ 
$>U>W>W&:]abc]c^ 	0_c` 
<S<U<U'8[_`aacd .00 )3J3L3L/RV		
cct 
)C)E)E%4QUVWucv 	6wcx 
_=T=V=V&9\`abyc|  (?(A(A$t	
{cH 
*D[D]D],@cghiIcJ 
(BYB[B[*>aefgKcL 
)CZC\C\+?bfghMcP  (?(A(A$t	
Oc\ 
&@W@Y@Y(<_cde]c`  (?(A(A$t	
_cn  (?(A(A$t	
mc|  (?(A(A$t	
{cH 
,F]F_F_.BeijkIcJ 
&@W@Y@Y(<_cdeKcL 
*D[D]D],@cghiMcN 	(OcP 
#=T=V=V%9\`abQcT $>$@$@ d(?(A(A$t	
Scb 'A'C'C#+B+D+D'$	
acp &@&B&B"*A*C*C&	
oc| 
+I`IbIb-Ehlmn}c~ 
'CZC\C\)?bfghcB #;R;T;T!7Z^_	
AcH 	1IcJ 
)F]F_F_+BeijkKcL 
$2I2K2K.QUVWMcP *D*F*F&D.E.G.G*T	
Oc^ *D*F*F&D.E.G.G*T	
]cl $>$@$@ d(?(A(A$t	
kcx 
)C)E)E%4QUVWyc| $>$@$@ d(?(A(A$t	
{cH 
T8O8Q8Q4W[\]IcJ 
6P6R6R2X\^bcdKcL 	>McN 
-G-I-I)tUYZ[OcP 	EQcT #CZC\C\%?bfg	
ScZ 
d6M6O6O2UYZ[[c\ 
@W@Y@Y)<_cde]c` "!;!=!=4%<%>%>!D	
_cn !;!=!=4%<%>%>!D	
mc| $>$@$@ d(?(A(A$t	
{cH 	,IcJ 	,KcL 	5McN 
9P9R9R"5X\]^OcR #=#?#?T'>'@'@#d	
Qc` !;!=!=4%<%>%>!D	
_cl 
)CZC\C\+?bfghmcn 
/:Q:S:S#6Y]^_ocp 
&@W@Y@Y(<_cdeqcr 
AXAZAZ*=`defsct 	*ucx ,G,I,I(t-D-F-FOjOlOl)rv	
wcD 	5EcF 	:GcH 	?IcJ 	DKcL 
'CZC\C\)?bfghMcN 
?;R;T;T$7Z^_`OcR #=#?#?T'>'@'@#d	
Qc^ 	(_c` 
8R8T8T4Z^`defacd )C)E)E%4-D-F-F)D	
ccr )C)E)E%4-D-F-F)D	
qc@ $>$@$@ d(?(A(A$t	
cL 
43J3L3L/RVWXMcP )C)E)E%4-D-F-F)D	
Oc^ %?%A%A!t)@)B)B%	
]cl $>$@$@ d(?(A(A$t	
kcz $>$@$@ d(?(A(A$t	
yce N %%9;RS #7#=#=#?@#?41!$#?@# %S	42H @ 9= &*(,(,""l#(bkk#.>)>#?lc2;;s#3345l l d^	l
 d38n%l E$)$%l sml l l 
#s(^l^\r \r~ 
0_ As   Z.A]