
    cCi                         S SK r S SKJrJr  S SKrS SKJr  SSKJ	r	  SSK
JrJr  SSKJr  \" 5       (       a  S S	KJr  \" S
S9 " S S\	R"                  R$                  5      5       rS/rg)    N)OptionalUnion)pad_model_inputs   )keras)is_keras_nlp_availablerequires   )GPT2Tokenizer)BytePairTokenizer)	keras_nlp)backendsc            
          ^  \ rS rSrSr  SS\\\4   S\\   S\	\   S\	\   4U 4S jjjr
\S\4S	 j5       r\S
\\\R                   4   4S j5       r\S 5       rS rSS\	\   4S jjrSrU =r$ )TFGPT2Tokenizer   a  
This is an in-graph tokenizer for GPT2. It should be initialized similarly to other tokenizers, using the
`from_pretrained()` method. It can also be initialized with the `from_tokenizer()` method, which imports settings
from an existing standard tokenizer object.

In-graph tokenizers, unlike other Hugging Face tokenizers, are actually Keras layers and are designed to be run
when the model is called, rather than during preprocessing. As a result, they have somewhat more limited options
than standard tokenizer classes. They are most useful when you want to create an end-to-end model that goes
straight from `tf.string` inputs to outputs.

Args:
    vocab (dict[str, int]): Vocabulary dict for Byte Pair Tokenizer
    merges (list[str]): Merges list for Byte Pair Tokenizer
vocabmerges
max_lengthpad_token_idc                 p   > [         TU ]  5         X@l        X0l        Xl        X l        [        XUS9U l        g )N)sequence_length)super__init__r   r   r   r   r   tf_tokenizer)selfr   r   r   r   	__class__s        g/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/gpt2/tokenization_gpt2_tf.pyr   TFGPT2Tokenizer.__init__!   s4     	($
-eZX    	tokenizerc                     UR                    Vs/ s H  nSR                  U5      PM     nnUR                  5       nU " Xe/UQ70 UD6$ s  snf )a  Creates TFGPT2Tokenizer from GPT2Tokenizer

Args:
    tokenizer (GPT2Tokenizer)

Examples:

```python
from transformers import AutoTokenizer, TFGPT2Tokenizer

tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
tf_tokenizer = TFGPT2Tokenizer.from_tokenizer(tokenizer)
```
 )	bpe_ranksjoin	get_vocab)clsr    argskwargsmr   r   s          r   from_tokenizerTFGPT2Tokenizer.from_tokenizer0   sP      (1':':;':!#((1+':;##%5242622 <s   Apretrained_model_name_or_pathc                 b    [         R                  " U/UQ70 UD6nU R                  " U/UQ70 UD6$ )a  Creates TFGPT2Tokenizer from pretrained GPT2Tokenizer

Args:
    pretrained_model_name_or_path (Union[str, os.PathLike]): Path to pretrained model

Examples:

```python
from transformers import TFGPT2Tokenizer

tf_tokenizer = TFGPT2Tokenizer.from_pretrained("openai-community/gpt2")
```
)r   from_pretrainedr*   )r&   r,   init_inputsr(   r    s        r   r.   TFGPT2Tokenizer.from_pretrainedD   s<     "112OhR]hagh	!!)DkDVDDr   c                     U " S0 UD6$ )z{Creates TFGPT2Tokenizer from configurations

Args:
    config (Dict): Dictionary with keys such as stated in `get_config`.
 r2   )r&   configs     r   from_configTFGPT2Tokenizer.from_configV   s     }V}r   c                 `    U R                   U R                  U R                  U R                  S.$ )Nr   r   r   r   r7   )r   s    r   
get_configTFGPT2Tokenizer.get_config_   s*    ZZkk// --	
 	
r   c                     U R                  U5      n[        R                  " U5      nU R                  b*  Ub  UOU R                  nUb  [        X2U R                  S9u  p4XCS.$ )N)max_seq_length	pad_value)attention_mask	input_ids)r   tf	ones_liker   r   r   )r   xr   r>   r=   s        r   callTFGPT2Tokenizer.callg   se    %%a(	i0('1'=4??J%,<DDUDU-)	 #1IIr   )r   r   r   r   r   )NN)N)__name__
__module____qualname____firstlineno____doc__dictstrintlistr   r   classmethodr   r*   r   osPathLiker.   r4   r8   rB   __static_attributes____classcell__)r   s   @r   r   r      s    & %)&*YCH~Y S	Y SM	Y
 smY Y 3} 3 3& EE#r{{BR<S E E"  
J(3- J Jr   r   )rN   typingr   r   
tensorflowr?   tensorflow_textr   modeling_tf_utilsr   utils.import_utilsr   r	   tokenization_gpt2r   keras_nlp.tokenizersr   layersLayerr   __all__r2   r   r   <module>r\      sc    	 "  , & B , 6 
>"cJell(( cJ #cJL 
r   