
    bCiPH                         S r SSKJr  SSKJr  \R
                  " \5      r " S S\5      r " S S\5      r	 " S S	\5      r
 " S
 S\5      r " S S\5      r/ SQrg)zBlt model configuration   )PretrainedConfig)loggingc                   T   ^  \ rS rSrSrSr                SU 4S jjrSrU =r$ )BltLocalEncoderConfig   z:
Configuration class for the Blt Local Encoder component.
blt_local_encoderc                 l  > Xl         X l        X0l        X@l        XPl        X`l        U=(       d    UU l        XV-  U l        U=(       d    [        SU-  S-  5      U l	        Xl
        Xl        Xl        Xl        Xl        Xl        Xl        UU l        UR%                  SS 5        [&        TU ]P  " S0 UDSS0D6  g N   r   tie_word_embeddingsF 
vocab_sizecross_attn_all_layerscross_attn_khidden_size_globalhidden_sizenum_attention_headsnum_key_value_headshead_dimintintermediate_sizenum_hidden_layersrms_norm_epsdropoutmax_position_embeddings
rope_thetarope_scaling
hidden_actinitializer_rangepopsuper__init__selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    kwargs	__class__s                     c/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/blt/configuration_blt.pyr#   BltLocalEncoderConfig.__init__       ( %%:"("4&#6 #6#M:M #:!2!Nc!k/A:M6N!2('>$$($!2 	

($/=6=u=    r   r   r   r   r   r   r   r    r   r   r   r   r   r   r   r   r   )  F            N   h㈵>         `      ANsilu   {Gz?	__name__
__module____qualname____firstlineno____doc__
model_typer#   __static_attributes____classcell__r'   s   @r(   r   r      sK     %J #  %#(> (>r+   r   c                   T   ^  \ rS rSrSrSr                SU 4S jjrSrU =r$ )BltLocalDecoderConfigJ   z:
Configuration class for the Blt Local Decoder component.
blt_local_decoderc                 l  > Xl         X l        X0l        X@l        XPl        X`l        U=(       d    UU l        XV-  U l        U=(       d    [        SU-  S-  5      U l	        Xl
        Xl        Xl        Xl        Xl        Xl        Xl        UU l        UR%                  SS 5        [&        TU ]P  " S0 UDSS0D6  g r
   r   r$   s                     r(   r#   BltLocalDecoderConfig.__init__Q   r*   r+   r,   )r-   Tr.   r/   r0   r1   N	   r3   r4   r5   r6   Nr7   r8   r9   r:   rC   s   @r(   rE   rE   J   sK     %J "  %#(> (>r+   rE   c                   L   ^  \ rS rSrSrSr            SU 4S jjrSrU =r$ )BltGlobalTransformerConfig|   z?
Configuration class for the Blt Global Transformer component.
blt_global_transformerc                 :  > Xl         X l        U=(       d    UU l        X-  U l        U=(       d    [	        SU-  S-  5      U l        X@l        XPl        X`l        Xpl	        Xl
        Xl        Xl        Xl        UR                  SS 5        [        TU ]@  " S0 UDSS0D6  g r
   )r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   )r%   r   r   r   r   r   r   r   r   r   r   r   r    r&   r'   s                 r(   r#   #BltGlobalTransformerConfig.__init__   s      '#6 #6#M:M #:!2!Nc!k/A:M6N!2('>$$($!2 	

($/=6=u=r+   )r   r   r   r   r    r   r   r   r   r   r   r   r   )r/   r1   N   r3   r4      r6   Nr7   i   r9   r:   rC   s   @r(   rL   rL   |   s?     *J   $ >  >r+   rL   c                   L   ^  \ rS rSrSrSr            SU 4S jjrSrU =r$ )BltPatcherConfig   a7	  
Configuration class for the Blt Patcher/Entropy model component.

Args:
        vocab_size (`int`, *optional*, defaults to 260):
            Vocabulary size of the Blt patcher model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling the patcher model.
        hidden_size (`int`, *optional*, defaults to 768):
            Dimension of the hidden representations.
        num_hidden_layers (`int`, *optional*, defaults to 14):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 12):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details, check out [this
            paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
            `num_attention_heads`.
        max_position_embeddings (`int`, *optional*, defaults to 8192):
            The maximum sequence length that this model might ever be used with.
        rms_norm_eps (`float`, *optional*, defaults to 1e-05):
            The epsilon used by the rms normalization layers.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        rope_theta (`float`, *optional*, defaults to 10000.0):
            The base period of the RoPE embeddings.
        intermediate_size (`int`, *optional*, defaults to 2048):
            Dimension of the MLP representations.
        rope_scaling (`dict`, *optional*):
            Dictionary containing the RoPE scaling configuration.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
blt_patcherc                 T  > Xl         X l        X0l        X@l        X$-  U l        Ub  UOUU l        X`l        Xpl        Xl        Xl	        SU l
        U
=(       d    [        SU R                  -  S-  5      U l        Xl        Xl        UR                  SS 5        [         TU ]D  " S0 UDSS0D6  g )Nr7   r   r   r   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   )r%   r   r   r   r   r   r   r   r   r   r   r   r    r&   r'   s                 r(   r#   BltPatcherConfig.__init__   s      %&!2#6 #::M:Y#6_r '>$($ !2!Sc!d>N>N:NQR:R6S(!2 	

($/=6=u=r+   )r   r   r   r   r    r   r   r   r   r   r   r   r   r   )r-   i         Ni    r3   r4   g     @r/   Nr9   r:   rC   s   @r(   rT   rT      s@    #J J   $!> !>r+   rT   c                   p   ^  \ rS rSrSrSrS/r\\\	\
S.r                    SU 4S jjrSrU =r$ )		BltConfig   a  
This is the configuration class to store the configuration of a [`BltModel`]. It is used to instantiate a
Blt model according to the specified arguments, defining the model architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
        vocab_size (`int`, *optional*, defaults to 260):
            Vocabulary size of the Blt model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`BltModel`].
        max_position_embeddings (`int`, *optional*, defaults to 4096):
            The maximum sequence length that this model might ever be used with.
        patch_in_forward (`bool`, *optional*, defaults to `True`):
            Whether to perform patching during the forward pass.
        patch_size (`int`, *optional*, defaults to 4):
            Size of the patches used in the patching mechanism.
        patching_mode (`str`, *optional*, defaults to `"entropy"`):
            The mode used for patching, such as entropy-based patching.
        patching_threshold (`float`, *optional*, defaults to 1.34):
            Threshold value used for determining when to apply patches.
        patching_batch_size (`int`, *optional*, defaults to 1):
            Batch size used during the patching process.
        max_patch_length (`int`, *optional*):
            Maximum length of patches that can be generated.
        cross_attn_k (`int`, *optional*, defaults to 2):
            Number of cross-attention heads used in the model.
        encoder_hash_byte_group_size (`list`, *optional*):
            List of byte group sizes used in the encoder hash function.
        encoder_hash_byte_group_vocab (`int`, *optional*, defaults to 500002):
            Vocabulary size for the encoder hash byte groups.
        encoder_hash_byte_group_nb_functions (`int`, *optional*, defaults to 1):
            Number of hash functions used in the encoder byte grouping.
        patcher_config (`BltPatcherConfig`, *optional*):
            Configuration for the patcher component of the model.
        encoder_config (`BltLocalEncoderConfig`, *optional*):
            Configuration for the local encoder component of the model.
        decoder_config (`BltLocalDecoderConfig`, *optional*):
            Configuration for the local decoder component of the model.
        global_config (`BltGlobalTransformerConfig`, *optional*):
            Configuration for the global transformer component of the model.
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rope_theta (`float`, *optional*, defaults to 500000.0):
            The base period of the RoPE embeddings.
        rope_scaling (`dict`, *optional*):
            Dictionary containing the RoPE scaling configuration.

```python
>>> from transformers import BltModel, BltConfig

>>> # Initializing a Blt configuration
>>> configuration = BltConfig()

>>> # Initializing a model from the configuration
>>> model = BltModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```

Checkpoint: [facebook/blt](https://huggingface.co/facebook/blt)
bltpast_key_values)patcher_configencoder_configdecoder_configglobal_configc                 H  > Xl         X l        UU l        UU l        UU l        X0l        X@l        XPl        X`l        Xpl	        Xl
        UR                  SS5      U l        UR                  SS5      U l        UR                  S5      U l        UR                  SS5      U l        Xl        U
=(       d    / SQU l        Xl        Xl        Uc$  [)        US	9U l        [,        R/                  S
5        OS[1        U[2        5      (       a#  UR5                  SU5        [)        S0 UD6U l        O[1        U[(        5      (       a  Xl        Uc$  [7        US	9U l        [,        R/                  S5        OS[1        U[2        5      (       a#  UR5                  SU5        [7        S0 UD6U l        O[1        U[6        5      (       a  Xl        Uc$  [;        US	9U l        [,        R/                  S5        OS[1        U[2        5      (       a#  UR5                  SU5        [;        S0 UD6U l        O[1        U[:        5      (       a  Xl        Uc$  [?        US	9U l         [,        R/                  S5        OT[1        U[2        5      (       a#  UR5                  SU5        [?        S0 UD6U l         O[1        U[>        5      (       a  UU l         U R8                  RB                  U R                   -  nUU R@                  RB                  :w  a  UOS U R@                  l"        URG                  SS 5        [H        TU ]  " SSU0UD6  g )Npatching_devicecudarealtime_patchingTpatching_threshold_addmonotonicityF)r               r   )r    z8patcher_config is None, using default Blt patcher configr    z8encoder_config is None, using default Blt encoder configz8decoder_config is None, using default Blt decoder configz6global_config is None, using default Blt global configr   r   )&r   r   r    r   r   patch_in_forward
patch_sizepatching_modepatching_thresholdpatching_batch_sizemax_patch_lengthgetre   rg   rh   ri   r   encoder_hash_byte_group_sizeencoder_hash_byte_group_vocab$encoder_hash_byte_group_nb_functionsrT   r`   loggerinfo
isinstancedict
setdefaultr   ra   rE   rb   rL   rc   r   encoder_cross_output_sizer!   r"   r#   )r%   r   r   rn   ro   rp   rq   rr   rs   r   ru   rv   rw   r`   ra   rb   rc   r   r    r   r   r&   r}   r'   s                          r(   r#   BltConfig.__init__>  s   2 %'>$!2$( !1$*"4#6  0%zz*;VD!',?!F&,jj1I&J#"JJ~u= ) -I,^L^)-J*4X1 !"2EV"WDKKRS--%%&9;LM"2"D^"DD(899"0!"7J["\DKKRS--%%&9;LM"7"I."ID(=>>"0!"7J["\DKKRS--%%&9;LM"7"I."ID(=>>"0 !;N_!`DKKPQt,,$$%8:KL!;!Lm!LD'ABB!.D %)$7$7$C$CdFWFW$W!)BdFXFXFdFd)d%jn 	4
 	

($/K-@KFKr+   )r   rb   ra   rw   ru   rv   rc   r    rs   r   ri   rn   ro   r`   rr   re   rp   rq   rh   rg   r   r   r   )r-   rR   Trj   entropyg   ]?r2   Nr.   Ni" r2   NNNNFr9   r6   N)r;   r<   r=   r>   r?   r@   keys_to_ignore_at_inferencerT   r   rE   rL   sub_configsr#   rA   rB   rC   s   @r(   r\   r\      sy    @D J#4"5*//3	K  $,%)&,-.!+`L `Lr+   r\   )r\   rT   r   rE   rL   N)r?   configuration_utilsr   utilsr   
get_loggerr;   rx   r   rE   rL   rT   r\   __all__r   r+   r(   <module>r      su     3  
		H	%/>, />d/>, />d'>!1 '>TI>' I>XlL  lL^r+   