ó
    cCi‹<  ã                   ó€   • S SK Jr  SSKJrJr   " S S\5      r " S S\5      r " S S	\5      r " S
 S\5      r/ SQr	g)é   )ÚPretrainedConfigé   )ÚCONFIG_MAPPINGÚ
AutoConfigc                   óX   ^ • \ rS rSrSrSrSrS\0r            SU 4S jjr	Sr
U =r$ )	ÚEdgeTamVisionConfigé   a\  
This is the configuration class to store the configuration of a [`EdgeTamVisionModel`]. It is used to instantiate a SAM
vision encoder according to the specified arguments, defining the model architecture. Instantiating a configuration
defaults will yield a similar configuration to that of SAM 2.1 Hiera-tiny
[facebook/EdgeTAM](https://huggingface.co/facebook/EdgeTAM) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    backbone_config (`Union[dict, "PretrainedConfig"]`, *optional*):
        Configuration for the vision backbone. This is used to instantiate the backbone using
        `AutoModel.from_config`.
    backbone_channel_list (`List[int]`, *optional*, defaults to `[384, 192, 96, 48]`):
        The list of channel dimensions for the backbone.
    backbone_feature_sizes (`List[List[int]]`, *optional*, defaults to `[[256, 256], [128, 128], [64, 64]]`):
        The spatial sizes of the feature maps from the backbone.
    fpn_hidden_size (`int`, *optional*, defaults to 256):
        The hidden dimension of the FPN.
    fpn_kernel_size (`int`, *optional*, defaults to 1):
        The kernel size for the convolutions in the neck.
    fpn_stride (`int`, *optional*, defaults to 1):
        The stride for the convolutions in the neck.
    fpn_padding (`int`, *optional*, defaults to 0):
        The padding for the convolutions in the neck.
    fpn_top_down_levels (`List[int]`, *optional*, defaults to `[2, 3]`):
        The levels for the top-down FPN connections.
    num_feature_levels (`int`, *optional*, defaults to 3):
        The number of feature levels from the FPN to use.
    hidden_act (`str`, *optional*, defaults to `"gelu"`):
        The non-linear activation function in the neck.
    layer_norm_eps (`float`, *optional*, defaults to 1e-06):
        The epsilon for the layer normalization.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

Úvision_configÚedgetam_vision_modelÚbackbone_configc                 óà  >• [         TU ]  " S0 UD6  Uc  / SQOUnUc  SS/SS/SS//OUnUc  SS/OUn[        U[        5      (       a(  UR	                  SS5      US'   [
        US      " S0 UD6nO6[        U[        5      (       a  UnOUc  [        R                  " S	SS
/ SQS.S9nXl        X l	        X0l
        X@l        XPl        X`l        Xpl        X€l        Xl        X l        X°l        XÀl        g )N)i€  éÀ   é`   é0   é   é€   é@   r   r   Ú
model_typeÚtimm_wrapperztimm/repvit_m1.dist_in1kT)é    é   r   r   )Úin_chansÚfeatures_onlyÚout_indices)Ú
model_args© )ÚsuperÚ__init__Ú
isinstanceÚdictÚgetr   r   Úfrom_pretrainedr   Úbackbone_channel_listÚbackbone_feature_sizesÚfpn_hidden_sizeÚfpn_kernel_sizeÚ
fpn_strideÚfpn_paddingÚfpn_top_down_levelsÚnum_feature_levelsÚ
hidden_actÚlayer_norm_epsÚinitializer_range)Úselfr   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   ÚkwargsÚ	__class__s                 €Úk/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/edgetam/configuration_edgetam.pyr   ÚEdgeTamVisionConfig.__init__G   s  ø€ ô  	‰ÒÑ"˜6Ò"à6KÑ6SÓ 2ÐYnÐà2HÑ2Pˆc3ˆZ˜#˜s˜ b¨" XÑ.ÐVlð 	ð )<Ñ(C˜q !™fÐI\Ðäo¤t×,Ñ,Ø,;×,?Ñ,?ÀÈnÓ,]ˆO˜LÑ)Ü,¨_¸\Ñ-JÒKÑ^ÈoÑ^‰OÜ˜¬×4Ñ4Ø-‰OØÑ$Ü(×8Ò8Ø*Ø()¸DÒQ]Ñ^ñˆOð
  /Ôð &;Ô"Ø&<Ô#Ø.ÔØ.ÔØ$ŒØ&ÔØ#6Ô Ø"4Ôà$ŒØ,ÔØ!2Õó    )r#   r   r$   r%   r&   r(   r'   r)   r+   r-   r,   r*   )NNNr   r   r   r   Nr   Úgeluçíµ ÷Æ°>ç{®Gáz”?)Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Úbase_config_keyr   r   Úsub_configsr   Ú__static_attributes__Ú__classcell__©r0   s   @r1   r   r      sQ   ø† ñ$ðL &€OØ'€Jà˜:ð€Kð Ø"Ø#ØØØØØ ØØØØ÷13õ 13r3   r   c                   óD   ^ • \ rS rSrSrSr        SU 4S jjrSrU =r$ )ÚEdgeTamPromptEncoderConfigé{   aê  
This is the configuration class to store the configuration of a [`EdgeTamPromptEncoder`]. The [`EdgeTamPromptEncoder`]
module is used to encode the input 2D points and bounding boxes.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    hidden_size (`int`, *optional*, defaults to 256):
        Dimensionality of the hidden states.
    image_size (`int`, *optional*, defaults to 1024):
        The expected output resolution of the image.
    patch_size (`int`, *optional*, defaults to 16):
        The size (resolution) of each patch.
    mask_input_channels (`int`, *optional*, defaults to 16):
        The number of channels to be fed to the `MaskDecoder` module.
    num_point_embeddings (`int`, *optional*, defaults to 4):
        The number of point embeddings to be used.
    hidden_act (`str`, *optional*, defaults to `"gelu"`):
        The non-linear activation function in the encoder and pooler.
    layer_norm_eps (`float`, *optional*, defaults to 1e-06):
        The epsilon used by the layer normalization layers.
    scale (`float`, *optional*, defaults to 1):
        The scale factor for the prompt encoder.
Úprompt_encoder_configc	                 ó†   >• [         T
U ]  " S0 U	D6  Xl        X l        X0l        X@l        XPl        X`l        Xpl        X€l	        g ©Nr   )
r   r   Úhidden_sizeÚ
image_sizeÚ
patch_sizeÚmask_input_channelsÚnum_point_embeddingsr+   r,   Úscale)r.   rG   rH   rI   rJ   rK   r+   r,   rL   r/   r0   s             €r1   r   Ú#EdgeTamPromptEncoderConfig.__init__˜   sA   ø€ ô 	‰ÒÑ"˜6Ò"Ø&ÔØ$ŒØ$ŒØ#6Ô Ø$8Ô!Ø$ŒØ,ÔØ
r3   )r+   rG   rH   r,   rJ   rK   rI   rL   )r   i   é   rN   é   r4   r5   r   ©	r7   r8   r9   r:   r;   r<   r   r>   r?   r@   s   @r1   rB   rB   {   s3   ø† ñð4 .€Oð ØØØØØØØ÷õ r3   rB   c                   óL   ^ • \ rS rSrSrSr            SU 4S jjrSrU =r$ )ÚEdgeTamMaskDecoderConfigé¯   a  
This is the configuration class to store the configuration of a [`EdgeTamMaskDecoder`]. It is used to instantiate a EDGETAM
memory encoder according to the specified arguments, defining the model architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    hidden_size (`int`, *optional*, defaults to 256):
        Dimensionality of the hidden states.
    hidden_act (`str`, *optional*, defaults to `"gelu"`):
        The non-linear activation function in the EDGETAM mask decoder.
    mlp_dim (`int`, *optional*, defaults to 2048):
        The dimension of the MLP in the two-way transformer.
    num_hidden_layers (`int`, *optional*, defaults to 2):
        The number of hidden layers in the two-way transformer.
    num_attention_heads (`int`, *optional*, defaults to 8):
        The number of attention heads in the two-way transformer.
    attention_downsample_rate (`int`, *optional*, defaults to 2):
        The downsample rate for the attention layers.
    num_multimask_outputs (`int`, *optional*, defaults to 3):
        The number of multimask outputs.
    iou_head_depth (`int`, *optional*, defaults to 3):
        The depth of the IoU head.
    iou_head_hidden_dim (`int`, *optional*, defaults to 256):
        The hidden dimension of the IoU head.
    dynamic_multimask_via_stability (`bool`, *optional*, defaults to `True`):
        Whether to use dynamic multimask via stability.
    dynamic_multimask_stability_delta (`float`, *optional*, defaults to 0.05):
        The stability delta for the dynamic multimask.
    dynamic_multimask_stability_thresh (`float`, *optional*, defaults to 0.98):
        The stability threshold for the dynamic multimask.

Úmask_decoder_configc                 óÂ   >• [         TU ]  " S0 UD6  Xl        Xpl        X l        X€l        Xl        X l        X°l        XÀl	        X@l
        Xl        XPl        X0l        X`l        g rF   )r   r   rG   Únum_multimask_outputsr+   Úiou_head_depthÚiou_head_hidden_dimÚdynamic_multimask_via_stabilityÚ!dynamic_multimask_stability_deltaÚ"dynamic_multimask_stability_threshÚnum_hidden_layersÚnum_attention_headsÚmlp_dimÚattention_downsample_rate)r.   rG   r+   r^   r\   r]   r_   rV   rW   rX   rY   rZ   r[   r/   r0   s                 €r1   r   Ú!EdgeTamMaskDecoderConfig.__init__Õ   sc   ø€ ô  	‰ÒÑ"˜6Ò"à&ÔØ%:Ô"Ø$ŒØ,ÔØ#6Ô Ø/NÔ,Ø1RÔ.Ø2TÔ/ð "3ÔØ&ÔØ#6Ô ØŒØ)BÕ&r3   )r_   rZ   r[   rY   r+   rG   rW   rX   r^   r]   r\   rV   )r   r4   i   r   é   r   r   r   r   Tgš™™™™™©?g\Âõ(\ï?rP   r@   s   @r1   rR   rR   ¯   sB   ø† ñ!ðF ,€Oð ØØØØØ"#ØØØØ(,Ø*.Ø+/÷ Cõ  Cr3   rR   c                   óH   ^ • \ rS rSrSrSr\\\S.r	    SU 4S jjr
SrU =r$ )ÚEdgeTamConfigéø   aì  
[`EdgeTamConfig`] is the configuration class to store the configuration of a [`EdgeTamModel`]. It is used to instantiate a
EDGETAM model according to the specified arguments, defining the memory attention, memory encoder, and image encoder
configs. Instantiating a configuration defaults will yield a similar configuration to that of the SAM 2.1 Hiera-tiny
[facebook/edgetam.1-hiera-tiny](https://huggingface.co/facebook/edgetam.1-hiera-tiny) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    vision_config (Union[`dict`, `EdgeTamVisionConfig`], *optional*):
        Dictionary of configuration options used to initialize [`EdgeTamVisionConfig`].
    prompt_encoder_config (Union[`dict`, `EdgeTamPromptEncoderConfig`], *optional*):
        Dictionary of configuration options used to initialize [`EdgeTamPromptEncoderConfig`].
    mask_decoder_config (Union[`dict`, `EdgeTamMaskDecoderConfig`], *optional*):
        Dictionary of configuration options used to initialize [`EdgeTamMaskDecoderConfig`].
    initializer_range (`float`, *optional*, defaults to 0.02):
        Standard deviation for parameter initialization.

Example:

```python
>>> from transformers import (
...     EdgeTamVisionConfig,
...     EdgeTamPromptEncoderConfig,
...     EdgeTamMaskDecoderConfig,
...     EdgeTamModel,
... )

>>> # Initializing a EdgeTamConfig with `"facebook/edgetam.1_hiera_tiny"` style configuration
>>> configuration = EdgeTamconfig()

>>> # Initializing a EdgeTamModel (with random weights) from the `"facebook/edgetam.1_hiera_tiny"` style configuration
>>> model = EdgeTamModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config

>>> # We can also initialize a EdgeTamConfig from a EdgeTamVisionConfig, EdgeTamPromptEncoderConfig, and EdgeTamMaskDecoderConfig

>>> # Initializing EDGETAM vision encoder, memory attention, and memory encoder configurations
>>> vision_config = EdgeTamVisionConfig()
>>> prompt_encoder_config = EdgeTamPromptEncoderConfig()
>>> mask_decoder_config = EdgeTamMaskDecoderConfig()

>>> config = EdgeTamConfig(vision_config, prompt_encoder_config, mask_decoder_config)
```Úedgetam)r
   rD   rT   c                 ó´  >• [         TU ]  " S0 UD6  Ub  UO0 nUb  UO0 nUb  UO0 n[        U[        5      (       a'  UR	                  SS5      US'   [
        US      " S0 UD6n[        U[        5      (       a  UR                  5       n[        U[        5      (       a  UR                  5       nXl	        [        S0 UD6U l
        [        S0 UD6U l        X@l        g )Nr   r   r   )r   r   r   r    r!   r   rB   Úto_dictrR   r
   rD   rT   r-   )r.   r
   rD   rT   r-   r/   r0   s         €r1   r   ÚEdgeTamConfig.__init__0  så   ø€ ô 	‰ÒÑ"˜6Ò"Ø)6Ñ)B™ÈˆØ9NÑ9ZÑ 5Ð`bÐØ5HÑ5TÑ1ÐZ\Ðäm¤T×*Ñ*Ø*7×*;Ñ*;¸LÐJ`Ó*aˆM˜,Ñ'Ü*¨=¸Ñ+FÒGÑXÈ-ÑXˆMÜÐ+Ô-G×HÑHØ$9×$AÑ$AÓ$CÐ!ÜÐ)Ô+C×DÑDØ"5×"=Ñ"=Ó"?Ðà*ÔÜ%?Ñ%XÐBWÑ%XˆÔ"Ü#;Ñ#RÐ>QÑ#RˆÔ à!2Õr3   )r-   rT   rD   r
   )NNNr6   )r7   r8   r9   r:   r;   r   r   rB   rR   r=   r   r>   r?   r@   s   @r1   rc   rc   ø   s8   ø† ñ.ð` €Jà#Ø!;Ø7ñ€Kð Ø"Ø Ø÷3õ 3r3   rc   )rc   r   rB   rR   N)
Úconfiguration_utilsr   Úautor   r   r   rB   rR   rc   Ú__all__r   r3   r1   Ú<module>rl      sU   ðõ, 4ß -ô^3Ð*ô ^3ôB1Ð!1ô 1ôhFCÐ/ô FCôRQ3Ð$ô Q3òh mr3   