
    cCib                        S r SSKrSSKJr  SSKJr  SSKrSSKJs  J	r
  SSKJrJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJr  SSKJr  SSKJrJrJrJr  SSKJ r J!r!  SSK"J#r#  SSK$J%r%  \RL                  " \'5      r( " S S\%5      r)\\" SS9 " S S\5      5       5       r* " S S\!5      r+ " S S\5      r, " S S\5      r- " S S\#5      r. " S  S!\5      r/ " S" S#\5      r0 " S$ S%\Rb                  5      r2 " S& S'\Rf                  5      r4 " S( S)\Rf                  5      r5 " S* S+\Rf                  5      r6\ " S, S-\5      5       r7\" S.S9 " S/ S0\ 5      5       r8/ S1Qr9g)2zPyTorch EoMT model.    N)	dataclass)Optional)Tensornn   )ACT2FN)ModelOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)check_model_inputs   )Dinov2EmbeddingsDinov2LayerDinov2LayerScaleDinov2PatchEmbeddings)#Mask2FormerForUniversalSegmentationMask2FormerLoss)SiglipAttention)	ViTConfigc                      ^  \ rS rSrSrSr                          SS\S\S\S\S\S	\S
\4U 4S jjjrSr	U =r
$ )
EomtConfig3   a  
This is the configuration class to store the configuration of a [`EomtForUniversalSegmentation`]. It is used to instantiate an EoMT model
according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the EoMT
[tue-mps/coco_panoptic_eomt_large_640](https://huggingface.co/tue-mps/coco_panoptic_eomt_large_640)
architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Args:
    hidden_size (`int`, *optional*, defaults to 1024):
        Dimensionality of the hidden representations.
    num_hidden_layers (`int`, *optional*, defaults to 24):
        Number of hidden layers in the Transformer encoder.
    num_attention_heads (`int`, *optional*, defaults to 16):
        Number of attention heads in each attention layer.
    mlp_ratio (`int`, *optional*, defaults to 4):
        Ratio of the MLP hidden dimensionality to the hidden size.
    hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
        The non-linear activation function (function or string) in the encoder.
    hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
        The dropout probability for all fully connected layers in the embeddings and encoder.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    layer_norm_eps (`float`, *optional*, defaults to 1e-06):
        The epsilon used by the layer normalization layers.
    image_size (`int`, *optional*, defaults to 640):
        The size (resolution) of each input image.
    patch_size (`int`, *optional*, defaults to 16):
        The size (resolution) of each patch.
    num_channels (`int`, *optional*, defaults to 3):
        The number of input channels.
    layerscale_value (`float`, *optional*, defaults to 1.0):
        Initial value for the LayerScale parameter.
    drop_path_rate (`float`, *optional*, defaults to 0.0):
        The stochastic depth rate (drop path) used during training.
    num_upscale_blocks (`int`, *optional*, defaults to 2):
        Number of upsampling blocks used in the decoder or segmentation head.
    attention_dropout (`float`, *optional*, defaults to 0.0):
        Dropout probability applied after attention projection.
    use_swiglu_ffn (`bool`, *optional*, defaults to `False`):
        Whether to use the SwiGLU feedforward neural network.
    num_blocks (`int`, *optional*, defaults to 4):
        Number of feature blocks or stages in the architecture.
    no_object_weight (`float`, *optional*, defaults to 0.1):
        Loss weight for the 'no object' class in panoptic/instance segmentation.
    class_weight (`float`, *optional*, defaults to 2.0):
        Loss weight for classification targets.
    mask_weight (`float`, *optional*, defaults to 5.0):
        Loss weight for mask prediction.
    dice_weight (`float`, *optional*, defaults to 5.0):
        Loss weight for the dice loss component.
    train_num_points (`int`, *optional*, defaults to 12544):
        Number of points to sample for mask loss computation during training.
    oversample_ratio (`float`, *optional*, defaults to 3.0):
        Oversampling ratio used in point sampling for mask training.
    importance_sample_ratio (`float`, *optional*, defaults to 0.75):
        Ratio of points to sample based on importance during training.
    num_queries (`int`, *optional*, defaults to 200):
        Number of object queries in the Transformer.
    num_register_tokens (`int`, *optional*, defaults to 4):
        Number of learnable register tokens added to the transformer input.

Example:

```python
>>> from transformers import EomtConfig, EomtForUniversalSegmentation

>>> # Initialize configuration
>>> config = EomtConfig()

>>> # Initialize model
>>> model = EomtForUniversalSegmentation(config)

>>> # Access config
>>> config = model.config
```eomtno_object_weightclass_weightmask_weightdice_weighttrain_num_pointsoversample_ratioimportance_sample_ratioc                 *  > [         TU ]  " SUUUUUUUU	U
US.
UD6  U ?U ?U ?U ?U ?U ?X@l        Xl	        Xl
        Xl        Xl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        g )N)
hidden_sizenum_hidden_layersnum_attention_headshidden_dropout_prob
hidden_actinitializer_rangelayer_norm_eps
image_size
patch_sizenum_channels )super__init__intermediate_sizeqkv_bias
pooler_actpooler_output_sizeencoder_strideattention_probs_dropout_prob	mlp_ratioattention_dropoutlayerscale_valuedrop_path_ratenum_upscale_blocksuse_swiglu_ffn
num_blocksr   r   r   r    r!   r"   r#   num_queriesnum_register_tokens)selfr%   r&   r'   r8   r)   r(   r*   r+   r,   r-   r.   r:   r;   r<   r9   r=   r>   r   r   r   r    r!   r"   r#   r?   r@   kwargs	__class__s                               _/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/eomt/modular_eomt.pyr1   EomtConfig.__init__   s    < 	 	
#/ 3 3!/)!!%	
 	
 "MO#-"!2 0,"4,$ 0(&& 0 0'>$&#6     )r9   r   r    r;   r#   r:   r   r8   r   r>   r?   r@   r<   r"   r!   r=   )i            gelu        g{Gz?ư>i  rH   r         ?rK   r   rK   FrI   g?g       @      @rN   i 1  g      @g      ?   rI   )__name__
__module____qualname____firstlineno____doc__
model_typefloatintr1   __static_attributes____classcell__rC   s   @rD   r   r   3   s    M^ J "%!   %"%)-7B7&  'B7( )B7* +B7, -B7. /B70  1B72 "'3B7 B7rF   r   a  
    Class for outputs of [`EomtForUniversalSegmentationOutput`].

    This output can be directly passed to [`~EomtImageProcessor.post_process_semantic_segmentation`] or
    [`~EomtImageProcessor.post_process_instance_segmentation`] or
    [`~EomtImageProcessor.post_process_panoptic_segmentation`] to compute final segmentation maps. Please, see
    [`~EomtImageProcessor] for details regarding usage.
    )custom_introc                   D   \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S	'   Sr\\\R$                        \	S
'   Srg)"EomtForUniversalSegmentationOutput   a  
loss (`torch.Tensor`, *optional*):
    The computed loss, returned when labels are present.
class_queries_logits (`torch.FloatTensor`):
    A tensor of shape `(batch_size, num_queries, num_labels + 1)` representing the proposed classes for each
    query. Note the `+ 1` is needed because we incorporate the null class.
masks_queries_logits (`torch.FloatTensor`):
    A tensor of shape `(batch_size, num_queries, height, width)` representing the proposed masks for each
    query.
last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
    Last hidden states (final feature map) of the last layer.
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, sequence_length, hidden_size)`. Hidden-states all layers of the model.
attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
    Tuple of `tuple(torch.FloatTensor)` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`. Self and Cross Attentions weights from transformer decoder.
patch_offsets (`list[torch.Tensor]`, *optional*):
    list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
Nlossclass_queries_logitsmasks_queries_logitslast_hidden_statehidden_states
attentionspatch_offsetsr/   )rP   rQ   rR   rS   rT   r_   r   torchFloatTensor__annotations__r`   ra   rb   rc   tuplerd   re   listr   rX   r/   rF   rD   r]   r]      s    * )-D(5$$
%,8<(5#4#45<8<(5#4#45<59x 1 1298<M8E%"3"345<59Ju0012926M8D./6rF   r]   c                       \ rS rSrSrg)EomtLoss   r/   NrP   rQ   rR   rS   rX   r/   rF   rD   rl   rl          rF   rl   c                       \ rS rSrSrg)EomtPatchEmbeddings   r/   Nrn   r/   rF   rD   rq   rq      ro   rF   rq   c                   f    \ rS rSrS\SS4S jrS rS\R                  S\R                  4S jr	S	r
g)
EomtEmbeddings   configreturnNc                    [         R                  R                  U 5        Xl        UR                  U l        [         R
                  " [        R                  " SSUR                  5      5      U l	        [         R
                  " [        R                  " SUR                  UR                  5      5      U l        [        U5      U l        U R                  R                  n[         R                   " UR"                  5      U l        SUR                  -   U l        [         R(                  " X!R                  5      U l        U R-                  S[        R.                  " U5      R1                  S5      SS9  g )N   position_ids)ry   F)
persistent)r   Moduler1   rv   r-   	Parameterrf   randnr%   	cls_tokenzerosr@   register_tokensrq   patch_embeddingsnum_patchesDropoutr(   dropoutnum_prefix_tokens	Embeddingposition_embeddingsregister_bufferarangeexpand)rA   rv   r   s      rD   r1   EomtEmbeddings.__init__   s    
		4  ++ekk!Q8J8J&KL!||EKK6;U;UW]WiWi,jk 3F ;++77zz&"<"<=!"V%?%?!?#%<<=O=O#P ^U\\+-F-M-Mg-VchirF   c                     [        S5      e)NzNot needed for Eomt ModelAttributeErrorrA   s    rD   interpolate_pos_encoding'EomtEmbeddings.interpolate_pos_encoding  s    899rF   pixel_valuesc                    UR                   u  n    nU R                  R                  R                  R                  nU R                  UR                  US95      nU R                  R                  USS5      nU R                  R                  USS5      nXPR                  U R                  5      -   n[        R                  " XgU/SS9nU R                  U5      nU$ )N)dtyper{   ry   dim)shaper   
projectionweightr   tor   r   r   r   rz   rf   catr   )rA   r   
batch_size_target_dtype
embeddings
cls_tokensr   s           rD   forwardEomtEmbeddings.forward  s    *00
Aq!,,77>>DD**<???+NO
^^**:r2>
..55j"bI":":4;L;L"MM
YY
ZHaP
\\*-
rF   )r   rv   r   r   r   r-   r   r   )rP   rQ   rR   rS   r   r1   r   rf   r   r   rX   r/   rF   rD   rt   rt      s8    jz jd j :ELL U\\ rF   rt   c                       \ rS rSrSrg)EomtAttentioni   r/   Nrn   r/   rF   rD   r   r      ro   rF   r   c                       \ rS rSrSrg)EomtLayerScalei$  r/   Nrn   r/   rF   rD   r   r   $  ro   rF   r   c                   r    \ rS rSr SS\R
                  S\\R
                     S\R
                  4S jjrSrg)		EomtLayeri(  Nrc   	head_maskrw   c                 &   U R                  U5      nU R                  X25      u  pEU R                  U5      nU R                  U5      U-   nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      U-   nU$ N)norm1	attentionlayer_scale1	drop_pathnorm2mlplayer_scale2)rA   rc   r   hidden_states_normself_attention_outputr   layer_outputs          rD   r   EomtLayer.forward)  s    
 "ZZ6#'>>2D#P  $ 1 12G H '<=M zz-0xx-((6 ~~l3mCrF   r/   r   )	rP   rQ   rR   rS   rf   r   r   r   rX   r/   rF   rD   r   r   (  s>     -1|| ELL) 
	 rF   r   c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )EomtLayerNorm2di@  c                 "   > [         TU ]  XUS9  g )N)epselementwise_affine)r0   r1   )rA   r.   r   affinerC   s       rD   r1   EomtLayerNorm2d.__init__A  s    6JrF   hidden_staterw   c                     UR                  SSSS5      n[        R                  " XR                  U R                  U R
                  U R                  5      nUR                  SSSS5      nU$ )Nr   r   r   ry   )permuteF
layer_normnormalized_shaper   biasr   )rA   r   s     rD   r   EomtLayerNorm2d.forwardD  s`    #++Aq!Q7||L2G2GVZV_V_aeaiaij#++Aq!Q7rF   r/   )rL   T)
rP   rQ   rR   rS   r1   rf   r   r   rX   rY   rZ   s   @rD   r   r   @  s)    KELL U\\  rF   r   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )EomtScaleLayeriK  rv   c           	         > [         TU ]  5         UR                  n[        R                  " X"SSS9U l        [        UR                     U l        [        R                  " UUSSUSS9U l
        [        U5      U l        g )Nr   )kernel_sizestrider   ry   F)r   paddinggroupsr   )r0   r1   r%   r   ConvTranspose2dconv1r   r)   
activationConv2dconv2r   layernorm2drA   rv   r%   rC   s      rD   r1   EomtScaleLayer.__init__L  ss    ((''aXYZ
 !2!23YY

 +;7rF   rc   rw   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   r   rA   rc   s     rD   r   EomtScaleLayer.forward\  sB    

=16

=1((7rF   )r   r   r   r   rP   rQ   rR   rS   r   r1   rf   r   r   rX   rY   rZ   s   @rD   r   r   K  s/    8z 8 U\\ ell  rF   r   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )EomtScaleBlockid  rv   c                    > [         TU ]  5         UR                  U l        [        R
                  " [        U R                  5       Vs/ s H  n[        U5      PM     sn5      U l        g s  snf r   )	r0   r1   r<   r>   r   
ModuleListranger   block)rA   rv   r   rC   s      rD   r1   EomtScaleBlock.__init__e  sM     33]]E$//DZ#[DZqN6$:DZ#[\
#[s   A*rc   rw   c                 <    U R                    H  nU" U5      nM     U$ r   )r   )rA   rc   r   s      rD   r   EomtScaleBlock.forwardj  s     ZZE!-0M  rF   )r   r>   r   rZ   s   @rD   r   r   d  s1    ]z ]
U\\ ell  rF   r   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )EomtMaskHeadip  rv   c                   > [         TU ]  5         UR                  n[        R                  " X"5      U l        [        R                  " X"5      U l        [        R                  " X"5      U l        [        UR                     U l
        g r   )r0   r1   r%   r   Linearfc1fc2fc3r   r)   r   r   s      rD   r1   EomtMaskHead.__init__q  s[    ((99[699[699[6 !2!23rF   rc   rw   c                     U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U5      nU$ r   r   r   r   r   r   s     rD   r   EomtMaskHead.forwardz  sD    (?@(?@/rF   r   r   rZ   s   @rD   r   r   p  s/    4z 4U\\ ell  rF   r   c                   j    \ rS rSr% Sr\\S'   SrSrSr	S/r
Sr\\S	.rS
\R                   SS4S jrSrg)EomtPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
rv   r   r   Fr   T)rc   rd   modulerw   Nc                 D   U R                   R                  n[        U[        R                  [        R
                  [        R                  45      (       a  [        R                  R                  UR                  [        R                  " S5      S9  UR                  by  [        R                  R                  UR                  5      u  p4US:  a  S[        R                  " U5      -  OSn[        R                  R                  UR                  U* U5        g g [        U[        R                  5      (       aJ  UR                  R                   R#                  S5        UR                  R                   R%                  5         g [        U[        R&                  5      (       ad  UR                  R                   R)                  SSS9  UR*                  b2  UR                  R                   UR*                     R%                  5         g g [        U[,        5      (       aL  [/        US5      (       a:  UR0                  R                   R#                  U R                   R2                  5        g g [        U[4        5      (       a  [        R                  R7                  UR8                  R                   R;                  [<        R>                  5      SUS9R;                  UR8                  R@                  5      UR8                  l        URB                  R                   R%                  5         g g )	N   )ar   ry   rM   rK   )meanstdlambda1)"rv   r*   
isinstancer   r   r   r   initkaiming_uniform_r   mathsqrtr   _calculate_fan_in_and_fan_outuniform_	LayerNormdatafill_zero_r   normal_padding_idxr   hasattrr   r:   rt   trunc_normal_r   r   rf   float32r   r   )rA   r   r   fan_inr   bounds         rD   _init_weights!EomtPreTrainedModel._init_weights  s!   kk++fryy"))R5G5GHIIGG$$V]]diil$C{{&GGAA&--P	17!DIIf--  ufe< ' --MM$$S)KK""$--MM&&CQ&7!!-""6#5#56<<> .//vy))##))$++*F*FG *//$&GG$9$9  %%((7cs %: %b!!''( ! ""''--/	 0rF   r/   )rP   rQ   rR   rS   rT   r   rh   base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpar   r   _can_record_outputsr   r}   r
  rX   r/   rF   rD   r   r     sR    
 $O&+#$N"#
0BII 0$ 0rF   r   zV
    The EoMT Model with head on top for instance/semantic/panoptic segmentation.
    c                       \ rS rSrS\4S jrS rS rS\R                  4S jr
\S 5       r\" 5       \   SS
\	S\\\	      S\\\	      S\\\	      S\\   S\4S jj5       5       rSrg	)EomtForUniversalSegmentationi  rv   c                    [         R                  " X5        Xl        UR                  U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  UR                  5      U l        [        R                  " [        UR                  5       Vs/ s H  n[!        U5      PM     sn5      U l        [%        U5      U l        [)        U5      U l        [        R,                  " UR                  UR.                  S-   5      U l        UR2                  UR4                  -  UR2                  UR4                  -  4U l        UR8                  UR:                  UR<                  S.U l        [A        XR>                  S9U l!        U RE                  S[F        RH                  " URJ                  5      5        U RM                  5         g s  snf )N)r   ry   )loss_cross_entropy	loss_mask	loss_dice)rv   weight_dictattn_mask_probs)'r
   r1   rv   r&   rt   r   r   r   r%   r+   	layernormr   r?   queryr   r   r   layersr   upscale_blockr   	mask_headr   
num_labelsclass_predictorr,   r-   	grid_sizer   r   r    r  rl   	criterionr   rf   onesr>   	post_init)rA   rv   r   s      rD   r1   %EomtForUniversalSegmentation.__init__  sr     .!'!9!9(0f&8&8f>S>ST\\&"4"4f6H6HI
mmfF^F^@_$`@_1Yv%6@_$`a+F3%f-!yy););V=N=NQR=RS ++v/@/@@&BSBSW]WhWhBhi"("5"5++++.
 "=M=MN.

6;L;L0MN% %as   G0c                 .    U R                   R                  $ r   )r   r   r   s    rD   get_input_embeddings1EomtForUniversalSegmentation.get_input_embeddings  s    ///rF   c                     [        S5      e)NzNote needed for Eomt Model.r   r   s    rD   get_auxiliary_logits1EomtForUniversalSegmentation.get_auxiliary_logits  s    :;;rF   logitsc                    US S 2S U R                   R                  2S S 24   nU R                  U5      nUS S 2U R                   R                  U R                  R                  -   S 2S S 24   nUR                  SS5      nUR                  " UR                  S   S/U R                  Q76 nU R                  U5      nU R                  U5      n[        R                  " SX$5      nXS4$ )Nry   r   r   r{   zbqc, bchw -> bqhw)rv   r?   r   r   r   	transposereshaper   r!  r  r  rf   einsum)rA   r,  query_tokensclass_logitsprefix_tokensmask_logitss         rD   predict$EomtForUniversalSegmentation.predict  s    a!:4;;#:#:!:A=>++L9q$++"9"9DOO<]<]"]"_abbc%//15%--m.A.A!.DbZ4>>Z~~l3**=9ll#6T((rF   c                 ~    US:  a6  [         R                  " U R                  S   X$S9U:  nSU S S 2S U2US 24   U'   U $ )Nry   r   )device)rf   randr   )	attn_maskprobnum_query_tokensencoder_start_tokensr8  random_queriess         rD   _disable_attention_mask4EomtForUniversalSegmentation._disable_attention_mask  sT    !8"ZZ	(:<L\_ccN VWIa***,@,AAB>RrF   Nr   mask_labelsclass_labelsre   rB   rw   c           	         Su  pgSnUc  [        S5      eU R                  U5      n	[        U R                  5       GH  u  pXR                  U R
                  R                  -
  :X  am  U R                  R                  SSS2SS24   R                  U	R                  S   SS5      R                  U	R                  5      n[        R                  " X4SS9n	XR                  U R
                  R                  -
  :  Ga  U R                  (       d7  U R                   XR                  -
  U R
                  R                  -      S:  Ga  U R#                  U	5      nU R%                  U5      u  pXn4-  nX4-  n[        R&                  " U	R                  S   U	R                  S   U	R                  S   U	R                  [        R(                  S9n[*        R,                  " XR.                  S	S
9nUR1                  UR3                  S5      UR3                  S5      S5      nU R
                  R4                  nUU R                  R6                  -   nUS:  USS2SU2US24'   U R9                  UU R                   XR                  -
  U R
                  R                  -      UUUR                  S9nUSS2SS4   R                  SU R
                  R:                  SS5      nUR=                  5       R?                  U) S5      nU" X5      n	GM     U R#                  U	5      nU R%                  U5      u  pXn4-  nX4-  nSnUb@  Ub=  Sn[A        Xg5       H,  u  pU RC                  UUUUSS9nUU RE                  U5      -  nM.     [G        UUUUUS9$ )a'  
mask_labels (`list[torch.Tensor]`, *optional*):
    list of mask labels of shape `(num_labels, height, width)` to be fed to a model
class_labels (`list[torch.LongTensor]`, *optional*):
    list of target class labels of shape `(num_labels, height, width)` to be fed to a model. They identify the
    labels of `mask_labels`, e.g. the label of `mask_labels[i][j]` if `class_labels[i][j]`.
patch_offsets (`list[torch.Tensor]`, *optional*):
    list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
)r/   r/   Nz You have to specify pixel_valuesr   r{   ry   r   )r8  r   bilinear)sizemode)r;  r<  r=  r8  .g    erK   )ra   r`   rA  rB  auxiliary_predictions)r_   ra   r`   rb   re   )$
ValueErrorr   	enumerater  r&   rv   r>   r  r   r   r   r   r8  rf   r   trainingr  r  r5  r#  boolr   interpolater!  viewrE  r?   r   r?  r'   rV   masked_fillzipget_loss_dictget_lossr]   )rA   r   rA  rB  re   rB   masks_queries_logits_per_layerclass_queries_logits_per_layerattention_maskrc   idxlayer_moduler  norm_hidden_statesra   r`   interpolated_logitsr<  r=  sequence_outputr_   	loss_dicts                         rD   r   $EomtForUniversalSegmentation.forward  s   ( JPF&?@@5!*4;;!7C,,t{{/E/EEE

))$1*5<<]=P=PQR=SUWY[\__`m`t`tu %		5*@a H,,t{{/E/EEE!5!5c<R<R6RUYU`U`UkUk6k!lop!p%)^^M%B"=A\\J\=]:$.2II..2II.!&!''*!''*!''*(//**" '(mm4H~~dn&o#&9&>&>',,Q/1D1I1I!1Lb'# $(;;#:#: '7$//:[:['[$ ObdeNeq"3#3"35I5JJK "&!=!="--c4J4J.JT[[McMc.cd%5)=)00 "> " "04!=!D!DRIhIhjlnp!q!/!5!5!7!C!C^OUY!Z(GM] "8` ..759\\/5R2&*AA&&*AA&"|'?D>A.?:$ !..)=)= +!-*. / 	 i00? 2!5!5-'
 	
rF   )r   rv   r"  r   r!  r  r  r  r&   r  r  r  )NNN)rP   rQ   rR   rS   r   r1   r'  r*  rf   r   r5  staticmethodr?  r   r   r   rj   r   r   r]   r   rX   r/   rF   rD   r  r    s    z 80<)ell )     /3/304e
e
 d6l+e
 tF|,	e

  V-e
 +,e
 
,e
  e
rF   r  )r   r   r  ):rT   r   dataclassesr   typingr   rf   torch.nn.functionalr   
functionalr   r   activationsr   
file_utilsr	   modeling_utilsr
   processing_utilsr   utilsr   r   r   utils.genericr   dinov2.modeling_dinov2r   r   r   r    mask2former.modeling_mask2formerr   r   siglip.modeling_siglipr   vit.configuration_vitr   
get_loggerrP   loggerr   r]   rl   rq   rt   r   r   r   r   r   r}   r   r   r   r   r  __all__r/   rF   rD   <module>rn     s|     !      ! . & 
 0  d 4 - 
		H	%T7 T7n 	7 7	 7>	 		/ 	!% !H	O 		% 	 0bll RYY 2	RYY 	299 " '0/ '0 '0T 
e
#F e

e
P PrF   