
    cCi`                        S SK rS SKrS SKJr  S SKJrJr  S SKr	S SK
r
S SKJs  Jr  S SK
JrJr  SSKJr  SSKJrJrJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJrJ r   SSK!J"r"  SSK#J$r$  \" 5       (       a  S SK%J&r&  \ " 5       (       a  S SK'J(r(  S SK)J*r*  \\" SS9 " S S\5      5       5       r+ SNS\
R                  S\
R                  S\
R                  4S jjr,S\S\S\4S jr-S\
R                  S\
R                  S\
R                  4S jr. " S S\R^                  5      r0S\S\S \1S\4S! jr2S\
R                  S\
R                  S \1S\
R                  4S" jr3 " S# S$\R^                  5      r4 " S% S&\R^                  5      r5 " S' S(\R^                  5      r6 SOS)\R^                  S*\
R                  S+\
R                  S,\
R                  S-\\
R                     S.\7S/\74S0 jjr8 " S1 S2\R^                  5      r9 " S3 S4\R^                  5      r:SPS5\
R                  S6\7S7\;S\
R                  4S8 jjr< " S9 S:\R^                  5      r= " S; S<\R^                  5      r> " S= S>\R^                  5      r? " S? S@\5      r@ " SA SB\R                  5      rB " SC SD\R^                  5      rC " SE SF\R^                  5      rD " SG SH\R^                  5      rE\ " SI SJ\5      5       rF\" SKS9 " SL SM\F5      5       rGSJSM/rHg)Q    N)	dataclass)CallableOptional)Tensornn   )ACT2FN)ModelOutputis_scipy_availablerequires_backends)GradientCheckpointingLayer)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringis_accelerate_available)check_model_inputs   )
EomtConfig)linear_sum_assignment)PartialState)reducea  
    Class for outputs of [`EomtForUniversalSegmentationOutput`].

    This output can be directly passed to [`~EomtImageProcessor.post_process_semantic_segmentation`] or
    [`~EomtImageProcessor.post_process_instance_segmentation`] or
    [`~EomtImageProcessor.post_process_panoptic_segmentation`] to compute final segmentation maps. Please, see
    [`~EomtImageProcessor] for details regarding usage.
    )custom_introc                   D   \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S	'   Sr\\\R$                        \	S
'   Srg)"EomtForUniversalSegmentationOutput2   a  
loss (`torch.Tensor`, *optional*):
    The computed loss, returned when labels are present.
class_queries_logits (`torch.FloatTensor`):
    A tensor of shape `(batch_size, num_queries, num_labels + 1)` representing the proposed classes for each
    query. Note the `+ 1` is needed because we incorporate the null class.
masks_queries_logits (`torch.FloatTensor`):
    A tensor of shape `(batch_size, num_queries, height, width)` representing the proposed masks for each
    query.
last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
    Last hidden states (final feature map) of the last layer.
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, sequence_length, hidden_size)`. Hidden-states all layers of the model.
attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
    Tuple of `tuple(torch.FloatTensor)` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`. Self and Cross Attentions weights from transformer decoder.
patch_offsets (`list[torch.Tensor]`, *optional*):
    list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
Nlossclass_queries_logitsmasks_queries_logitslast_hidden_statehidden_states
attentionspatch_offsets )__name__
__module____qualname____firstlineno____doc__r   r   torchFloatTensor__annotations__r   r    r!   r"   tupler#   r$   listr   __static_attributes__r%       `/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/eomt/modeling_eomt.pyr   r   2   s    * )-D(5$$
%,8<(5#4#45<8<(5#4#45<59x 1 1298<M8E%"3"345<59Ju0012926M8D./6r1   r   input_featurespoint_coordinatesreturnc                     UR                  5       S:X  a  SnUR                  S5      n[        R                  R                  R
                  " U SU-  S-
  40 UD6nU(       a  UR                  S5      nU$ )a  
A wrapper around `torch.nn.functional.grid_sample` to support 3D point_coordinates tensors.

Args:
    input_features (`torch.Tensor` of shape (batch_size, channels, height, width)):
        A tensor that contains features map on a height * width grid
    point_coordinates (`torch.Tensor` of shape (batch_size, num_points, 2) or (batch_size, grid_height, grid_width,:
    2)):
        A tensor that contains [0, 1] * [0, 1] normalized point coordinates
    add_dim (`bool`):
        boolean value to keep track of added dimension

Returns:
    point_features (`torch.Tensor` of shape (batch_size, channels, num_points) or (batch_size, channels,
    height_grid, width_grid):
        A tensor that contains features for points in `point_coordinates`.
r   T   g       @      ?)dim	unsqueezer+   r   
functionalgrid_samplesqueeze)r3   r4   add_dimkwargspoint_featuress        r2   sample_pointrA   ]   st    ( !#-77: XX((44^SK\E\_bEbmflmN'//2r1   inputslabelsc                    U R                  5       R                  S5      n S[        R                  " XR                  5      -  nU R                  S5      SS2S4   UR                  S5      SSS24   -   nSUS-   US-   -  -
  nU$ )a  
A pair wise version of the dice loss, see `dice_loss` for usage.

Args:
    inputs (`torch.Tensor`):
        A tensor representing a mask
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).

Returns:
    `torch.Tensor`: The computed loss between each pairs.
r   r7   N)sigmoidflattenr+   matmulTsum)rB   rC   	numeratordenominatorr   s        r2   pair_wise_dice_lossrM   }   sz     ^^%%a(FELL22I**R.D)FJJrN47,CCK	A+/22DKr1   c                 Z   U R                   S   n[        R                  " SS9nU" U [        R                  " U 5      5      nU" U [        R
                  " U 5      5      n[        R                  " XB-  UR                  5      n[        R                  " XR-  SU-
  R                  5      nXg-   nU$ )a  
A pair wise version of the cross entropy loss, see `sigmoid_cross_entropy_loss` for usage.

Args:
    inputs (`torch.Tensor`):
        A tensor representing a mask.
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).

Returns:
    loss (`torch.Tensor`): The computed loss between each pairs.
r   none	reduction)shaper   BCEWithLogitsLossr+   	ones_like
zeros_likerH   rI   )	rB   rC   height_and_width	criterioncross_entropy_loss_poscross_entropy_loss_negloss_posloss_negr   s	            r2   $pair_wise_sigmoid_cross_entropy_lossr\      s     ||A$$v6I&vuv/FG&vu/?/?/GH||2EvxxPH||2EF
~~VHDKr1   c                      ^  \ rS rSrSr SS\S\S\S\4U 4S jjjr\R                  " 5       S\R                  S	\R                  S
\R                  S\R                  S\\\
      4
S j5       rSrU =r$ )EomtHungarianMatcher   aa  This class computes an assignment between the labels and the predictions of the network.

For efficiency reasons, the labels don't include the no_object. Because of this, in general, there are more
predictions than labels. In this case, we do a 1-to-1 matching of the best predictions, while the others are
un-matched (and thus treated as non-objects).

cost_class	cost_mask	cost_dice
num_pointsc                    > [         TU ]  5         US:X  a  US:X  a  US:X  a  [        S5      eX@l        Xl        X l        X0l        g)a  Creates the matcher

Params:
    cost_class (`float`, *optional*, defaults to 1.0):
        Relative weight of the classification error in the matching cost.
    cost_mask (`float`, *optional*,  defaults to 1.0):
        This is the relative weight of the focal loss of the binary mask in the matching cost.
    cost_dice (`float`, *optional*, defaults to 1.0):
        This is the relative weight of the dice loss of the binary mask in the matching cost.
    num_points (`int`, *optional*, defaults to 12544):
        No. of points to sample on which the mask loss will be calculated. The same set of K points are
        uniformly sampled for all prediction and ground truth masks to construct the cost matrix for bipartite
        matching.
r   zAll costs can't be 0N)super__init__
ValueErrorrc   r`   ra   rb   )selfr`   ra   rb   rc   	__class__s        r2   rf   EomtHungarianMatcher.__init__   sC    " 	?yA~)q.344$$""r1   r    r   mask_labelsclass_labelsr5   c           
         / nUR                   S   n[        U5       GH  nX'   R                  S5      nX   n	USS2XG   4   * n
X7   R                  U	5      nUSS2S4   nU	SS2S4   n	[        R
                  " SU R                  SU	R                  S9nUR                  UR                   S   SS5      n[        XSS9R                  S5      nUR                  U	R                   S   SS5      n[        XSS9R                  S5      n	[        X5      n[        X5      nU R                  U-  U R                  U
-  -   U R                  U-  -   n[        R                   " U[        R"                  " S	5      5      n[        R$                  " U[        R"                  " S
5      5      n[        R&                  " US5      n[)        UR+                  5       5      nUR-                  U5        GM     U VVs/ s HL  u  nn[        R.                  " U[        R0                  S9[        R.                  " U[        R0                  S94PMN     nnnU$ s  snnf )a  
Params:
    masks_queries_logits (`torch.Tensor`):
        A tensor of dim `batch_size, num_queries, num_labels` with the classification logits.
    class_queries_logits (`torch.Tensor`):
        A tensor of dim `batch_size, num_queries, height, width` with the predicted masks.
    class_labels (`torch.Tensor`):
        A tensor of dim `num_target_boxes` (where num_target_boxes is the number of ground-truth objects in the
        target) containing the class labels.
    mask_labels (`torch.Tensor`):
        A tensor of dim `num_target_boxes, height, width` containing the target masks.

Returns:
    matched_indices (`list[tuple[Tensor]]`): A list of size batch_size, containing tuples of (index_i, index_j)
    where:
        - index_i is the indices of the selected predictions (in order)
        - index_j is the indices of the corresponding selected labels (in order)
    For each batch element, it holds:
        len(index_i) = len(index_j) = min(num_queries, num_target_boxes).
r   rE   Nr   r7   deviceFalign_cornersg    _Bg    _©dtype)rR   rangesoftmaxtor+   randrc   ro   repeatrA   r=   r\   rM   ra   r`   rb   minimumtensormaximum
nan_to_numr   cpuappend	as_tensorint64)rh   r    r   rk   rl   indices
batch_sizei
pred_probs	pred_maskr`   target_maskr4   target_coordinatespred_coordinatesra   rb   cost_matrixassigned_indicesjmatched_indicess                        r2   forwardEomtHungarianMatcher.forward   s   8 *, *//2
z"A-088<J,/I %Q%788J%.++I6K%ag.K!!T'*I !&

1dooqIYIY Z!2!9!9+:K:KA:NPQST!U&{V[\ddefgK077	8JAqQ$YPUV^^_`aI =YTI+ICI..94t7SSVZVdVdgpVppK--U\\$5GHK--U\\%5HIK**;:K0EkooFW0XNN+,? #F ho
gn_c_`bcU__Qekk2EOOAU[[4YZgn 	 
 
s   ,AIr`   rb   ra   rc   )r8   r8   r8   i 1  )r&   r'   r(   r)   r*   floatintrf   r+   no_gradr   r/   r.   r   r0   __classcell__ri   s   @r2   r^   r^      s     jo##27#JO#cf# #4 ]]_D#llD $llD \\	D
 llD 
eFm	D Dr1   r^   	num_masksc                     U R                  5       R                  S5      nSX1-  R                  S5      -  nUR                  S5      UR                  S5      -   nSUS-   US-   -  -
  nUR                  5       U-  nU$ )a  
Compute the DICE loss, similar to generalized IOU for masks as follows:

$$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x \cap y }{x \cup y + 1}} $$

In practice, since `labels` is a binary mask, (only 0s and 1s), dice can be computed as follow

$$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x * y }{x + y + 1}} $$

Args:
    inputs (`torch.Tensor`):
        A tensor representing a mask.
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).
    num_masks (`int`):
        The number of masks present in the current batch, used for normalization.

Returns:
    `torch.Tensor`: The computed loss.
r   r7   rE   )rF   rG   rJ   )rB   rC   r   probsrK   rL   r   s          r2   	dice_lossr     sx    , NN$$Q'EU^((,,I))B-&**R.0K	A+/22D88:	!DKr1   c                     [         R                  " SS9nU" X5      nUR                  S5      R                  5       U-  nU$ )aX  
Args:
    inputs (`torch.Tensor`):
        A float tensor of arbitrary shape.
    labels (`torch.Tensor`):
        A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
        (0 for the negative class and 1 for the positive class).

Returns:
    loss (`torch.Tensor`): The computed loss.
rO   rP   r   )r   rS   meanrJ   )rB   rC   r   rW   cross_entropy_lossr   s         r2   sigmoid_cross_entropy_lossr   7  sB     $$v6I"62""1%))+i7DKr1   c                     ^  \ rS rSrS\S\\\4   4U 4S jjrS\	\	\
      S\	\
   4S jrS\	\   S\\\4   4S	 jrS
\S\	\   S\\R                      S\\\4   4S jrS\R                  S\	\R                     S\\R                      S\
S\\\R                  4   4
S jrS rS rS\R                  S\R                  4S jrS\R                  S\
S\
S\S\R                  4
S jr SS\R                  S
\R                  S\	\R                     S\	\R                     S\\\\R                  4      S\\\R                  4   4S jjrS\R                  S\R4                  S\R                  4S jrSrU =r$ ) EomtLossiK  configweight_dictc                   > [         TU ]  5         [        U S/5        UR                  U l        X l        UR
                  U l        [        R                  " U R                  S-   5      nU R                  US'   U R                  SU5        UR                  U l        UR                  U l        UR                  U l        [        UR                  UR                   UR"                  U R                  S9U l        g)a   
The Eomt Loss. The loss is computed very similar to DETR. The process happens in two steps: 1) we
compute hungarian assignment between ground truth masks and the outputs of the model 2) we supervise each pair
of matched ground-truth / prediction (supervise class and mask)

Args:
    config (`EomtConfig`):
        The configuration for Eomt model also containing loss calculation specific parameters.
    weight_dict (`dict[str, float]`):
        A dictionary of weights to be applied to the different losses.
scipyr   rE   empty_weightr   N)re   rf   r   
num_labelsr   no_object_weighteos_coefr+   onesregister_buffertrain_num_pointsrc   oversample_ratioimportance_sample_ratior^   class_weightdice_weightmask_weightmatcher)rh   r   r   r   ri   s       r2   rf   EomtLoss.__init__L  s     	$	* ++& //zz$//A"56==R^\: !11 & 7 7'-'E'E$+**((((	
r1   sizesr5   c                 p    US   nUSS   H'  n[        U5       H  u  pE[        X$   U5      X$'   M     M)     U$ )Nr   r   )	enumeratemax)rh   r   maxessublistindexitems         r2   _max_by_axisEomtLoss._max_by_axiso  sC    aQRyG(1"5<6  2 ! r1   tensorsc                 T   U R                  U Vs/ s H  n[        UR                  5      PM     sn5      n[        U5      /U-   nUu  pVpxUS   R                  n	US   R
                  n
[        R                  " XIU
S9n[        R                  " XWU4[        R                  U
S9n[        XU5       Ho  u  p-nUS UR                  S   2S UR                  S   2S UR                  S   24   R                  U5        SUS UR                  S   2S UR                  S   24'   Mq     X4$ s  snf )Nr   rs   ro   r   r7   F)r   r/   rR   lenrs   ro   r+   zerosr   boolzipcopy_)rh   r   rz   max_sizebatch_shaper   _heightwidthrs   ro   padded_tensorspadding_maskspadded_tensorpadding_masks                  r2   _pad_images_to_max_in_batch$EomtLoss._pad_images_to_max_in_batchw  s'   $$w%OwVd6<<&8w%OP7|nx/'2$
v
  ""[fM

J#>ejjY_`36wP]3^/F<+FLLO+->v||A->@Q&,,q/@QQRXXY_`AFL*6<<?*,=fll1o,==> 4_ ,, &Ps   D%r   rl   r   c           	         UnUR                   u  pVn[        R                  " U R                  S9nU R	                  U5      n	[
        R                  " [        X#5       V
VVs/ s H  u  n
u  p{X   PM     snnn
5      n[
        R                  " XV4U R                  [
        R                  UR                  S9nXU	'   UR                  SS5      nU" X5      nSU0nU$ s  snnn
f )a-  Compute the losses related to the labels using cross entropy.

Args:
    class_queries_logits (`torch.Tensor`):
        A tensor of shape `batch_size, num_queries, num_labels`
    class_labels (`list[torch.Tensor]`):
        List of class labels of shape `(labels)`.
    indices (`tuple[np.array])`:
        The indices computed by the Hungarian matcher.

Returns:
    `dict[str, Tensor]`: A dict of `torch.Tensor` containing the following key:
    - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
)weight)
fill_valuers   ro   r   r7   loss_cross_entropy)rR   r   CrossEntropyLossr   $_get_predictions_permutation_indicesr+   catr   fullr   r   ro   	transpose)rh   r   rl   r   pred_logitsr   num_queriesr   rW   idxtargetr   target_classes_otarget_classespred_logits_transposedloss_celossess                    r2   loss_labelsEomtLoss.loss_labels  s    " +%0%6%6"
''t/@/@A	77@ 99-0-GH-G>66AVY-GH
 %$//]h]o]o
 /s!,!6!6q!!<2C&0 Is    Cr    rk   r   c                   ^  T R                  U5      nT R                  U5      nX   nT R                  U5      u  pX   nUSS2S4   nUSS2S4   n[        R                  " 5          T R                  UU 4S jT R                  T R                  T R                  5      n
[        XSS9R                  S5      nSSS5        [        UW
SS9R                  S5      n[        UWU5      [        XU5      S.nAAU$ ! , (       d  f       NF= f)a$  Compute the losses related to the masks using sigmoid_cross_entropy_loss and dice loss.

Args:
    masks_queries_logits (`torch.Tensor`):
        A tensor of shape `(batch_size, num_queries, height, width)`.
    mask_labels (`torch.Tensor`):
        List of mask labels of shape `(labels, height, width)`.
    indices (`tuple[np.array])`:
        The indices computed by the Hungarian matcher.
    num_masks (`int)`:
        The number of masks, used for normalization.

Returns:
    losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing two keys:
    - **loss_mask** -- The loss computed using sigmoid cross entropy loss on the predicted and ground truth.
      masks.
    - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth,
      masks.
Nc                 &   > TR                  U 5      $ N)calculate_uncertainty)logitsrh   s    r2   <lambda>%EomtLoss.loss_masks.<locals>.<lambda>  s    t99&Ar1   Frp   r   )	loss_mask	loss_dice)r    _get_targets_permutation_indicesr   r+   r   sample_points_using_uncertaintyrc   r   r   rA   r=   r   r   )rh   r    rk   r   r   src_idxtgt_idx
pred_maskstarget_masksr   r4   point_labelspoint_logitsr   s   `             r2   
loss_masksEomtLoss.loss_masks  s   4 ;;GD77@)2
 ::;G#,  4(
#AtG, ]]_ $ D DA%%,,! (W\]eefghL  $J0AQVW__`ab 4L,PYZ"<yI

 ) _s   &AC77
Dc                    [         R                  " [        U5       VVVs/ s H  u  nu  p4[         R                  " X25      PM      snnn5      n[         R                  " U VVs/ s H  u  p4UPM	     snn5      nXV4$ s  snnnf s  snnf r   r+   r   r   	full_like)rh   r   r   srcr   batch_indicespredictions_indicess          r2   r   -EomtLoss._get_predictions_permutation_indices  sh    		iX_N`"aN`{q(35??3#:N`"ab#iiW(EW#W(EF11 #b(E   %A<#B
c                    [         R                  " [        U5       VVVs/ s H  u  nu  p4[         R                  " XB5      PM      snnn5      n[         R                  " U VVs/ s H  u  p4UPM	     snn5      nXV4$ s  snnnf s  snnf r   r   )rh   r   r   r   tgtr   target_indicess          r2   r   )EomtLoss._get_targets_permutation_indices  sg    		iX_N`"aN`{q(15??3#:N`"ab#@HQC#@A,, #b#@r   r   c                 4    [         R                  " U5      * nU$ )a2  
In Eomt paper, uncertainty is estimated as L1 distance between 0.0 and the logit prediction in 'logits'
for the foreground class in `classes`.

Args:
    logits (`torch.Tensor`):
    A tensor of shape (R, 1, ...) for class-specific or class-agnostic, where R is the total number of predicted masks in all images and C is:
    the number of foreground classes. The values are logits.

Returns:
    scores (`torch.Tensor`): A tensor of shape (R, 1, ...) that contains uncertainty scores with the most
    uncertain locations having the highest uncertainty score.
)r+   abs)rh   r   uncertainty_scoress      r2   r   EomtLoss.calculate_uncertainty  s      %yy01!!r1   rc   r   r   c           
      h   UR                   S   n[        X4-  5      n[        R                  " XgSUR                  S9n[        XSS9n	U" U	5      n
[        XS-  5      nX;-
  n[        R                  " U
SS2SSS24   USS9S   nU[        R                  " U[        R                  UR                  S	9-  nXSS2S4   -  nUR                  S
S5      UR                  S
5      SS24   R                  XkS5      nUS:  a5  [        R                  " U[        R                  " XlSUR                  S9/SS9nU$ )a  
This function is meant for sampling points in [0, 1] * [0, 1] coordinate space based on their uncertainty. The
uncertainty is calculated for each point using the passed `uncertainty function` that takes points logit
prediction as input.

Args:
    logits (`float`):
        Logit predictions for P points.
    uncertainty_function:
        A function that takes logit predictions for P points and returns their uncertainties.
    num_points (`int`):
        The number of points P to sample.
    oversample_ratio (`int`):
        Oversampling parameter.
    importance_sample_ratio (`float`):
        Ratio of points that are sampled via importance sampling.

Returns:
    point_coordinates (`torch.Tensor`):
        Coordinates for P sampled points.
r   r7   rn   Frp   Nr   )kr9   r   rE   r9   )rR   r   r+   rw   ro   rA   topkarangelongviewr   )rh   r   uncertainty_functionrc   r   r   	num_boxesnum_points_sampledr4   r   point_uncertaintiesnum_uncertain_pointsnum_random_pointsr   shifts                  r2   r   (EomtLoss.sample_points_using_uncertainty  s3   < LLO	 !>? "JJyaPVP]P]^#FUS2<@"#:#GH&=jj,Q1W59MSTUVWX"U\\)5::V\VcVc%ddQW~-222q9#((2,/JOOPYqrsq  %		"EJJyQW]WdWd$ef! ! r1   auxiliary_predictionsc                    U R                  XX45      nU R                  XDS   R                  S9n0 U R                  XXg5      EU R	                  X$U5      EnUbk  [        U5       H\  u  pU
S   nU
S   nU R                  XX45      nUR                  5        VVs0 s H  u  pU SU	 3U_M     nnnUR                  U5        M^     U$ s  snnf )a  
This performs the loss computation.

Args:
    masks_queries_logits (`torch.Tensor`):
        A tensor of shape `(batch_size, num_queries, height, width)`.
    class_queries_logits (`torch.Tensor`):
        A tensor of shape `(batch_size, num_queries, num_labels)`.
    mask_labels (`torch.Tensor`):
        List of mask labels of shape `(labels, height, width)`.
    class_labels (`list[torch.Tensor]`):
        List of class labels of shape `(labels)`.
    auxiliary_predictions (`dict[str, torch.Tensor]`, *optional*):
        if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], then it contains the logits from
        the inner layers of the EomtMaskedAttentionDecoder.

Returns:
    losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing three keys:
    - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
    - **loss_mask** -- The loss computed using sigmoid cross_entropy loss on the predicted and ground truth
      masks.
    - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth
      masks.
    if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], the dictionary contains additional
    losses for each auxiliary predictions.
r   rn   r    r   r   )	r   get_num_masksro   r   r   r   r   itemsupdate)rh   r    r   rk   rl   r  r   r   r   r   aux_outputs	loss_dictkeyvalues                 r2   r   EomtLoss.forward<  s    H ,,3;e&&|O<R<R&S	%
oo2T%
37K%

 !,$-.C$D '23I'J$'23I'J$ LL)=U`o	EN__EVWEVzsuAcU^U2EV	Wi( %E  Xs   Cro   c                 (   [        S U 5       5      n[        R                  " U[        R                  US9nSn[	        5       (       a3  [
        R                  0 :w  a  [        U5      n[        5       R                  n[        R                  " X4-  SS9nU$ )z[
Computes the average number of target masks across the batch, for normalization purposes.
c              3   8   #    U  H  n[        U5      v   M     g 7fr   )r   ).0classess     r2   	<genexpr>)EomtLoss.get_num_masks.<locals>.<genexpr>w  s     ALGLs   r   r   )min)
rJ   r+   r   r   r   r   _shared_stater   num_processesclamp)rh   rl   ro   r   
world_sizes        r2   r  EomtLoss.get_num_maskss  sv     ALAA	OOIU[[P	
"$$))R/"9-	)^99
KK	 6A>	r1   )r   r   r   r   rc   r   r   r   )r&   r'   r(   r)   r   dictstrr   rf   r/   r   r   r   r.   r   nparrayr   r+   r   r   r   r   r   r   r   ro   r  r0   r   r   s   @r2   r   r   K  s   !
z !
S%Z8H !
F$tCy/ d3i -4< -E&RX.DY -" $* :>v, QVWYW_W_Q` 	c6k	 D<#ll< %,,'< rxx	<
 < 
c5<<	 <|2-"ELL "U\\ ""5!5! 	5!
 5! "'5! 
5!z DH5#ll5 $ll5 %,,'	5
 5<<(5  (S%,,->(?@5 
c5<<	 5n%,,  QVQ]Q]  r1   r   c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )EomtPatchEmbeddingsi  z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
c                   > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   kernel_sizestride)re   rf   
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterablenum_patchesr   Conv2d
projection)rh   r   r4  r5  r6  r7  r<  ri   s          r2   rf   EomtPatchEmbeddings.__init__  s    !'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&))L:ir1   pixel_valuesr5   c                     UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  U5      R	                  S5      R                  SS5      nU$ )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r7   )rR   r6  rg   r>  rG   r   )rh   r@  r6  
embeddingss       r2   r   EomtPatchEmbeddings.forward  sx    #))!,,,,!../yaI  __\2::1=GG1M
r1   )r4  r6  r<  r5  r>  )r&   r'   r(   r)   r*   rf   r+   r   r   r0   r   r   s   @r2   r/  r/    s.    jELL U\\  r1   r/  c                   r   ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
EomtEmbeddingsi  zE
Construct the CLS token, mask token, position and patch embeddings.
r   r5   Nc                   > [         TU ]  5         Xl        UR                  U l        [        R
                  " [        R                  " SSUR                  5      5      U l	        [        R
                  " [        R                  " SUR                  UR                  5      5      U l        [        U5      U l        U R                  R                  n[        R                   " UR"                  5      U l        SUR                  -   U l        [        R(                  " X!R                  5      U l        U R-                  S[        R.                  " U5      R1                  S5      SS9  g )Nr   position_ids)r   rE   F)
persistent)re   rf   r   r5  r   	Parameterr+   randnr7  	cls_tokenr   num_register_tokensregister_tokensr/  patch_embeddingsr<  Dropouthidden_dropout_probdropoutnum_prefix_tokens	Embeddingposition_embeddingsr   r	  expand)rh   r   r<  ri   s      r2   rf   EomtEmbeddings.__init__  s     ++ekk!Q8J8J&KL!||EKK6;U;UW]WiWi,jk 3F ;++77zz&"<"<=!"V%?%?!?#%<<=O=O#P ^U\\+-F-M-Mg-Vchir1   r@  c                    UR                   u  n    nU R                  R                  R                  R                  nU R                  UR                  US95      nU R                  R                  USS5      nU R                  R                  USS5      nXPR                  U R                  5      -   n[        R                  " XgU/SS9nU R                  U5      nU$ )Nrr   rE   r   r  )rR   rO  r>  r   rs   rv   rL  rV  rN  rU  rH  r+   r   rR  )rh   r@  r   r   target_dtyperC  
cls_tokensrN  s           r2   r   EomtEmbeddings.forward  s    *00
Aq!,,77>>DD**<???+NO
^^**:r2>
..55j"bI":":4;L;L"MM
YY
ZHaP
\\*-
r1   )rL  r   rR  rS  rO  r5  rU  rN  )r&   r'   r(   r)   r*   r   rf   r+   r   r   r0   r   r   s   @r2   rF  rF    s>    jz jd j ELL U\\  r1   rF  modulequeryr  r  attention_maskscalingrR  c                    [         R                  " XR                  SS5      5      U-  nUb  X-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )NrE   )r9   rs   )ptrainingr   r7   )r+   rH   r   r   r;   ru   float32rv   rs   rR  rc  
contiguous)
r\  r]  r  r  r^  r_  rR  r?   attn_weightsattn_outputs
             r2   eager_attention_forwardrh    s     <<}}R'<=GL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$r1   c            
          ^  \ rS rSrSrU 4S jr S	S\R                  S\\R                     S\	\R                  \\R                     4   4S jjr
SrU =r$ )
EomtAttentioni  z=Multi-headed attention from 'Attention Is All You Need' paperc                    > [         TU ]  5         Xl        UR                  U l        UR
                  U l        U R                  U R                  -  U l        U R                  U R                  -  U R                  :w  a&  [        SU R                   SU R                   S35      eU R                  S-  U l	        UR                  U l        SU l        [        R                  " U R                  U R                  5      U l        [        R                  " U R                  U R                  5      U l        [        R                  " U R                  U R                  5      U l        [        R                  " U R                  U R                  5      U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      F)re   rf   r   r7  	embed_dimnum_attention_heads	num_headshead_dimrg   scaleattention_dropoutrR  	is_causalr   Lineark_projv_projq_projout_projrh   r   ri   s     r2   rf   EomtAttention.__init__  s   ++33$..8==4>>)T^^;MdnnM] ^NN#2'  ]]D(
//ii?ii?ii?		$..$..Ar1   r"   r^  r5   c                 2   UR                   u  pEnU R                  U5      nU R                  U5      nU R                  U5      n	UR	                  XEU R
                  U R                  5      R                  SS5      nUR	                  XEU R
                  U R                  5      R                  SS5      nU	R	                  XEU R
                  U R                  5      R                  SS5      n	[        n
U R                  R                  S:w  a  [        U R                  R                     n
U
" U UUU	UU R                  U R                  U R                  (       d  SOU R                  S9u  pUR!                  XEU5      R#                  5       nU R%                  U5      nX4$ )z#Input shape: Batch x Time x Channelr   r7   eager        )rr  r_  rR  )rR   rv  rt  ru  r  rn  ro  r   rh  r   _attn_implementationr   rr  rp  rc  rR  reshapere  rw  )rh   r"   r^  r?   r   
seq_lengthrl  querieskeysvaluesattention_interfacerg  rf  s                r2   r   EomtAttention.forward  sS    -:,?,?)
	++m,{{=)]+,,zt~~t}}U__`acdeyyOYYZ[]^_ZT^^T]]S]]^_abc(?;;++w6"9$++:Z:Z"[$7nnJJ#}}C$,,	%
! "))*)LWWYmmK0((r1   )r   rR  rl  ro  rr  rt  rn  rw  rv  rp  ru  r   )r&   r'   r(   r)   r*   rf   r+   r   r   r.   r   r0   r   r   s   @r2   rj  rj    s[    GB. 26$)||$) !.$)
 
u||Xell33	4$) $)r1   rj  c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )EomtLayerScalei  r5   c                    > [         TU ]  5         [        R                  " UR                  [
        R                  " UR                  5      -  5      U l        g r   )	re   rf   r   rJ  layerscale_valuer+   r   r7  lambda1rx  s     r2   rf   EomtLayerScale.__init__  s8    ||F$;$;ejjI[I[>\$\]r1   hidden_statec                     XR                   -  $ r   r  rh   r  s     r2   r   EomtLayerScale.forward#  s    ll**r1   r  r5   N
r&   r'   r(   r)   rf   r+   r   r   r0   r   r   s   @r2   r  r    s)    ^+ELL +U\\ + +r1   r  input	drop_probrc  c                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
r|  r   r   )r   r   )rR   ndimr+   rw   rs   ro   floor_div)r  r  rc  	keep_probrR   random_tensoroutputs          r2   	drop_pathr  '  s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr1   c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )EomtDropPathi;  zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr  r5   c                 .   > [         TU ]  5         Xl        g r   )re   rf   r  )rh   r  ri   s     r2   rf   EomtDropPath.__init__>  s    "r1   r"   c                 B    [        XR                  U R                  5      $ r   )r  r  rc  rh   r"   s     r2   r   EomtDropPath.forwardB  s    FFr1   c                      SU R                    3$ )Nzp=r  rh   s    r2   
extra_reprEomtDropPath.extra_reprE  s    DNN#$$r1   r  r   )r&   r'   r(   r)   r*   r   r   rf   r+   r   r   r+  r  r0   r   r   s   @r2   r  r  ;  sQ    b#(5/ #T # #GU\\ Gell G%C % %r1   r  c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )EomtMLPiI  r5   c                 z  > [         TU ]  5         UR                  =p#[        UR                  UR                  -  5      n[
        R                  " X$SS9U l        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        [
        R                  " XCSS9U l        g )NTbias)re   rf   r7  r   	mlp_ratior   rs  fc1r8  
hidden_actr+  r	   
activationfc2rh   r   in_featuresout_featureshidden_featuresri   s        r2   rf   EomtMLP.__init__J  s    %+%7%77f0063C3CCD99[Ef''--$V%6%67DO$//DO99_Fr1   r  c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r  r  r  r  s     r2   r   EomtMLP.forwardU  s2    xx-|4xx-r1   )r  r  r  r  r  r   s   @r2   r  r  I  s)    	GELL U\\  r1   r  c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )EomtSwiGLUFFNi\  r5   c                 $  > [         TU ]  5         UR                  =p#[        UR                  UR                  -  5      n[        US-  S-  5      S-   S-  S-  n[
        R                  " USU-  SS9U l        [
        R                  " XCSS9U l        g )Nr7   r         Tr  )	re   rf   r7  r   r  r   rs  
weights_inweights_outr  s        r2   rf   EomtSwiGLUFFN.__init__]  s    %+%7%77f0063C3CCD2Q67!;AAE))K_1D4P99_Nr1   r  c                     U R                  U5      nUR                  SSS9u  p#[        R                  R	                  U5      U-  nU R                  U5      $ )Nr7   rE   r  )r  chunkr   r;   silur  )rh   r  x1x2hiddens        r2   r   EomtSwiGLUFFN.forwardf  sQ    |4##A2#.##B'",''r1   )r  r  r  r  r   s   @r2   r  r  \  s)    O(ELL (U\\ ( (r1   r  c                      ^  \ rS rSrSrS\SS4U 4S jjr SS\R                  S\	\R                     S\R                  4S	 jjr
S
rU =r$ )	EomtLayerim  zCThis corresponds to the Block class in the original implementation.r   r5   Nc                   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        [        U5      U l        [        U5      U l
        UR                  S:  a  [        UR                  5      O[        R                  " 5       U l        [        R                  " UR                  UR
                  S9U l        UR                   (       a  [#        U5      U l        O['        U5      U l        [        U5      U l        g )Nepsr|  )re   rf   r   	LayerNormr7  layer_norm_epsnorm1rj  	attentionr  layer_scale1drop_path_rater  Identityr  norm2use_swiglu_ffnr  mlpr  layer_scale2rx  s     r2   rf   EomtLayer.__init__p  s    \\&"4"4&:O:OP
&v.*62@F@U@UX[@[f&;&;<acalalan\\&"4"4&:O:OP
  $V,DHvDH*62r1   r"   	head_maskc                 &   U R                  U5      nU R                  X25      u  pEU R                  U5      nU R                  U5      U-   nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      U-   nU$ r   )r  r  r  r  r  r  r  )rh   r"   r  hidden_states_normself_attention_outputr   layer_outputs          r2   r   EomtLayer.forward  s    
 "ZZ6#'>>2D#P  $ 1 12G H '<=M zz-0xx-((6 ~~l3mCr1   )r  r  r  r  r  r  r  r   )r&   r'   r(   r)   r*   r   rf   r+   r   r   r   r0   r   r   s   @r2   r  r  m  sU    M3z 3d 3& -1|| ELL) 
	 r1   r  c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )EomtLayerNorm2di  c                 "   > [         TU ]  XUS9  g )N)r  elementwise_affine)re   rf   )rh   r6  r  affineri   s       r2   rf   EomtLayerNorm2d.__init__  s    6Jr1   r  r5   c                     UR                  SSSS5      n[        R                  " XR                  U R                  U R
                  U R                  5      nUR                  SSSS5      nU$ )Nr   r7   r   r   )permuteF
layer_normnormalized_shaper   r  r  r  s     r2   r   EomtLayerNorm2d.forward  s`    #++Aq!Q7||L2G2GVZV_V_aeaiaij#++Aq!Q7r1   r%   )gư>Tr  r   s   @r2   r  r    s)    KELL U\\  r1   r  c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )EomtScaleLayeri  r   c           	         > [         TU ]  5         UR                  n[        R                  " X"SSS9U l        [        UR                     U l        [        R                  " UUSSUSS9U l
        [        U5      U l        g )Nr7   r1  r   r   F)r2  paddinggroupsr  )re   rf   r7  r   ConvTranspose2dconv1r	   r  r  r=  conv2r  layernorm2drh   r   r7  ri   s      r2   rf   EomtScaleLayer.__init__  ss    ((''aXYZ
 !2!23YY

 +;7r1   r"   r5   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r  r  r  r  r  s     r2   r   EomtScaleLayer.forward  sB    

=16

=1((7r1   )r  r  r  r  r&   r'   r(   r)   r   rf   r+   r   r   r0   r   r   s   @r2   r  r    s/    8z 8 U\\ ell  r1   r  c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )EomtScaleBlocki  r   c                    > [         TU ]  5         UR                  U l        [        R
                  " [        U R                  5       Vs/ s H  n[        U5      PM     sn5      U l        g s  snf r   )	re   rf   num_upscale_blocks
num_blocksr   
ModuleListrt   r  blockrh   r   r   ri   s      r2   rf   EomtScaleBlock.__init__  sM     33]]E$//DZ#[DZqN6$:DZ#[\
#[s   A*r"   r5   c                 <    U R                    H  nU" U5      nM     U$ r   )r  )rh   r"   r  s      r2   r   EomtScaleBlock.forward  s     ZZE!-0M  r1   )r  r  r  r   s   @r2   r  r    s1    ]z ]
U\\ ell  r1   r  c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )EomtMaskHeadi  r   c                   > [         TU ]  5         UR                  n[        R                  " X"5      U l        [        R                  " X"5      U l        [        R                  " X"5      U l        [        UR                     U l
        g r   )re   rf   r7  r   rs  r  r  fc3r	   r  r  r  s      r2   rf   EomtMaskHead.__init__  s[    ((99[699[699[6 !2!23r1   r"   r5   c                     U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U5      nU$ r   r  r  r  r  r  s     r2   r   EomtMaskHead.forward  sD    (?@(?@/r1   r
  r  r   s   @r2   r  r    s/    4z 4U\\ ell  r1   r  c                   j    \ rS rSr% Sr\\S'   SrSrSr	S/r
Sr\\S	.rS
\R                   SS4S jrSrg)EomtPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
r   eomtr@  Fr  T)r"   r#   r\  r5   Nc                 D   U R                   R                  n[        U[        R                  [        R
                  [        R                  45      (       a  [        R                  R                  UR                  [        R                  " S5      S9  UR                  by  [        R                  R                  UR                  5      u  p4US:  a  S[        R                  " U5      -  OSn[        R                  R                  UR                  U* U5        g g [        U[        R                  5      (       aJ  UR                  R                   R#                  S5        UR                  R                   R%                  5         g [        U[        R&                  5      (       ad  UR                  R                   R)                  SSS9  UR*                  b2  UR                  R                   UR*                     R%                  5         g g [        U[,        5      (       aL  [/        US5      (       a:  UR0                  R                   R#                  U R                   R2                  5        g g [        U[4        5      (       a  [        R                  R7                  UR8                  R                   R;                  [<        R>                  5      SUS9R;                  UR8                  R@                  5      UR8                  l        URB                  R                   R%                  5         g g )	N   )ar   r   r8   r|  )r   stdr  )"r   initializer_ranger8  r   rs  r=  r  initkaiming_uniform_r   mathsqrtr  _calculate_fan_in_and_fan_outuniform_r  datafill_zero_rT  normal_padding_idxr  hasattrr  r  rF  trunc_normal_rL  rv   r+   rd  rs   rN  )rh   r\  r  fan_inr   bounds         r2   _init_weights!EomtPreTrainedModel._init_weights  s!   kk++fryy"))R5G5GHIIGG$$V]]diil$C{{&GGAA&--P	17!DIIf--  ufe< ' --MM$$S)KK""$--MM&&CQ&7!!-""6#5#56<<> .//vy))##))$++*F*FG *//$&GG$9$9  %%((7cs %: %b!!''( ! ""''--/	 0r1   r%   )r&   r'   r(   r)   r*   r   r-   base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpar  rj  _can_record_outputsr   Moduler#  r0   r%   r1   r2   r  r    sR    
 $O&+#$N"#
0BII 0$ 0r1   r  zV
    The EoMT Model with head on top for instance/semantic/panoptic segmentation.
    c                   2  ^  \ rS rSrSrS\4U 4S jjrS\S\S\S\S	\\	\4   S
\\	\4   4S jr
S\\	\4   S
\4S jr\" 5       \   SS\S\\\      S\\\      S\\\      S\\   S
\4S jj5       5       rS rS\R                  4S jr\S 5       rSrU =r$ )EomtForUniversalSegmentationi  r@  r   c                   > [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  UR                  5      U l        [        R                  " [        UR                  5       Vs/ s H  n[!        U5      PM     sn5      U l        [%        U5      U l        [)        U5      U l        [        R,                  " UR                  UR.                  S-   5      U l        UR2                  UR4                  -  UR2                  UR4                  -  4U l        UR8                  UR:                  UR<                  S.U l        [A        XR>                  S9U l!        U RE                  S[F        RH                  " URJ                  5      5        U RM                  5         g s  snf )Nr  r   )r   r   r   )r   r   attn_mask_probs)'re   rf   r   num_hidden_layersrF  rC  r   r  r7  r  	layernormrT  r   r]  r  rt   r  layersr  upscale_blockr  	mask_headrs  r   class_predictorr4  r5  	grid_sizer   r   r   r   r   rW   r   r+   r   r  	post_initr   s      r2   rf   %EomtForUniversalSegmentation.__init__  sr    !'!9!9(0f&8&8f>S>ST\\&"4"4f6H6HI
mmfF^F^@_$`@_1Yv%6@_$`a+F3%f-!yy););V=N=NQR=RS ++v/@/@@&BSBSW]WhWhBhi"("5"5++++.
 "=M=MN.

6;L;L0MN% %as   =G*r    r   rk   rl   r  r5   c                     U R                  UUUUUS9nU R                  R                  5        H)  u  pxUR                  5        H  u  pXy;   d  M  X-  n
M     M+     U$ )Nr    r   rk   rl   r  )rW   r   r  )rh   r    r   rk   rl   r  r  r  r   loss_keyr   s              r2   get_loss_dict*EomtForUniversalSegmentation.get_loss_dict'  sj     (,~~!5!5#%"7 (6 (
	  ++113KC"+//"3?ND #4 4
 r1   r  c                 4    [        UR                  5       5      $ r   )rJ   r  )rh   r  s     r2   get_loss%EomtForUniversalSegmentation.get_loss?  s    9##%&&r1   r$   r?   c           	         Su  pgSnUc  [        S5      eU R                  U5      n	[        U R                  5       GH  u  pXR                  U R
                  R                  -
  :X  am  U R                  R                  SSS2SS24   R                  U	R                  S   SS5      R                  U	R                  5      n[        R                  " X4SS9n	XR                  U R
                  R                  -
  :  Ga  U R                  (       d7  U R                   XR                  -
  U R
                  R                  -      S:  Ga  U R#                  U	5      nU R%                  U5      u  pXn4-  nX4-  n[        R&                  " U	R                  S   U	R                  S   U	R                  S   U	R                  [        R(                  S9n[*        R,                  " XR.                  S	S
9nUR1                  UR3                  S5      UR3                  S5      S5      nU R
                  R4                  nUU R                  R6                  -   nUS:  USS2SU2US24'   U R9                  UU R                   XR                  -
  U R
                  R                  -      UUUR                  S9nUSS2SS4   R                  SU R
                  R:                  SS5      nUR=                  5       R?                  U) S5      nU" X5      n	GM     U R#                  U	5      nU R%                  U5      u  pXn4-  nX4-  nSnUb@  Ub=  Sn[A        Xg5       H,  u  pU RC                  UUUUSS9nUU RE                  U5      -  nM.     [G        UUUUUS9$ )a'  
mask_labels (`list[torch.Tensor]`, *optional*):
    list of mask labels of shape `(num_labels, height, width)` to be fed to a model
class_labels (`list[torch.LongTensor]`, *optional*):
    list of target class labels of shape `(num_labels, height, width)` to be fed to a model. They identify the
    labels of `mask_labels`, e.g. the label of `mask_labels[i][j]` if `class_labels[i][j]`.
patch_offsets (`list[torch.Tensor]`, *optional*):
    list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
)r%   r%   Nz You have to specify pixel_valuesr   rE   r   r  )ro   rs   bilinear)sizemode)probnum_query_tokensencoder_start_tokensro   .g    er|  r:  )r   r    r   r!   r$   )$rg   rC  r   r2  r0  r   r  r]  r   rV  rR   rv   ro   r+   r   rc  r/  r1  predictr   r   r  interpolater6  r  rC  r   rS  _disable_attention_maskrm  r   masked_fillr   r<  r?  r   )rh   r@  rk   rl   r$   r?   masks_queries_logits_per_layerclass_queries_logits_per_layerr^  r"   r   layer_moduler]  norm_hidden_statesr    r   interpolated_logitsrF  rG  sequence_outputr   r  s                         r2   r   $EomtForUniversalSegmentation.forwardB  s   ( JPF&?@@5!*4;;!7C,,t{{/E/EEE

))$1*5<<]=P=PQR=SUWY[\__`m`t`tu %		5*@a H,,t{{/E/EEE!5!5c<R<R6RUYU`U`UkUk6k!lop!p%)^^M%B"=A\\J\=]:$.2II..2II.!&!''*!''*!''*(//**" '(mm4H~~dn&o#&9&>&>',,Q/1D1I1I!1Lb'# $(;;#:#: '7$//:[:['[$ ObdeNeq"3#3"35I5JJK "&!=!="--c4J4J.JT[[McMc.cd%5)=)00 "> " "04!=!D!DRIhIhjlnp!q!/!5!5!7!C!C^OUY!Z(GM] "8` ..759\\/5R2&*AA&&*AA&"|'?D>A.?:$ !..)=)= +!-*. / 	 i00? 2!5!5-'
 	
r1   c                 .    U R                   R                  $ r   )rC  rO  r  s    r2   get_input_embeddings1EomtForUniversalSegmentation.get_input_embeddings  s    ///r1   r   c                    US S 2S U R                   R                  2S S 24   nU R                  U5      nUS S 2U R                   R                  U R                  R                  -   S 2S S 24   nUR                  SS5      nUR                  " UR                  S   S/U R                  Q76 nU R                  U5      nU R                  U5      n[        R                  " SX$5      nXS4$ )Nr   r7   r   rE   zbqc, bchw -> bqhw)r   r   r5  rC  rS  r   r~  rR   r6  r4  r3  r+   einsum)rh   r   query_tokensclass_logitsprefix_tokensmask_logitss         r2   rH  $EomtForUniversalSegmentation.predict  s    a!:4;;#:#:!:A=>++L9q$++"9"9DOO<]<]"]"_abbc%//15%--m.A.A!.DbZ4>>Z~~l3**=9ll#6T((r1   c                 ~    US:  a6  [         R                  " U R                  S   X$S9U:  nSU S S 2S U2US 24   U'   U $ )Nr   r   rn   )r+   rw   rR   )	attn_maskrE  rF  rG  ro   random_queriess         r2   rJ  4EomtForUniversalSegmentation._disable_attention_mask  sT    !8"ZZ	(:<L\_ccN VWIa***,@,AAB>Rr1   )r5  r   rW   rC  r6  r1  r2  r4  r0  r]  r3  r   )NNN)r&   r'   r(   r)   r&  r   rf   r   r*  r+  r<  r?  r   r   r   r/   r   r   r   r   rT  r+   rH  staticmethodrJ  r0   r   r   s   @r2   r-  r-    s:    %Oz 8$ % 	
   $CK0 
c6k	0'$sF{"3 ' '  /3/304e
e
 d6l+e
 tF|,	e

  V-e
 +,e
 
,e
  e
N0)ell )   r1   r-  )F)r|  )r|  F)Icollections.abcr9  r  dataclassesr   typingr   r   numpyr,  r+   torch.nn.functionalr   r;   r  r   activationsr	   
file_utilsr
   r   r   modeling_layersr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr   configuration_eomtr   scipy.optimizer   
accelerater   accelerate.utilsr   r   rA   rM   r\   r+  r^   r   r   r   r   r/  rF  r   rh  rj  r  r   r  r  r  r  r  r  r  r  r  r  r  r-  __all__r%   r1   r2   <module>rs     s  ,   ! %      ! L L 9 F & P P / * 4'' 	7 7	 7B LQLL5:\\
\\@  6 , u|| X]XdXd 8g299 gTf f   <u|| U\\ VY ^c^j^j (uryy up	")) B"RYY "X %II%<<% 
% <<	%
 U\\*% % %.;)BII ;)|+RYY +U\\ e T V[VbVb (%299 %bii &(BII ("'* 'Tbll RYY 2	RYY 	299 " '0/ '0 '0T 
#6 
D !"@
Ar1   