
    cCi                   d   S r SSKJr  SSKrSSKrSSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJrJr  SSKrSS	KJr  SS
KJrJrJrJrJrJr  SSKJr  SSKJrJrJrJ r J!r!J"r"  SSK#J$r$  \!RJ                  " \&5      r'Sr(Sr)/ SQr*Sr+Sr,\	 " S S\5      5       r-\	 " S S\5      5       r.\	 " S S\5      5       r/\	 " S S\5      5       r0SMS jr1SNS jr2 SO         SPS jjr3 " S S\Rh                  Rj                  5      r6 " S  S!\Rh                  Rj                  5      r7 " S" S#\Rh                  Rj                  5      r8 " S$ S%\Rh                  Rj                  5      r9 " S& S'\Rh                  Rj                  5      r: " S( S)\Rh                  Rj                  5      r; " S* S+\Rh                  Rj                  5      r< " S, S-\Rh                  Rj                  5      r= " S. S/\Rh                  Rj                  5      r> " S0 S1\Rh                  Rj                  5      r? " S2 S3\Rh                  Rj                  5      r@ " S4 S5\Rh                  Rj                  5      rA " S6 S7\5      rBS8rCS9rDSQS: jrE " S; S<\Rh                  Rj                  5      rF\ " S= S>\Rh                  Rj                  5      5       rG\" S?\C5       " S@ SA\B5      5       rH " SB SC\Rh                  Rj                  5      rI " SD SE\Rh                  Rj                  5      rJ\" SF\C5       " SG SH\B5      5       rK\" SI\C5       " SJ SK\B\5      5       rL/ SLQrMg)RzTF 2.0 Swin Transformer model.    )annotationsN)Iterable)	dataclass)partial)AnyCallable   )ACT2FN)TFPreTrainedModelTFSequenceClassificationLossget_initializerkeraskeras_serializableunpack_inputs)
shape_list)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
SwinConfigr   z&microsoft/swin-tiny-patch4-window7-224)r   1   i   ztabby, tabby catc                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   S
r
g)TFSwinEncoderOutputD   a  
Swin encoder's outputs, with potential hidden states and attentions.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
Ntf.Tensor | Nonelast_hidden_statetuple[tf.Tensor, ...] | Nonehidden_states
attentionsreshaped_hidden_states )__name__
__module____qualname____firstlineno____doc__r   __annotations__r!   r"   r#   __static_attributes__r$       c/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/swin/modeling_tf_swin.pyr   r   D   s7    2 +/'.26M/6/3J,3;?8?r,   r   c                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S\S
'   Srg)TFSwinModelOutpute   a  
Swin model's outputs that also contains a pooling of the last hidden states.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    pooler_output (`tf.Tensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
        Average pooling of the last layer hidden-state.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
Nr   r   pooler_outputr    r!   r"   r#   r$   )r%   r&   r'   r(   r)   r   r*   r1   r!   r"   r#   r+   r$   r,   r-   r/   r/   e   sB    6 +/'.&*M#*26M/6/3J,3;?8?r,   r/   c                  p    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S\S
'   \S 5       rSrg)TFSwinMaskedImageModelingOutput   a3  
Swin masked image model outputs.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `bool_masked_pos` is provided):
        Masked image modeling (MLM) loss.
    reconstruction (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
        Reconstructed pixel values.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
Nr   lossreconstructionr    r!   r"   r#   c                P    [         R                  " S[        5        U R                  $ )Nzlogits attribute is deprecated and will be removed in version 5 of Transformers. Please use the reconstruction attribute to retrieve the final output instead.)warningswarnFutureWarningr6   selfs    r-   logits&TFSwinMaskedImageModelingOutput.logits   s%    ]	

 """r,   r$   )r%   r&   r'   r(   r)   r5   r*   r6   r!   r"   r#   propertyr=   r+   r$   r,   r-   r3   r3      sS    6 "D
!'+N$+26M/6/3J,3;?8?# #r,   r3   c                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S\S
'   Srg)TFSwinImageClassifierOutput   am  
Swin outputs for image classification.

Args:
    loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
        `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
Nr   r5   r=   r    r!   r"   r#   r$   )r%   r&   r'   r(   r)   r5   r*   r=   r!   r"   r#   r+   r$   r,   r-   rA   rA      sA    6 "D
!#F#26M/6/3J,3;?8?r,   rA   c           	         [        U 5      u  p#pE[        R                  " U X#U-  XU-  X45      n [        R                  " U S5      n[        R                  " USXU45      nU$ )z*
Partitions the given input into windows.
r   r   r	            )r   tfreshape	transpose)input_featurewindow_size
batch_sizeheightwidthnum_channelswindowss          r-   window_partitionrS      sg     /9.G+JJJ	{*K+9M{iM ll=*<=Gjj2{"NOGNr,   c           	     l   [         R                  " U 5      S   n[         R                  " X#-  X-  -  [         R                  5      n[         R                  R                  XE5      n[         R                  " XX!-  X1-  XS45      n [         R                  " U S5      n [         R                  " XX#S45      n U $ )z7
Merges windows to produce higher resolution features.
r   rH   rD   )rI   shapecastint32mathfloordivrJ   rK   )rR   rM   rO   rP   xyrN   s          r-   window_reverser\      s     	!A
+";<bhhGA!!!'Jjjf3U5I;eghG ll7$67Gjjvb"ABGNr,   c                   US:X  d  U(       d  U $ SU-
  n[        U 5      n[        U5      nUS   /S/US-
  -  -   n[        R                  R	                  U5      n[        R
                  " X:*  SS5      nUS:  a  U(       a  X-  nX-  $ )zZ
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
        r   r         ?)r   lenrI   randomuniformwhere)	input	drop_probtrainingscale_by_keep	keep_probinput_shapendimrU   random_tensors	            r-   	drop_pathrl      s     CxIIU#K{D^sdQh//EII%%e,MHH]7cBM3="  r,   c                  \   ^  \ rS rSrSrSSU 4S jjjrS	S jr S
       SS jjrSrU =r	$ )TFSwinEmbeddingsi
  zO
Construct the patch and position embeddings. Optionally, also the mask token.
c                  > [         TU ]  " S0 UD6  [        USS9U l        U R                  R                  U l        U R                  R
                  U l        UR                  U l        X l        UR                  U l	        [        R                  R                  SSS9U l        [        R                  R                  UR                  SS9U l        Xl        g )Npatch_embeddingsnamenormh㈵>)rr   epsilondropoutr$   )super__init__TFSwinPatchEmbeddingsrp   num_patches	grid_size
patch_grid	embed_dimuse_mask_tokenuse_absolute_embeddingsr   layersLayerNormalizationrs   Dropouthidden_dropout_probrv   config)r<   r   r~   kwargs	__class__s       r-   rx   TFSwinEmbeddings.__init__  s    "6" 5fCU V00<<//99)),'-'E'E$LL333N	||++F,F,FY+Wr,   c                   U R                   (       a$  U R                  SSU R                  4SSS9U l        OS U l        U R                  (       a1  U R                  SU R
                  S-   U R                  4SSS9U l        OS U l        U R                  (       a  g SU l        [        U SS 5      bN  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        [        U S	S 5      be  [        R                  " U R                  R                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U S
S 5      bO  [        R                  " U R                   R                  5         U R                   R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)Nr   zeros
mask_tokenrU   initializerrr   positional_embeddings)r   rr   Trp   rs   rv   )r~   
add_weightr}   r   r   rz   position_embeddingsbuiltgetattrrI   
name_scoperp   rr   buildrs   r   rv   r<   ri   s     r-   r   TFSwinEmbeddings.build  sv   "ooQ4>>4JX_frosDO"DO'''+D$$q($..9wUl (7 (D$ (,D$::
4+T2>t4499:%%++D1 ;4&2tyy~~.		tT[[-B-B CD /4D)5t||001""4( 21 6 ;: /. 21s$   G33G&G.
G
G+.
G<c                   U R                  XS9u  pEU R                  XCS9n[        U5      u  pgnUb}  [        R                  " U R
                  US5      n	[        R                  " XS5      n	[        R                  " US5      n
[        R                  " XR                  5      n
USU
-
  -  X-  -   nU R                  b  X@R                  -   nU R                  XCS9nXE4$ )Nrf   r   r   rH   r_   )rp   rs   r   rI   repeatr   expand_dimsrV   dtyper   rv   )r<   pixel_valuesbool_masked_posrf   
embeddingsoutput_dimensionsrN   seq_len_mask_tokensmasks              r-   callTFSwinEmbeddings.call6  s     )-(=(=l(=(^%
YYzY=
!+J!7
Q&))DOOZCK))K!<K>>/26D774!2!23D#sTz2[5GGJ##/#&>&>>J\\*\@
,,r,   )r   r   rv   r}   r   rs   rz   rp   r|   r   r   r~   F)r   r   r~   boolreturnNoneri   tf.TensorShaper   r   )NF)r   	tf.Tensorr   bool | Nonerf   r   r   !tuple[tf.Tensor, tuple[int, int]])
r%   r&   r'   r(   r)   rx   r   r   r+   __classcell__r   s   @r-   rn   rn   
  sH     )6 ^c-%-8C-VZ-	*- -r,   rn   c                  N   ^  \ rS rSrSrU 4S jrSS jrS	S
S jjrSS jrSr	U =r
$ )ry   iN  z
Image to Patch Embedding.
c                B  > [         TU ]  " S0 UD6  UR                  UR                  pCUR                  UR
                  pe[        U[        R                  R                  5      (       a  UOX34n[        U[        R                  R                  5      (       a  UOXD4nUS   US   -  US   US   -  -  nX0l        X@l        XPl        Xpl
        US   US   -  US   US   -  4U l        [        R                  R                  UU R                  U R                  SSS9U l        g )Nr   r   valid
projection)filterskernel_sizestridespaddingrr   r$   )rw   rx   
image_size
patch_sizerQ   r}   
isinstancecollectionsabcr   rz   r{   r   r   Conv2Dr   )	r<   r   r   r   r   rQ   hidden_sizerz   r   s	           r-   rx   TFSwinPatchEmbeddings.__init__S  s   "6"!'!2!2F4E4EJ$*$7$79I9Ik#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&$Q-:a=8*Q-:VW=:XY,,--OO . 
r,   c                N   X0R                   S   -  S:w  a=  SSSSU R                   S   X0R                   S   -  -
  44n[        R                  " X5      nX R                   S   -  S:w  a=  SSSU R                   S   X R                   S   -  -
  4S4n[        R                  " X5      nU$ )Nr   r   r   r   )r   rI   pad)r<   r   rO   rP   
pad_valuess        r-   	maybe_padTFSwinPatchEmbeddings.maybe_padh  s    ??1%%* &&1dooa6H5SbSbcdSeKe6e2fgJ66,;LOOA&&!+ &1dooa.@6OO\]L^C^.^*_aghJ66,;Lr,   c                   [        U5      u  p4pV[        R                  " 5       (       a  X@R                  :w  a  [	        S5      eU R                  XU5      n[        R                  " US5      nU R                  XS9n[        R                  " US5      n[        U5      u  ppVXV4n
[        R                  " XxU	S45      n[        R                  " US5      nXz4$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   rE   r	   r   r   r   r	   r   rE   rH   r   rE   r   )	r   rI   executing_eagerlyrQ   
ValueErrorr   rK   r   rJ   )r<   r   rf   r   rQ   rO   rP   r   rN   channelsr   s              r-   r   TFSwinPatchEmbeddings.callq  s    )3L)A&!!l6G6G&Gw  ~~lEB ||L,?__\_E
 \\*l;
.8.D+
f#OZZ
2,FG
\\*i8
,,r,   c                .   U R                   (       a  g SU l         [        U SS 5      b]  [        R                  " U R                  R
                  5         U R                  R                  S S S U R                  /5        S S S 5        g g ! , (       d  f       g = f)NTr   )r   r   rI   r   r   rr   r   rQ   r   s     r-   r   TFSwinPatchEmbeddings.build  sm    ::
4t,8t334%%tT49J9J&KL 54 944s   *B
B)r   r{   r   rQ   rz   r   r   )r   r   rO   intrP   r   r   r   r   )r   r   rf   r   r   r   Nr%   r&   r'   r(   r)   rx   r   r   r   r+   r   r   s   @r-   ry   ry   N  s#    
*-0M Mr,   ry   c                  f   ^  \ rS rSrSr S       S	U 4S jjjrS
S jrSSS jjrSS jrSr	U =r
$ )TFSwinPatchMergingi  a  
Patch Merging Layer.

Args:
    input_resolution (`tuple[int]`):
        Resolution of input feature.
    dim (`int`):
        Number of input channels.
    norm_layer (`keras.layer.Layer`, *optional*, defaults to `keras.layers.LayerNormalization`):
        Normalization layer class.
c                   > [         TU ]  " S	0 UD6  Xl        X l        [        R
                  R                  SU-  SSS9U l        Uc$  [        R
                  R                  SSS9U l	        g U" SS9U l	        g )
NrE   F	reduction)use_biasrr   rt   rs   ru   rr   rq   r$   )
rw   rx   input_resolutiondimr   r   Denser   r   rs   )r<   r   r   
norm_layerr   r   s        r-   rx   TFSwinPatchMerging.__init__  sn     	"6" 0++AGe++V7767RDI"/DIr,   c                    US-  S:H  =(       d    US-  S:H  nU(       a&  SSUS-  4SUS-  4S4n[         R                  " X5      nU$ )NrE   r   r   r   )rI   r   )r<   rL   rO   rP   
should_padr   s         r-   r   TFSwinPatchMerging.maybe_pad  sQ    qjAo:519>
 1fqj/Auqy>6JJFF==Mr,   c                   Uu  pE[        U5      u  pgn[        R                  " XXEU45      nU R                  XU5      nUS S 2SS S2SS S2S S 24   n	US S 2SS S2SS S2S S 24   n
US S 2SS S2SS S2S S 24   nUS S 2SS S2SS S2S S 24   n[        R                  " XX/S5      n[        R                  " XSSU-  45      nU R                  XS9nU R                  XS9nU$ )Nr   rE   r   rH   rF   r   )r   rI   rJ   r   concatrs   r   )r<   rL   input_dimensionsrf   rO   rP   rN   r   rQ   input_feature_0input_feature_1input_feature_2input_feature_3s                r-   r   TFSwinPatchMerging.call  s   (&0&?#
|

=vl2[\}eD'14a4Aq(89'14a4Aq(89'14a4Aq(89'14a4Aq(89		?_"fhjk

A,<=
 		-	C}Hr,   c                ,   U R                   (       a  g SU l         [        U SS 5      b^  [        R                  " U R                  R
                  5         U R                  R                  S S SU R                  -  /5        S S S 5        [        U SS 5      b_  [        R                  " U R                  R
                  5         U R                  R                  S S SU R                  -  /5        S S S 5        g g ! , (       d  f       N|= f! , (       d  f       g = f)NTr   rF   rs   )	r   r   rI   r   r   rr   r   r   rs   r   s     r-   r   TFSwinPatchMerging.build  s    ::
4d+7t~~223$$dD!dhh,%?@ 44&2tyy~~.		tQ\ :; /. 3 43 /.s   ,C4>,D4
D
D)r   r   r   rs   r   r   )r   tuple[int, int]r   r   r   Callable | Noner   r   )rL   r   rO   r   rP   r   r   r   r   )rL   r   r   r   rf   r   r   r   r   r   s   @r-   r   r     sM    
 Z^0 /0690GV0	0 06	< 	<r,   r   c                  B   ^  \ rS rSrSrSSU 4S jjjrSS	S jjrSrU =r$ )
TFSwinDropPathi  zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).c                >   > [         TU ]  " S0 UD6  Xl        X l        g Nr$   )rw   rx   re   rg   )r<   re   rg   r   r   s       r-   rx   TFSwinDropPath.__init__  s    "6""*r,   c                B    [        XR                  X R                  5      $ r   )rl   re   rg   )r<   rd   rf   s      r-   r   TFSwinDropPath.call  s    :L:LMMr,   )re   rg   )NT)re   zfloat | Nonerg   r   r   r   r   )rd   r   rf   r   r   r   	r%   r&   r'   r(   r)   rx   r   r+   r   r   s   @r-   r   r     s    b+ +
N Nr,   r   c                  l   ^  \ rS rSrSU 4S jjrSS jrS	S jr    S
           SS jjrSrU =r	$ )TFSwinSelfAttentioni  c                h  > [         TU ]  " S	0 UD6  X#-  S:w  a  [        SU SU S35      eX0l        [	        X#-  5      U l        U R                  U R
                  -  U l        UR                  n[        U[        R                  R                  5      (       a  UOXU4U l        [        R                  R                  U R                  [        UR                   5      UR"                  SS9U l        [        R                  R                  U R                  [        UR                   5      UR"                  SS9U l        [        R                  R                  U R                  [        UR                   5      UR"                  SS9U l        [        R                  R+                  UR,                  5      U l        g )
Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()query)kernel_initializerr   rr   keyvaluer$   )rw   rx   r   num_attention_headsr   attention_head_sizeall_head_sizerM   r   r   r   r   r   r   r   r   initializer_rangeqkv_biasr   r   r   r   attention_probs_dropout_probrv   )r<   r   r   	num_headsr   rM   r   s         r-   rx   TFSwinSelfAttention.__init__  sz   "6"?a#C5(^_h^iijk  $- #&s#7 !558P8PP((%k;??3K3KLLKS^Rl 	 \\''.v/G/GH__	 ( 

 <<%%.v/G/GH__	 & 
 \\''.v/G/GH__	 ( 

 ||++F,O,OPr,   c           	     <   U R                  SU R                  S   -  S-
  SU R                  S   -  S-
  -  U R                  4SSS9U l        U R                  U R                  S   S-  U R                  S   S-  4S[        R
                  SS	9U l        [        R                  " U R                  S   5      n[        R                  " U R                  S   5      n[        R                  " [        R                  " X#S
S95      n[        R                  " U[        U5      S   S45      nUS S 2S S 2S 4   US S 2S S S 24   -
  n[        R                  " US5      n[        R                  " USS9u  pxXpR                  S   S-
  -  nUSU R                  S   -  S-
  -  nXR                  S   S-
  -  n[        R                  " Xx/SS9nU R                  R                  [        R                  " [        R                   " USS9[        R
                  5      5        U R"                  (       a  g SU l        [%        U SS 5      b[  [        R&                  " U R(                  R*                  5         U R(                  R-                  S S U R.                  /5        S S S 5        [%        U SS 5      b[  [        R&                  " U R0                  R*                  5         U R0                  R-                  S S U R.                  /5        S S S 5        [%        U SS 5      b\  [        R&                  " U R2                  R*                  5         U R2                  R-                  S S U R.                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NrE   r   r   r   relative_position_bias_tabler   Frelative_position_index)rU   	trainabler   rr   ij)indexingrH   )r   rE   r   axisTr   r   r   )r   rM   r   r  rI   rW   r  rangestackmeshgridrJ   r   rK   unstackassignrV   
reduce_sumr   r   r   r   rr   r   r   r   r   )	r<   ri   coords_hcoords_wcoordscoords_flattenrelative_coordsstack_0stack_1s	            r-   r   TFSwinSelfAttention.build  s   ,0OO))!,,q0Q9I9I!9L5Lq5PQTXTlTlm/ -< -
)
 (,##A&!+T-=-=a-@A-EF((*	 (7 (
$ 88D,,Q/088D,,Q/0"++h4HIFZ-?-BB,GH(At4~aqj7QQ,,	B::oA>##A&**1t''**Q..##A&**((G#5A>$$++BGGBMM/XZ4[]_]e]e,fg::
4$'3tzz/

  $d.@.@!AB 04%1txx}}-dD,>,>?@ .4$'3tzz/

  $d.@.@!AB 0/ 4 0/ .- 0/s$   &)M+)M<8)N+
M9<
N

Nc                    [        U5      S S U R                  U R                  /-   n[        R                  " X5      n[        R
                  " US5      $ )NrH   r   rE   r   r	   )r   r   r   rI   rJ   rK   )r<   rZ   new_x_shapes      r-   transpose_for_scores(TFSwinSelfAttention.transpose_for_scores6  sG     mCR(D,D,DdF^F^+__JJq&||A|,,r,   c                v   [        U5      u  pgnU R                  U5      n	U R                  U R                  U5      5      n
U R                  U R	                  U5      5      nU R                  U	5      n[
        R                  " U[
        R                  " U
S5      5      nU[        R                  " U R                  5      -  n[
        R                  " U R                  [
        R                  " U R                  S5      5      n[
        R                  " UU R                  S   U R                  S   -  U R                  S   U R                  S   -  S45      n[
        R                  " US5      nU[
        R                   " US5      -   nUb  [        U5      S   n[
        R                  " XU-  XR"                  Xw45      n[
        R                   " US5      n[
        R                   " US5      nX-   n[
        R                  " USU R"                  Xw45      n[
        R$                  R'                  USS9nU R)                  UUS9nUb  UU-  n[
        R                  " UU5      n[
        R                  " US	5      n[        U5      S S
 U R*                  /-   n[
        R                  " UU5      nU(       a  UU4nU$ U4nU$ )N)r   r   r	   rE   rH   r   r   rH   )rE   r   r   r  r   r  )r   r   r  r   r   rI   matmulrK   rX   sqrtr   gatherr  rJ   r  rM   r   r   nnsoftmaxrv   r   )r<   r!   attention_mask	head_maskoutput_attentionsrf   rN   r   r   mixed_query_layer	key_layervalue_layerquery_layerattention_scoresrelative_position_bias
mask_shapeattention_probscontext_layernew_context_layer_shapeoutputss                       r-   r   TFSwinSelfAttention.call;  s    (6
 JJ}5--dhh}.EF	//

=0IJ//0AB 99[",,y,2WX+dii8P8P.QQ!#--rzz$:V:VX]/^"
 "$"a 4#3#3A#668H8H8KdN^N^_`Na8acef"

 "$.Di!P+bnn=SUV.WW%#N3A6J!zz #;ZIaIacf"l   ^^NA>N^^NA>N/@!zz*:RAYAY[^<de %%--(8r-B ,,,J  -	9O		/;?]LA",]";CR"@D
 #
 

=2IJ6G=/2 O\M]r,   )r   r   r   rv   r   r   r   r  r  r   rM   r   r   r   r   r  r   r   r   r   rZ   r   r   r   NNFF)r!   r   r(  r   r)  r   r*  r   rf   r   r   tuple[tf.Tensor, ...])
r%   r&   r'   r(   rx   r   r  r   r+   r   r   s   @r-   r   r     sh    "QH(CT- ,0&*"'= = )= $	=
  = = 
= =r,   r   c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFSwinSelfOutputi{  c                   > [         TU ]  " S0 UD6  [        R                  R	                  USS9U l        [        R                  R                  UR                  SS9U l        X l	        g Ndenserq   rv   r$   )
rw   rx   r   r   r   r?  r   r  rv   r   r<   r   r   r   r   s       r-   rx   TFSwinSelfOutput.__init__|  sU    "6"\\'''':
||++F,O,OV_+`r,   c                F    U R                  U5      nU R                  XS9nU$ Nr   r?  rv   )r<   r!   input_tensorrf   s       r-   r   TFSwinSelfOutput.call  &    

=1]Fr,   c                   U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr?  rv   )	r   r   rI   r   r?  rr   r   r   rv   r   s     r-   r   TFSwinSelfOutput.build  s    ::
4$'3tzz/

  $dhh!78 04D)5t||001""4( 21 6 0/ 21s   )C!;C2!
C/2
D )r   r?  r   rv   r   r   r   r   r   r   r   )r!   r   rE  r   rf   r   r   r   r   	r%   r&   r'   r(   rx   r   r   r+   r   r   s   @r-   r<  r<  {  s    
	) 	)r,   r<  c                  h   ^  \ rS rSrSU 4S jjrS r    S           S	S jjrS
S jrSrU =r	$ )TFSwinAttentioni  c                   > [         TU ]  " S0 UD6  [        XUSS9U l        [	        XSS9U l        [        5       U l        g )Nr<   rq   outputr$   )rw   rx   r   r<   r<  self_outputsetpruned_heads)r<   r   r   r  r   r   s        r-   rx   TFSwinAttention.__init__  s<    "6"'YVL	+FhGEr,   c                    [         e)z|
Prunes heads of the model. See base class PreTrainedModel heads: dict of {layer_num: list of heads to prune in
this layer}
)NotImplementedError)r<   headss     r-   prune_headsTFSwinAttention.prune_heads  s
    
 "!r,   c                `    U R                  XX4US9nU R                  US   XS9nU4USS  -   nU$ )Nr   r   r   )r<   rP  )	r<   r!   r(  r)  r*  rf   self_outputsattention_outputr5  s	            r-   r   TFSwinAttention.call  sL     yy	goyp++LO]+^#%QR(88r,   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr<   rP  )r   r   rI   r   r<   rr   r   rP  r   s     r-   r   TFSwinAttention.build  s    ::
4&2tyy~~.		% /4-9t//445  &&t, 65 : /. 65   C.C%
C"%
C3)r   rR  r<   rP  r7  r9  )r!   r   r(  r   r)  r   r*  r   rf   r   r   r   r   )
r%   r&   r'   r(   rx   rW  r   r   r+   r   r   s   @r-   rM  rM    sd    "" ,0&*"'  ) $	
    
	- 	-r,   rM  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	TFSwinIntermediatei  c                6  > [         TU ]  " S0 UD6  [        R                  R	                  [        UR                  U-  5      SS9U l        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        X l        g )Nr?  rq   r$   )rw   rx   r   r   r   r   	mlp_ratior?  r   
hidden_actstrr
   intermediate_act_fnr   r@  s       r-   rx   TFSwinIntermediate.__init__  sw    "6"\\''F,<,<s,B(C''R
f''--'-f.?.?'@D$'-'8'8D$r,   c                J    U R                  U5      nU R                  U5      nU$ r   )r?  rf  )r<   r!   s     r-   r   TFSwinIntermediate.call  s&    

=100?r,   c                ,   U R                   (       a  g SU l         [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       g = fNTr?  )r   r   rI   r   r?  rr   r   r   r   s     r-   r   TFSwinIntermediate.build  sg    ::
4$'3tzz/

  $dhh!78 0/ 4//s   )B
B)r   r?  r   rf  rJ  )r!   r   r   r   r   rK  r   s   @r-   ra  ra    s    
9 9r,   ra  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFSwinOutputi  c                   > [         TU ]  " S0 UD6  [        R                  R	                  USS9U l        [        R                  R                  UR                  S5      U l        Xl	        X l
        g r>  )rw   rx   r   r   r   r?  r   r   rv   r   r   r@  s       r-   rx   TFSwinOutput.__init__  sV    "6"\\'''':
||++F,F,F	Rr,   c                F    U R                  U5      nU R                  XS9nU$ rC  rD  )r<   r!   rf   s      r-   r   TFSwinOutput.call  rG  r,   c           	     l   U R                   (       a  g SU l         [        U SS 5      b|  [        R                  " U R                  R
                  5         U R                  R                  S S [        U R                  R                  U R                  -  5      /5        S S S 5        g g ! , (       d  f       g = frk  )r   r   rI   r   r?  rr   r   r   r   rc  r   r   s     r-   r   TFSwinOutput.build  s{    ::
4$'3tzz/

  $c$++2G2G$((2R.S!TU 0/ 4//s   A	B%%
B3)r   r   r?  r   rv   rJ  r   )r!   r   rf   r   r   r   r   rK  r   s   @r-   rn  rn    s    
V Vr,   rn  c                     ^  \ rS rSr  S         S	U 4S jjjrS
S jr          SS jr   S           SS jjrSS jrSr	U =r
$ )TFSwinLayeri  c                v  > [         T	U ]  " S0 UD6  UR                  U l        [        R                  " U5      nXR
                  ::  a  UOUR
                  U l        XR
                  ::  a  SOUU l        X0l        [        R                  R                  UR                  SS9U l        [        XUSS9U l        US:  a
  [        USS9O[        R                  R!                  SSS9U l        [        R                  R                  UR                  S	S9U l        ['        XS
S9U l        [+        XSS9U l        X l        g )Nr   layernorm_beforer   	attentionrq   r^   rl   linearlayernorm_afterintermediaterO  r$   )rw   rx   chunk_size_feed_forwardrI   
reduce_minrM   
shift_sizer   r   r   r   layer_norm_epsrx  rM  ry  r   
Activationrl   r{  ra  r|  rn  swin_outputr   )
r<   r   r   r   r  drop_path_rater  r   min_resr   s
            r-   rx   TFSwinLayer.__init__  s    	"6"'-'E'E$-- 01&-1C1C&C7I[I[&*:*::!
 0 % ? ?H]H]dv ? w(ikR # ><(((D 	
  %||>>vG\G\ct>u.vP'(Cr,   c           
        [         R                  " X45      nSU* 4U* U* 4U* S44nSU* 4U* U* 4U* S44nUS:  a  SnU H  n	U H  n
[         R                  " U	S   U-  U	S   U-  S-   5      n[         R                  " U
S   U-  U
S   U-  S-   5      n[         R                  " [         R                  " [         R
                  " X5      SS9S5      n[        U5      S:  aC  [         R                  " [        U5      4UR                  S9U-  n[         R                  " X]U5      nUS-  nM     M     [         R                  " US5      n[         R                  " US5      n[        XS5      n[         R                  " USX3-  45      n[         R                  " US5      [         R                  " US5      -
  n[         R                  " US:g  SU5      n[         R                  " US:H  S	U5      nU$ )
Nr   rH   r   r  )rH   rE   )r   rE   g      Yr^   )rI   r   r  rJ   r  r  r`   onesr   tensor_scatter_nd_updater   rS   rc   )r<   rO   rP   rM   r  img_maskheight_sliceswidth_slicescountheight_slicewidth_sliceheight_inds
width_indsindicesupdatesmask_windows	attn_masks                    r-   get_attn_maskTFSwinLayer.get_attn_mask  s   88VO,k\*k\J;,G:+WYIZ[[L)[L:++F*VXHYZ >E -#/K"$((<?V+C\RS_W]E]`aEa"bK!#+a.5*@+a.SXBX[\B\!]J jj"++k2V]_)`bijG7|q("$''3w</"PSX"X#%#>#>xRY#ZQJE $0 !. >>(B/>>(A.'>zz,[5N0OPNN<3bnn\ST6UU	HHY!^VY?	HHY!^S)<	r,   c                    X$U-  -
  U-  nX#U-  -
  U-  nSS/SU/SU/SS//n[         R                  " X5      n[         R                  " US5      nX4$ )Nr   r!  )rI   r   rJ   )r<   r!   rM   rO   rP   	pad_right
pad_bottomr   s           r-   r   TFSwinLayer.maybe_pad"  sm     !;#66+E	![$88KG
!fq*o9~1vF
}9ZZ
E2
((r,   c                   [         R                  " U5      nX`R                  ::  a  SOU R                  nX`R                  ::  a  UOU R                  nUu  p[	        U5      u  pnUnU R                  XS9n[         R                  " XXU45      nU R                  XX5      u  p[	        U5      u  nnnnUS:  a  [         R                  " X* U* 4SS9nOUn[        UU5      n[         R                  " USX-  U45      nU R                  UUXS9nU R                  UUX4US9nUS   n[         R                  " USXU45      n[        UUUU5      nUS:  a  [         R                  " UXw4SS9nOUnUS   S:  =(       d    US	   S:  nU(       a  US S 2S U	2S U
2S S 24   n[         R                  " UXU
-  U45      nXR                  UUS9-   nU R                  XS9nU R                  U5      nXR!                  UUS9-   nU(       a	  UUS
   4nU$ U4nU$ )Nr   r   )r   rE   )shiftr  rH   )rO   rP   rM   r  )r*  rf   r	   rG   r   )rI   r~  rM   r  r   rx  rJ   r   rollrS   r  ry  r\   rl   r{  r|  r  )r<   r!   r   r)  r*  rf   r  r  rM   rO   rP   rN   r   r   shortcutr   
height_pad	width_padshifted_hidden_stateshidden_states_windowsr  attention_outputsr[  attention_windowsshifted_windows
was_paddedlayer_outputlayer_outputss                               r-   r   TFSwinLayer.call,  sn    -- 01!%5%55Q4??
!(,<,<!<g$BRBR(",]";
x --m-O

=vh2WX$(NN=v$]!&0&?#:y!>$&GGM+PZ{A[bh$i!$1! !11F T "

+@2{G`bjBk l&&YK ' 
	 !NN!9igo + 
 -Q/JJ'7"kX`9ab():KU^_ > "
?W^d e /]Q&;*Q-!*;
 1!WfWfufa2G HJJ'8:PU~W_:`a >>2Ch>#WW++M+M((6$'7'7x'7'XX@Q'8';< YeWfr,   c                   U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GNV= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTrx  ry  rl   r{  r|  r  )r   r   rI   r   rx  rr   r   r   ry  rl   r{  r|  r  r   s     r-   r   TFSwinLayer.buildn  s   ::
4+T2>t4499:%%++T4,BC ;4d+7t~~223$$T* 44d+7t~~223$$T* 44*D1=t33889$$**D$+AB :4.:t00556!!''- 74-9t//445  &&t, 65 : ;: 43 43 :9 76 65sH   )I;I0J3)JJ%8J6
I-0
I?
J
J"%
J36
K)ry  r   r}  r   rl   r   r|  r{  rx  r  r  rM   )r^   r   )
r   r   r  r   r  floatr  r   r   r   )
rO   r   rP   r   rM   r   r  r   r   r   )
r!   r   rM   r   rO   r   rP   r   r   ztuple[tf.Tensor, tf.Tensor]NFF)r!   r   r   r   r)  r   r*  r   rf   r   r   r   r   )r%   r&   r'   r(   rx   r  r   r   r   r+   r   r   s   @r-   rv  rv    s     !$ *	
    
 :8)&)58)BE)NQ)	$) '+"'@ @ *@ $	@
  @ @ 
@D- -r,   rv  c                     ^  \ rS rSr                SU 4S jjr   S           SS jjrS	S jrSrU =r$ )
TFSwinStagei  c                ^  > [         T
U ]  " S
0 UD6  Xl        X l        [	        U5       V	s/ s H/  n	[        UUUUU	S-  S:X  a  SOUR                  S-  Xi   SU	 3S9PM1     sn	U l        Ub/  U" UU[        [        R                  R                  SS9SS9U l        OS U l        S	U l        g s  sn	f )NrE   r   zblocks.)r   r   r   r  r  r  rr   rt   )ru   
downsample)r   r   rr   Fr$   )rw   rx   r   r   r  rv  rM   blocksr   r   r   r   r  pointing)r<   r   r   r   depthr  rl   r  r   ir   s             r-   rx   TFSwinStage.__init__  s     	"6" 5\
 " !1#!"Q!1&2D2D2I(|qc] "
 !( "5<<#B#BDQ!	DO #DO1
s   6B*c           	     
   Uu  pg[        U R                  5       H  u  pUb  X8   OS n
U	" XXUS9nUS   nM     U R                  b'  US-   S-  US-   S-  pXgX4nU R                  WS   X%S9nOXgXg4nX4nU(       a  UWSS  -  nU$ )Nr   r   r   rE   )	enumerater  r  )r<   r!   r   r)  r*  rf   rO   rP   r  layer_modulelayer_head_maskr  height_downsampledwidth_downsampledr   stage_outputss                   r-   r   TFSwinStage.call  s     )(5OA.7.CilO(^fM *!,M  6 ??&5;aZA4EPQ	VWGW 1!'0B V OOM!,<>NObM!' >&:]12..Mr,   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       Nk= f! , (       d  f       Mc  = f)NTr  r  )r   r   rI   r   r  rr   r   r  r<   ri   layers      r-   r   TFSwinStage.build  s    ::
4t,8t334%%d+ 544(4]]5::.KK% /. % 5 54 /.s   C3C$
C!$
C3	)r  r   r   r   r  r  )r   r   r   r   r   r   r  r   r  r   rl   zlist[float]r  r   r   r   r  )r!   r   r   r   r)  r   r*  r   rf   r   r   r:  r   rK  r   s   @r-   r  r    s    && & *	&
 & & & $& 
&X '+).  * $	
 '  
>
& 
&r,   r  c                  l   ^  \ rS rSrSU 4S jjr     S               SS jjrS	S jrSrU =r$ )
TFSwinEncoderi  c                  > [         TU ]  " S0 UD6  [        UR                  5      U l        Xl        [        [        R                  " SS[        UR                  5      5      UR                  -  R                  5       5      n[        U R                  5       Vs/ s H  n[        U[        UR                  SU-  -  5      US   SU-  -  US   SU-  -  4UR                  U   UR                   U   U[        UR                  S U 5      [        UR                  S US-    5       XPR                  S-
  :  a  ["        OS SU 3S9PM     snU l        SU l        g s  snf )Nr   r   rE   zlayers.)r   r   r   r  r  rl   r  rr   Fr$   )rw   rx   r`   depths
num_layersr   listrI   linspacesumr  numpyr  r  r   r}   r  r   r   gradient_checkpointing)r<   r   r{   r   dpri_layerr   s         r-   rx   TFSwinEncoder.__init__  sG   "6"fmm,BKK1c&--&89F<Q<QQXXZ[ !1
 2 ((1g:56"+A,1g:">	!QRT[Q[@\!]mmG, **73c&--"9:S}QX[\Q\A]=^_29OOa<O2O-VZwi(	 2
 ',#
s   B.Ec           	        SnU(       a  SOS n	U(       a  SOS n
U(       a  SOS nU(       aK  [        U5      u  pn[        R                  " X/UQUP75      n[        R                  " US5      nX4-  n	X4-  n
[	        U R
                  5       H  u  nnUb  UU   OS nU" XUXGS9nUS   nUS   nUS   US   4nX4-  nU(       aK  [        U5      u  pn[        R                  " X/UQUP75      n[        R                  " US5      nX4-  n	X4-  n
U(       d  M  UUSS  -  nM     U(       d  [        S	 XU4 5       5      $ [        UU	UU
S
9$ )Nr$   r   r   r   r   r"  rH   rE   c              3  .   #    U  H  oc  M  Uv   M     g 7fr   r$   ).0vs     r-   	<genexpr>%TFSwinEncoder.call.<locals>.<genexpr>!  s     m$[q$[s   	)r   r!   r"   r#   )r   rI   rJ   rK   r  r   tupler   )r<   r!   r   r)  r*  output_hidden_statesreturn_dictrf   all_input_dimensionsall_hidden_statesall_reshaped_hidden_statesall_self_attentionsrN   r   r   reshaped_hidden_stater  r  r  r  r   s                        r-   r   TFSwinEncoder.call  s     ""6BD+?RT"$5b4)3M)B&J;$&JJ}>jL\>j^i>j$k!$&LL1F$U!!11&*BB&(5OA|.7.CilO(BSM *!,M -a 0 1" 57H7LM $77 #-7-F*
{(*

=BnP`BnbmBn(o%(*5JL(Y%!%55!*.FF*  #}QR'88#-  60 m]GZ$[mmm"++*#=	
 	
r,   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = f)NTr   )r   r   r   rI   r   rr   r   r  s      r-   r   TFSwinEncoder.build*  s`    ::
44(4]]5::.KK% /. % 5..s   A77
B	)r   r   r  r   r  )r   r   r{   r   )NFFTF)r!   r   r   r   r)  r   r*  r   r  r   r  r   rf   r   r   z+tuple[tf.Tensor, ...] | TFSwinEncoderOutputr   rK  r   s   @r-   r  r    sw    ,2 '+"'%* 7
 7
 *7
 $	7

  7
 #7
 7
 7
 
57
r& &r,   r  c                  $    \ rS rSrSr\rSrSrSr	g)TFSwinPreTrainedModeli4  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
swinr   r$   N)
r%   r&   r'   r(   r)   r   config_classbase_model_prefixmain_input_namer+   r$   r,   r-   r  r  4  s    
 L$Or,   r  a`  
    This model is a Tensorflow
    [keras.layers.Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) sub-class. Use it as a
    regular Tensorflow Module and refer to the Tensorflow documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`SwinConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a:  
    Args:
        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See [`ViTImageProcessor.__call__`]
            for details.
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                    U c  [         R                  R                  5       n U R                  5       nUS;  a  [	        S[        U 5      -   5      eU$ )z
From tensorflow addons
https://github.com/tensorflow/addons/blob/8cec33fcaaf1cf90aec7bdd55a0fcdbb251ce5c2/tensorflow_addons/utils/keras_utils.py#L71
>   channels_lastchannels_firstzWThe `data_format` argument must be one of "channels_first", "channels_last". Received: )r   backendimage_data_formatlowerr   re  )r   data_formats     r-   normalize_data_formatr  a  sS    
 }//1++-K==ehklqhrr
 	
 r,   c                  ~   ^  \ rS rSrSr\R                  S4       S	U 4S jjjrS
S jrSS jr	SU 4S jjr
SrU =r$ )AdaptiveAveragePooling1Dip  a4  
Args:
Average 1D Pooling with adaptive kernel size.
  output_size: An integer or tuple/list of a single integer, specifying pooled_features.
    The new size of output channels.
  data_format: A string,
    one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs.
    `channels_last` corresponds to inputs with shape `(batch, steps, channels)` while `channels_first` corresponds
    to inputs with shape `(batch, channels, steps)`.
Input shape:
  - If `data_format='channels_last'`: 3D tensor with shape `(batch, steps, channels)`.
  - If `data_format='channels_first'`: 3D tensor with shape `(batch, channels, steps)`.
Output shape:
  - If `data_format='channels_last'`: 3D tensor with shape `(batch_size, pooled_steps, channels)`.
  - If `data_format='channels_first'`: 3D tensor with shape `(batch_size, channels, pooled_steps)`.

Adapted from [tensorflow-addon's adaptive pooling.py](
    https://github.com/tensorflow/addons/blob/8cec33fcaaf1cf90aec7bdd55a0fcdbb251ce5c2/tensorflow_addons/layers/adaptive_pooling.py#L90-L120
)
Nc                   > [        U5      U l        X l        [        U[        5      (       a  U4O
[        U5      U l        [        TU ]   " S0 UD6  g r   )	r  r  reduce_functionr   r   r  output_sizerw   rx   )r<   r  r  r  r   r   s        r-   rx   !AdaptiveAveragePooling1D.__init__  sF     1=.-7S-I-IK>uU`Oa"6"r,   c                0   U R                   S   nU R                  S:X  a<  [        R                  " XSS9n[        R                  " USS9nU R                  USS9nU$ [        R                  " XSS9n[        R                  " USS9nU R                  USS9nU$ )Nr   r  r   r  rE   r	   )r  r  rI   splitr  r  )r<   inputsargsbinssplitsout_vects         r-   r   AdaptiveAveragePooling1D.call  s    ".XXf3FXXf1-F++F+;H
  XXf3FXXf1-F++F+;Hr,   c                "   [         R                  " U5      R                  5       nU R                  S:X  a.  [         R                  " US   U R                  S   US   /5      nU$ [         R                  " US   US   U R                  S   /5      nU$ )Nr  r   rE   r   )rI   TensorShapeas_listr  r  )r<   ri   rU   s      r-   compute_output_shape-AdaptiveAveragePooling1D.compute_output_shape  s    nn[199;.NNKND4D4DQ4GUV#XYE  NNKNKNDDTDTUVDW#XYEr,   c                ^   > U R                   U R                  S.n[        TU ]  5       n0 UEUE$ )N)r  r  )r  r  rw   
get_config)r<   r   base_configr   s      r-   r  #AdaptiveAveragePooling1D.get_config  s;    ++++
 g(*(+(((r,   )r  r  r  )r  zint | Iterable[int]r  r   r  z
str | Noner   r   )r  r   r   r   )ri   zIterable[int]r   r   )r   zdict[str, Any])r%   r&   r'   r(   r)   rI   reduce_meanrx   r   r   r  r+   r   r   s   @r-   r  r  p  sX    0 %'NN"&	
#(
# "
#  	
# 

# 
#
) )r,   r  c                     ^  \ rS rSr\r S	       S
U 4S jjjrSS jrSS jrSS jr	\
       S               SS jj5       rSS jrSrU =r$ )TFSwinMainLayeri  c                  > [         TU ]  " S0 UD6  Xl        [        UR                  5      U l        [        UR                  SU R
                  S-
  -  -  5      U l        [        XSS9U l
        [        XR                  R                  SS9U l        [        R                  R!                  UR"                  SS9U l        U(       a  ['        S	S
9U l        g S U l        g )NrE   r   r   )r~   rr   encoderrq   	layernormr   r   )r  r$   )rw   rx   r   r`   r  r  r   r}   num_featuresrn   r   r  r|   r
  r   r   r   r  r  r  poolerr<   r   add_pooling_layerr~   r   r   s        r-   rx   TFSwinMainLayer.__init__  s     	"6"fmm, 0 0119L3M MN*6Wcd$V__-G-GiX88AVAV]h8iDU.4@[_r,   c                .    U R                   R                  $ r   )r   rp   r;   s    r-   get_input_embeddings$TFSwinMainLayer.get_input_embeddings  s    ///r,   c                    UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr
  r  ry  rW  )r<   heads_to_pruner  rV  s       r-   _prune_headsTFSwinMainLayer._prune_heads  s<    
 +002LELLu%//;;EB 3r,   c                Z    Ub  [         eS /[        U R                  R                  5      -  $ r   )rU  r`   r   r  )r<   r)  s     r-   get_head_maskTFSwinMainLayer.get_head_mask  s*     %%vDKK..///r,   c           
     ^   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  U5      nU R                  XUS9u  pU R                  UU	UUUUUS9n
U
S   nU R                  XS9nS nU R                  b7  [        U5      u  pnU R                  U5      n[        R                  " XU45      nU(       d  X4U
SS  -   nU$ [        UUU
R                  U
R                  U
R                   S9$ )N You have to specify pixel_values)r   rf   r)  r*  r  r  rf   r   r   r   )r   r1   r!   r"   r#   )r   r*  r  use_return_dictr   r  r   r
  r  r  r   rI   rJ   r/   r!   r"   r#   )r<   r   r   r)  r*  r  r  rf   embedding_outputr   encoder_outputssequence_outputpooled_outputrN   r   r  rO  s                    r-   r   TFSwinMainLayer.call  sa    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@ &&y1	-1__H .= .
* ,,/!5# ' 
 *!,...L;;"*4_*E'J< KK8MJJ}<6PQM%58KKFM -')77&11#2#I#I
 	
r,   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r
  r  )
r   r   rI   r   r   rr   r   r
  r  r  r   s     r-   r   TFSwinMainLayer.build  s    ::
4t,8t334%%d+ 54D)5t||001""4( 24d+7t~~223$$dD$2C2C%DE 43 8 54 21 43s$   D=.E
)E=
E
E
E-)r   r   r   r
  r  r  r  r  TFr   r   r  r   r~   r   r   r   )r   ry   )r  zdict[int, list])r)  z
Any | Noner   r  NNNNNNFr   r   r   r   r)  r   r*  r   r  r   r  r   rf   r   r   z)TFSwinModelOutput | tuple[tf.Tensor, ...]r   )r%   r&   r'   r(   r   r  rx   r  r  r  r   r   r   r+   r   r   s   @r-   r  r    s    L Z_` `59`RV`	` `0C0
  *.,0&*)-,0#':
&:
 *:
 $	:

 ':
 *:
 !:
 :
 
3:
 :
xF Fr,   r  z^The bare Swin Model transformer outputting raw hidden-states without any specific head on top.c            	         ^  \ rS rSr S       S	U 4S jjjr\" \5      \" \\	\
S\S9\       S
               SS jj5       5       5       rSS jrSrU =r$ )TFSwinModeli  c                R   > [         TU ]  " U40 UD6  Xl        [        USS9U l        g )Nr  rq   )rw   rx   r   r  r  r  s        r-   rx   TFSwinModel.__init__"  s*     	*6*#F8	r,   vision)
checkpointoutput_typer  modalityexpected_outputc           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  UUUUUUUS9nU$ )z
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
r  )r   r   r)  r*  r  r  rf   )r   r*  r  r   r   r  )	r<   r   r   r)  r*  r  r  rf   swin_outputss	            r-   r   TFSwinModel.call)  s    . 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@yy%+/!5# ! 
 r,   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTr  )r   r   rI   r   r  rr   r   r   s     r-   r   TFSwinModel.buildU  sZ    ::
4&2tyy~~.		% /. 3..s   A88
B)r   r   r  r(  r)  r*  r+  r   )r%   r&   r'   r(   rx   r   SWIN_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr/   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r   r   r+   r   r   s   @r-   r-  r-    s     Z_9 9599RV9	9 9 ++@A&%$.  *.,0&*)-,0#'!&! *! $	!
 '! *! !! ! 
3!  B!F& &r,   r-  c                  :   ^  \ rS rSrSrSU 4S jjrSS jrSrU =r$ )TFSwinPixelShufflei^  z0TF layer implementation of torch.nn.PixelShufflec                   > [         TU ]  " S0 UD6  [        U[        5      (       a  US:  a  [	        SU 35      eXl        g )NrE   z1upscale_factor must be an integer value >= 2 got r$   )rw   rx   r   r   r   upscale_factor)r<   rA  r   r   s      r-   rx   TFSwinPixelShuffle.__init__a  sB    "6".#...12DPQ_P`abb,r,   c           
        Un[        U5      u  n  pEU R                  S-  n[        XV-  5      n[        R                  " [        U5       VV	s/ s H  n[        U5        H	  oX-  -   PM     M     sn	n/5      n
[        R                  " U[        R                  " XS/5      SS9n[        R                  R                  X R                  SS9nU$ s  sn	nf )NrE   r   rH   )paramsr  
batch_dimsNHWC)
block_sizer  )
r   rA  r   rI   constantr  r%  tiler&  depth_to_space)r<   rZ   r!   rN   r   num_input_channelsblock_size_squaredoutput_depthr  jpermutations              r-   r   TFSwinPixelShuffle.callg  s    /9-/H,
Aq!00!3-BC
 kk278J2Ki2KQUZ[gUhPQ!((Uh(2Kij
 		bcUd@ertu,,]GZGZhn,o	 js   #C
)rA  )rA  r   r   r   r8  r   r   s   @r-   r?  r?  ^  s    :- r,   r?  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	TFSwinDecoderix  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  S-  UR                  -  SSSS9U l        [        UR
                  SS9U l	        Xl
        g )NrE   r   0)r   r   r   rr   1rq   r$   )rw   rx   r   r   r   encoder_striderQ   conv2dr?  pixel_shuffler   )r<   r   r   r   s      r-   rx   TFSwinDecoder.__init__y  sl    "6"ll))))1,v/B/BBPQ[\cf * 
 00E0ECPr,   c                    Un[         R                  " US5      nU R                  U5      nU R                  U5      n[         R                  " US5      nU$ )Nr   r   )rI   rK   rW  rX  )r<   rZ   r!   s      r-   r   TFSwinDecoder.call  sK    ]LAM2**=9]LAr,   c                   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTrW  rX  )
r   r   rI   r   rW  rr   r   r   r   rX  r   s     r-   r   TFSwinDecoder.build  s    ::
44(4t{{//0!!4tT[[5L5L"MN 14$/;t11667""((. 87 < 10 87s   4C,C=,
C:=
D)r   r   rW  rX  r   r   r8  r   rK  r   s   @r-   rR  rR  x  s    	/ 	/r,   rR  z~Swin Model with a decoder on top for masked image modeling, as proposed in [SimMIM](https://huggingface.co/papers/2111.09886).c                     ^  \ rS rSrSU 4S jjr\" \5      \" \\	S9\
       S               S	S jj5       5       5       rS
S jrSrU =r$ )TFSwinForMaskedImageModelingi  c                d   > [         TU ]  U5        [        USSSS9U l        [	        USS9U l        g )NFTr  )r  r~   rr   decoderrq   )rw   rx   r  r  rR  rb  r<   r   r   s     r-   rx   %TFSwinForMaskedImageModeling.__init__  s2     #FeTX_ef	$V)<r,   )r2  r  c           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9nUS   n	[        R                  " U	S5      n	[        U	5      u  pn[        US-  5      =p[        R                  " XXU45      n	U R                  U	5      nSnUGb  U R                   R                  U R                   R                  -  n[        R                  " USUU45      n[        R                  " X R                   R                  S5      n[        R                  " UU R                   R                  S5      n[        R                  " US5      n[        R                  " U[        R                  5      n[        R                   R#                  [        R                  " US	5      [        R                  " US	5      5      n[        R                  " US5      n[        R$                  " UU-  5      n[        R$                  " U5      S
-   U R                   R&                  -  nUU-  n[        R                  " US5      nU(       d  U4USS -   nUb  U4U-   $ U$ [)        UUUR*                  UR,                  UR.                  S9$ )a  
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

Returns:

Examples:
```python
>>> from transformers import AutoImageProcessor, TFSwinForMaskedImageModeling
>>> import tensorflow as tf
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> image_processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
>>> model = TFSwinForMaskedImageModeling.from_pretrained("microsoft/swin-tiny-patch4-window7-224")

>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = image_processor(images=image, return_tensors="tf").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = tf.random.uniform((1, num_patches)) >= 0.5

>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
>>> loss, reconstructed_pixel_values = outputs.loss, outputs.reconstruction
>>> list(reconstructed_pixel_values.shape)
[1, 3, 224, 224]
```N)r   r)  r*  r  r  rf   r   r   g      ?rH   r   rE   )r   rE   r	   r   rt   r  )r5   r6   r!   r"   r#   )r   r   r  rI   rK   r   r   rJ   rb  r   r   r   r   rV   float32r   lossesmean_absolute_errorr  rQ   r3   r!   r"   r#   )r<   r   r   r)  r*  r  r  rf   r5  r#  rN   rQ   sequence_lengthrO   rP   reconstructed_pixel_valuesmasked_im_losssizer   reconstruction_loss
total_lossnum_masked_pixelsrO  s                          r-   r   !TFSwinForMaskedImageModeling.call  s5   T &1%<k$++B]B]))+/!5#  
 "!*,,	B4>4O1
/_c122**_<Y^6_` &*\\/%B"&;;))T[[-C-CCD jj2tT:JKO99_kk.D.DaHD99T4;;#9#91=D>>$*D774,D"',,"B"B\<87F#
 #%..1Da"H':T'ABJ!#t!4t!;t{{?W?W W'*;;NZZ=N02WQR[@F3A3M^%.YSYY.5!//))#*#A#A
 	
r,   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  rb  )r   r   rI   r   r  rr   r   rb  r   s     r-   r   "TFSwinForMaskedImageModeling.build  s    ::
4&2tyy~~.		% /4D)5t||001""4( 21 6 /. 21r_  )r   rb  r  r^  r*  )r   r   r   r   r)  r   r*  r   r  r   r  r   rf   r   r   z'tuple | TFSwinMaskedImageModelingOutputr   )r%   r&   r'   r(   rx   r   r:  r   r3   r<  r   r   r   r+   r   r   s   @r-   r`  r`    s    = ++@A+JYhi *.,0&*)-,0#'[
&[
 *[
 $	[

 '[
 *[
 ![
 [
 
1[
  j B[
z	) 	)r,   r`  z
    Swin Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                     ^  \ rS rSrSU 4S jjr\" \5      \" \\	\
\S9\       S               S	S jj5       5       5       rS
S jrSrU =r$ )TFSwinForImageClassificationi  c                &  > [         TU ]  U5        UR                  U l        [        USS9U l        UR                  S:  a.  [
        R                  R                  UR                  SS9U l	        g [
        R                  R                  SSS9U l	        g )Nr  rq   r   
classifierrz  )
rw   rx   
num_labelsr  r  r   r   r   r  rv  rc  s     r-   rx   %TFSwinForImageClassification.__init__  s      ++#F8	
   1$ LLv00|D 	 (((E 	r,   )r1  r2  r  r4  c           	     @   Ub  UOU R                   R                  nU R                  UUUUUUS9nUS   n	U R                  XS9n
Uc  SOU R	                  X:5      nU(       d  U
4USS -   nUb  U4U-   $ U$ [        UU
UR                  UR                  UR                  S9$ )a[  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   r   rE   )r5   r=   r!   r"   r#   )	r   r   r  rv  hf_compute_lossrA   r!   r"   r#   )r<   r   r)  labelsr*  r  r  rf   r5  r$  r=   r5   rO  s                r-   r   !TFSwinForImageClassification.call%  s    0 &1%<k$++B]B]))/!5#  
  
B~t4+?+?+OY,F)-)9TGf$EvE*!//))#*#A#A
 	
r,   c                R   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b  [        U R                  S5      (       af  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g g ! , (       d  f       N= f! , (       d  f       g = f)NTr  rv  rr   )
r   r   rI   r   r  rr   r   hasattrrv  r  r   s     r-   r   "TFSwinForImageClassification.buildZ  s    ::
4&2tyy~~.		% /4t,8t//]]4??#7#78OO))4tyy7M7M*NO 98 0 9 /. 98s   D	3D
D
D&)r   rv  rw  r  r^  r*  )r   r   r)  r   r{  r   r*  r   r  r   r  r   rf   r   r   z3tuple[tf.Tensor, ...] | TFSwinImageClassifierOutputr   )r%   r&   r'   r(   rx   r   r:  r   _IMAGE_CLASS_CHECKPOINTrA   r<  _IMAGE_CLASS_EXPECTED_OUTPUTr   r   r   r+   r   r   s   @r-   rt  rt    s    
 ++@A*/$4	  *.&*#')-,0#'+
&+
 $+
 !	+

 '+
 *+
 !+
 +
 
=+
  B+
Z
P 
Pr,   rt  )rt  r`  r-  r  )rL   r   rM   r   r   r   )
rR   r   rM   r   rO   r   rP   r   r   r   )r^   FT)
rd   r   re   r  rf   r   rg   r   r   r   )r   re  r   re  )Nr)   
__future__r   collections.abcr   rX   r8   r   dataclassesr   	functoolsr   typingr   r   
tensorflowrI   activations_tfr
   modeling_tf_utilsr   r   r   r   r   r   tf_utilsr   utilsr   r   r   r   r   r   configuration_swinr   
get_loggerr%   loggerr<  r;  r=  r  r  r   r/   r3   rA   rS   r\   rl   r   Layerrn   ry   r   r   r   r<  rM  ra  rn  rv  r  r  r  SWIN_START_DOCSTRINGr:  r  r  r  r-  r?  rR  r`  rt  __all__r$   r,   r-   <module>r     sz   % "    $ !     $  #  + 
		H	%  ? %  C 1  @+ @ @@  @  @  @F )#k )# )#X  @+  @  @F  ]a!!!&!8<!UY!!&A-u||)) A-HAMELL.. AMHF<++ F<R	NU\\'' 	NQ%,,,, Qh)u||)) )0$-ell(( $-N9++ 90V5<<%% V,[-%,,$$ [-|R&%,,$$ R&jV&ELL&& V&r%- %
  ,<)u||11 <)~ jFell(( jF jFZ d:&' :&	:&z++ 4/ELL&& /> ;
q)#8 q)
q)h  MP#8:V MPMP` sr,   