
    cCi                       S r SSKJr  SSKrSSKJr  SSKJrJ	r	J
r
Jr  SSKJrJrJrJr  SSKJrJrJrJrJr  SS	KJrJr  SS
KJr  SSKJr  \R<                  " \5      r Sr!Sr"/ SQr#Sr$Sr%S@SAS jjr& " S S\RN                  RP                  5      r) " S S\RN                  RP                  5      r* " S S\RN                  RP                  5      r+ " S S\RN                  RP                  5      r, " S S\RN                  RP                  5      r- " S S\RN                  RP                  5      r. " S S\RN                  RP                  5      r/ " S  S!\RN                  RP                  5      r0 " S" S#\RN                  RP                  5      r1 " S$ S%\RN                  RP                  5      r2 " S& S'\RN                  RP                  5      r3 " S( S)\RN                  RP                  5      r4\ " S* S+\RN                  RP                  5      5       r5 " S, S-\5      r6S.r7S/r8\	" S0\75       " S1 S2\65      5       r9\	" S3\75       " S4 S5\6\5      5       r: " S6 S7\RN                  RP                  5      r; " S8 S9\RN                  RP                  5      r< " S: S;\RN                  RP                  5      r=\	" S<\75       " S= S>\65      5       r>/ S?Qr?g)BzTensorFlow 2.0 MobileViT model.    )annotationsN   )get_tf_activation)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings)TFBaseModelOutputTFBaseModelOutputWithPooling&TFImageClassifierOutputWithNoAttention(TFSemanticSegmenterOutputWithNoAttention)TFPreTrainedModelTFSequenceClassificationLosskeraskeras_serializableunpack_inputs)
shape_liststable_softmax)logging   )MobileViTConfigr   zapple/mobilevit-small)r   i     r   ztabby, tabby catc                |    Uc  Un[        U[        XS-  -   5      U-  U-  5      nUSU -  :  a  X1-  n[        U5      $ )z
Ensure that all layers have a channel count that is divisible by `divisor`. This function is taken from the
original TensorFlow repo. It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
   g?)maxint)valuedivisor	min_value	new_values       m/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/mobilevit/modeling_tf_mobilevit.pymake_divisibler"   >   sO     	Is5Q;#677BWLMI3;	y>    c                  ~   ^  \ rS rSr      S                     SU 4S jjjrSS	S jjrS
S jrSrU =r$ )TFMobileViTConvLayerM   c                  > [         TU ]  " S0 UD6  [        R                  SU R                  R
                   S35        [        US-
  S-  5      U-  n[        R                  R                  U5      U l
        X6-  S:w  a  [        SU SU S35      e[        R                  R                  UUUS	UUUS
S9U l        U	(       a%  [        R                  R                  SSSS9U l        OS U l        U
(       ar  [!        U
["        5      (       a  [%        U
5      U l        OS[!        UR(                  ["        5      (       a  [%        UR(                  5      U l        OUR(                  U l        OS U l        X l        X0l        g )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPUr   r   r   zOutput channels (z) are not divisible by z groups.VALIDconvolution)filterskernel_sizestridespaddingdilation_rategroupsuse_biasnamegh㈵>g?normalization)epsilonmomentumr2    )super__init__loggerwarning	__class____name__r   r   layersZeroPadding2Dr.   
ValueErrorConv2Dr*   BatchNormalizationr3   
isinstancestrr   
activation
hidden_actin_channelsout_channels)selfconfigrF   rG   r,   strider0   biasdilationuse_normalizationuse_activationkwargsr.   r;   s                r!   r8   TFMobileViTConvLayer.__init__N   s[    	"6"(() *E E	

 {Q!+,x7||11': A%0>UV\U]]efgg <<.. #" / 	
 !&!@!@X[bq!@!rD!%D.#.."3N"CF--s33"3F4E4E"F"("3"3"DO&(r#   c                    U R                  U5      nU R                  U5      nU R                  b  U R                  XS9nU R                  b  U R                  U5      nU$ Ntraining)r.   r*   r3   rD   )rH   featuresrT   padded_featuress       r!   callTFMobileViTConvLayer.call   s\    ,,x0##O4)))()FH??&x0Hr#   c                \   U R                   (       a  g SU l         [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S S U R                  /5        S S S 5        [        U SS 5      by  [        U R                  S5      (       a]  [        R                  " U R                  R
                  5         U R                  R                  S S S U R                  /5        S S S 5        g g g ! , (       d  f       N= f! , (       d  f       g = f)NTr*   r3   r2   )builtgetattrtf
name_scoper*   r2   buildrF   hasattrr3   rG   rH   input_shapes     r!   r^   TFMobileViTConvLayer.build   s    ::
4-9t//445  &&dD$:J:J'KL 64$/;t))622]]4#5#5#:#:;&&,,dD$@Q@Q-RS <; 3 < 65 <;s   *D*D
D
D+)rD   rZ   r*   rF   r3   rG   r.   )r   r   Fr   TT)rI   r   rF   r   rG   r   r,   r   rJ   r   r0   r   rK   boolrL   r   rM   rc   rN   z
bool | strreturnNoneFrU   	tf.TensorrT   rc   rd   rh   N	r<   
__module____qualname____firstlineno__r8   rW   r^   __static_attributes____classcell__r;   s   @r!   r%   r%   M   s     "&%)4)4) 4) 	4)
 4) 4) 4) 4) 4)  4) #4) 
4) 4)l
T 
Tr#   r%   c                  d   ^  \ rS rSrSr S           SU 4S jjjrS	S
S jjrSS jrSrU =r	$ )TFMobileViTInvertedResidual   zQ
Inverted residual block (MobileNetv2): https://huggingface.co/papers/1801.04381
c                @  > [         TU ]  " S0 UD6  [        [        [	        X!R
                  -  5      5      S5      nUS;  a  [        SU S35      eUS:H  =(       a    X#:H  U l        [        XUSSS9U l	        [        UUUSUUUS	S
9U l
        [        UUUSSSS9U l        g )Nr   )r   r   zInvalid stride .r   
expand_1x1rF   rG   r,   r2   r   conv_3x3)rF   rG   r,   rJ   r0   rL   r2   F
reduce_1x1rF   rG   r,   rN   r2   r6   )r7   r8   r"   r   roundexpand_ratior?   use_residualr%   rv   rx   ry   )	rH   rI   rF   rG   rJ   rL   rO   expanded_channelsr;   s	           r!   r8   $TFMobileViTInvertedResidual.__init__   s     	"6"*3u[CVCV5V/W+XZ[\vha899#q[K{/J.:KYZam
 -)*$	
 /)% 
r#   c                    UnU R                  XS9nU R                  XS9nU R                  XS9nU R                  (       a  X1-   $ U$ rR   )rv   rx   ry   r}   )rH   rU   rT   residuals       r!   rW    TFMobileViTInvertedResidual.call   sM    ??8??====??8??&*&7&7x"EXEr#   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)NTrv   rx   ry   )	rZ   r[   r\   r]   rv   r2   r^   rx   ry   r`   s     r!   r^   !TFMobileViTInvertedResidual.build   s    ::
4t,8t334%%d+ 54T*6t}}112##D) 34t,8t334%%d+ 54 9 54 32 54s$   D0.E
E0
D>
E
E )rZ   rx   rv   ry   r}   r   )rI   r   rF   r   rG   r   rJ   r   rL   r   rd   re   rf   rg   ri   
r<   rk   rl   rm   __doc__r8   rW   r^   rn   ro   rp   s   @r!   rr   rr      sZ    
 jk!
%!
47!
GJ!
TW!
cf!
	!
 !
FF, ,r#   rr   c                  b   ^  \ rS rSr  S           SU 4S jjjrSS	S jjrS
S jrSrU =r$ )TFMobileViTMobileNetLayer   c           	        > [         T	U ]  " S0 UD6  / U l        [        U5       H8  n[	        UUUUS:X  a  UOSSU 3S9nU R                  R                  U5        UnM:     g )Nr   r   layer.)rF   rG   rJ   r2   r6   )r7   r8   r=   rangerr   append)
rH   rI   rF   rG   rJ   
num_stagesrO   ilayerr;   s
            r!   r8   "TFMobileViTMobileNetLayer.__init__   sk     	"6"z"A/')!"avQaS\E KKu%&K #r#   c                8    U R                    H	  nU" XS9nM     U$ rR   r=   )rH   rU   rT   layer_modules       r!   rW   TFMobileViTMobileNetLayer.call   s     KKL#H@H (r#   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = fNTr=   rZ   r[   r=   r\   r]   r2   r^   rH   ra   r   s      r!   r^   TFMobileViTMobileNetLayer.build   d    ::
44(4 $]]<#4#45 &&t, 65 !, 555   A77
B	rZ   r=   )r   r   )rI   r   rF   r   rG   r   rJ   r   r   r   rd   re   rf   rg   ri   rj   rp   s   @r!   r   r      s^     '' ' 	'
 ' ' 
' '.
- -r#   r   c                  N   ^  \ rS rSrSU 4S jjrSS jrS	S
S jjrSS jrSrU =r	$ )TFMobileViTSelfAttentioni  c                r  > [         TU ]  " S
0 UD6  X!R                  -  S:w  a  [        SU SUR                   S35      eUR                  U l        [	        X!R                  -  5      U l        U R                  U R
                  -  U l        [        R                  " U R
                  [        R                  S9n[        R                  R                  U5      U l        [        R                  R                  U R                  UR                   SS9U l        [        R                  R                  U R                  UR                   SS9U l        [        R                  R                  U R                  UR                   S	S9U l        [        R                  R)                  UR*                  5      U l        X l        g )Nr   zThe hidden size z4 is not a multiple of the number of attention heads ru   dtypequery)r1   r2   keyr   r6   )r7   r8   num_attention_headsr?   r   attention_head_sizeall_head_sizer\   castfloat32mathsqrtscaler   r=   Denseqkv_biasr   r   r   Dropoutattention_probs_dropout_probdropouthidden_size)rH   rI   r   rO   r   r;   s        r!   r8   !TFMobileViTSelfAttention.__init__  sW   "6"333q8";- 0334A7 
 $*#=#= #&{5O5O'O#P !558P8PP00

CWW\\%(
\\''(:(:V__[b'c
<<%%d&8&86??Y^%_\\''(:(:V__[b'c
||++F,O,OP&r#   c                    [         R                  " U5      S   n[         R                  " XSU R                  U R                  4S9n[         R
                  " U/ SQS9$ )Nr   shaper   r   r   r   perm)r\   r   reshaper   r   	transpose)rH   x
batch_sizes      r!   transpose_for_scores-TFMobileViTSelfAttention.transpose_for_scores  sG    XXa[^
JJqR1I1I4KcKc de||AL11r#   c                   [         R                  " U5      S   nU R                  U R                  U5      5      nU R                  U R	                  U5      5      nU R                  U R                  U5      5      n[         R                  " XdSS9nXpR                  -  n[        USS9nU R                  XS9n[         R                  " X5      n	[         R                  " U	/ SQS9n	[         R                  " XSU R                  4S	9n	U	$ )
Nr   T)transpose_br   axisrS   r   r   r   )r\   r   r   r   r   r   matmulr   r   r   r   r   r   )
rH   hidden_statesrT   r   	key_layervalue_layerquery_layerattention_scoresattention_probscontext_layers
             r!   rW   TFMobileViTSelfAttention.call  s    XXm,Q/
--dhh}.EF	//

=0IJ//

=0IJ 99[N+jj8 ))9C ,,,J		/?]F

=RI[I[8\]r#   c                   U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r   r   )
rZ   r[   r\   r]   r   r2   r^   r   r   r   r`   s     r!   r^   TFMobileViTSelfAttention.build5  s	   ::
4$'3tzz/

  $d.>.>!?@ 04%1txx}}-dD,<,<=> .4$'3tzz/

  $d.>.>!?@ 0/ 4 0/ .- 0/s$   )E;)E($)E9
E%(
E69
F)
r   r   rZ   r   r   r   r   r   r   r   rI   r   r   r   rd   re   )r   rh   rd   rh   rf   r   rh   rT   rc   rd   rh   ri   )
r<   rk   rl   rm   r8   r   rW   r^   rn   ro   rp   s   @r!   r   r     s    ',2
0A Ar#   r   c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFMobileViTSelfOutputiD  c                   > [         TU ]  " S0 UD6  [        R                  R	                  USS9U l        [        R                  R                  UR                  5      U l        X l	        g Ndenser2   r6   )
r7   r8   r   r=   r   r   r   hidden_dropout_probr   r   rH   rI   r   rO   r;   s       r!   r8   TFMobileViTSelfOutput.__init__E  sP    "6"\\''''B
||++F,F,FG&r#   c                F    U R                  U5      nU R                  XS9nU$ rR   r   r   )rH   r   rT   s      r!   rW   TFMobileViTSelfOutput.callK  s&    

=1]Fr#   c                ,   U R                   (       a  g SU l         [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       g = fNTr   rZ   r[   r\   r]   r   r2   r^   r   r`   s     r!   r^   TFMobileViTSelfOutput.buildP  i    ::
4$'3tzz/

  $d.>.>!?@ 0/ 4//   )B
B)rZ   r   r   r   r   rf   r   ri   rj   rp   s   @r!   r   r   D  s    '
A Ar#   r   c                  J   ^  \ rS rSrSU 4S jjrS rSS	S jjrS
S jrSrU =r	$ )TFMobileViTAttentioniY  c                b   > [         TU ]  " S0 UD6  [        XSS9U l        [	        XSS9U l        g )N	attentionr   outputr6   )r7   r8   r   r   r   dense_outputr   s       r!   r8   TFMobileViTAttention.__init__Z  s0    "6"1&KX1&HUr#   c                    [         eri   NotImplementedError)rH   headss     r!   prune_heads TFMobileViTAttention.prune_heads_  s    !!r#   c                B    U R                  XS9nU R                  X2S9nU$ rR   )r   r   )rH   r   rT   self_outputsattention_outputs        r!   rW   TFMobileViTAttention.callb  s,    ~~m~G,,\,Mr#   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr   r   )rZ   r[   r\   r]   r   r2   r^   r   r`   s     r!   r^   TFMobileViTAttention.buildg  s    ::
4d+7t~~223$$T* 44.:t00556!!''- 76 ; 43 76   C.C%
C"%
C3)r   rZ   r   r   rf   r   ri   )
r<   rk   rl   rm   r8   r   rW   r^   rn   ro   rp   s   @r!   r   r   Y  s    V
" 
	. 	.r#   r   c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	TFMobileViTIntermediateis  c                  > [         TU ]  " S0 UD6  [        R                  R	                  USS9U l        [        UR                  [        5      (       a  [        UR                  5      U l
        OUR                  U l
        X l        g r   )r7   r8   r   r=   r   r   rB   rE   rC   r   intermediate_act_fnr   rH   rI   r   intermediate_sizerO   r;   s        r!   r8    TFMobileViTIntermediate.__init__t  si    "6"\\''(9'H
f''--'89J9J'KD$'-'8'8D$&r#   c                J    U R                  U5      nU R                  U5      nU$ ri   )r   r   )rH   r   s     r!   rW   TFMobileViTIntermediate.call}  s&    

=100?r#   c                ,   U R                   (       a  g SU l         [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       g = fr   r   r`   s     r!   r^   TFMobileViTIntermediate.build  r   r   )rZ   r   r   r   rI   r   r   r   r   r   rd   re   )r   rh   rd   rh   ri   rj   rp   s   @r!   r   r   s  s    '
A Ar#   r   c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFMobileViTOutputi  c                   > [         TU ]  " S0 UD6  [        R                  R	                  USS9U l        [        R                  R                  UR                  5      U l        X0l	        g r   )
r7   r8   r   r=   r   r   r   r   r   r   r   s        r!   r8   TFMobileViTOutput.__init__  sP    "6"\\''''B
||++F,F,FG!2r#   c                N    U R                  U5      nU R                  XS9nX-   nU$ rR   r   )rH   r   input_tensorrT   s       r!   rW   TFMobileViTOutput.call  s.    

=1]F%4r#   c                ,   U R                   (       a  g SU l         [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       g = fr   )rZ   r[   r\   r]   r   r2   r^   r   r`   s     r!   r^   TFMobileViTOutput.build  si    ::
4$'3tzz/

  $d.D.D!EF 0/ 4//r   )rZ   r   r   r   r  rf   )r   rh   r
  rh   rT   rc   rd   rh   ri   rj   rp   s   @r!   r  r    s    3G Gr#   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFMobileViTTransformerLayeri  c                D  > [         TU ]  " S0 UD6  [        XSS9U l        [	        XUSS9U l        [        XUSS9U l        [        R                  R                  UR                  SS9U l        [        R                  R                  UR                  SS9U l        X l        g )	Nr   r   intermediater   layernorm_beforer4   r2   layernorm_afterr6   )r7   r8   r   r   r   r  r  mobilevit_outputr   r=   LayerNormalizationlayer_norm_epsr  r  r   r   s        r!   r8   $TFMobileViTTransformerLayer.__init__  s    "6"-fT3FIZaop 1&GX_g h % ? ?H]H]dv ? w$||>>vG\G\ct>u&r#   c                    U R                  U R                  U5      US9nX1-   nU R                  U5      nU R                  U5      nU R	                  XAUS9nU$ rR   )r   r  r  r  r  )rH   r   rT   r   layer_outputs        r!   rW    TFMobileViTTransformerLayer.call  se    >>$*?*?*NYa>b(8++M:((6,,\S[,\r#   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       GN= f! , (       d  f       GNQ= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r  r  r  r  )rZ   r[   r\   r]   r   r2   r^   r  r  r  r   r  r`   s     r!   r^   !TFMobileViTTransformerLayer.build  s   ::
4d+7t~~223$$T* 44.:t00556!!''- 74+T2>t4499:%%++D1 ;4+T2>t4499:%%++T49I9I,JK ;4*D1=t33889$$**D$8H8H+IJ :9 > 43 76 ;: ;: :9s<   H.H
H&&)H8)I	
H
H#&
H58
I	
I)r   rZ   r   r  r  r  r  r  rf   r   ri   rj   rp   s   @r!   r  r    s    'K Kr#   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFMobileViTTransformeri  c           	        > [         TU ]  " S0 UD6  / U l        [        U5       HB  n[	        UU[        X!R                  -  5      SU 3S9nU R                  R                  U5        MD     g )Nr   )r   r   r2   r6   )r7   r8   r=   r   r  r   	mlp_ratior   )rH   rI   r   r   rO   r   transformer_layerr;   s          r!   r8   TFMobileViTTransformer.__init__  si    "6"z"A ;'"%k4D4D&D"EaS\	! KK01 #r#   c                8    U R                    H	  nU" XS9nM     U$ rR   r   )rH   r   rT   r   s       r!   rW   TFMobileViTTransformer.call  s      KKL(JM (r#   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = fr   r   r   s      r!   r^   TFMobileViTTransformer.build  r   r   r   )rI   r   r   r   r   r   rd   re   rf   r   ri   rj   rp   s   @r!   r  r    s    2
- -r#   r  c                     ^  \ rS rSrSr S	               S
U 4S jjjrSS jrSS jrSSS jjrSS jr	Sr
U =r$ )TFMobileViTLayeri  z;
MobileViT block: https://huggingface.co/papers/2110.02178
c           
       > [         T	U ]  " S0 UD6  UR                  U l        UR                  U l        US:X  a)  [        UUUUS:X  a  UOSUS:  a  US-  OSSS9U l        UnOS U l        [        UUUUR                  SS9U l	        [        UUUSSSSS	9U l
        [        XUS
S9U l        [        R                  R                  UR                   SS9U l        [        XUSSS9U l        [        USU-  UUR                  SS9U l        XPl        g )Nr   r   downsampling_layer)rF   rG   rJ   rL   r2   conv_kxkrw   Fconv_1x1)rF   rG   r,   rM   rN   r2   transformer)r   r   r2   	layernormr  conv_projectionfusionr6   )r7   r8   
patch_sizepatch_widthpatch_heightrr   r+  r%   conv_kernel_sizer,  r-  r  r.  r   r=   r  r  r/  r0  r1  r   )
rH   rI   rF   rG   rJ   r   r   rL   rO   r;   s
            r!   r8   TFMobileViTLayer.__init__  s?    	"6"!,,"--Q;&A')!)QvA*2Q,QA)'D# 'K&*D#,#$//
 -#$# 
 2

 88AVAV]h8i3+ST[l 
 +K$//
 'r#   c                   U R                   U R                  p2[        R                  " X#-  S5      n[        R                  " U5      S   n[        R                  " U5      S   n[        R                  " U5      S   n[        R                  " U5      S   n[        R                  " [        R
                  R                  Xc-  5      U-  S5      n	[        R                  " [        R
                  R                  Xr-  5      U-  S5      n
X:g  =(       d    X:g  nU(       a   [        R                  R                  XU
4SS9nX-  nX-  nX-  n[        R                  " U/ SQ5      n[        R                  " XU-  U-  X<U45      n[        R                  " U/ S	Q5      n[        R                  " XXU45      n[        R                  " U/ S
Q5      n[        R                  " XU-  X45      nXg4UUUUUUS.nUU4$ )Nint32r   r   r   r   bilinearsizemethodr   r   r   r   r   r   r   r   r   )	orig_sizer   channelsinterpolatenum_patchesnum_patches_widthnum_patches_height)r3  r4  r\   r   r   r   ceilimageresizer   r   )rH   rU   r3  r4  
patch_arear   orig_height
orig_widthr@  
new_height	new_widthrA  num_patch_widthnum_patch_heightrB  patches	info_dicts                    r!   	unfoldingTFMobileViTLayer.unfolding,  s   $($4$4d6G6G\WW[7A
XXh'*
hhx(+XXh'*
88H%a(WWRWW\\+*DETV]^
GGBGGLL)AB[PRYZ	-J1Jxxx96MV`aH $2%5&8 <<,7**H,/??`kl
 ,,w5**W8*&UV,,w5**WJ'>&VW &2$ &&!0"2
	 	!!r#   c                    U R                   U R                  pC[        X4-  5      nUS   nUS   nUS   nUS   n	US   n
[        R                  " XXXS45      n[        R
                  " USS9n[        R                  " XU-  U	-  XU45      n[        R
                  " US	S9n[        R                  " XXyU-  X-  45      n[        R
                  " US
S9nUS   (       a!  [        R                  R                  XS   SS9nU$ )Nr   r@  rB  rD  rC  r   r>  r   r   r   r   r   r   rA  r?  r9  r:  )r3  r4  r   r\   r   r   rF  rG  )rH   rO  rP  r3  r4  rH  r   r@  rB  rN  rM  rU   s               r!   foldingTFMobileViTLayer.foldingX  s   $($4$4d6G6G\34
|,
Z(.$%9:#$78 ::gJR'PQ<<|<::H,/??`kl
 <<|<::8-LoNkl
 <<|<]#xxx6LU_`Hr#   c                t   U R                   (       a  U R                  XS9nUnU R                  XS9nU R                  XS9nU R                  U5      u  pEU R	                  XBS9nU R                  U5      nU R                  XE5      nU R                  XS9nU R                  [        R                  " X1/SS9US9nU$ )NrS   r   r   )r+  r,  r-  rQ  r.  r/  rU  r0  r1  r\   concat)rH   rU   rT   r   rO  rP  s         r!   rW   TFMobileViTLayer.callt  s    ""..x.KH ======== "^^H5 ""7">..) <<3'''D;;ryy()=BGRZ;[r#   c                R   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       GNF= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN[= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       g = f)	NTr,  r-  r.  r/  r0  r1  r+  )rZ   r[   r\   r]   r,  r2   r^   r-  r.  r/  r   r0  r1  r+  r`   s     r!   r^   TFMobileViTLayer.build  s   ::
4T*6t}}112##D) 34T*6t}}112##D) 34-9t//445  &&t, 64d+7t~~223$$dD$2B2B%CD 44*D1=t33889$$**40 :44(4t{{//0!!$' 14-t4@t66;;<''--d3 =< A# 32 32 65 43 :9 10 =<sT   J-.J?
K&)K#K5+LL-
J<?
K
K #
K25
L
L
L&)rZ   r-  r,  r0  r+  r1  r   r/  r4  r3  r.  r   )rI   r   rF   r   rG   r   rJ   r   r   r   r   r   rL   r   rd   re   )rU   rh   rd   ztuple[tf.Tensor, dict])rO  rh   rP  dictrd   rh   rf   rg   ri   )r<   rk   rl   rm   r   r8   rQ  rU  rW   r^   rn   ro   rp   s   @r!   r)  r)    s     ?'?' ?' 	?'
 ?' ?' ?' ?' 
?' ?'B*"X824 4r#   r)  c                  \   ^  \ rS rSrSU 4S jjr   S         SS jjrS	S jrSrU =r$ )
TFMobileViTEncoderi  c                  > [         TU ]  " S0 UD6  Xl        / U l        S=p4UR                  S:X  a  SnSnOUR                  S:X  a  SnSn[        UUR                  S   UR                  S   SSSS9nU R                  R                  U5        [        UUR                  S   UR                  S	   S	S
SS9nU R                  R                  U5        [        UUR                  S	   UR                  S
   S	UR                  S   S	SS9nU R                  R                  U5        U(       a  US	-  n[        UUR                  S
   UR                  S   S	UR                  S   SUSS9n	U R                  R                  U	5        U(       a  US	-  n[        UUR                  S   UR                  S   S	UR                  S	   S
USS9n
U R                  R                  U
5        g )NFr   T   r   r   zlayer.0)rF   rG   rJ   r   r2   r   r   zlayer.1zlayer.2)rF   rG   rJ   r   r   r2      zlayer.3)rF   rG   rJ   r   r   rL   r2      zlayer.4r6   )
r7   r8   rI   r=   output_strider   neck_hidden_sizesr   r)  hidden_sizes)rH   rI   rO   dilate_layer_4dilate_layer_5rL   layer_1layer_2layer_3layer_4layer_5r;   s              r!   r8   TFMobileViTEncoder.__init__  s   "6" +0/1$!N!N!!R'!N+00311!4
 	7#+00311!4
 	7#"00311!4++A.
 	7#MH"00311!4++A.	
 	7#MH"00311!4++A.	
 	7#r#   c                    U(       a  SOS n[        U R                  5       H  u  pgU" XS9nU(       d  M  XQ4-   nM     U(       d  [        S X4 5       5      $ [        XS9$ )Nr6   rS   c              3  .   #    U  H  oc  M  Uv   M     g 7fri   r6   ).0vs     r!   	<genexpr>*TFMobileViTEncoder.call.<locals>.<genexpr>  s     X$Fq$Fs   	)last_hidden_stater   )	enumerater=   tupler
   )rH   r   output_hidden_statesreturn_dictrT   all_hidden_statesr   r   s           r!   rW   TFMobileViTEncoder.call  sc     #7BD(5OA(JM##$58H$H!	  6 X]$FXXX =bbr#   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = fr   r   r   s      r!   r^   TFMobileViTEncoder.build  r   r   )rZ   rI   r=   rI   r   rd   re   )FTF)
r   rh   rw  rc   rx  rc   rT   rc   rd   ztuple | TFBaseModelOutputri   rj   rp   s   @r!   r^  r^    sZ    L$b &+ c c #c 	c
 c 
#c(- -r#   r^  c                  v   ^  \ rS rSr\rSSU 4S jjjrS r\    S	         S
S jj5       r	SS jr
SrU =r$ )TFMobileViTMainLayeri  c           	     p  > [         TU ]  " S0 UD6  Xl        X l        [	        UUR
                  UR                  S   SSSS9U l        [        USS9U l	        U R                  (       a,  [	        UUR                  S   UR                  S	   S
SS9U l
        [        R                  R                  SSS9U l        g )Nr   r   r   	conv_stem)rF   rG   r,   rJ   r2   encoderr   rb     r   conv_1x1_exprw   channels_firstpooler)data_formatr2   r6   )r7   r8   rI   expand_outputr%   num_channelsrd  r  r^  r  r  r   r=   GlobalAveragePooling2Dr  )rH   rI   r  rO   r;   s       r!   r8   TFMobileViTMainLayer.__init__  s    "6"*-++11!4
 *&yA 4"44Q7#55a8#!D ll99FV]e9fr#   c                    [         e)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
r   )rH   heads_to_prunes     r!   _prune_heads!TFMobileViTMainLayer._prune_heads4  s
    
 "!r#   c                   Ub  UOU R                   R                  nUb  UOU R                   R                  n[        R                  " USS9nU R                  XS9nU R                  XRX4S9nU R                  (       a=  U R                  US   5      n[        R                  " U/ SQS9nU R                  U5      nOUS   n[        R                  " U/ SQS9nS nU(       dB  Ub  Xx4OU4n	U R                  (       d!  USS  n
[        S U
S    5       5      n
U
4n
X-   $ XSS  -   $ U(       a  [        S	 US    5       5      n[        UUU(       a  WS
9$ UR                  S
9$ )NrT  r   rS   rw  rx  rT   r   r=  r   c              3  L   #    U  H  n[         R                  " US S9v   M     g7fr=  r   Nr\   r   rp  hs     r!   rr  ,TFMobileViTMainLayer.call.<locals>.<genexpr>g  s      2@\1BLL6@\   "$c              3  L   #    U  H  n[         R                  " US S9v   M     g7fr  r  r  s     r!   rr  r  q  s     !aN`",,q|"DN`r  )rt  pooler_outputr   )rI   rw  use_return_dictr\   r   r  r  r  r  r  rv  r   r   )rH   pixel_valuesrw  rx  rT   embedding_outputencoder_outputsrt  pooled_outputr   remaining_encoder_outputsr   s               r!   rW   TFMobileViTMainLayer.call;  s    %9$D $++JjJj 	 &1%<k$++B]B]
 ||L|D>>,>J,,U` ' 
  $ 1 1/!2D E !#->\ R !KK(9:M / 2 "->\ R M;H;T'7[lZnF %%,;AB,?),1 2@YZ[@\2 -) .G,H)99 333  !!ao^_N`!aaM+/'+?-
 	
 FUEbEb
 	
r#   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bP  [        R                  " U R                  R
                  5         U R                  R                  / SQ5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       GN'= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr  r  r  NNNNr  )
rZ   r[   r\   r]   r  r2   r^   r  r  r  r`   s     r!   r^   TFMobileViTMainLayer.buildy  s4   ::
4d+7t~~223$$T* 44D)5t||001""4( 244(4t{{//0!!":; 14.:t00556!!''- 76 ; 43 21 10 76s0   F.F 
F1(G
F 
F.1
F?
G)rZ   rI   r  r  r  r  r  TrI   r   r  rc   NNNF
r  tf.Tensor | Nonerw  bool | Nonerx  r  rT   rc   rd   z/tuple[tf.Tensor] | TFBaseModelOutputWithPoolingri   )r<   rk   rl   rm   r   config_classr8   r  r   rW   r^   rn   ro   rp   s   @r!   r  r    sv    "Lg g6"  *.,0#';
&;
 *;
 !	;

 ;
 
9;
 ;
z. .r#   r  c                  $    \ rS rSrSr\rSrSrSr	g)TFMobileViTPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
	mobilevitr  r6   N)
r<   rk   rl   rm   r   r   r  base_model_prefixmain_input_namern   r6   r#   r!   r  r    s    
 #L#$Or#   r  a	  
    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `pixel_values` only and nothing else: `model(pixel_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([pixel_values, attention_mask])` or `model([pixel_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"pixel_values": pixel_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`MobileViTConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`np.ndarray`, `tf.Tensor`, `list[tf.Tensor]`, `dict[str, tf.Tensor]` or `dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`MobileViTImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
zWThe bare MobileViT model outputting raw hidden-states without any specific head on top.c            
         ^  \ rS rSrSS	U 4S jjjr\\" \5      \" \	\
\S\S9    S
         SS jj5       5       5       rSS jrSrU =r$ )TFMobileViTModeli  c                d   > [         TU ]  " U/UQ70 UD6  Xl        X l        [	        XSS9U l        g )Nr  r  r2   )r7   r8   rI   r  r  r  )rH   rI   r  inputsrO   r;   s        r!   r8   TFMobileViTModel.__init__  s4    3&3F3*-fXcdr#   vision)
checkpointoutput_typer  modalityexpected_outputc                &    U R                  XX4S9nU$ rR   )r  )rH   r  rw  rx  rT   r   s         r!   rW   TFMobileViTModel.call  s      Kcr#   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTr  )rZ   r[   r\   r]   r  r2   r^   r`   s     r!   r^   TFMobileViTModel.build  s^    ::
4d+7t~~223$$T* 43 833s   A88
B)rZ   rI   r  r  r  r  r  r  ri   )r<   rk   rl   rm   r8   r   r   MOBILEVIT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPErW   r^   rn   ro   rp   s   @r!   r  r    s    
e e *+EF&0$. *.,0#'& * !	
  
9 G + +r#   r  z
    MobileViT model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c            	         ^  \ rS rSrSU 4S jjr\\" \5      \" \	\
\\S9     S           S	S jj5       5       5       rS
S jrSrU =r$ )!TFMobileViTForImageClassificationi  c                p  > [         TU ]  " U/UQ70 UD6  UR                  U l        [        USS9U l        [
        R                  R                  UR                  5      U l	        UR                  S:  a(  [
        R                  R                  UR                  SS9O[        R                  U l        Xl        g )Nr  r   r   
classifier)r7   r8   
num_labelsr  r  r   r=   r   classifier_dropout_probr   r   r\   identityr  rI   )rH   rI   r  rO   r;   s       r!   r8   *TFMobileViTForImageClassification.__init__  s    3&3F3 ++-f;G ||++F,J,JKHNHYHY\]H]ELLv00|Dcecncn 	 r#   )r  r  r  r  c                J   Ub  UOU R                   R                  nU R                  XXES9nU(       a  UR                  OUS   nU R	                  U R                  XuS95      nUc  SOU R                  X8S9n	U(       d  U4USS -   n
U	b  U	4U
-   $ U
$ [        XUR                  S9$ )a[  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss). If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   rS   )labelslogitsr   lossr  r   )	rI   r  r  r  r  r   hf_compute_lossr   r   )rH   r  rw  r  rx  rT   outputsr  r  r  r   s              r!   rW   &TFMobileViTForImageClassification.call  s    , &1%<k$++B]B]..Q\ ! 
 2=--'!*m!OP~t4+?+?v+?+]Y,F)-)9TGf$EvE54^e^s^sttr#   c                X   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b  [        U R                  S5      (       ai  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  S   /5        S S S 5        g g g ! , (       d  f       N= f! , (       d  f       g = f)NTr  r  r2   r   )rZ   r[   r\   r]   r  r2   r^   r_   r  rI   rd  r`   s     r!   r^   'TFMobileViTForImageClassification.build3  s    ::
4d+7t~~223$$T* 44t,8t//]]4??#7#78OO))4t{{7T7TUW7X*YZ 98 0 9 43 98s   D
	6D

D
D))rZ   r  rI   r   r  r  r}  NNNNF)r  r  rw  r  r  r  rx  r  rT   r  rd   z.tuple | TFImageClassifierOutputWithNoAttentionri   )r<   rk   rl   rm   r8   r   r   r  r   _IMAGE_CLASS_CHECKPOINTr   r  _IMAGE_CLASS_EXPECTED_OUTPUTrW   r^   rn   ro   rp   s   @r!   r  r    s     *+EF*:$4	 *.,0#'#' %u&u *u !	u
 !u u 
8u G u>
[ 
[r#   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFMobileViTASPPPoolingi@  c                   > [         TU ]  " S0 UD6  [        R                  R	                  SSS9U l        [        UUUSSSSSS9U l        g )	NTglobal_pool)keepdimsr2   r   relur-  )rF   rG   r,   rJ   rM   rN   r2   r6   )r7   r8   r   r=   r  r  r%   r-  )rH   rI   rF   rG   rO   r;   s        r!   r8   TFMobileViTASPPPooling.__init__A  sT    "6" <<>>S`>a,#%"!	
r#   c                    [        U5      SS nU R                  U5      nU R                  XS9n[        R                  R                  XSS9nU$ )Nr   r   rS   r9  r:  )r   r  r-  r\   rF  rG  )rH   rU   rT   spatial_sizes       r!   rW   TFMobileViTASPPPooling.callQ  sN    !(+Ab1##H-====88??8z?Rr#   c                   U R                   (       a  g SU l         [        U SS 5      bP  [        R                  " U R                  R
                  5         U R                  R                  / SQ5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  r  r-  )rZ   r[   r\   r]   r  r2   r^   r-  r`   s     r!   r^   TFMobileViTASPPPooling.buildX  s    ::
4-9t//445  &&'?@ 64T*6t}}112##D) 32 7 65 32s   C0C'
C$'
C5)rZ   r-  r  )rI   r   rF   r   rG   r   rd   re   rf   rg   ri   rj   rp   s   @r!   r  r  @  s    
 	* 	*r#   r  c                  H   ^  \ rS rSrSrSU 4S jjrSS	S jjrS
S jrSrU =r	$ )TFMobileViTASPPid  z{
ASPP module defined in DeepLab papers: https://huggingface.co/papers/1606.00915, https://huggingface.co/papers/1706.05587
c                  > [         T	U ]  " S0 UD6  UR                  S   nUR                  n[	        UR
                  5      S:w  a  [        S5      e/ U l        [        UUUSSSS9nU R                  R                  U5        U R                  R                  [        UR
                  5       VVs/ s H  u  pg[        UUUSUSSUS-    3S	9PM     snn5        [        XUS[	        UR
                  5      S-    3S
9nU R                  R                  U5        [        USU-  USSSS9U l        [        R                  R!                  UR"                  5      U l        g s  snnf )Nr   z"Expected 3 values for atrous_ratesr   r  zconvs.0rz   zconvs.)rF   rG   r,   rL   rN   r2   r   rb  projectr6   )r7   r8   rd  aspp_out_channelslenatrous_ratesr?   convsr%   r   extendru  r  r  r   r=   r   aspp_dropout_probr   )
rH   rI   rO   rF   rG   in_projectionr   rate
pool_layerr;   s
            r!   r8   TFMobileViTASPP.__init__i  so   "6"..r2//v""#q(ABB
,#%!
 	

-(

  ))<)<=  >GA % +!- !!#)!!a%)  >	
 ,fSATAT=UXY=Y<Z4[

 	

*%+L(%!
 ||++F,D,DE9s   .!E
c                    [         R                  " U/ SQS9n/ nU R                   H  nUR                  U" XS95        M     [         R                  " USS9nU R                  X2S9nU R                  XRS9nU$ )NrT  r   rS   r   r   )r\   r   r  r   rX  r  r   )rH   rU   rT   pyramidconvpooled_featuress         r!   rW   TFMobileViTASPP.call  sn     <<|<JJDNN4<= ))G"-,,w,B,,,Jr#   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       Nk= f! , (       d  f       Mc  = f)NTr  r  )rZ   r[   r\   r]   r  r2   r^   r  )rH   ra   r  s      r!   r^   TFMobileViTASPP.build  s    ::
4D)5t||001""4( 24$'3

]]499-JJt$ .- # 4 21 .-s   C3C$
C!$
C3	)rZ   r  r   r  r}  rf   rg   ri   r   rp   s   @r!   r  r  d  s    2Fh
% 
%r#   r  c                  H   ^  \ rS rSrSrSU 4S jjrSS	S jjrS
S jrSrU =r	$ )TFMobileViTDeepLabV3i  zB
DeepLabv3 architecture: https://huggingface.co/papers/1706.05587
c                   > [         TU ]  " S0 UD6  [        USS9U l        [        R
                  R                  UR                  5      U l        [        UUR                  UR                  SSSSSS9U l        g )	Nasppr   r   FTr  )rF   rG   r,   rM   rN   rK   r2   r6   )r7   r8   r  r  r   r=   r   r  r   r%   r  r  r  rH   rI   rO   r;   s      r!   r8   TFMobileViTDeepLabV3.__init__  sm    "6"#F8	||++F,J,JK.00**# 	
r#   c                h    U R                  US   US9nU R                  X2S9nU R                  X2S9nU$ )Nr   rS   )r  r   r  )rH   r   rT   rU   s       r!   rW   TFMobileViTDeepLabV3.call  s>    99]2.9B<<<<??8??r#   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  r  )rZ   r[   r\   r]   r  r2   r^   r  r`   s     r!   r^   TFMobileViTDeepLabV3.build  s    ::
4&2tyy~~.		% /4t,8t334%%d+ 54 9 /. 54r   )r  rZ   r  r   r}  rf   r   ri   r   rp   s   @r!   r  r    s    
"	, 	,r#   r  zX
    MobileViT model with a semantic segmentation head on top, e.g. for Pascal VOC.
    c                     ^  \ rS rSrSU 4S jjrS r\\" \5      \	" \
\S9     S	           S
S jj5       5       5       rSS jrSrU =r$ )"TFMobileViTForSemanticSegmentationi  c                   > [         TU ]  " U40 UD6  UR                  U l        [        USSS9U l        [        USS9U l        g )NFr  r  segmentation_headr   )r7   r8   r  r  r  r  r  r  s      r!   r8   +TFMobileViTForSemanticSegmentation.__init__  sC    *6* ++-fEP[\!5fCV!Wr#   c                   ^ ^ [        U5      SS  n[        R                  R                  XSS9n[        R
                  R                  SSS9mUU 4S jnU" X$5      $ )Nr   r9  r:  Tnone)from_logits	reductionc                  > T" X5      n[         R                  " U TR                  R                  :g  UR                  S9nX#-  n[         R
                  " U5      [         R
                  " U5      -  n[         R                  " US5      $ )Nr   r   )r\   r   rI   semantic_loss_ignore_indexr   
reduce_sumr   )realpredunmasked_lossmaskmasked_lossreduced_masked_lossloss_fctrH   s         r!   r  GTFMobileViTForSemanticSegmentation.hf_compute_loss.<locals>.masked_loss  sl    $T0M7744;;#I#IIQ^QdQdeD'.K #%--"<r}}T?R"R::1488r#   )r   r\   rF  rG  r   lossesSparseCategoricalCrossentropy)rH   r  r  label_interp_shapeupsampled_logitsr  r  s   `     @r!   r  2TFMobileViTForSemanticSegmentation.hf_compute_loss  s[     (/388??6S]?^<<==$Z`=a	9 644r#   )r  r  c                   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb%  U R                   R                  S:  d  [	        S5      eU R                  USUUS9nU(       a  UR                  OUS   nU R                  XuS9nSn	Ub  U R                  XS9n	[        R                  " U/ SQS	9nU(       d%  U(       a
  U4USS -   n
O	U4US
S -   n
U	b  U	4U
-   $ U
$ [        U	UU(       a  UR                  S9$ SS9$ )a  
labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
    Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

Returns:

Examples:

```python
>>> from transformers import AutoImageProcessor, TFMobileViTForSemanticSegmentation
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> image_processor = AutoImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> model = TFMobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")

>>> inputs = image_processor(images=image, return_tensors="tf")

>>> outputs = model(**inputs)

>>> # logits are of shape (batch_size, num_labels, height, width)
>>> logits = outputs.logits
```Nr   z/The number of labels should be greater than oneTr  rS   )r  r  r=  r   r   r  )rI   rw  r  r  r?   r  r   r  r  r\   r   r   )rH   r  r  rw  rx  rT   r  encoder_hidden_statesr  r  r   s              r!   rW   'TFMobileViTForSemanticSegmentation.call  sE   N %9$D $++JjJj 	 &1%<k$++B]B]dkk&<&<q&@NOO..!%#	 ! 
 :E 5 5'RS*''(='Q''v'ED f<8# WQR[0 WQR[0)-)9TGf$EvE73G'//
 	
 NR
 	
r#   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  r  )rZ   r[   r\   r]   r  r2   r^   r  r`   s     r!   r^   (TFMobileViTForSemanticSegmentation.buildO  s    ::
4d+7t~~223$$T* 44,d3?t55::;&&,,T2 <; @ 43 <;r   )rZ   r  r  r  r}  r  )r  r  r  r  rw  r  rx  r  rT   rc   rd   z0tuple | TFSemanticSegmenterOutputWithNoAttentionri   )r<   rk   rl   rm   r8   r  r   r   r  r	   r   r  rW   r^   rn   ro   rp   s   @r!   r  r    s    X5( *+EF+Sbqr *.#',0#'I
&I
 !I
 *	I

 !I
 I
 
:I
 s G I
V	3 	3r#   r  )r  r  r  r  )r   N)r   r   r   r   r   z
int | Nonerd   r   )@r   
__future__r   
tensorflowr\   activations_tfr   
file_utilsr   r   r   r	   modeling_tf_outputsr
   r   r   r   modeling_tf_utilsr   r   r   r   r   tf_utilsr   r   utilsr   configuration_mobilevitr   
get_loggerr<   r9   r  r  r  r  r  r"   r=   Layerr%   rr   r   r   r   r   r   r  r  r  r)  r^  r  r  MOBILEVIT_START_DOCSTRINGr  r  r  r  r  r  r  __all__r6   r#   r!   <module>r0     s  " & "  /    3  4 
		H	% $ . '  2 1 JT5<<-- JTZ=,%,,"4"4 =,@$- 2 2 $-N@Au||11 @AFAELL.. A*.5<<-- .4Aell00 A0G** G,%K%,,"4"4 %KP-U\\// -:4u||)) 4Dj-++ j-Z r.5<<-- r. r.j%!2 %' R   ]!+1 !+	!+H  ?[(BD` ?[?[D!*U\\// !*HP%ell(( P%f%,5<<-- %,P  	s3)C s3s3lr#   