
    bCi                    $   S r SSKJr  SSKrSSKrSSKJr  SSKr	SSK
Jr  SSKJrJrJrJr  SSKJrJrJrJrJrJr  SS	KJrJr  SS
KJrJrJrJrJ r J!r!  SSK"J#r#  \ RH                  " \%5      r&Sr'Sr(/ SQr)Sr*Sr+\ " S S\5      5       r, " S S\RZ                  R\                  5      r/ " S S\RZ                  R\                  5      r0 " S S\RZ                  R\                  5      r1 " S S\RZ                  R\                  5      r2 " S S\RZ                  R\                  5      r3 " S S\RZ                  R\                  5      r4 " S S \RZ                  R\                  5      r5 " S! S"\RZ                  R\                  5      r6 " S# S$\RZ                  R\                  5      r7\ " S% S&\RZ                  R\                  5      5       r8 " S' S(\5      r9S)r:S*r;\" S+\:5       " S, S-\95      5       r< " S. S/\RZ                  R\                  5      r= " S0 S1\RZ                  R\                  5      r> " S2 S3\RZ                  R\                  5      r?\" S4\:5       " S5 S6\95      5       r@\" S7\:5       " S8 S9\9\5      5       rA\" S:\:5       " S; S<\95      5       rB/ S=QrCg)>zTensorFlow DeiT model.    )annotationsN)	dataclass   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPoolingTFImageClassifierOutputTFMaskedImageModelingOutput)TFPreTrainedModelTFSequenceClassificationLossget_initializerkeraskeras_serializableunpack_inputs)
shape_liststable_softmax)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
DeiTConfigr   z(facebook/deit-base-distilled-patch16-224)r      i   ztabby, tabby catc                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S\S
'   Srg)-TFDeiTForImageClassificationWithTeacherOutputB   a  
Output type of [`DeiTForImageClassificationWithTeacher`].

Args:
    logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Prediction scores as the average of the cls_logits and distillation logits.
    cls_logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
        class token).
    distillation_logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
        Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
        distillation token).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus
        the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
Ntf.Tensor | Nonelogits
cls_logitsdistillation_logitsztuple[tf.Tensor] | Nonehidden_states
attentions )__name__
__module____qualname____firstlineno____doc__r    __annotations__r!   r"   r#   r$   __static_attributes__r%       c/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/deit/modeling_tf_deit.pyr   r   B   sA    ,  $F##'J ',0)0-1M*1*.J'.r-   r   c                  n   ^  \ rS rSrSrSS	U 4S jjjrS
S jrSS jr   S         SS jjrSr	U =r
$ )TFDeiTEmbeddingsa   zn
Construct the CLS token, distillation token, position and patch embeddings. Optionally, also the mask token.
c                   > [         TU ]  " S0 UD6  Xl        X l        [	        USS9U l        [        R                  R                  UR                  SS9U l
        g )Npatch_embeddings)confignamedropoutr5   r%   )super__init__r4   use_mask_tokenTFDeiTPatchEmbeddingsr3   r   layersDropouthidden_dropout_probr6   )selfr4   r:   kwargs	__class__s       r.   r9   TFDeiTEmbeddings.__init__f   sO    "6", 5VJ\ ]||++F,F,FY+Wr-   c                   U R                  SSU R                  R                  4[        R                  R                  5       SSS9U l        U R                  SSU R                  R                  4[        R                  R                  5       SSS9U l        S U l        U R                  (       aJ  U R                  SSU R                  R                  4[        R                  R                  5       SSS9U l        U R                  R                  nU R                  SUS-   U R                  R                  4[        R                  R                  5       SSS9U l        U R                  (       a  g SU l        [        U S	S 5      bN  [        R                   " U R                  R"                  5         U R                  R%                  S 5        S S S 5        [        U S
S 5      bO  [        R                   " U R&                  R"                  5         U R&                  R%                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)Nr   T	cls_token)shapeinitializer	trainabler5   distillation_token
mask_token   position_embeddingsr3   r6   )
add_weightr4   hidden_sizer   initializerszerosrD   rH   rI   r:   r3   num_patchesrK   builtgetattrtf
name_scoper5   buildr6   )r?   input_shaperP   s      r.   rU   TFDeiTEmbeddings.buildm   s   a001**002	 ) 
 #'//a001**002%	 #2 #
 "oo!T[[445!..446!	 . DO ++77#'??kAot{{'>'>?**002&	 $3 $
  ::
4+T2>t4499:%%++D1 ;4D)5t||001""4( 21 6 ;: 21s   +H-H>-
H;>
Ic           
        UR                   S   S-
  nU R                  R                   S   S-
  nXE:X  a  X#:X  a  U R                  $ U R                  S S 2SS S 24   nU R                  S S 2SS S 24   nU R                  S S 2SS 2S S 24   nUR                   S   n	X R                  R                  -  n
X0R                  R                  -  nU
S-   US-   p[        R
                  " US[        [        R                  " U5      5      [        [        R                  " U5      5      U	45      n[        R                  R                  U[        U
5      [        U5      4SS9n[        R                  " U/ SQS	9n[        R
                  " USSU	45      n[        R                  " [        R                  " USS
9[        R                  " USS
9U/SS
9$ )Nr   rJ   r   g?bicubic)sizemethodr   rJ   r   r   permaxis)rE   rK   r4   
patch_sizerS   reshapeintmathsqrtimageresize	transposeconcatexpand_dims)r?   
embeddingsheightwidthrP   num_positionsclass_pos_embeddist_pos_embedpatch_pos_embeddimh0w0s               r.   interpolate_pos_encoding)TFDeiTEmbeddings.interpolate_pos_encoding   s    &&q)A-0066q9A='FO+++221a7;11!Q':221ab!8<r"{{---kk,,, c28B**aTYY}%=!>DIImD\@]_bc
 ((///R#b'@R[d/e,,\J**_q"clCyy^^O!4bnn^Z[6\^mnuv
 	
r-   c                L   UR                   u  pVpuU R                  U5      n[        U5      u  pnUbc  [        R                  " U R
                  XS/5      n[        R                  " USS9n[        R                  " XR                  S9nUSU-
  -  X-  -   n[        R                  " U R                  U	SS9n[        R                  " U R                  U	SS9n[        R                  " XU4SS9nU R                  nU(       a  U R                  XU5      nX-   nU R                  XS9nU$ )	Nr   rY   r`   dtypeg      ?r   )repeatsra   training)rE   r3   r   rS   tilerI   rk   castrz   repeatrD   rH   rj   rK   rv   r6   )r?   pixel_valuesbool_masked_posr}   rv   _rm   rn   rl   
batch_size
seq_lengthmask_tokensmask
cls_tokensdistillation_tokensposition_embeddings                   r.   callTFDeiTEmbeddings.call   s    +005**<8
$.z$:!
&''$//JA3NOK>>/;D774'8'89D#sTz2[5GGJYYt~~zJ
 ii(?(?Z[\YY
LSTU
!55#!%!>!>zSX!Y4
\\*\@
r-   )	rQ   rD   r4   rH   r6   rI   r3   rK   r:   F)r4   r   r:   boolreturnNoneN)rl   	tf.Tensorrm   rd   rn   rd   r   r   )NFF)
r   r   r   r   r}   r   rv   r   r   r   )r&   r'   r(   r)   r*   r9   rU   rv   r   r,   __classcell__rA   s   @r.   r0   r0   a   sd    X X%)N
< -1). * 	
 #' 
 r-   r0   c                  D   ^  \ rS rSrSrSU 4S jjrSS jrS	S jrSrU =r	$ )
r;      z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
c                  > [         TU ]  " S0 UD6  UR                  UR                  pCUR                  UR
                  pe[        U[        R                  R                  5      (       a  UOX34n[        U[        R                  R                  5      (       a  UOXD4nUS   US   -  US   US   -  -  nX0l        X@l        XPl        Xpl
        [        R                  R                  XdUSS9U l        g )Nr   r   
projection)kernel_sizestridesr5   r%   )r8   r9   
image_sizerb   num_channelsrM   
isinstancecollectionsabcIterablerP   r   r<   Conv2Dr   )	r?   r4   r@   r   rb   r   rM   rP   rA   s	           r.   r9   TFDeiTPatchEmbeddings.__init__   s    "6"!'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&,,--, . 
r-   c                    [        U5      u  p#pE[        R                  " 5       (       a  XPR                  :w  a  [	        S5      eU R                  U5      n[        U5      u  p#pE[        R                  " XbX4-  U45      nU$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   rS   executing_eagerlyr   
ValueErrorr   rc   )r?   r   r   rm   rn   r   xs          r.   r   TFDeiTPatchEmbeddings.call   su    2<\2J/
E!!l6G6G&Gw  OOL)2<Q-/
EJJqv~|DEr-   c                .   U R                   (       a  g SU l         [        U SS 5      b]  [        R                  " U R                  R
                  5         U R                  R                  S S S U R                  /5        S S S 5        g g ! , (       d  f       g = f)NTr   )rQ   rR   rS   rT   r   r5   rU   r   r?   rV   s     r.   rU   TFDeiTPatchEmbeddings.build   sm    ::
4t,8t334%%tT49J9J&KL 54 944s   *B
B)rQ   r   r   rP   rb   r   r4   r   r   r   )r   r   r   r   r   
r&   r'   r(   r)   r*   r9   r   rU   r,   r   r   s   @r.   r;   r;      s    
"
M Mr-   r;   c                  b   ^  \ rS rSrSU 4S jjrSS jr S	         S
S jjrSS jrSrU =r	$ )TFDeiTSelfAttention   c                  > [         TU ]  " S
0 UD6  UR                  UR                  -  S:w  a&  [	        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " U R                  5      U l
        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R'                  UR(                  S	9U l        Xl        g )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()queryunitskernel_initializerr5   keyvaluerater%   )r8   r9   rM   num_attention_headsr   rd   attention_head_sizeall_head_sizere   rf   sqrt_att_head_sizer   r<   Denser   initializer_ranger   r   r   r=   attention_probs_dropout_probr6   r4   r?   r4   r@   rA   s      r.   r9   TFDeiTSelfAttention.__init__   s   "6" : ::a?#F$6$6#7 8''-'A'A&B!E 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP"&))D,D,D"E\\''$$IaIa9bip ( 

 <<%%$$IaIa9bin & 
 \\''$$IaIa9bip ( 

 ||++1T1T+Ur-   c                    [         R                  " XSU R                  U R                  4S9n[         R                  " U/ SQS9$ )NrY   tensorrE   r   rJ   r   r   r^   )rS   rc   r   r   ri   )r?   r   r   s      r.   transpose_for_scores(TFDeiTSelfAttention.transpose_for_scores  s9    6b$BZBZ\`\t\t1uv ||F66r-   c                   [        U5      S   nU R                  US9nU R                  US9nU R                  US9nU R	                  Xe5      n	U R	                  Xu5      n
U R	                  X5      n[
        R                  " XSS9n[
        R                  " U R                  UR                  S9n[
        R                  " X5      n[        USS9nU R                  XS9nUb  [
        R                  " X5      n[
        R                  " X5      n[
        R                  " U/ S	QS
9n[
        R                  " XSU R                   4S9nU(       a  X4nU$ U4nU$ )Nr   inputsT)transpose_bry   rY   )r    ra   r   r}   r   r^   r   )r   r   r   r   r   rS   matmulr   r   rz   divider   r6   multiplyri   rc   r   )r?   r#   	head_maskoutput_attentionsr}   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresdkattention_probsattention_outputoutputss                    r.   r   TFDeiTSelfAttention.call  sQ     .q1
 JJmJ<((-(8 JJmJ<//0AN--oJ	//0AN 99[NWWT,,4D4J4JK99%5: )0@rJ ,,o,Q   kk/EO99_B<<(8|L ::-=RTVZVhVhEij9J#5 RbPcr-   c                P   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r   r   )rQ   rR   rS   rT   r   r5   rU   r4   rM   r   r   r   s     r.   rU   TFDeiTSelfAttention.buildG  s   ::
4$'3tzz/

  $dkk.E.E!FG 04%1txx}}-dDKK,C,CDE .4$'3tzz/

  $dkk.E.E!FG 0/ 4 0/ .- 0/s$   3E53F83F5
F
F
F%)
r   r   rQ   r4   r6   r   r   r   r   r   r4   r   )r   r   r   rd   r   r   r   
r#   r   r   r   r   r   r}   r   r   tuple[tf.Tensor]r   )
r&   r'   r(   r)   r9   r   r   rU   r,   r   r   s   @r.   r   r      sT    47 ' ' '  	'
 ' 
'RH Hr-   r   c                  H   ^  \ rS rSrSrSU 4S jjrSS	S jjrS
S jrSrU =r	$ )TFDeiTSelfOutputiW  z
The residual connection is defined in TFDeiTLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
c                  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        R                  R                  UR                  S9U l        Xl        g Ndenser   r   r%   r8   r9   r   r<   r   rM   r   r   r   r=   r>   r6   r4   r   s      r.   r9   TFDeiTSelfOutput.__init__]  m    "6"\\''$$IaIa9bip ( 

 ||++1K1K+Lr-   c                B    U R                  US9nU R                  XS9nU$ Nr   r   r   r6   r?   r#   input_tensorr}   s       r.   r   TFDeiTSelfOutput.callf  s(    

-
8MMr-   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fNTr   	rQ   rR   rS   rT   r   r5   rU   r4   rM   r   s     r.   rU   TFDeiTSelfOutput.buildl  m    ::
4$'3tzz/

  $dkk.E.E!FG 0/ 4//   3B
BrQ   r4   r   r6   r   r   r#   r   r   r   r}   r   r   r   r   r   r   s   @r.   r   r   W  s    
H Hr-   r   c                  ^   ^  \ rS rSrSU 4S jjrS r S         S	S jjrS
S jrSrU =r	$ )TFDeiTAttentioniv  c                b   > [         TU ]  " S0 UD6  [        USS9U l        [	        USS9U l        g )N	attentionr7   outputr%   )r8   r9   r   self_attentionr   dense_outputr   s      r.   r9   TFDeiTAttention.__init__w  s1    "6"1&{K,V(Cr-   c                    [         er   NotImplementedError)r?   headss     r.   prune_headsTFDeiTAttention.prune_heads}  s    !!r-   c                ^    U R                  XX4S9nU R                  US   XS9nU4USS  -   nU$ )Nr#   r   r   r}   r   r#   r   r}   r   )r   r   )r?   r   r   r   r}   self_outputsr   r   s           r.   r   TFDeiTAttention.call  s[     **&O` + 
  ,,&q/ - 
 $%QR(88r-   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr   r   )rQ   rR   rS   rT   r   r5   rU   r   r   s     r.   rU   TFDeiTAttention.build  s    ::
4)40<t22778##))$/ 94.:t00556!!''- 76 ; 98 76   C.C%
C"%
C3)rQ   r   r   r   r   )
r   r   r   r   r   r   r}   r   r   r   r   )
r&   r'   r(   r)   r9   r  r   rU   r,   r   r   s   @r.   r   r   v  sR    D"    	
  
"	. 	.r-   r   c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	TFDeiTIntermediatei  c                J  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        UR                  [        5      (       a  [        UR                  5      U l        OUR                  U l        Xl        g )Nr   r   r%   )r8   r9   r   r<   r   intermediate_sizer   r   r   r   
hidden_actstrr   intermediate_act_fnr4   r   s      r.   r9   TFDeiTIntermediate.__init__  s    "6"\\''**vOgOg?hov ( 

 f''--'89J9J'KD$'-'8'8D$r-   c                F    U R                  US9nU R                  U5      nU$ )Nr   )r   r  )r?   r#   s     r.   r   TFDeiTIntermediate.call  s(    

-
800?r-   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr   r   r   s     r.   rU   TFDeiTIntermediate.build  r   r   )rQ   r4   r   r  r   r#   r   r   r   r   	r&   r'   r(   r)   r9   r   rU   r,   r   r   s   @r.   r  r    s    H Hr-   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFDeiTOutputi  c                  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        R                  R                  UR                  S9U l        Xl        g r   r   r   s      r.   r9   TFDeiTOutput.__init__  r   r-   c                J    U R                  US9nU R                  XS9nX-   nU$ r   r   r   s       r.   r   TFDeiTOutput.call  s0    

-
8MM%4r-   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr   )	rQ   rR   rS   rT   r   r5   rU   r4   r  r   s     r.   rU   TFDeiTOutput.build  sm    ::
4$'3tzz/

  $dkk.K.K!LM 0/ 4//r   r   r   r   r   r   r  r   s   @r.   r  r    s    N Nr-   r  c                  \   ^  \ rS rSrSrSU 4S jjr S         S	S jjrS
S jrSrU =r	$ )TFDeiTLayeri  z?This corresponds to the Block class in the timm implementation.c                @  > [         TU ]  " S0 UD6  [        USS9U l        [	        USS9U l        [        USS9U l        [        R                  R                  UR                  SS9U l        [        R                  R                  UR                  SS9U l        Xl        g )	Nr   r7   intermediater   layernorm_beforeepsilonr5   layernorm_afterr%   )r8   r9   r   r   r  r)  r  deit_outputr   r<   LayerNormalizationlayer_norm_epsr*  r-  r4   r   s      r.   r9   TFDeiTLayer.__init__  s    "6"(kB.vNK'X> % ? ?H]H]dv ? w$||>>vG\G\ct>ur-   c                    U R                  U R                  XS9UUUS9nUS   nXa-   nU R                  XS9nU R                  XtS9nU R	                  XUS9nU4USS  -   n	U	$ )Nr   )r   r   r   r}   r   )r#   r}   r
  r   )r   r*  r-  r)  r.  )
r?   r#   r   r   r}   attention_outputsr   layer_outputintermediate_outputr   s
             r.   r   TFDeiTLayer.call  s     !NN..m.W/ + 
 -Q/ )8 ++=+T"//l/^ ''-T\ ( 
  /$5ab$99r-   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       GN= f! , (       d  f       GNe= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r)  r.  r*  r-  )rQ   rR   rS   rT   r   r5   rU   r)  r.  r*  r4   rM   r-  r   s     r.   rU   TFDeiTLayer.build  s   ::
4d+7t~~223$$T* 44.:t00556!!''- 74-9t//445  &&t, 64+T2>t4499:%%++T49P9P,QR ;4*D1=t33889$$**D$8O8O+PQ :9 > 43 76 65 ;: :9s<   H.H(
H:&3I3I
H%(
H7:
I	
I
I+)r   rQ   r4   r.  r)  r-  r*  r   r   r   r   r   r   s   @r.   r'  r'    sR    I	      	
  
@R Rr-   r'  c                  `   ^  \ rS rSrSU 4S jjr S             SS jjrS	S jrSrU =r$ )
TFDeiTEncoderi  c                   > [         TU ]  " S0 UD6  [        UR                  5       Vs/ s H  n[	        USU 3S9PM     snU l        g s  snf )Nzlayer_._r7   r%   )r8   r9   rangenum_hidden_layersr'  layer)r?   r4   r@   irA   s       r.   r9   TFDeiTEncoder.__init__  sF    "6"HMfNfNfHghHg1k&!~>Hgh
hs   A	c                &   U(       a  SOS nU(       a  SOS n[        U R                  5       H2  u  pU(       a  Xq4-   nU
" UX)   UUS9nUS   nU(       d  M*  XS   4-   nM4     U(       a  Xq4-   nU(       d  [        S XU4 5       5      $ [        XUS9$ )Nr%   r	  r   r   c              3  .   #    U  H  oc  M  Uv   M     g 7fr   r%   ).0vs     r.   	<genexpr>%TFDeiTEncoder.call.<locals>.<genexpr>@  s     h$Vq$Vs   	)last_hidden_stater#   r$   )	enumerater>  tupler   )r?   r#   r   r   output_hidden_statesreturn_dictr}   all_hidden_statesall_attentionsr?  layer_modulelayer_outputss               r.   r   TFDeiTEncoder.call   s     #7BD0d(4OA#$58H$H!(+#,"3!	M *!,M  !/3C2E!E  5    14D Dh]~$Vhhh +Yg
 	
r-   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = f)NTr>  )rQ   rR   r>  rS   rT   r5   rU   )r?   rV   r>  s      r.   rU   TFDeiTEncoder.buildF  s`    ::
4$'3]]5::.KK% /. $ 4..s   A77
B	)rQ   r>  r   r   )r#   r   r   r   r   r   rJ  r   rK  r   r}   r   r   z$TFBaseModelOutput | tuple[tf.Tensor]r   r  r   s   @r.   r:  r:    sb    i $
 $
 $
  	$

 #$
 $
 $
 
.$
L& &r-   r:  c                     ^  \ rS rSr\r S	       S
U 4S jjjrSS jrS rS r	\
        S                 SS jj5       rSS jrSrU =r$ )TFDeiTMainLayeriP  c                  > [         TU ]  " S0 UD6  Xl        [        XSS9U l        [        USS9U l        [        R                  R                  UR                  SS9U l        U(       a  [        USS9U l        g S U l        g )	Nrl   )r:   r5   encoderr7   	layernormr+  poolerr%   )r8   r9   r4   r0   rl   r:  rV  r   r<   r/  r0  rW  TFDeiTPoolerrX  r?   r4   add_pooling_layerr:   r@   rA   s        r.   r9   TFDeiTMainLayer.__init__T  sp     	"6"*6Wcd$V)<88AVAV]h8i=Nl69TXr-   c                .    U R                   R                  $ r   )rl   r3   )r?   s    r.   get_input_embeddings$TFDeiTMainLayer.get_input_embeddings`  s    ///r-   c                    [         e)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
r  )r?   heads_to_prunes     r.   _prune_headsTFDeiTMainLayer._prune_headsc  s
    
 "!r-   c                L    Ub  [         eS /U R                  R                  -  nU$ r   )r  r4   r=  )r?   r   s     r.   get_head_maskTFDeiTMainLayer.get_head_maskj  s*     %%!>!>>Ir-   c	           	     .   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      e[
        R                  " US5      nU R                  U5      nU R                  UUUUS9n	U R                  U	UUUUUS9n
U
S   nU R                  XS9nU R                  b  U R                  XS9OS nU(       d  Ub  X4OU4nXSS  -   $ [        UUU
R                  U
R                  S9$ )	Nz You have to specify pixel_valuesr]   )r   r}   rv   )r   r   rJ  rK  r}   r   r|   r   )rG  pooler_outputr#   r$   )r4   r   rJ  use_return_dictr   rS   ri   re  rl   rV  rW  rX  r   r#   r$   )r?   r   r   r   r   rJ  rK  rv   r}   embedding_outputencoder_outputssequence_outputpooled_outputhead_outputss                 r.   r   TFDeiTMainLayer.callr  sR    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@ ||L,? &&y1	??+%=	 + 
 ,,/!5# ' 
 *!,...LKO;;KbOGhl?L?XO;_n^pL!""555+-')77&11	
 	
r-   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       GN<= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTrl   rV  rW  rX  )rQ   rR   rS   rT   rl   r5   rU   rV  rW  r4   rM   rX  r   s     r.   rU   TFDeiTMainLayer.build  s@   ::
4t,8t334%%d+ 54D)5t||001""4( 24d+7t~~223$$dD$++2I2I%JK 444(4t{{//0!!$' 10 5 54 21 43 10s0   F#.F5
3G=G#
F25
G
G
G%)rQ   r4   rl   rV  rW  rX  TFr4   r   r[  r   r:   r   r   r   )r   r;   NNNNNNFF)r   r   r   r   r   r   r   bool | NonerJ  ru  rK  ru  rv   r   r}   r   r   z4TFBaseModelOutputWithPooling | tuple[tf.Tensor, ...]r   )r&   r'   r(   r)   r   config_classr9   r^  rb  re  r   r   rU   r,   r   r   s   @r.   rT  rT  P  s    L Z_
Y 
Y59
YRV
Y	
Y 
Y0"  *.,0&*)-,0#').;
&;
 *;
 $	;

 ';
 *;
 !;
 #';
 ;
 
>;
 ;
z( (r-   rT  c                  $    \ rS rSrSr\rSrSrSr	g)TFDeiTPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
deitr   r%   N)
r&   r'   r(   r)   r*   r   rv  base_model_prefixmain_input_namer,   r%   r-   r.   rx  rx    s    
 L$Or-   rx  aR  
    This model is a TensorFlow
    [keras.layers.Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer). Use it as a regular
    TensorFlow Module and refer to the TensorFlow documentation for all matter related to general usage and behavior.

    Parameters:
        config ([`DeiTConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`DeiTImageProcessor.__call__`] for details.

        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        interpolate_pos_encoding (`bool`, *optional*, defaults to `False`):
            Whether to interpolate the pre-trained position encodings.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z^The bare DeiT Model transformer outputting raw hidden-states without any specific head on top.c            
         ^  \ rS rSr S       S	U 4S jjjr\\" \5      \" \	\
\S\S9        S
                 SS jj5       5       5       rSS jrSrU =r$ )TFDeiTModeli  c                H   > [         TU ]  " U40 UD6  [        XUSS9U l        g )Nry  r[  r:   r5   )r8   r9   rT  ry  rZ  s        r.   r9   TFDeiTModel.__init__  s+     	*6*#]c
	r-   vision)
checkpointoutput_typerv  modalityexpected_outputc	                2    U R                  UUUUUUUUS9n	U	$ )N)r   r   r   r   rJ  rK  rv   r}   )ry  )
r?   r   r   r   r   rJ  rK  rv   r}   r   s
             r.   r   TFDeiTModel.call   s6    ( ))%+/!5#%=  	
 r-   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTry  )rQ   rR   rS   rT   ry  r5   rU   r   s     r.   rU   TFDeiTModel.build   sZ    ::
4&2tyy~~.		% /. 3..s   A88
B)rQ   ry  rr  rs  rt  )r   r   r   r   r   r   r   ru  rJ  ru  rK  ru  rv   r   r}   r   r   z$tuple | TFBaseModelOutputWithPoolingr   )r&   r'   r(   r)   r9   r   r   DEIT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   rU   r,   r   r   s   @r.   r}  r}    s     Z_
 
59
RV
	
 
 *+@A&0$. *.,0&*)-,0#').& * $	
 ' * ! #'  
. B .& &r-   r}  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	rY  i*  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      UR                  SS9U l	        Xl
        g )Nr   )r   r   
activationr5   r%   )r8   r9   r   r<   r   pooler_output_sizer   r   
pooler_actr   r4   r   s      r.   r9   TFDeiTPooler.__init__+  sX    "6"\\''++.v/G/GH((	 ( 

 r-   c                6    US S 2S4   nU R                  US9nU$ )Nr   r   )r   )r?   r#   first_token_tensorrm  s       r.   r   TFDeiTPooler.call6  s*     +1a40

*<
=r-   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr   r   r   s     r.   rU   TFDeiTPooler.build>  r   r   )rQ   r4   r   r   r  r   r  r   s   @r.   rY  rY  *  s    	H Hr-   rY  c                  :   ^  \ rS rSrSrSU 4S jjrSS jrSrU =r$ )TFDeitPixelShuffleiG  z0TF layer implementation of torch.nn.PixelShufflec                   > [         TU ]  " S0 UD6  [        U[        5      (       a  US:  a  [	        SU 35      eXl        g )NrJ   z1upscale_factor must be an integer value >= 2 got r%   )r8   r9   r   rd   r   upscale_factor)r?   r  r@   rA   s      r.   r9   TFDeitPixelShuffle.__init__J  sB    "6".#...12DPQ_P`abb,r-   c           
        Un[        U5      u  n  pEU R                  S-  n[        XV-  5      n[        R                  " [        U5       VV	s/ s H  n[        U5        H	  oX-  -   PM     M     sn	n/5      n
[        R                  " U[        R                  " XS/5      SS9n[        R                  R                  X R                  SS9nU$ s  sn	nf )NrJ   r   rY   )paramsindices
batch_dimsNHWC)
block_sizedata_format)
r   r  rd   rS   constantr<  gatherr~   nndepth_to_space)r?   r   r#   r   r   num_input_channelsblock_size_squaredoutput_depthr?  jpermutations              r.   r   TFDeitPixelShuffle.callP  s    /9-/H,
Aq!00!3-BC
 kk278J2Ki2KQUZ[gUhPQ!((Uh(2Kij
 		bcUd@ertu,,]GZGZhn,o	 js   #C
)r  )r  rd   r   r   )r   r   r   r   )	r&   r'   r(   r)   r*   r9   r   r,   r   r   s   @r.   r  r  G  s    :- r-   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFDeitDecoderia  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  S-  UR                  -  SSS9U l        [        UR
                  SS9U l	        Xl
        g )NrJ   r   0)filtersr   r5   1r7   r%   )r8   r9   r   r<   r   encoder_strider   conv2dr  pixel_shuffler4   r   s      r.   r9   TFDeitDecoder.__init__b  si    "6"ll))))1,v/B/BBPQX[ * 
 00E0ECPr-   c                N    UnU R                  U5      nU R                  U5      nU$ r   )r  r  )r?   r   r}   r#   s       r.   r   TFDeitDecoder.callj  s+    M2**=9r-   c                   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  r  )
rQ   rR   rS   rT   r  r5   rU   r4   rM   r  r   s     r.   rU   TFDeitDecoder.buildp  s    ::
44(4t{{//0!!4tT[[5L5L"MN 14$/;t11667""((. 87 < 10 87s   4C,C=,
C:=
D)rQ   r4   r  r  r   r   )r   r   r}   r   r   r   r   r  r   s   @r.   r  r  a  s    	/ 	/r-   r  z~DeiT Model with a decoder on top for masked image modeling, as proposed in [SimMIM](https://huggingface.co/papers/2111.09886).c                     ^  \ rS rSrSU 4S jjr\\" \5      \" \	\
S9        S                 S	S jj5       5       5       rS
S jrSrU =r$ )TFDeiTForMaskedImageModelingi|  c                d   > [         TU ]  U5        [        USSSS9U l        [	        USS9U l        g )NFTry  r  decoderr7   )r8   r9   rT  ry  r  r  r?   r4   rA   s     r.   r9   %TFDeiTForMaskedImageModeling.__init__  s2     #FeTX_ef	$V)<r-   r  rv  c	                   Ub  UOU R                   R                  nU R                  UUUUUUUUS9n	U	S   n
U
SS2SS24   n
[        U
5      u  pn[	        US-  5      =p[
        R                  " XXU45      n
U R                  XS9n[
        R                  " US5      nSnUGb  U R                   R                  U R                   R                  -  n[
        R                  " USUU45      n[
        R                  " X R                   R                  S5      n[
        R                  " UU R                   R                  S	5      n[
        R                  " US5      n[
        R                  " U[
        R                  5      n[        R                   R#                  [
        R                  " US
5      [
        R                  " US
5      5      n[
        R                  " US5      n[
        R$                  " UU-  5      n[
        R$                  " U5      S-   U R                   R&                  -  nUU-  n[
        R                  " US5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [)        UUU	R*                  U	R,                  S9$ )a  
bool_masked_pos (`tf.Tensor` of type bool and shape `(batch_size, num_patches)`):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

Returns:

Examples:
```python
>>> from transformers import AutoImageProcessor, TFDeiTForMaskedImageModeling
>>> import tensorflow as tf
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> image_processor = AutoImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = TFDeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")

>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = image_processor(images=image, return_tensors="tf").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = tf.cast(tf.random.uniform((1, num_patches), minval=0, maxval=2, dtype=tf.int32), tf.bool)

>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
>>> loss, reconstructed_pixel_values = outputs.loss, outputs.reconstruction
>>> list(reconstructed_pixel_values.shape)
[1, 3, 224, 224]
```N)r   r   r   rJ  rK  rv   r}   r   r   rY   g      ?r|   )r   r   r   rJ   rJ   )r   rJ   r   r   gh㈵>)r   )lossreconstructionr#   r$   )r4   ri  ry  r   rd   rS   rc   r  ri   r   rb   r   rk   r   float32r   lossesmean_absolute_error
reduce_sumr   r
   r#   r$   )r?   r   r   r   r   rJ  rK  rv   r}   r   rl  r   sequence_lengthr   rm   rn   reconstructed_pixel_valuesmasked_im_lossr[   r   reconstruction_loss
total_lossnum_masked_pixelsr   s                           r.   r   !TFDeiTForMaskedImageModeling.call  sI   V &1%<k$++B]B]))+/!5#%=  	
 "!* *!QrT'24>4O1
\_c122**_6R^6_` &*\\/\%U" &(\\2Ll%["&;;))T[[-C-CCD jj2tT:JKO99_kk.D.DaHD99T4;;#9#91=D>>$*D774,D"',,"B"B\<87F#
 #%..1Da"H':T'ABJ!#t!4t!;t{{?W?W W'*;;NZZ=N02WQR[@F3A3M^%.YSYY*5!//))	
 	
r-   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTry  r  )rQ   rR   rS   rT   ry  r5   rU   r  r   s     r.   rU   "TFDeiTForMaskedImageModeling.build  s    ::
4&2tyy~~.		% /4D)5t||001""4( 21 6 /. 21r  )rQ   r  ry  r   rt  )r   r   r   r   r   r   r   ru  rJ  ru  rK  ru  rv   r   r}   r   r   z#tuple | TFMaskedImageModelingOutputr   )r&   r'   r(   r)   r9   r   r   r  r   r
   r  r   rU   r,   r   r   s   @r.   r  r  |  s    = *+@A+FUde *.,0&*)-,0#').a
&a
 *a
 $	a

 'a
 *a
 !a
 #'a
 a
 
-a
 f B a
F	) 	)r-   r  z
    DeiT Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                     ^  \ rS rSrSU 4S jjr\\" \5      \" \	\
S9        S                 S	S jj5       5       5       rS
S jrSrU =r$ )TFDeiTForImageClassificationi  c                (  > [         TU ]  U5        UR                  U l        [        USSS9U l        UR                  S:  a(  [
        R                  R                  UR                  SS9O[
        R                  R                  SSS9U l	        Xl
        g )NFry  r[  r5   r   
classifierr7   linear)r8   r9   
num_labelsrT  ry  r   r<   r   
Activationr  r4   r  s     r.   r9   %TFDeiTForImageClassification.__init__  s      ++#Fe&Q	
   1$ LLv00|D(((E 	
 r-   r  c	           
     D   Ub  UOU R                   R                  nU R                  UUUUUUUS9n	U	S   n
U R                  U
SS2SSS24   5      nUc  SOU R	                  X;5      nU(       d  U4U	SS -   nUb  U4U-   $ U$ [        UUU	R                  U	R                  S9$ )a  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Returns:

Examples:

```python
>>> from transformers import AutoImageProcessor, TFDeiTForImageClassification
>>> import tensorflow as tf
>>> from PIL import Image
>>> import requests

>>> keras.utils.set_random_seed(3)  # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> # note: we are loading a TFDeiTForImageClassificationWithTeacher from the hub here,
>>> # so the head will be randomly initialized, hence the predictions will be random
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = TFDeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224")

>>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes
>>> predicted_class_idx = tf.math.argmax(logits, axis=-1)[0]
>>> print("Predicted class:", model.config.id2label[int(predicted_class_idx)])
Predicted class: little blue heron, Egretta caerulea
```Nr   r   rJ  rK  rv   r}   r   r   )r  r    r#   r$   )r4   ri  ry  r  hf_compute_lossr	   r#   r$   )r?   r   r   labelsr   rJ  rK  rv   r}   r   rl  r    r  r   s                 r.   r   !TFDeiTForImageClassification.call  s    ^ &1%<k$++B]B]))/!5#%=  
 "!*Aq!9: ~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
r-   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTry  r  )
rQ   rR   rS   rT   ry  r5   rU   r  r4   rM   r   s     r.   rU   "TFDeiTForImageClassification.build]  s    ::
4&2tyy~~.		% /4t,8t334%%tT4;;3J3J&KL 54 9 /. 54s   C+.3C<+
C9<
D
)rQ   r  r4   ry  r  r   rt  )r   r   r   r   r  r   r   ru  rJ  ru  rK  ru  rv   r   r}   r   r   z#tf.Tensor | TFImageClassifierOutputr   )r&   r'   r(   r)   r9   r   r   r  r   r	   r  r   rU   r,   r   r   s   @r.   r  r    s     *+@A+BQ`a *.&*#')-,0#').H
&H
 $H
 !	H

 'H
 *H
 !H
 #'H
 H
 
-H
 b B H
T	M 	Mr-   r  a  
    DeiT Model transformer with image classification heads on top (a linear layer on top of the final hidden state of
    the [CLS] token and a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet.

    .. warning::

            This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
            supported.
    c            	         ^  \ rS rSrSU 4S jjr\\" \5      \" \	\
\\S9       S               S	S jj5       5       5       rS
S jrSrU =r$ )'TFDeiTForImageClassificationWithTeacherii  c                  > [         TU ]  U5        UR                  U l        [        USSS9U l        UR                  S:  a(  [
        R                  R                  UR                  SS9O[
        R                  R                  SSS9U l	        UR                  S:  a(  [
        R                  R                  UR                  SS9O[
        R                  R                  SSS9U l
        Xl        g )	NFry  r  r   cls_classifierr7   r  distillation_classifier)r8   r9   r  rT  ry  r   r<   r   r  r  r  r4   r  s     r.   r9   0TFDeiTForImageClassificationWithTeacher.__init__v  s      ++#Fe&Q	
   1$ LLv007GH((8H(I 	   1$ LLv007PQ((8Q(R 	$
 r-   )r  r  rv  r  c           
     N   Ub  UOU R                   R                  nU R                  UUUUUUUS9nUS   n	U R                  U	S S 2SS S 24   5      n
U R	                  U	S S 2SS S 24   5      nX-   S-  nU(       d  XU4USS  -   nU$ [        UU
UUR                  UR                  S9$ )Nr  r   r   rJ   )r    r!   r"   r#   r$   )r4   ri  ry  r  r  r   r#   r$   )r?   r   r   r   rJ  rK  rv   r}   r   rl  r!   r"   r    r   s                 r.   r   ,TFDeiTForImageClassificationWithTeacher.call  s    $ &1%<k$++B]B]))/!5#%=  
 "!*((Aq)AB
"::?1aQR7;ST 2a7*=>LFM<! 3!//))
 	
r-   c                "   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTry  r  r  )rQ   rR   rS   rT   ry  r5   rU   r  r4   rM   r  r   s     r.   rU   -TFDeiTForImageClassificationWithTeacher.build  s   ::
4&2tyy~~.		% /4)40<t22778##))4t{{7N7N*OP 942D9Et;;@@A,,22D$@W@W3XY BA F /. 98 BAs$   E.3E/!3F 
E,/
E= 
F)rQ   r  r4   ry  r  r  r   )NNNNNFF)r   r   r   r   r   ru  rJ  ru  rK  ru  rv   r   r}   r   r   z5tuple | TFDeiTForImageClassificationWithTeacherOutputr   )r&   r'   r(   r)   r9   r   r   r  r   _IMAGE_CLASS_CHECKPOINTr   r  _IMAGE_CLASS_EXPECTED_OUTPUTr   rU   r,   r   r   s   @r.   r  r  i  s    & *+@A*A$4	 *.&*)-,0#').(
&(
 $(
 '	(

 *(
 !(
 #'(
 (
 
?(
 B (
TZ Zr-   r  )r  r  r  r}  rx  )Dr*   
__future__r   collections.abcr   re   dataclassesr   
tensorflowrS   activations_tfr   modeling_tf_outputsr   r   r	   r
   modeling_tf_utilsr   r   r   r   r   r   tf_utilsr   r   utilsr   r   r   r   r   r   configuration_deitr   
get_loggerr&   loggerr  r  r  r  r  r   r<   Layerr0   r;   r   r   r   r  r  r'  r:  rT  rx  DEIT_START_DOCSTRINGr  r}  rY  r  r  r  r  r  __all__r%   r-   r.   <module>r     s    "   !  /   3  + 
		H	%  A &  E 1  /K / /<ju||)) jZ*MELL.. *M\WH%,,,, WHvHu||)) H>$.ell(( $.PH++ H<N5<<%% N4@R%,,$$ @RH3&ELL&& 3&l n(ell(( n( n(d%- %	  2 d0&' 0&	0&hH5<<%% H:++ 4/ELL&& /6 ;
v)#8 v)
v)r  eM#8:V eMeMP  RZ.C RZRZjr-   