
    cCib                     <   S r SSKrSSKJr  SSKJrJr  SSKrSSKJr  SSK	J
r
JrJrJr  SSKJr  SS	KJrJr  S
SKJr  \R*                  " \5      r\\" SS9 " S S\5      5       5       r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r  " S S\R2                  5      r! " S  S!\R2                  5      r" " S" S#\R2                  5      r# " S$ S%\R2                  5      r$\ " S& S'\5      5       r%\ " S( S)\%5      5       r&\" S*S9 " S+ S,\%5      5       r'\" S-S9 " S. S/\%5      5       r(/ S0Qr)g)1zPyTorch LeViT model.    N)	dataclass)OptionalUnion)nn   )BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel)auto_docstringlogging   )LevitConfigzD
    Output type of [`LevitForImageClassificationWithTeacher`].
    )custom_introc                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Srg)	,LevitForImageClassificationWithTeacherOutput&   aJ  
logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores as the average of the `cls_logits` and `distillation_logits`.
cls_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
    class token).
distillation_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
    distillation token).
Nlogits
cls_logitsdistillation_logitshidden_states )__name__
__module____qualname____firstlineno____doc__r   r   torchFloatTensor__annotations__r   r   r   tuple__static_attributes__r       b/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/levit/modeling_levit.pyr   r   &   sc    	 +/FHU&&'..2J**+27;%"3"34;8<M8E%"3"345<r$   r   c                   8   ^  \ rS rSrSr SU 4S jjrS rSrU =r$ )LevitConvEmbeddings>   zS
LeViT Conv Embeddings with Batch Norm, used in the initial patch embedding layer.
c	                    > [         T	U ]  5         [        R                  " XX4XVUSS9U l        [        R
                  " U5      U l        g )NF)dilationgroupsbias)super__init__r   Conv2dconvolutionBatchNorm2d
batch_norm)
selfin_channelsout_channelskernel_sizestridepaddingr*   r+   bn_weight_init	__class__s
            r%   r.   LevitConvEmbeddings.__init__C   s@     	99{G_elq
 ..6r$   c                 J    U R                  U5      nU R                  U5      nU$ N)r0   r2   )r3   
embeddingss     r%   forwardLevitConvEmbeddings.forwardL   s&    %%j1
__Z0
r$   )r2   r0   )r   r   r   	r   r   r   r   r   r.   r?   r#   __classcell__r:   s   @r%   r'   r'   >   s    
 mn7 r$   r'   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitPatchEmbeddingsR   z
LeViT patch embeddings, for final embeddings to be passed to transformer blocks. It consists of multiple
`LevitConvEmbeddings`.
c                 ^  > [         TU ]  5         [        UR                  UR                  S   S-  UR
                  UR                  UR                  5      U l        [        R                  " 5       U l        [        UR                  S   S-  UR                  S   S-  UR
                  UR                  UR                  5      U l        [        R                  " 5       U l        [        UR                  S   S-  UR                  S   S-  UR
                  UR                  UR                  5      U l        [        R                  " 5       U l        [        UR                  S   S-  UR                  S   UR
                  UR                  UR                  5      U l        UR                  U l        g )Nr            )r-   r.   r'   num_channelshidden_sizesr6   r7   r8   embedding_layer_1r   	Hardswishactivation_layer_1embedding_layer_2activation_layer_2embedding_layer_3activation_layer_3embedding_layer_4r3   configr:   s     r%   r.   LevitPatchEmbeddings.__init__X   so   !4!4!4Q!71!<f>P>PRXR_R_agaoao"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?ASASU[UbUbdjdrdr"
 #//r$   c                    UR                   S   nX R                  :w  a  [        S5      eU R                  U5      nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUR                  S5      R                  SS5      $ )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rJ   )shaperK   
ValueErrorrM   rO   rP   rQ   rR   rS   rT   flatten	transpose)r3   pixel_valuesrK   r>   s       r%   r?   LevitPatchEmbeddings.forwardn   s    #))!,,,,w  ++L9
,,Z8
++J7
,,Z8
++J7
,,Z8
++J7
!!!$..q!44r$   )rO   rQ   rS   rM   rP   rR   rT   rK   rA   rC   s   @r%   rE   rE   R   s    
0,5 5r$   rE   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )MLPLayerWithBN~   c                    > [         TU ]  5         [        R                  " XSS9U l        [        R
                  " U5      U l        g )NF)in_featuresout_featuresr,   )r-   r.   r   LinearlinearBatchNorm1dr2   )r3   	input_dim
output_dimr9   r:   s       r%   r.   MLPLayerWithBN.__init__   s1    iiIUZ[..4r$   c                     U R                  U5      nU R                  UR                  SS5      5      R                  U5      nU$ )Nr   r   )rf   r2   r[   
reshape_asr3   hidden_states     r%   r?   MLPLayerWithBN.forward   s<    {{<0|';';Aq'ABMMl[r$   r2   rf   )r   r   r   r   r   r.   r?   r#   rB   rC   s   @r%   r`   r`   ~   s    5
 r$   r`   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )LevitSubsample   c                 :   > [         TU ]  5         Xl        X l        g r=   )r-   r.   r7   
resolution)r3   r7   rv   r:   s      r%   r.   LevitSubsample.__init__   s    $r$   c                     UR                   u  p#nUR                  X R                  U R                  U5      S S 2S S U R                  2S S U R                  24   R	                  USU5      nU$ )N)rY   viewrv   r7   reshape)r3   rn   
batch_size_channelss        r%   r?   LevitSubsample.forward   sg    "."4"4
x#((__dooW_`~$++~~$++~-

'*b(
+ 	 r$   )rv   r7   rq   rC   s   @r%   rs   rs      s    %
 r$   rs   c                   l   ^  \ rS rSrU 4S jr\R                  " 5       SU 4S jj5       rS rS r	Sr
U =r$ )LevitAttention   c                 n  > [         TU ]  5         X0l        US-  U l        X l        X@l        XB-  U-  X#-  S-  -   U l        XB-  U-  U l        [        XR                  5      U l	        [        R                  " 5       U l        [        U R                  USS9U l        [        [        R                   " [#        U5      [#        U5      5      5      n[%        U5      n0 / pU HY  n
U HP  n['        U
S   US   -
  5      ['        U
S   US   -
  5      4nX;  a  [%        U5      X'   U	R)                  X   5        MR     M[     0 U l        [,        R                  R/                  [,        R0                  " U[%        U5      5      5      U l        U R5                  S[,        R6                  " U	5      R9                  Xw5      SS9  g )	N      rJ   r   )r9   r   attention_bias_idxsF
persistent)r-   r.   num_attention_headsscalekey_dimattention_ratioout_dim_keys_valuesout_dim_projectionr`   queries_keys_valuesr   rN   
activation
projectionlist	itertoolsproductrangelenabsappendattention_bias_cacher   	Parameterzerosattention_biasesregister_buffer
LongTensorrz   )r3   rL   r   r   r   rv   points
len_pointsattention_offsetsindicesp1p2offsetr:   s                r%   r.   LevitAttention.__init__   s   #6 d]
.#2#<?R#RU\UruvUv#v "1";>Q"Q#1,@X@X#Y ,,.()@)@,_`ai''j(95;LMN[
%'7Bbebem,c"Q%"Q%-.@A2034E0F%-089	   %'! % 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A*#Yfk 	 	
r$   c                 f   > [         TU ]  U5        U(       a  U R                  (       a  0 U l        g g g r=   r-   trainr   r3   moder:   s     r%   r   LevitAttention.train   )    dD--(*D% .4r$   c                    U R                   (       a  U R                  S S 2U R                  4   $ [        U5      nX R                  ;  a*  U R                  S S 2U R                  4   U R                  U'   U R                  U   $ r=   trainingr   r   strr   r3   device
device_keys      r%   get_attention_biases#LevitAttention.get_attention_biases   r    ==((D,D,D)DEEVJ!:!::8<8M8MaQUQiQiNi8j))*5,,Z88r$   c                    UR                   u  p#nU R                  U5      nUR                  X#U R                  S5      R	                  U R
                  U R
                  U R                  U R
                  -  /SS9u  pgnUR                  SSSS5      nUR                  SSSS5      nUR                  SSSS5      nXgR                  SS5      -  U R                  -  U R                  UR                  5      -   n	U	R                  SS9n	X-  R                  SS5      R                  X#U R                  5      nU R                  U R!                  U5      5      nU$ Nry   r   dimr   rJ   r   )rY   r   rz   r   splitr   r   permuter\   r   r   r   softmaxr{   r   r   r   )
r3   rn   r|   
seq_lengthr}   r   querykeyvalue	attentions
             r%   r?   LevitAttention.forward   sB   $0$6$6!
"66|D/44ZTMeMegijpp\\4<<)=)=)LMST q 
E aAq)kk!Q1%aAq)MM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!)44Q:BB:[_[r[rst|'DEr$   )r   r   r   r   r   r   r   r   r   r   r   Tr   r   r   r   r.   r   no_gradr   r   r?   r#   rB   rC   s   @r%   r   r      s1    
: ]]_+ +
9 r$   r   c                   l   ^  \ rS rSrU 4S jr\R                  " 5       SU 4S jj5       rS rS r	Sr
U =r$ )LevitAttentionSubsample   c	                 n  > [         TU ]  5         X@l        US-  U l        X0l        XPl        XS-  U-  X4-  -   U l        XS-  U-  U l        Xl        [        XR                  5      U l
        [        Xg5      U l        [        XU-  5      U l        [        R                  " 5       U l        [        U R                  U5      U l        0 U l        ['        [(        R*                  " [-        U5      [-        U5      5      5      n	['        [(        R*                  " [-        U5      [-        U5      5      5      n
[/        U	5      [/        U
5      p0 / pU
 Hv  nU	 Hm  nSn[1        US   U-  US   -
  US-
  S-  -   5      [1        US   U-  US   -
  US-
  S-  -   5      4nUU;  a  [/        U5      UU'   UR3                  UU   5        Mo     Mx     [4        R                  R7                  [4        R8                  " U[/        U5      5      5      U l        U R=                  S[4        R>                  " U5      RA                  X5      SS9  g )Nr   r   r   rJ   r   Fr   )!r-   r.   r   r   r   r   r   r   resolution_outr`   keys_valuesrs   queries_subsamplequeriesr   rN   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rz   )r3   rh   ri   r   r   r   r7   resolution_inr   r   points_r   len_points_r   r   r   r   sizer   r:   s                      r%   r.    LevitAttentionSubsample.__init__   s    	#6 d]
.#2#<?R#RU\Ur#r "1";>Q"Q,))5M5MN!/!F%i;N1NO,,.()@)@*M$&!i''m(<eM>RSTy((~)>n@UVW"%f+s7|K%'7Bbefnr!u4qA~EFBqETZN]_`a]bLbfjmnfnrsesLsHtu!22034E0F%f-089   !& 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A+#Zgl 	 	
r$   c                 f   > [         TU ]  U5        U(       a  U R                  (       a  0 U l        g g g r=   r   r   s     r%   r   LevitAttentionSubsample.train  r   r$   c                    U R                   (       a  U R                  S S 2U R                  4   $ [        U5      nX R                  ;  a*  U R                  S S 2U R                  4   U R                  U'   U R                  U   $ r=   r   r   s      r%   r   ,LevitAttentionSubsample.get_attention_biases
  r   r$   c                 4   UR                   u  p#nU R                  U5      R                  X#U R                  S5      R	                  U R
                  U R                  U R
                  -  /SS9u  pVUR                  SSSS5      nUR                  SSSS5      nU R                  U R                  U5      5      nUR                  X R                  S-  U R                  U R
                  5      R                  SSSS5      nXuR                  SS5      -  U R                  -  U R                  UR                  5      -   nUR                  SS9nX-  R                  SS5      R!                  USU R"                  5      nU R%                  U R'                  U5      5      nU$ r   )rY   r   rz   r   r   r   r   r   r   r   r   r\   r   r   r   r   r{   r   r   r   )	r3   rn   r|   r   r}   r   r   r   r   s	            r%   r?   LevitAttentionSubsample.forward  sr   $0$6$6!
\*T*$*B*BBGUDLL$"6"6"EFAUN 	
 kk!Q1%aAq)T33LAB

:':':A'=t?W?WY]YeYefnnq!Q
 MM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!)44Q:BB:rSWSjSjkt|'DEr$   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rC   s   @r%   r   r      s2    +
Z ]]_+ +
9 r$   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitMLPLayeri)  z=
MLP Layer with `2X` expansion in contrast to ViT with `4X`.
c                    > [         TU ]  5         [        X5      U l        [        R
                  " 5       U l        [        X!5      U l        g r=   )r-   r.   r`   	linear_upr   rN   r   linear_down)r3   rh   
hidden_dimr:   s      r%   r.   LevitMLPLayer.__init__.  s4    '	>,,.)*@r$   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r=   )r   r   r   rm   s     r%   r?   LevitMLPLayer.forward4  s4    ~~l3|4''5r$   )r   r   r   rA   rC   s   @r%   r   r   )  s    A r$   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitResidualLayeri;  z
Residual Block for LeViT
c                 :   > [         TU ]  5         Xl        X l        g r=   )r-   r.   module	drop_rate)r3   r   r   r:   s      r%   r.   LevitResidualLayer.__init__@  s    "r$   c                    U R                   (       a  U R                  S:  a  [        R                  " UR	                  S5      SSUR
                  S9nUR                  U R                  5      R                  SU R                  -
  5      R                  5       nXR                  U5      U-  -   nU$ XR                  U5      -   nU$ )Nr   r   )r   )
r   r   r   randr   r   ge_divdetachr   )r3   rn   rnds      r%   r?   LevitResidualLayer.forwardE  s    ==T^^a/**\..q11a@S@STC''$..)--a$...@AHHJC'++l*Cc*IIL'++l*CCLr$   )r   r   rA   rC   s   @r%   r   r   ;  s    #
   r$   r   c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )
LevitStageiP  zH
LeViT Stage consisting of `LevitMLPLayer` and `LevitAttention` layers.
c                 
  > [         TU ]  5         / U l        Xl        Xl        [        U5       H  nU R                  R                  [        [        X4XgU
5      U R                  R                  5      5        US:  d  MO  X8-  nU R                  R                  [        [        X<5      U R                  R                  5      5        M     U	S   S:X  Ga  U R                  S-
  U	S   -  S-   U l        U R                  R                  [        U R                  R                  X"S-    U	S   U	S   U	S   U	S   U
U R                  S.65        U R                  U l        U	S   S:  a  U R                  R                  US-      U	S   -  nU R                  R                  [        [        U R                  R                  US-      U5      U R                  R                  5      5        [        R                  " U R                  5      U l        g )	Nr   	Subsampler      rJ   r   )r   r   r   r7   r   r   rI   )r-   r.   layersrV   r   r   r   r   r   drop_path_rater   r   r   rL   r   
ModuleList)r3   rV   idxrL   r   depthsr   r   	mlp_ratiodown_opsr   r}   r   r:   s                r%   r.   LevitStage.__init__U  s    	*vAKK""<:M`mnKK.. 1})5
""&}\'NPTP[P[PjPjk  A;+%#'#5#5#9hqk"IA"MDKK'[[--c!G<$QK(0$,QK#A;"/#'#6#6
 "&!4!4D{Q![[55cAg>!L
""&%dkk&>&>sQw&GTVZVaVaVpVp mmDKK0r$   c                     U R                   $ r=   )r   )r3   s    r%   get_resolutionLevitStage.get_resolution  s    !!!r$   c                 <    U R                    H  nU" U5      nM     U$ r=   )r   )r3   rn   layers      r%   r?   LevitStage.forward  s     [[E .L !r$   )rV   r   r   r   )
r   r   r   r   r   r.   r   r?   r#   rB   rC   s   @r%   r   r   P  s    51n" r$   r   c                   6   ^  \ rS rSrSrU 4S jrSS jrSrU =r$ )LevitEncoderi  z;
LeViT Encoder consisting of multiple `LevitStage` stages.
c                   > [         TU ]  5         Xl        U R                  R                  U R                  R                  -  n/ U l        U R                  R                  R                  S/5        [        [        UR                  5      5       H  n[        UUUR                  U   UR                  U   UR                  U   UR                  U   UR                  U   UR                   U   UR                  U   U5
      nUR#                  5       nU R
                  R                  U5        M     [$        R&                  " U R
                  5      U l        g )N )r-   r.   rV   
image_size
patch_sizestagesr   r   r   r   r   r   rL   r   r   r   r   r   r   r   )r3   rV   rv   	stage_idxstager:   s        r%   r.   LevitEncoder.__init__  s   [[++t{{/E/EE
##RD)s6==12I##I.y)i(**95&&y1  +	*E --/JKKu% 3  mmDKK0r$   c                     U(       a  SOS nU R                    H  nU(       a  XA4-   nU" U5      nM     U(       a  XA4-   nU(       d  [        S X4 5       5      $ [        XS9$ )Nr   c              3   .   #    U  H  oc  M  Uv   M     g 7fr=   r   ).0vs     r%   	<genexpr>'LevitEncoder.forward.<locals>.<genexpr>  s     W$Eq$Es   	)last_hidden_stater   )r
  r"   r   )r3   rn   output_hidden_statesreturn_dictall_hidden_statesr  s         r%   r?   LevitEncoder.forward  sd    "6BD[[E#$5$G! .L !
   1O CW\$EWWW-nnr$   )rV   r
  )FTrA   rC   s   @r%   r  r    s    12o or$   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitClassificationLayeri  z
LeViT Classification Layer
c                    > [         TU ]  5         [        R                  " U5      U l        [        R
                  " X5      U l        g r=   )r-   r.   r   rg   r2   re   rf   )r3   rh   ri   r:   s      r%   r.   !LevitClassificationLayer.__init__  s.    ..3ii	6r$   c                 J    U R                  U5      nU R                  U5      nU$ r=   rp   )r3   rn   r   s      r%   r?    LevitClassificationLayer.forward  s#    |4\*r$   rp   rA   rC   s   @r%   r  r    s    7
 r$   r  c                   4    \ rS rSr% \\S'   SrSrS/rS r	Sr
g)	LevitPreTrainedModeli  rV   levitr]   r   c                 *   [        U[        R                  [        R                  45      (       ak  UR                  R
                  R                  SU R                  R                  S9  UR                  b%  UR                  R
                  R                  5         gg[        U[        R                  [        R                  45      (       aJ  UR                  R
                  R                  5         UR                  R
                  R                  S5        gg)zInitialize the weightsg        )meanstdNg      ?)
isinstancer   re   r/   weightdatanormal_rV   initializer_ranger,   zero_rg   r1   fill_)r3   r   s     r%   _init_weights"LevitPreTrainedModel._init_weights  s    fryy"))455 MM&&CT[[5R5R&S{{&  &&( ' @AAKK""$MM$$S) Br$   r   N)r   r   r   r   r   r!   base_model_prefixmain_input_name_no_split_modulesr,  r#   r   r$   r%   r   r     s!    $O-.
*r$   r   c                      ^  \ rS rSrU 4S jr\   S	S\\R                     S\\	   S\\	   S\
\\4   4S jj5       rSrU =r$ )

LevitModeli  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U R                  5         g r=   )r-   r.   rV   rE   patch_embeddingsr  encoder	post_initrU   s     r%   r.   LevitModel.__init__  s8      4V <#F+r$   r]   r  r  returnc                 @   Ub  UOU R                   R                  nUb  UOU R                   R                  nUc  [        S5      eU R	                  U5      nU R                  UUUS9nUS   nUR                  SS9nU(       d	  Xg4USS  -   $ [        UUUR                  S9$ )Nz You have to specify pixel_valuesr  r  r   r   r   )r  pooler_outputr   )	rV   r  use_return_dictrZ   r4  r5  r#  r	   r   )r3   r]   r  r  r>   encoder_outputsr  pooled_outputs           r%   r?   LevitModel.forward  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@**<8
,,!5# ' 
 ,A. *..1.5%58KKK7/')77
 	
r$   )rV   r5  r4  NNN)r   r   r   r   r.   r   r   r   r    boolr   r"   r	   r?   r#   rB   rC   s   @r%   r2  r2    sk      59/3&*	!
u001!
 'tn!
 d^	!

 
u>>	?!
 !
r$   r2  z
    Levit Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                      ^  \ rS rSrU 4S jr\    S
S\\R                     S\\R                     S\\
   S\\
   S\\\4   4
S jj5       rS	rU =r$ )LevitForImageClassificationi  c                 >  > [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        UR                  S:  a#  [        UR                  S   UR                  5      O[        R                  R                  5       U l        U R                  5         g Nr   ry   )r-   r.   rV   
num_labelsr2  r!  r  rL   r   r   Identity
classifierr6  rU   s     r%   r.   $LevitForImageClassification.__init__  s~      ++'

   1$ %V%8%8%<f>O>OP""$ 	 	r$   r]   labelsr  r  r8  c                 H   Ub  UOU R                   R                  nU R                  XUS9nUS   nUR                  S5      nU R	                  U5      nSnUb  U R                  X'U R                   5      nU(       d  U4USS -   n	Ub  U4U	-   $ U	$ [        UUUR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr:  r   r   rJ   )lossr   r   )rV   r<  r!  r#  rH  loss_functionr
   r   )
r3   r]   rJ  r  r  outputssequence_outputr   rL  outputs
             r%   r?   #LevitForImageClassification.forward-  s     &1%<k$++B]B]**\bm*n!!*)..q11%%fdkkBDY,F)-)9TGf$EvE3!//
 	
r$   )rH  rV   r!  rF  )NNNN)r   r   r   r   r.   r   r   r   r    r   rA  r   r"   r
   r?   r#   rB   rC   s   @r%   rC  rC    s       59-1/3&*!
u001!
 ))*!
 'tn	!

 d^!
 
u::	;!
 !
r$   rC  ap  
    LeViT Model transformer with image classification heads on top (a linear layer on top of the final hidden state and
    a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet. .. warning::
           This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
           supported.
    c                      ^  \ rS rSrU 4S jr\   S	S\\R                     S\\	   S\\	   S\
\\4   4S jj5       rSrU =r$ )
&LevitForImageClassificationWithTeacheriR  c                   > [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        UR                  S:  a#  [        UR                  S   UR                  5      O[        R                  R                  5       U l        UR                  S:  a#  [        UR                  S   UR                  5      O[        R                  R                  5       U l        U R                  5         g rE  )r-   r.   rV   rF  r2  r!  r  rL   r   r   rG  rH  classifier_distillr6  rU   s     r%   r.   /LevitForImageClassificationWithTeacher.__init__[  s      ++'

   1$ %V%8%8%<f>O>OP""$ 	   1$ %V%8%8%<f>O>OP""$ 	 	r$   r]   r  r  r8  c                 &   Ub  UOU R                   R                  nU R                  XUS9nUS   nUR                  S5      nU R	                  U5      U R                  U5      pvXg-   S-  nU(       d  XU4USS  -   n	U	$ [        UUUUR                  S9$ )Nr:  r   r   rJ   )r   r   r   r   )rV   r<  r!  r#  rH  rU  r   r   )
r3   r]   r  r  rN  rO  r   distill_logitsr   rP  s
             r%   r?   .LevitForImageClassificationWithTeacher.forwardp  s     &1%<k$++B]B]**\bm*n!!*)..q1%)___%EtG^G^_nGoN-2.9GABKGFM;! .!//	
 	
r$   )rH  rU  rV   r!  rF  r@  )r   r   r   r   r.   r   r   r   r    rA  r   r"   r   r?   r#   rB   rC   s   @r%   rS  rS  R  sk    *  59/3&*	
u001
 'tn
 d^	

 
uBB	C
 
r$   rS  )rC  rS  r2  r   )*r   r   dataclassesr   typingr   r   r   r   modeling_outputsr   r	   r
   r   modeling_utilsr   utilsr   r   configuration_levitr   
get_loggerr   loggerr   Moduler'   rE   r`   rs   r   r   r   r   r   r  r  r   r2  rC  rS  __all__r   r$   r%   <module>rd     s     ! "    . , , 
		H	% 
=; = =$")) ()5299 )5X	RYY 	RYY ;RYY ;|Pbii PfBII $   *B BJ+o299 +o\ryy   *? * *& +
% +
 +
\ 3
"6 3
3
l /
-A /
/
dr$   