
    cCin`              	          S r SSKrSSKrSSKJr  SSKJrJr  SSKrSSK	J
s  Jr  SSKJ
r
  SSKJr  SSKJrJr  SS	KJr  SS
KJrJr  SSKJrJr  SSKJr  \R8                  " \5      rS,S\R>                  S\ S\!S\R>                  4S jjr" " S S\
RF                  5      r$ " S S\
RF                  5      r% " S S\
RF                  5      r& " S S\
RF                  5      r' " S S\
RF                  5      r( " S S\
RF                  5      r) " S S \
RF                  5      r* " S! S"\
RF                  5      r+\ " S# S$\5      5       r,\ " S% S&\,5      5       r-\" S'S(9 " S) S*\,5      5       r./ S+Qr/g)-zPyTorch PVT model.    N)Iterable)OptionalUnion)nn   )ACT2FN)BaseModelOutputImageClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	PvtConfiginput	drop_probtrainingreturnc                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
        r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          ^/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/pvt/modeling_pvt.py	drop_pathr$   (   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )PvtDropPath=   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 .   > [         TU ]  5         Xl        g N)super__init__r   )selfr   	__class__s     r#   r,   PvtDropPath.__init__@   s    "r%   hidden_statesc                 B    [        XR                  U R                  5      $ r*   )r$   r   r   r-   r0   s     r#   forwardPvtDropPath.forwardD   s    FFr%   c                      SU R                    3$ )Nzp=r   )r-   s    r#   
extra_reprPvtDropPath.extra_reprG   s    DNN#$$r%   r6   r*   )__name__
__module____qualname____firstlineno____doc__r   floatr,   r   Tensorr3   strr7   __static_attributes____classcell__r.   s   @r#   r'   r'   =   sQ    b#(5/ #T # #GU\\ Gell G%C % %r%   r'   c                      ^  \ rS rSrSr SS\S\\\\   4   S\\\\   4   S\S\S\S	\	4U 4S
 jjjr
S\R                  S\S\S\R                  4S jrS\R                  S\\R                  \\4   4S jrSrU =r$ )PvtPatchEmbeddingsK   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
config
image_size
patch_sizestridenum_channelshidden_size	cls_tokenc                   > [         T	U ]  5         Xl        [        U[        R
                  R                  5      (       a  UOX"4n[        U[        R
                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        XPl	        Xl
        [        R                  " [        R                  " SU(       a  US-   OUU5      5      U l        U(       a,  [        R                  " [        R                   " SSU5      5      OS U l        [        R$                  " XVXCS9U l        [        R(                  " XaR*                  S9U l        [        R.                  " UR0                  S9U l        g )Nr   r   kernel_sizerJ   eps)p)r+   r,   rG   
isinstancecollectionsabcr   rH   rI   rK   num_patchesr   	Parameterr   randnposition_embeddingszerosrM   Conv2d
projection	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropout)
r-   rG   rH   rI   rJ   rK   rL   rM   rW   r.   s
            r#   r,   PvtPatchEmbeddings.__init__R   s    	#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&#%<<KKi;?[+V$
  JSekk!Q&DEX\))L6e,,{8M8MNzzF$>$>?r%   
embeddingsheightwidthr   c                    X#-  n[         R                  R                  5       (       d<  X@R                  R                  U R                  R                  -  :X  a  U R
                  $ UR                  SX#S5      R                  SSSS5      n[        R                  " XU4SS9nUR                  SSX#-  5      R                  SSS5      nU$ )Nr   r   r      bilinear)sizemode)
r   jit
is_tracingrG   rH   rZ   reshapepermuteFinterpolate)r-   re   rf   rg   rW   interpolated_embeddingss         r#   interpolate_pos_encoding+PvtPatchEmbeddings.interpolate_pos_encodingn   s    n yy##%%+9O9ORVR]R]RhRh9h*h+++''6"=EEaAqQ
"#--
%Wa"b"9"A"A!R"X"`"`abdegh"i&&r%   pixel_valuesc                 ~   UR                   u  p#pEX0R                  :w  a  [        S5      eU R                  U5      nUR                   Gt ptnUR	                  S5      R                  SS5      nU R                  U5      nU R                  b  U R                  R                  USS5      n	[        R                  " X4SS9nU R                  U R                  S S 2SS 24   XE5      n
[        R                  " U R                  S S 2S S24   U
4SS9n
OU R                  U R                  XE5      n
U R                  X-   5      nXU4$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rj   r   ri   dim)r   rK   
ValueErrorr]   flatten	transposer`   rM   expandr   catru   rZ   rc   )r-   rw   
batch_sizerK   rf   rg   patch_embed_re   rM   rZ   s              r#   r3   PvtPatchEmbeddings.forwardy   s8   2>2D2D/
&,,,w  ool3'--E!))!,66q!<__[1
>>%--j"bAII#:BJ"&"?"?@X@XYZ\]\^Y^@_ag"o"'))T-E-Ea!e-LNa,bhi"j"&"?"?@X@XZ`"h\\*"BC
5((r%   )
rM   rG   rc   rH   r`   rK   rW   rI   rZ   r]   F)r9   r:   r;   r<   r=   r   r   intr   boolr,   r   r?   ru   tupler3   rA   rB   rC   s   @r#   rE   rE   K   s      @@ #x},-@ #x},-	@
 @ @ @ @ @8	'5<< 	' 	'UX 	']b]i]i 	')ELL )U5<<c;Q5R ) )r%   rE   c                   n   ^  \ rS rSrS\S\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	PvtSelfOutput   rG   rL   c                    > [         TU ]  5         [        R                  " X"5      U l        [        R
                  " UR                  5      U l        g r*   )r+   r,   r   Lineardensera   rb   rc   )r-   rG   rL   r.   s      r#   r,   PvtSelfOutput.__init__   s4    YY{8
zz&"<"<=r%   r0   r   c                 J    U R                  U5      nU R                  U5      nU$ r*   r   rc   r2   s     r#   r3   PvtSelfOutput.forward   s$    

=1]3r%   r   )r9   r:   r;   r<   r   r   r,   r   r?   r3   rA   rB   rC   s   @r#   r   r      s6    >y >s >
U\\ ell  r%   r   c                      ^  \ rS rSrSrS\S\S\S\4U 4S jjrS\S	\	R                  4S
 jr SS\	R                  S\S\S\S	\\	R                     4
S jjrSrU =r$ )PvtEfficientSelfAttention   zxEfficient self-attention mechanism with reduction of the sequence [PvT paper](https://huggingface.co/papers/2102.12122).rG   rL   num_attention_headssequences_reduction_ratioc                 ~  > [         TU ]  5         X l        X0l        U R                  U R                  -  S:w  a&  [	        SU R                   SU R                   S35      e[        U R                  U R                  -  5      U l        U R                  U R                  -  U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " UR                  5      U l        X@l        US:  a>  [        R$                  " X"XDS9U l        [        R(                  " X!R*                  S9U l        g g )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ())biasr   rO   rQ   )r+   r,   rL   r   r{   r   attention_head_sizeall_head_sizer   r   qkv_biasquerykeyvaluera   attention_probs_dropout_probrc   r   r\   sequence_reductionr^   r_   r`   r-   rG   rL   r   r   r.   s        r#   r,   "PvtEfficientSelfAttention.__init__   se    	&#6 d666!;#D$4$4#5 622316 
 $'t'7'7$:R:R'R#S !558P8PPYYt//1C1C&//Z
99T--t/A/AXYYt//1C1C&//Z
zz&"E"EF)B&$q(&(ii6O'D# !ll;<Q<QRDO	 )r%   r0   r   c                     UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nri   r   rj   r   r   )rl   r   r   viewrq   )r-   r0   	new_shapes      r#   transpose_for_scores.PvtEfficientSelfAttention.transpose_for_scores   sT    !&&("-1I1I4KcKc0dd	%**95$$Q1a00r%   rf   rg   output_attentionsc                    U R                  U R                  U5      5      nU R                  S:  aw  UR                  u  pgnUR	                  SSS5      R                  XhX#5      nU R                  U5      nUR                  XhS5      R	                  SSS5      nU R                  U5      nU R                  U R                  U5      5      n	U R                  U R                  U5      5      n
[        R                  " XYR                  SS5      5      nU[        R                  " U R                  5      -  n[         R"                  R%                  USS9nU R'                  U5      n[        R                  " X5      nUR	                  SSSS5      R)                  5       nUR+                  5       S S U R,                  4-   nUR/                  U5      nU(       a  X4nU$ U4nU$ )Nr   r   rj   ri   ry   r   )r   r   r   r   rq   rp   r   r`   r   r   r   matmulr}   mathsqrtr   r   
functionalsoftmaxrc   
contiguousrl   r   r   )r-   r0   rf   rg   r   query_layerr   seq_lenrK   	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                   r#   r3   !PvtEfficientSelfAttention.forward   s    //

=0IJ))A-0=0C0C-J)11!Q:BB:]ckM 33MBM)11*BOWWXY[\^_`M OOM:M--dhh}.EF	//

=0IJ !<<5H5HR5PQ+dii8P8P.QQ --//0@b/I ,,7_B%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=2 O\M]r%   )r   r   rc   rL   r   r`   r   r   r   r   r   r   )r9   r:   r;   r<   r=   r   r   r>   r,   r   r?   r   r   r   r3   rA   rB   rC   s   @r#   r   r      s     CSS.1SHKShmS:1# 1%,, 1 #(*||* * 	*
  * 
u||	* *r%   r   c                      ^  \ rS rSrS\S\S\S\4U 4S jjrS r SS\	R                  S	\S
\S\S\\	R                     4
S jjrSrU =r$ )PvtAttention   rG   rL   r   r   c                 ~   > [         TU ]  5         [        UUUUS9U l        [	        XS9U l        [        5       U l        g )N)rL   r   r   )rL   )r+   r,   r   r-   r   r"   setpruned_headsr   s        r#   r,   PvtAttention.__init__   s@     	-# 3&?	
	 $FDEr%   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   ry   )lenr   r-   r   r   r   r   r   r   r   r"   r   r   union)r-   headsindexs      r#   prune_headsPvtAttention.prune_heads   s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r%   r0   rf   rg   r   r   c                 d    U R                  XX45      nU R                  US   5      nU4USS  -   nU$ )Nr   r   )r-   r"   )r-   r0   rf   rg   r   self_outputsattention_outputr   s           r#   r3   PvtAttention.forward  s@     yyQ;;|A7#%QR(88r%   )r"   r   r-   r   )r9   r:   r;   r<   r   r   r>   r,   r   r   r?   r   r   r3   rA   rB   rC   s   @r#   r   r      ss    "".1"HK"hm";& _d"\\36?BW[	u||	 r%   r   c            
          ^  \ rS rSr  SS\S\S\\   S\\   4U 4S jjjrS\R                  S\R                  4S	 jr
S
rU =r$ )PvtFFNi  rG   in_featureshidden_featuresout_featuresc                 x  > [         TU ]  5         Ub  UOUn[        R                  " X#5      U l        [        UR                  [        5      (       a  [        UR                     U l	        OUR                  U l	        [        R                  " X45      U l
        [        R                  " UR                  5      U l        g r*   )r+   r,   r   r   dense1rT   
hidden_actr@   r   intermediate_act_fndense2ra   rb   rc   )r-   rG   r   r   r   r.   s        r#   r,   PvtFFN.__init__  s     	'3'?|[ii=f''--'-f.?.?'@D$'-'8'8D$ii>zz&"<"<=r%   r0   r   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r*   )r   r   rc   r   r2   s     r#   r3   PvtFFN.forward)  sP    M200?]3M2]3r%   )r   r   rc   r   )NN)r9   r:   r;   r<   r   r   r   r,   r   r?   r3   rA   rB   rC   s   @r#   r   r     sc    
 *.&*>> > "#	>
 sm> >"U\\ ell  r%   r   c                   v   ^  \ rS rSrS\S\S\S\S\S\4U 4S jjrSS	\R                  S
\S\S\
4S jjrSrU =r$ )PvtLayeri2  rG   rL   r   r$   r   	mlp_ratioc                 ^  > [         TU ]  5         [        R                  " X!R                  S9U l        [        UUUUS9U l        US:  a  [        U5      O[        R                  " 5       U l
        [        R                  " X!R                  S9U l        [        X&-  5      n[        XUS9U l        g )NrQ   )rG   rL   r   r   r   )rG   r   r   )r+   r,   r   r^   r_   layer_norm_1r   	attentionr'   Identityr$   layer_norm_2r   r   mlp)	r-   rG   rL   r   r$   r   r   mlp_hidden_sizer.   s	           r#   r,   PvtLayer.__init__3  s     	LL:O:OP%# 3&?	
 4=s?Y/LL:O:OPk56Rabr%   r0   rf   rg   r   c                     U R                  U R                  U5      UUUS9nUS   nUSS  nU R                  U5      nXa-   nU R                  U R	                  U5      5      nU R                  U5      nX-   n	U	4U-   nU$ )N)r0   rf   rg   r   r   r   )r   r   r$   r   r   )
r-   r0   rf   rg   r   self_attention_outputsr   r   
mlp_outputlayer_outputs
             r#   r3   PvtLayer.forwardI  s    !%++M:/	 "0 "
 2!4(,>>*:;(8XXd//>?
^^J/
$1/G+r%   )r   r$   r   r   r   r   )r9   r:   r;   r<   r   r   r>   r,   r   r?   r   r3   rA   rB   rC   s   @r#   r   r   2  st    cc c !	c
 c $)c c,U\\ 3 s _c  r%   r   c                      ^  \ rS rSrS\4U 4S jjr   SS\R                  S\\	   S\\	   S\\	   S\
\\4   4
S	 jjrS
rU =r$ )
PvtEncoderi`  rG   c                   > [         T	U ]  5         Xl        [        R                  " SUR
                  [        UR                  5      SS9R                  5       n/ n[        UR                  5       H  nUR                  [        UUS:X  a  UR                  OU R                  R                  SUS-   -  -  UR                  U   UR                  U   US:X  a  UR                   OUR"                  US-
     UR"                  U   XAR                  S-
  :H  S95        M     [$        R&                  " U5      U l        / nSn[        UR                  5       H  n/ nUS:w  a  XaR                  US-
     -  n[        UR                  U   5       HY  nUR                  [+        UUR"                  U   UR,                  U   X&U-      UR.                  U   UR0                  U   S95        M[     UR                  [$        R&                  " U5      5        M     [$        R&                  " U5      U l        [$        R4                  " UR"                  S   UR6                  S	9U l        g )
Nr   cpu)r   rj   r   )rG   rH   rI   rJ   rK   rL   rM   )rG   rL   r   r$   r   r   ri   rQ   )r+   r,   rG   r   linspacedrop_path_ratesumdepthstolistrangenum_encoder_blocksappendrE   rH   patch_sizesstridesrK   hidden_sizesr   
ModuleListpatch_embeddingsr   r   sequence_reduction_ratios
mlp_ratiosblockr^   r_   r`   )
r-   rG   drop_path_decaysre   iblockscurlayersjr.   s
            r#   r,   PvtEncoder.__init__a  s    !>>!V-B-BCDV_delln 
v001A"!45Fv00@V@V[\abefaf[g@h%11!4!>>!,89Q!4!4FDWDWXY\]X]D^ & 3 3A 6#<#<q#@@
 2 !#j 9 v001AFAv}}QU++6==+,%$*$7$7$:,2,F,Fq,I"27";282R2RST2U"("3"3A"6	 - MM"--/0! 2$ ]]6*
 ,,v':':2'>FDYDYZr%   rw   r   output_hidden_statesreturn_dictr   c                 d   U(       a  SOS nU(       a  SOS nUR                   S   n[        U R                  5      nUn	[        [	        U R
                  U R                  5      5       H  u  n
u  pU" U	5      u  pnU H/  nU" XX5      nUS   n	U(       a	  UUS   4-   nU(       d  M*  XY4-   nM1     XS-
  :w  d  MR  U	R                  X}US5      R                  SSSS5      R                  5       n	M     U R                  U	5      n	U(       a  XY4-   nU(       d  [        S XU4 5       5      $ [        U	UUS9$ )	N r   r   ri   r   rj   c              3   .   #    U  H  oc  M  Uv   M     g 7fr*   r	  ).0vs     r#   	<genexpr>%PvtEncoder.forward.<locals>.<genexpr>  s     m$[q$[s   	last_hidden_stater0   
attentions)r   r   r   	enumeratezipr   rp   rq   r   r`   r   r	   )r-   rw   r   r  r  all_hidden_statesall_self_attentionsr   
num_blocksr0   idxembedding_layerblock_layerrf   rg   r   layer_outputss                    r#   r3   PvtEncoder.forward  sK    #7BD$5b4!''*
_
$3<SAVAVX\XbXb=c3d/C//+:=+I(M5$ %mU V -a 0$*=qAQ@S*S'''(9<L(L% % 1n$ - 5 5j%QS T \ \]^`acdfg h s s u 4e 6 14D Dm]GZ$[mmm++*
 	
r%   )r   rG   r`   r   )FFT)r9   r:   r;   r<   r   r,   r   FloatTensorr   r   r   r   r	   r3   rA   rB   rC   s   @r#   r   r   `  ss    0[y 0[j -2/4&*#
''#
 $D>#
 'tn	#

 d^#
 
uo%	&#
 #
r%   r   c                   R    \ rS rSr% \\S'   SrSr/ rS\	R                  SS4S jrS	rg)
PvtPreTrainedModeli  rG   pvtrw   moduler   Nc                 |   U R                   R                  n[        U[        R                  [        R
                  45      (       af  [        R                  R                  UR                  R                  SUS9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a  [        R                  R                  UR                  R                  SUS9UR                  l	        UR                   bC  [        R                  R                  UR                   R                  SUS9UR                   l	        ggg)zInitialize the weightsr   )meanstdNg      ?)rG   initializer_rangerT   r   r   r\   inittrunc_normal_weightdatar   zero_r^   fill_rE   rZ   rM   )r-   r   r#  s      r#   _init_weights PvtPreTrainedModel._init_weights  sI   kk++fryy"))455 GG!!&--"4"43C!H{{&  &&( '--KK""$MM$$S) 233.0gg.C.C**// /D /F&&+
 +(*(=(=$$)) )> )  % , 4r%   r	  )r9   r:   r;   r<   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   Moduler+  rA   r	  r%   r#   r  r    s0    $OBII $ r%   r  c                      ^  \ rS rSrS\4U 4S jjrS r\   SS\R                  S\
\   S\
\   S\
\   S	\\\4   4
S
 jj5       rSrU =r$ )PvtModeli  rG   c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g r*   )r+   r,   rG   r   encoder	post_initr-   rG   r.   s     r#   r,   PvtModel.__init__  s/      "&) 	r%   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr5  layerr   r   )r-   heads_to_pruner;  r   s       r#   _prune_headsPvtModel._prune_heads  s<    
 +002LELLu%//;;EB 3r%   rw   r   r  r  r   c                 0   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  UUUUS9nUS   nU(       d	  U4USS  -   $ [        UUR                  UR                  S9$ )Nrw   r   r  r  r   r   r  )rG   r   r  use_return_dictr5  r	   r0   r  )r-   rw   r   r  r  encoder_outputssequence_outputs          r#   r3   PvtModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B],,%/!5#	 ' 
 *!,#%(;;;-)77&11
 	
r%   )rG   r5  )NNN)r9   r:   r;   r<   r   r,   r=  r   r   r  r   r   r   r   r	   r3   rA   rB   rC   s   @r#   r3  r3    s    y C  -1/3&*
''
 $D>
 'tn	

 d^
 
uo%	&
 
r%   r3  z
    Pvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    )custom_introc                      ^  \ rS rSrS\SS4U 4S jjr\    SS\\R                     S\\R                     S\\
   S	\\
   S
\\
   S\\\4   4S jj5       rSrU =r$ )PvtForImageClassificationi  rG   r   Nc                 6  > [         TU ]  U5        UR                  U l        [        U5      U l        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l	        U R                  5         g )Nr   ri   )r+   r,   
num_labelsr3  r  r   r   r   r   
classifierr6  r7  s     r#   r,   "PvtForImageClassification.__init__  sy      ++F# FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r%   rw   labelsr   r  r  c                 T   Ub  UOU R                   R                  nU R                  UUUUS9nUS   nU R                  USS2SSS24   5      nSn	Ub  U R	                  X(U R                   5      n	U(       d  U4USS -   n
U	b  U	4U
-   $ U
$ [        U	UUR                  UR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr@  r   r   )losslogitsr0   r  )rG   rA  r  rJ  loss_functionr
   r0   r  )r-   rw   rL  r   r  r  r   rC  rO  rN  r"   s              r#   r3   !PvtForImageClassification.forward#  s     &1%<k$++B]B]((%/!5#	  
 "!*Aq!9:%%fdkkBDY,F)-)9TGf$EvE$!//))	
 	
r%   )rJ  rI  r  )NNNN)r9   r:   r;   r<   r   r,   r   r   r   r?   r   r   r   r
   r3   rA   rB   rC   s   @r#   rG  rG    s    y T   *.,0/3&*(
u||,(
 &(
 $D>	(

 'tn(
 d^(
 
u++	,(
 (
r%   rG  )rG  r3  r  )r   F)0r=   rU   r   collections.abcr   typingr   r   r   torch.nn.functionalr   r   rr   activationsr   modeling_outputsr	   r
   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_pvtr   
get_loggerr9   loggerr?   r>   r   r$   r1  r'   rE   r   r   r   r   r   r   r  r3  rG  __all__r	  r%   r#   <module>r^     se  "    $ "     ! F - Q , ( 
		H	%U\\ e T V[VbVb *%")) %A) A)H	BII 	O		 Od'299 'TRYY 6+ryy +\V
 V
r   @ 0
! 0
 0
f 8
 2 8
8
v Jr%   