
    cCi[                     R   S SK rS SKJrJrJr  S SKrS SKJr  SSK	J
r
  SSKJr  SSKJrJrJr  SSKJrJr  SSKJr  SS	KJrJr  SS
KJrJrJr  SSKJrJr  SSK J!r!   " S S\RD                  5      r# " S S\RD                  5      r$ S3S\RD                  S\RJ                  S\RJ                  S\RJ                  S\\RJ                     S\&S\&4S jjr' " S S\RD                  5      r( " S S\RD                  5      r) " S S\RD                  5      r* " S  S!\RD                  5      r+ " S" S#\RD                  5      r, " S$ S%\5      r-\ " S& S'\5      5       r. " S( S)\RD                  5      r/ " S* S+\RD                  5      r0\ " S, S-\.5      5       r1\" S.S/9 " S0 S1\.5      5       r2/ S2Qr3g)4    N)CallableOptionalUnion   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingImageClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack) find_pruneable_heads_and_indicesprune_linear_layer)TransformersKwargsauto_docstring	torch_int)can_return_tuplecheck_model_inputs   )IJepaConfigc                   v   ^  \ rS rSrSrS\4U 4S jjrS
S\R                  S\	S\R                  4S jjr
S	rU =r$ )IJepaPatchEmbeddings   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
configc                   > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   )kernel_sizestride)super__init__
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterablenum_patchesnnConv2d
projection)selfr   r!   r"   r#   r$   r)   	__class__s          b/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/ijepa/modeling_ijepa.pyr    IJepaPatchEmbeddings.__init__   s    !'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&))L:i    pixel_valuesinterpolate_pos_encodingreturnc                    UR                   u  p4pVX@R                  :w  a  [        SU R                   SU S35      eU(       dV  XPR                  S   :w  d  X`R                  S   :w  a2  [        SU SU SU R                  S    SU R                  S    S	3	5      eU R	                  U5      R                  S
5      R                  SS
5      nU$ )NzoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r   r   zInput image size (*z) doesn't match model (z).   )shaper#   
ValueErrorr!   r,   flatten	transpose)r-   r2   r3   
batch_sizer#   heightwidth
embeddingss           r/   forwardIJepaPatchEmbeddings.forward.   s    2>2D2D/
&,,,!../yaI  (++u8J/J (% 9+,Adooa.@-AE  __\2::1=GG1M
r1   )r!   r#   r)   r"   r,   F)__name__
__module____qualname____firstlineno____doc__r   r    torchTensorboolrA   __static_attributes____classcell__r.   s   @r/   r   r      s@    j{ jELL D ]b]i]i  r1   r   c            	          ^  \ rS rSrSrSS\S\SS4U 4S jjjrS\R                  S	\
S
\
S\R                  4S jr  SS\R                  S\\R                     S\S\R                  4S jjrSrU =r$ )IJepaEmbeddings?   zZ
Construct the CLS token, position and patch embeddings. Optionally, also the mask token.
r   use_mask_tokenr4   Nc                   > [         TU ]  5         U(       a6  [        R                  " [        R
                  " SSUR                  5      5      OS U l        [        U5      U l	        U R                  R                  n[        R                  " [        R                  " SX1R                  5      5      U l        [        R                  " UR                  5      U l        UR                   U l        Xl        g )Nr   )r   r    r*   	ParameterrI   zerosr$   
mask_tokenr   patch_embeddingsr)   randnposition_embeddingsDropouthidden_dropout_probdropoutr"   r   )r-   r   rR   r)   r.   s       r/   r    IJepaEmbeddings.__init__D   s    Q_",,u{{1a9K9K'LMei 4V <++77#%<<A{L^L^0_#` zz&"<"<= ++r1   r@   r>   r?   c                 ,   UR                   S   nU R                  R                   S   n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  nUR                   S   nX R
                  -  nX0R
                  -  n	[        US-  5      n
UR                  SXU5      nUR                  SSSS5      n[        R                  R                  UX4SSS	9nUR                  SSSS5      R                  SSU5      nU$ )
a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   g      ?r   r   r8   bicubicF)sizemodealign_corners)r9   rY   rI   jit
is_tracingr"   r   reshapepermuter*   
functionalinterpolateview)r-   r@   r>   r?   r)   num_positionspatch_pos_embeddim
new_height	new_widthsqrt_num_positionss              r/   r3   (IJepaEmbeddings.interpolate_pos_encodingN   s    !&&q)0066q9 yy##%%+*F6?+++22r".
__,	&}c'9:)11!5G]`a)11!Q1=--33(	 4 
 *11!Q1=BB1b#Nr1   r2   bool_masked_posr3   c                 n   UR                   u  pEpgU R                  XS9nUbX  UR                   S   n	U R                  R                  XIS5      n
UR	                  S5      R                  U
5      nUSU-
  -  X-  -   nU(       a  XR                  XU5      -   nOXR                  -   nU R                  U5      nU$ )N)r3   r   r_         ?)	r9   rW   rV   expand	unsqueezetype_asr3   rY   r\   )r-   r2   rr   r3   r=   _r>   r?   r@   
seq_lengthmask_tokensmasks               r/   rA   IJepaEmbeddings.forwardu   s     (4'9'9$
v**<*k
&#))!,J//00LK",,R088ED#sTz2[5GGJ $#&C&CJX]&^^J#&>&>>J\\*-
r1   )r   r\   rV   rW   r"   rY   rC   NF)rD   rE   rF   rG   rH   r   rK   r    rI   rJ   intr3   r   
BoolTensorrA   rL   rM   rN   s   @r/   rP   rP   ?   s    { D T  %5<< % %UX %]b]i]i %T 7;).	ll "%"2"23 #'	
 
 r1   rP   modulequerykeyvalueattention_maskscalingr\   c                    [         R                  " XR                  SS5      5      U-  n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9nUb  X-  n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr_   )rm   dtype)ptrainingr   r8   )rI   matmulr<   r*   rh   softmaxfloat32tor   r\   r   
contiguous)
r   r   r   r   r   r   r\   kwargsattn_weightsattn_outputs
             r/   eager_attention_forwardr      s     <<}}R'<=GL ==((2U]](SVVW\WbWbcL ==((6??([L !#4,,|3K''1-88:K$$r1   c            	          ^  \ rS rSrS\4U 4S jjr S	S\R                  S\\R                     S\	\R                  \R                  4   4S jjr
SrU =r$ )
IJepaSelfAttention   r   c                 0  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        UR                  U l        U R                  S-  U l        SU l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        g )	Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads r6   g      F)bias)r   r    r$   num_attention_headshasattrr:   r   r~   attention_head_sizeall_head_sizeattention_probs_dropout_probdropout_probr   	is_causalr*   Linearqkv_biasr   r   r   r-   r   r.   s     r/   r    IJepaSelfAttention.__init__   sG    : ::a?PVXhHiHi"6#5#5"6 7334A7 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PP"??//5YYv1143E3EFOO\
99V//1C1C&//ZYYv1143E3EFOO\
r1   hidden_states	head_maskr4   c                    UR                   S   nUSU R                  U R                  4nU R                  U5      R                  " U6 R                  SS5      nU R                  U5      R                  " U6 R                  SS5      nU R                  U5      R                  " U6 R                  SS5      n[        nU R                  R                  S:w  a  [        U R                  R                     nU" U UUUUU R                  U R                  U R                  (       d  SOU R                  S9u  pU	R!                  5       S S U R"                  4-   nU	R%                  U5      n	X4$ )	Nr   r_   r   r8   eager        )r   r   r\   r   )r9   r   r   r   rj   r<   r   r   r   r   _attn_implementationr   r   r   r   r   ra   r   rf   )r-   r   r   r=   	new_shape	key_layervalue_layerquery_layerattention_interfacecontext_layerattention_probsnew_context_layer_shapes               r/   rA   IJepaSelfAttention.forward   sH    #((+
D$<$<d>V>VV	HH]+00)<FFq!L	jj/44i@JJ1aPjj/44i@JJ1aP(?;;++w6"9$++:Z:Z"[)<nnLL#}}C$2C2C	*
& #0"4"4"6s";t?Q?Q>S"S%--.EF--r1   )
r   r   r   r   r   r   r   r   r   r   N)rD   rE   rF   rG   r   r    rI   rJ   r   tuplerA   rL   rM   rN   s   @r/   r   r      sY    ]{ ]* PT."\\.6>u||6L.	u||U\\)	*. .r1   r   c                      ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\R                  4S jr	S	r
U =r$ )
IJepaSelfOutput   z
The residual connection is defined in IJepaLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
r   c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        g r   )	r   r    r*   r   r$   denserZ   r[   r\   r   s     r/   r    IJepaSelfOutput.__init__   sB    YYv1163E3EF
zz&"<"<=r1   r   input_tensorr4   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r\   r-   r   r   s      r/   rA   IJepaSelfOutput.forward   s$    

=1]3r1   r   )rD   rE   rF   rG   rH   r   r    rI   rJ   rA   rL   rM   rN   s   @r/   r   r      sB    
>{ >
U\\  RWR^R^  r1   r   c                      ^  \ rS rSrS\4U 4S jjrS\\   4S jrSS\	R                  S\\	R                     S\	R                  4S	 jjrS
rU =r$ )IJepaAttention   r   c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g r   )r   r    r   	attentionr   outputsetpruned_headsr   s     r/   r    IJepaAttention.__init__   s0    +F3%f-Er1   headsc                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   rm   )lenr   r   r   r   r   r   r   r   r   r   r   r   union)r-   r   indexs      r/   prune_headsIJepaAttention.prune_heads   s   u:?7>>55t~~7Y7Y[_[l[l

  2$..2F2FN/0B0BEJ1$..2F2FN.t{{/@/@%QO .2^^-O-ORUV[R\-\*'+~~'I'IDNNLnLn'n$ --33E:r1   r   r   r4   c                 N    U R                  X5      u  p4U R                  X15      nU$ r   )r   r   )r-   r   r   self_attn_outputrx   r   s         r/   rA   IJepaAttention.forward  s(    "nn]F-=r1   )r   r   r   r   )rD   rE   rF   rG   r   r    r   r~   r   rI   rJ   r   rA   rL   rM   rN   s   @r/   r   r      sR    "{ ";S ;$U\\ hu||>T `e`l`l  r1   r   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )IJepaIntermediatei  r   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r   r    r*   r   r$   intermediate_sizer   r%   
hidden_actstrr   intermediate_act_fnr   s     r/   r    IJepaIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r1   r   r4   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r-   r   s     r/   rA   IJepaIntermediate.forward  s&    

=100?r1   r   rD   rE   rF   rG   r   r    rI   rJ   rA   rL   rM   rN   s   @r/   r   r     s/    9{ 9U\\ ell  r1   r   c                      ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  S\R                  4S jrSr	U =r
$ )	IJepaOutputi!  r   c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  5      U l	        g r   )
r   r    r*   r   r   r$   r   rZ   r[   r\   r   s     r/   r    IJepaOutput.__init__"  sB    YYv779K9KL
zz&"<"<=r1   r   r   r4   c                 R    U R                  U5      nU R                  U5      nX-   nU$ r   r   r   s      r/   rA   IJepaOutput.forward'  s,    

=1]3%4r1   r   r   rN   s   @r/   r   r   !  s=    >{ >
U\\  RWR^R^  r1   r   c                      ^  \ rS rSrSrS\4U 4S jjrS
S\R                  S\	\R                     S\R                  4S jjr
S	rU =r$ )
IJepaLayeri.  z?This corresponds to the Block class in the timm implementation.r   c                 j  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  S9U l        g )Nr   eps)r   r    chunk_size_feed_forwardseq_len_dimr   r   r   intermediater   r   r*   	LayerNormr$   layer_norm_epslayernorm_beforelayernorm_afterr   s     r/   r    IJepaLayer.__init__1  s    '-'E'E$'/-f5!&) "V-?-?VEZEZ [!||F,>,>FDYDYZr1   r   r   r4   c                     U R                  U5      nU R                  X25      nXA-   nU R                  U5      nU R                  U5      nU R	                  XQ5      nU$ r   )r   r   r   r   r   )r-   r   r   hidden_states_normattention_outputlayer_outputs         r/   rA   IJepaLayer.forward;  se    !22=A>>*<H )8 ++M:((6 {{<?r1   )r   r   r   r   r   r   r   r   )rD   rE   rF   rG   rH   r   r    rI   rJ   r   rA   rL   rM   rN   s   @r/   r   r   .  sG    I[{ [U\\ hu||>T `e`l`l  r1   r   c                       \ rS rSr% \\S'   SrSrSrSS/r	Sr
SrSrSr\\S.rS	\\R&                  \R(                  \R*                  4   S
S4S jrSrg)IJepaPreTrainedModeliL  r   ijepar2   TrP   r   )r   
attentionsr   r4   Nc                    [        U[        R                  [        R                  45      (       a  [        R                  R                  UR                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR                  R                  5      UR                  l        UR                  b%  UR                  R                  R                  5         gg[        U[        R                   5      (       aJ  UR                  R                  R                  5         UR                  R                  R#                  S5        g[        U[$        5      (       a  [        R                  R                  UR&                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR&                  R                  5      UR&                  l        UR(                  b%  UR(                  R                  R                  5         ggg)zInitialize the weightsr   )meanstdNrt   )r%   r*   r   r+   inittrunc_normal_weightdatar   rI   r   r   initializer_ranger   r   zero_r   fill_rP   rY   rV   )r-   r   s     r/   _init_weights"IJepaPreTrainedModel._init_weights\  s   fryy"))455 "$!6!6""%%emm43DKKDaDa "7 "b$$% MM {{&  &&( '--KK""$MM$$S)00.0gg.C.C**//225==AKK11 /D / b++112	 &&+
   ,!!&&,,. - 1r1    )rD   rE   rF   rG   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   r   _can_record_outputsr   r*   r   r+   r   r  rL   r	  r1   r/   r   r   L  su    $O&*#*L9N"&#(
/E"))RYY*L$M /RV /r1   r   c                   x   ^  \ rS rSrS\4U 4S jjrS	S\R                  S\\R                     S\	4S jjr
SrU =r$ )
IJepaEncoderis  r   c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf r}   )
r   r    r   r*   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r-   r   rx   r.   s      r/   r    IJepaEncoder.__init__t  sR    ]]fF^F^@_#`@_1Jv$6@_#`a
&+# $as   A&r   r   r4   c                 r    [        U R                  5       H  u  p4Ub  X#   OS nU" X5      nM     [        US9$ )N)last_hidden_state)	enumerater  r	   )r-   r   r   ilayer_modulelayer_head_masks         r/   rA   IJepaEncoder.forwardz  s<    (4OA.7.CilO(HM  5 ??r1   )r   r  r  r   )rD   rE   rF   rG   r   r    rI   rJ   r   r	   rA   rL   rM   rN   s   @r/   r  r  s  sA    ,{ ,@U\\ @hu||>T @`o @ @r1   r  c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )IJepaPooleri  r   c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        g r   )
r   r    r*   r   r$   pooler_output_sizer   r   
pooler_act
activationr   s     r/   r    IJepaPooler.__init__  s>    YYv1163L3LM
 !2!23r1   r   r4   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r)  )r-   r   first_token_tensorpooled_outputs       r/   rA   IJepaPooler.forward  s6     +1a40

#566r1   )r)  r   r   rN   s   @r/   r%  r%    s/    4{ 4
U\\ ell  r1   r%  c                     ^  \ rS rSrSS\S\S\4U 4S jjjrS\4S jrS	\	\
\\
   4   4S
 jr\" SS9\    SS\\R"                     S\\R$                     S\\R"                     S\\   S\\   S\4S jj5       5       rSrU =r$ )
IJepaModeli  Fr   add_pooling_layerrR   c                   > [         TU ]  U5        Xl        [        XS9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U(       a  [        U5      OSU l        U R                  5         g)z
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
use_mask_token (`bool`, *optional*, defaults to `False`):
    Whether to use a mask token for masked image modeling.
)rR   r   N)r   r    r   rP   r@   r  encoderr*   r   r$   r   	layernormr%  pooler	post_init)r-   r   r1  rR   r.   s       r/   r    IJepaModel.__init__  si     	 )&P#F+f&8&8f>S>ST->k&)D 	r1   r4   c                 .    U R                   R                  $ r   )r@   rW   )r-   s    r/   get_input_embeddingsIJepaModel.get_input_embeddings  s    ///r1   heads_to_prunec                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr3  r  r   r   )r-   r;  r  r   s       r/   _prune_headsIJepaModel._prune_heads  s<    
 +002LELLu%//;;EB 3r1   )tie_last_hidden_statesr2   rr   r   r3   r   c                    Uc  [        S5      eU R                  X0R                  R                  5      nU R                  R
                  R                  R                  R                  nUR                  U:w  a  UR                  U5      nU R	                  XUS9nU R                  XsS9nUR                  n	U R                  U	5      n	U R                  b  U R                  U	5      OSn
[        XS9$ )z
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
Nz You have to specify pixel_values)rr   r3   )r   )r  pooler_output)r:   get_head_maskr   r  r@   rW   r,   r  r   r   r3  r  r4  r5  r
   )r-   r2   rr   r   r3   r   expected_dtypeembedding_outputencoder_outputssequence_outputr-  s              r/   rA   IJepaModel.forward  s     ?@@ &&y++2O2OP	 99DDKKQQ/'??>:L??Tl + 
 ,0<<8H<+^);;..98<8OO4UY)Oiir1   )r   r@   r3  r4  r5  )FFNNNN)rD   rE   rF   rG   r   rK   r    r   r9  dictr~   listr>  r   r   r   rI   rJ   r   r   r   r
   rA   rL   rM   rN   s   @r/   r0  r0    s    { t ]a  $0&: 0C4T#Y+? C u5 046:,037&ju||,&j "%"2"23&j ELL)	&j
 #+4.&j +,&j 
$&j  6&jr1   r0  a  
    IJepa Model transformer with an image classification head on top (a linear layer on top of the final hidden states)
    e.g. for ImageNet.

    <Tip>

        Note that it's possible to fine-tune IJepa on higher resolution images than the ones it has been trained on, by
        setting `interpolate_pos_encoding` to `True` in the forward of the model. This will interpolate the pre-trained
        position embeddings to the higher resolution.

    </Tip>
    )custom_introc                      ^  \ rS rSrS\4U 4S jjr\\    SS\\	R                     S\\	R                     S\\	R                     S\\   S\\   S	\4S
 jj5       5       rSrU =r$ )IJepaForImageClassificationi  r   c                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        UR                  S:  a+  [
        R                  " UR                  UR                  5      O[
        R                  " 5       U l	        U R                  5         g )NF)r1  r   )r   r    
num_labelsr0  r   r*   r   r$   Identity
classifierr6  r   s     r/   r    $IJepaForImageClassification.__init__  ss      ++%@
 OUN_N_bcNc"))F$6$68I8IJikititiv 	r1   r2   r   labelsr3   r   r4   c                    U R                   " U4UUS.UD6nUR                  nU R                  UR                  SS95      nSn	Ub  U R                  " X8U R
                  40 UD6n	[        U	UUR                  UR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
)r   r3   r   r   N)losslogitsr   r   )	r   r  rR  r   loss_functionr   r   r   r   )
r-   r2   r   rT  r3   r   outputsrG  rW  rV  s
             r/   rA   #IJepaForImageClassification.forward  s    " /3jj/
%=/
 	/
 "33!5!5!!5!<=%%fdkkLVLD$!//))	
 	
r1   )rR  r   rP  rI  )rD   rE   rF   rG   r   r    r   r   r   rI   rJ   rK   r   r   r   rA   rL   rM   rN   s   @r/   rN  rN    s    
{ 
  04,0)-37!
u||,!
 ELL)!
 &	!

 #+4.!
 +,!
 
!
  !
r1   rN  )r   r0  rN  )r   )4collections.abcr&   typingr   r   r   rI   torch.nnr*   activationsr   modeling_layersr   modeling_outputsr	   r
   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   r   utilsr   r   r   utils.genericr   r   configuration_ijepar   Moduler   rP   rJ   floatr   r   r   r   r   r   r   r   r  r%  r0  rN  __all__r	  r1   r/   <module>rj     s    , ,   ! 9 b b F & Q B B A ,$299 $NNbii Np %II%<<% 
% <<	%
 U\\*% % %<1. 1.hbii "RYY >		 
")) 
+ < #/? #/ #/L@299 @"))  Fj% Fj FjR 0
"6 0
0
f Pr1   