
    bCi                        S SK JrJr  S SKrS SKJr  S SKrS SKJ	r
  S SK	rS SKJrJrJr  S SKJr  S SKJrJr  SSKJrJrJrJr  SSKJrJrJrJr  SS	KJ r J!r!  S
SK"J#r#  \RH                  RJ                   " S S\5      5       r&Sr'Sr(S\)\*\*4   S\
RV                  4S jr,\
RZ                  4S jr. " S S\R^                  5      r0 " S S\R^                  5      r1 " S S\R^                  5      r2 " S S\R^                  5      r3 " S S\R^                  5      r4 " S S\R^                  5      r5 " S  S!\R^                  5      r6 " S" S#\R^                  5      r7 " S$ S%\R^                  5      r8 " S& S'\R^                  5      r9 " S( S)\R^                  5      r: " S* S+\R^                  5      r; " S, S-\5      r< " S. S/\R^                  5      r= " S0 S1\R^                  5      r>\ " S2\'5       " S3 S4\<5      5       r?S5r@\" \?\@5        \" \?\&\#S69   " S7 S8\R^                  5      rA\ " S9\'5       " S: S;\<5      5       rBS<rC\" \B\C5        \" \B\\#S69   " S= S>\R^                  5      rD\ " S?\'5       " S@ SA\<5      5       rESBrF\" \E\F5        \" \E\\#S69  / SCQrGg)D    )CallableOptionalN)
FrozenDictfreezeunfreeze)dot_product_attention_weights)flatten_dictunflatten_dict   )FlaxBaseModelOutputFlaxBaseModelOutputWithPoolingFlaxMaskedLMOutputFlaxSequenceClassifierOutput)ACT2FNFlaxPreTrainedModel append_replace_return_docstringsoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forward   )
BeitConfigc                       \ rS rSrSrSrg)FlaxBeitModelOutputWithPooling,   aB  
Class for outputs of [`FlaxBeitModel`].

Args:
    last_hidden_state (`jnp.ndarray` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    pooler_output (`jnp.ndarray` of shape `(batch_size, hidden_size)`):
        Average of the last layer hidden states of the patch tokens (excluding the *[CLS]* token) if
        *config.use_mean_pooling* is set to True. If set to False, then the final hidden state of the *[CLS]* token
        will be returned.
    hidden_states (`tuple(jnp.ndarray)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `jnp.ndarray` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus
        the initial embedding outputs.
    attentions (`tuple(jnp.ndarray)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `jnp.ndarray` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
 N)__name__
__module____qualname____firstlineno____doc____static_attributes__r       e/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/beit/modeling_flax_beit.pyr   r   ,   s    r"   r   a  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`BeitConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
            `jax.numpy.bfloat16` (on TPUs).

            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
            specified all the computation will be performed with the given `dtype`.

            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
            parameters.**

            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
            [`~FlaxPreTrainedModel.to_bf16`].
a  
    Args:
        pixel_values (`numpy.ndarray` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`AutoImageProcessor.__call__`] for details.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
window_sizereturnc                    SU S   -  S-
  SU S   -  S-
  -  S-   n[         R                  " U S   5      n[         R                  " U S   5      n[         R                  " [         R                  " X#SS95      n[         R                  " US5      nUSS2SS2S4   USS2SSS24   -
  n[         R
                  " US	5      nUSS2SS2S4==   U S   S-
  -  ss'   USS2SS2S4==   U S   S-
  -  ss'   USS2SS2S4==   SU S   -  S-
  -  ss'   [         R                  " U S   U S   -  S-   4S-  UR                  S
9nUR                  S5      USS2SS24'   US-
  USSS24'   US-
  USS2S4'   US-
  US'   [        R                  " U5      $ )zH
get pair-wise relative position index for each token inside the window
   r   r   r   ij)indexing)r'   N)r   r'   r   shapedtyper*   )r   r   )nparangestackmeshgridreshape	transposezerosr-   sumjnparray)r$   num_relative_distancecoords_hcoords_wcoordscoords_flattenrelative_coordsrelative_position_indexs           r#   relative_position_index_initr?   w   s    Q/!3KN8JQ8NORSSyyQ(HyyQ(HXXbkk(tDEFZZ0N$Q4Z0>!T1*3MMOll?I>OAq!GA 22Aq!GA 22Aq!GKN 2Q 66 hhk!n{1~.MPQ.Q-SVW-W_n_t_tu&5&9&9"&=ABF#%:Q%>AqrE"%:Q%>ABE"$9A$=D!99,--r"   c                 4    [         R                  " X5      U-  $ N)r6   ones)keyr,   scaler-   s       r#   ones_with_scalerE      s    88E!E))r"   c                   n    \ rS rSr% Sr\\S'   \R                  R                  SS\
\   4S jj5       rSrg)	FlaxBeitDropPath   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).ratedeterministicc                 R   U R                   S:X  a  U$ SU R                   -
  nU(       a  U$ UR                  S   4SUR                  S-
  -  -   nU R                  S5      nU[        R
                  R                  XTUR                  S9-   n[        R                  " U5      nX-  U-  nU$ )N        g      ?r   )r   r   droppathr+   )
rI   r,   ndimmake_rngjaxrandomuniformr-   r6   floor)	selfinputsrJ   	keep_probr,   rngrandom_tensorbinary_tensoroutputs	            r#   __call__FlaxBeitDropPath.__call__   s    99M$))O	M\\!_&q)AAE--
+C%

(:(:3SYS_S_(:(``MIIm4M'-7FMr"   r   NT)r   r   r   r   r    float__annotations__nnmodulecompactr   boolr[   r!   r   r"   r#   rG   rG      s1    b
KYYhtn  r"   rG   c                   b    \ rS rSr% \\S'   \R                  r\R                  \S'   S r	S r
Srg)FlaxBeitPatchEmbeddings   configr-   c           
         U R                   R                  U l        U R                   R                  nU R                   R                  nX-  X-  -  nX-  X-  4nX0l        X@l        [        R                  " U R                   R                  X"4X"4SU R                  [        R                  R                  R                  U R                   R                  5      S9U l        g )NVALID)kernel_sizestridespaddingr-   kernel_init)rg   num_channels
image_size
patch_sizenum_patchespatch_shaper`   Convhidden_sizer-   rP   initializersnormalinitializer_range
projection)rT   ro   rp   rq   rr   s        r#   setupFlaxBeitPatchEmbeddings.setup   s     KK44[[++
[[++
!/J4LM!/1IJ&&''KK###0,**++224;;3P3PQ
r"   c                     UR                   S   nX R                  :w  a  [        S5      eU R                  U5      nUR                   u  n  pV[        R
                  " X4SU45      $ )Nr*   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r,   rn   
ValueErrorrx   r6   r2   )rT   pixel_valuesrn   
embeddings
batch_size_channelss          r#   r[    FlaxBeitPatchEmbeddings.__call__   sf    #))"-,,,w  __\2
%/%5%5"
Aq{{:B'ABBr"   )rn   rq   rr   rx   Nr   r   r   r   r   r_   r6   float32r-   ry   r[   r!   r   r"   r#   re   re      s%    {{E399"
"Cr"   re   c                   j    \ rS rSr% Sr\\S'   \R                  r	\R                  \S'   S r
S	S jrSrg)
FlaxBeitEmbeddings   z7Construct the CLS token, position and patch embeddings.rg   r-   c                    U R                  S[        R                  R                  SSU R                  R
                  45      U l        U R                  R                  (       aG  U R                  S[        R                  R                  SSU R                  R
                  45      U l        [        U R                  U R                  S9U l        U R                  R                  nU R                  R                  (       aJ  U R                  S[        R                  R                  SUS-   U R                  R
                  45      U l        [        R                  " U R                  R                   S9U l        g )N	cls_tokenr   
mask_tokenr-   position_embeddingsrI   )paramr`   ru   r4   rg   rt   r   use_mask_tokenr   re   r-   patch_embeddingsrq    use_absolute_position_embeddingsr   Dropouthidden_dropout_probdropout)rT   rq   s     r#   ry   FlaxBeitEmbeddings.setup   s    K1F1FAt{{OfOfHgh;;%%"jjr7L7LqRSUYU`U`UlUlNmnDO 74:: V++77;;77'+zz%r'<'<q+PQ/SWS^S^SjSj>k(D$ zzt{{'F'FGr"   Nc                    U R                  U5      nUR                  u  pVn[        R                  " U R                  USU R
                  R                  45      nUR                  UR                  5      nUbt  [        R                  " U R                  XVU R
                  R                  45      n	U	R                  UR                  5      n	[        R                  " USS9n
USU
-
  -  X-  -   n[        R                  " X4SS9nU R
                  R                  (       a'  X@R                  R                  UR                  5      -   nU R                  XCS9nU$ )Nr   r*   axisrJ   )r   r,   r6   broadcast_tor   rg   rt   astyper-   r   expand_dimsconcatenater   r   r   )rT   r}   bool_masked_posrJ   r~   r   seq_lenr   
cls_tokensmask_tokensws              r#   r[   FlaxBeitEmbeddings.__call__   s   **<8
!+!1!1
Q%%dnnz1dkkF]F]6^_
&&z'7'78
&**4??ZRVR]R]RiRi<jkK%,,Z-=-=>Kb9A#q1u-?J__j%=AF
;;77#&>&>&E&EjFVFV&WWJ\\*\J
r"   )r   r   r   r   r   )NT)r   r   r   r   r    r   r_   r6   r   r-   ry   r[   r!   r   r"   r#   r   r      s(    A{{E399"
Hr"   r   c                   v    \ rS rSr% \\S'   \\\4   \S'   \R                  r
\R                  \S'   S rS rSrg)	FlaxBeitRelativePositionBias   rg   r$   r-   c                    SU R                   S   -  S-
  SU R                   S   -  S-
  -  S-   nU R                  S[        R                  R                  XR
                  R                  45      U l        [        U R                   5      U l	        g )Nr'   r   r   r   relative_position_bias_table)
r$   r   r`   ru   r4   rg   num_attention_headsr   r?   r>   )rT   r8   s     r#   ry   "FlaxBeitRelativePositionBias.setup   s    !"T%5%5a%8!81!<TEUEUVWEXAX[\A\ ]`a a,0JJ*OO!!"KK$C$CD-
) (DDDTDT'U$r"   c                 ,   U R                   R                  S5      nU R                  S   U R                  S   -  S-   U R                  S   U R                  S   -  S-   S4nU R                  U   R                  U5      n[        R
                  " US5      $ )Nr*   r   r   )r'   r   r   )r>   r2   r$   r   r6   r3   )rT   indexr,   relative_position_biass       r#   r[   %FlaxBeitRelativePositionBias.__call__  s    ,,44R8!!!$t'7'7'::Q>@P@PQR@SVZVfVfghVi@ilm@moqr!%!B!B5!I!Q!QRW!X}}3Y??r"   )r   r>   N)r   r   r   r   r   r_   tupleintr6   r   r-   ry   r[   r!   r   r"   r#   r   r      s4    sCx {{E399"	V@r"   r   c                       \ rS rSr% \\S'   \\\4   \S'   \R                  r
\R                  \S'   S r SS\S\4S	 jjrS
rg)FlaxBeitSelfAttentioni  rg   r$   r-   c                 X   U R                   R                  U R                   R                  -  S:w  aU  [        U R                   S5      (       d:  [	        SU R                   R                   SU R                   R                   S35      e[
        R                  " U R                   R                  U R                  [        R
                  R                  R                  U R                   R                  5      S9U l        [
        R                  " U R                   R                  U R                  [        R
                  R                  R                  U R                   R                  5      SS9U l        [
        R                  " U R                   R                  U R                  [        R
                  R                  R                  U R                   R                  5      S9U l        U R                  (       a/  [!        U R                   U R                  U R                  S	9U l        g S U l        g )
Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads .)r-   rm   F)r-   rm   use_biasr$   r-   )rg   rt   r   hasattrr|   r`   Denser-   rP   ru   rv   rw   queryrC   valuer$   r   r   rT   s    r#   ry   FlaxBeitSelfAttention.setup  s   ;;""T[[%D%DDIRYKK)S
 S
 "4;;#:#:"; <889< 
 XXKK##**++224;;3P3PQ


 88KK##**++224;;3P3PQ	
 XXKK##**++224;;3P3PQ

  )$BRBRZ^ZdZde 	#  	#r"   NrJ   output_attentionsc                 p   U R                   R                  U R                   R                  -  nU R                  U5      R	                  UR
                  S S U R                   R                  U4-   5      nU R                  U5      R	                  UR
                  S S U R                   R                  U4-   5      nU R                  U5      R	                  UR
                  S S U R                   R                  U4-   5      nS n	U(       d+  U R                   R                  S:  a  U R                  S5      n	[        R                  " SU R                  S9n
U R                  b@  [        R                  " U R                  5       S5      n
U
R                  UR                  5      n
Ub  XR                  U
R                  5      -   n
[!        UUU
U	U R                   R                  SUU R                  S S9	n[        R"                  " SX5      nUR	                  UR
                  S S S	-   5      nU(       a  X4nU$ U4nU$ )
Nr'   rL   r   r   r   T)biasdropout_rngdropout_ratebroadcast_dropoutrJ   r-   	precisionz...hqk,...khd->...qhd)r*   )rg   rt   r   r   r2   r,   r   rC   attention_probs_dropout_probrO   r6   r7   r-   r   r   r   r   einsum)rT   hidden_statesr   rJ   r   head_dimquery_statesvalue_states
key_statesr   attention_biasattn_weightsattn_outputoutputss                 r#   r[   FlaxBeitSelfAttention.__call__-  s    ;;**dkk.M.MMzz-088#t{{'F'F&QQ
 zz-088#t{{'F'F&QQ
 XXm,44#t{{'F'F&QQ

 !I!IC!O--	2K3djj9&&2 __T-H-H-JANN+22<3E3EFN "-+.K.KNL`L`.aaN4#AA"'**

 jj!8,U!))+*;*;BQ*?%*GH1B;- JUr"   )rC   r   r   r   NTFr   r   r   r   r   r_   r   r   r6   r   r-   ry   rc   r[   r!   r   r"   r#   r   r     sO    sCx {{E399"
B qv-IM-im- -r"   r   c                   n    \ rS rSr% \\S'   \R                  r\R                  \S'   S r	S	S\
4S jjrSrg)
FlaxBeitSelfOutputi]  rg   r-   c                 F   [         R                  " U R                  R                  [        R                   R
                  R                  U R                  R                  5      U R                  S9U l	        [         R                  " U R                  R                  S9U l        g Nrm   r-   r   r`   r   rg   rt   rP   ru   rv   rw   r-   denser   r   r   r   s    r#   ry   FlaxBeitSelfOutput.setupa  d    XXKK##++224;;3P3PQ**


 zzt{{'F'FGr"   rJ   c                 F    U R                  U5      nU R                  XS9nU$ Nr   r   r   rT   r   rJ   s      r#   r[   FlaxBeitSelfOutput.__call__i  s&    

=1]Pr"   r   Nr]   r   r   r   r   r   r_   r6   r   r-   ry   rc   r[   r!   r   r"   r#   r   r   ]  s1    {{E399"HT  r"   r   c                       \ rS rSr% \\S'   \\\4   \S'   \R                  r
\R                  \S'   S r S
S\4S jjrS	rg)FlaxBeitAttentionio  rg   r$   r-   c                     [        U R                  U R                  U R                  S9U l        [        U R                  U R                  S9U l        g )Nr   )r   rg   r$   r-   	attentionr   rZ   r   s    r#   ry   FlaxBeitAttention.setupt  s9    .t{{D<L<LTXT^T^_(DJJGr"   Nr   c                 r    U R                  XX4S9nUS   nU R                  XcS9nU4nU(       a  XuS   4-  nU$ NrJ   r   r   r   r   r   rZ   )rT   r   r   rJ   r   attn_outputsr   r   s           r#   r[   FlaxBeitAttention.__call__x  sU     ~~ & 
 #1okk+kK.Q))Gr"   r   r   r   r   r"   r#   r   r   o  sG    sCx {{E399"H
 inae r"   r   c                   b    \ rS rSr% \\S'   \R                  r\R                  \S'   S r	S r
Srg)FlaxBeitIntermediatei  rg   r-   c                 0   [         R                  " U R                  R                  [        R                   R
                  R                  U R                  R                  5      U R                  S9U l	        [        U R                  R                     U l        g )Nr   )r`   r   rg   intermediate_sizerP   ru   rv   rw   r-   r   r   
hidden_act
activationr   s    r#   ry   FlaxBeitIntermediate.setup  s`    XXKK))++224;;3P3PQ**


 !!7!78r"   c                 J    U R                  U5      nU R                  U5      nU$ rA   )r   r   )rT   r   s     r#   r[   FlaxBeitIntermediate.__call__  s$    

=16r"   )r   r   Nr   r   r"   r#   r   r     s$    {{E399"9r"   r   c                   n    \ rS rSr% \\S'   \R                  r\R                  \S'   S r	S	S\
4S jjrSrg)
FlaxBeitOutputi  rg   r-   c                 F   [         R                  " U R                  R                  [        R                   R
                  R                  U R                  R                  5      U R                  S9U l	        [         R                  " U R                  R                  S9U l        g r   r   r   s    r#   ry   FlaxBeitOutput.setup  r   r"   rJ   c                 F    U R                  U5      nU R                  XS9nU$ r   r   r   s      r#   r[   FlaxBeitOutput.__call__  s&    

=1]Pr"   r   Nr]   r   r   r"   r#   r   r     s1    {{E399"HT  r"   r   c                       \ rS rSr% \\S'   \\\4   \S'   \\S'   \	R                  r\	R                  \S'   S r SS\S	\4S
 jjrSrg)FlaxBeitLayeri  rg   r$   drop_path_rater-   c                 \   [        U R                  U R                  U R                  S9U l        [        U R                  U R                  S9U l        [        U R                  U R                  S9U l        [        R                  " U R                  R                  U R                  S9U l        [        U R                  S9U l        [        R                  " U R                  R                  U R                  S9U l        U R                  R"                  U l        U R$                  S:  aw  U R'                  S[(        U R                  R*                  U R$                  5      U l        U R'                  S[(        U R                  R*                  U R$                  5      U l        g S U l        S U l        g )Nr   epsilonr-   r   r   lambda_1lambda_2)r   rg   r$   r-   r   r   intermediater   rZ   r`   	LayerNormlayer_norm_epslayernorm_beforerG   r   	drop_pathlayernorm_afterlayer_scale_init_valueinit_valuesr   rE   rt   r  r  r   s    r#   ry   FlaxBeitLayer.setup  s&   *4;;8H8HPTPZPZ[0DJJO$T[[

C "T[[5O5OW[WaWa b)t/B/BC!||DKK4N4NVZV`V`a;;==a JJz?T[[E\E\_c_o_opDM JJz?T[[E\E\_c_o_opDM DM DMr"   NrJ   r   c                    U R                  U R                  U5      UUUS9nUS   nU R                  b(  U R                  R                  UR                  5      U-  nU R                  XcS9U-   nU R                  U5      nU R                  U5      nU R                  XsS9nU R                  b(  U R                  R                  UR                  5      U-  nU R                  XsS9U-   nU4nU(       a  XS   4-  nU$ r   )
r   r  r  r   r-   r  r	  r  rZ   r  )	rT   r   r   rJ   r   self_attention_outputsattention_outputlayer_outputr   s	            r#   r[   FlaxBeitLayer.__call__  s    "&!!-0"'/	 "0 "
 2!4 ==$#}}334D4J4JKN^^ '7UXee ++M:((6{{<{M ==$==//0B0BClRL ~~l~PS``/q133Gr"   )	r   r  r  r  r  r  r	  r  rZ   r   )r   r   r   r   r   r_   r   r   r^   r6   r   r-   ry   rc   r[   r!   r   r"   r#   r   r     sT    sCx {{E399"!" qv$IM$im$ $r"   r   c            	           \ rS rSr% \\S'   \\\4   \S'   \\	   \S'   \
/ \R                  4   \S'   \R                  r\R                  \S'   S r    SS\S	\S
\S\4S jjrSrg)FlaxBeitLayerCollectioni  rg   r$   drop_path_ratesr   r-   c                 0   [        U R                  R                  5       Vs/ s Ha  n[        U R                  U R                  R                  (       a  U R
                  OS U R                  U   [        U5      U R                  S9PMc     snU l	        g s  snf )N)r$   r   namer-   )
rangerg   num_hidden_layersr   use_relative_position_biasr$   r  strr-   layers)rT   is     r#   ry   FlaxBeitLayerCollection.setup  s{     4;;889	
 : 040V0VD,,\`#33A6Vjj :	
 	
s   A(BrJ   r   output_hidden_statesreturn_dictc                 ^   U(       a  SOS nU(       a  SOS n[        U R                  5       HM  u  pU(       a  Xq4-  nU R                  b  U R                  5       OS n
U	" XX#S9nUS   nU(       d  ME  XkS   4-  nMO     U(       a  Xq4-  nU4nU(       d  [        S U 5       5      $ [	        XUS9$ )Nr   r   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7frA   r   ).0vs     r#   	<genexpr>3FlaxBeitLayerCollection.__call__.<locals>.<genexpr>  s     =GqGs   	)last_hidden_stater   
attentions)	enumerater  r   r   r   )rT   r   rJ   r   r  r  all_attentionsall_hidden_statesr  layerr   layer_outputsr   s                r#   r[    FlaxBeitLayerCollection.__call__  s      1d"6BD!$++.HA#!%55!FJFaFaFmT%@%@%Bsw"!]M *!,M  #3"55 /  !11 "=G==="+Yg
 	
r"   )r  NTFFT)r   r   r   r   r   r_   r   r   listr^   r   r6   ndarrayr   r-   ry   rc   r[   r!   r   r"   r#   r  r    s    sCx %[ $R_55{{E399"

 #"'%* !
 !
  	!

 #!
 !
 !
r"   r  c            	           \ rS rSr% \\S'   \\\4   \S'   \R                  r
\R                  \S'   S r    SS\S\S\S	\4S
 jjrSrg)FlaxBeitEncoderi#  rg   r$   r-   c                    U R                   R                  (       a.  [        U R                   U R                  U R                  S9U l        [        [        R                  " SU R                   R                  U R                   R                  5      5      n[        U R                   U R                  UU R                   R                  (       a  U R
                  OS U R                  S9U l        g )N)rg   r$   r-   r   )r$   r  r   r-   )rg   !use_shared_relative_position_biasr   r$   r-   r   r/  r.   linspacer   r  r  r+  )rT   r  s     r#   ry   FlaxBeitEncoder.setup(  s    ;;88*F{{0@0@

+D'
 r{{1dkk.H.H$++JgJghi,KK((+{{<< $(#>#>**

r"   rJ   r   r  r  c                 (    U R                  UUUUUS9$ )NrJ   r   r  r  )r+  )rT   r   rJ   r   r  r  s         r#   r[   FlaxBeitEncoder.__call__:  s)     zz'/!5#  
 	
r"   )r+  r   Nr.  r   r   r"   r#   r2  r2  #  sm    sCx {{E399"
* #"'%* 
 
  	

 #
 
 
r"   r2  c                     ^  \ rS rSr% Sr\rSrSrSr	\
R                  \S'   SS\R                  S4S	\S
\S\R                   S\4U 4S jjjrSS\R(                  R*                  S\S\S\4S jjr\" \R7                  S5      5             SS\\   S\R(                  R*                  S\S\\   S\\   S\\   4S jj5       rSrU =r $ )FlaxBeitPreTrainedModeliK  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
beitr}   Nmodule_classr   Trg   seedr-   _do_initc           	         > U R                   " SXS.UD6nUc$  SUR                  UR                  UR                  4n[        TU ]  XX#XES9  g )N)rg   r-   r   )input_shaper>  r-   r?  r   )r=  ro   rn   super__init__)	rT   rg   rA  r>  r-   r?  kwargsra   	__class__s	           r#   rC   FlaxBeitPreTrainedModel.__init__V  sX     ""H&HHf//1B1BFDWDWXK[SXlr"   rW   rA  paramsr%   c                    [         R                  " X R                  S9n[        R                  R                  U5      u  pV[        R                  R                  U5      u  pgXVUS.nU R                  R                  XSS9S   n	Ubd  [        [        U	5      5      n	[        [        U5      5      nU R                   H	  n
X   X:'   M     [        5       U l
        [        [        U5      5      $ U	$ )Nr   )rG  r   rM   F)r  rG  )r6   r4   r-   rP   rQ   splitra   initr	   r   _missing_keyssetr   r
   )rT   rW   rA  rG  r}   
params_rngr   droppath_rngrngsrandom_paramsmissing_keys              r#   init_weights$FlaxBeitPreTrainedModel.init_weightsd  s    yyJJ?"%**"2"23"7
$'JJ$4$4[$A!$,W(((OPXY(-)@AM!(6"23F#11&3&@#  2!$D.011  r"   zbatch_size, sequence_lengthr   trainr   r  r  c	                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  n[        R
                  " US5      n0 n	Ub)  [        R                  R                  U5      u  pJXIS'   XS'   U R                  R                  SU=(       d    U R                  0[        R                  " U[        R                  S9UU(       + UUUU	S9$ )N)r   r'   r   r   r   rM   rG  r   )rO  )rg   r   r  r  r6   r3   rP   rQ   rI  ra   applyrG  r7   r   )rT   r}   r   rG  r   rT  r   r  r  rO  rN  s              r#   r[    FlaxBeitPreTrainedModel.__call__x  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY}}\<@"(+

(8(8(E%K)O+{{  v,-IIl#++6I  ! 	
 		
r"   )rK  rA   )NNNFNNN)!r   r   r   r   r    r   config_classbase_model_prefixmain_input_namer=  r`   Moduler_   r6   r   r   r-   rc   rC  rP   rQ   PRNGKeyr   r   rR  r   BEIT_INPUTS_DOCSTRINGformatr   dictr[   r!   __classcell__)rE  s   @r#   r;  r;  K  s5   
 L$O"L"))"
 ;;mm 	m
 yym m m!

 2 2 ! !PZ !fp !( ++@+G+GHe+fg !%*.,0/3&*"
 	"

 ZZ''"
 "
 $D>"
 'tn"
 d^"
 h"
r"   r;  c                   b    \ rS rSr% \\S'   \R                  r\R                  \S'   S r	S r
Srg)FlaxBeitPooleri  rg   r-   c                     U R                   R                  (       a9  [        R                  " U R                   R                  U R
                  S9U l        g g )Nr   )rg   use_mean_poolingr`   r  r  r-   	layernormr   s    r#   ry   FlaxBeitPooler.setup  s7    ;;''\\$++2L2LTXT^T^_DN (r"   c                     U R                   R                  (       a4  US S 2SS 2S S 24   nU R                  [        R                  " USS95      nU$ US S 2S4   nU$ )Nr   r   r   )rg   rd  re  r6   mean)rT   r   patch_tokenspooled_outputs       r#   r[   FlaxBeitPooler.__call__  sX    ;;''(AB2L NN388Lq+IJM
  *!Q$/Mr"   )re  Nr   r   r"   r#   rb  rb    s%    {{E399"`	r"   rb  c            	           \ rS rSr% \\S'   \R                  r\R                  \S'   Sr	\
\S'   S r     SS\
S	\
S
\
S\
4S jjrSrg)FlaxBeitModulei  rg   r-   Tadd_pooling_layerc                    [        U R                  U R                  S9U l        [	        U R                  U R                  R
                  R                  U R                  S9U l        U R                  R                  (       d8  [        R                  " U R                  R                  U R                  S9U l        U R                  (       a$  [        U R                  U R                  S9U l        g S U l        g )Nr   r   r   )r   rg   r-   r~   r2  r   rr   encoderrd  r`   r  r  re  rn  rb  poolerr   s    r#   ry   FlaxBeitModule.setup  s    ,T[[

K&KKT__%E%E%Q%QY]YcYc
 {{++\\$++2L2LTXT^T^_DNGKG]G]nT[[

Ccgr"   NrJ   r   r  r  c                 j   U R                  XUS9nU R                  UUUUUS9nUS   nU R                  R                  (       d  U R	                  U5      nU R
                  (       a  U R                  U5      OS n	U(       d  U	c	  U4USS  -   $ Xy4USS  -   $ [        UU	UR                  UR                  S9$ )Nr   r8  r   r   )r&  pooler_outputr   r'  )
r~   rp  rg   rd  re  rn  rq  r   r   r'  )
rT   r}   r   rJ   r   r  r  r   r   pooleds
             r#   r[   FlaxBeitModule.__call__  s     Ubc,,'/!5#  
  
{{++ NN=9M/3/E/E]+4~%''!"+55!*WQR[88-+ !//))	
 	
r"   )r~   rp  re  rq  )NTFFT)r   r   r   r   r   r_   r6   r   r-   rn  rc   ry   r[   r!   r   r"   r#   rm  rm    sn    {{E399""t"h ""'%* "
 	"

  "
 #"
 "
 "
r"   rm  z^The bare Beit Model transformer outputting raw hidden-states without any specific head on top.c                       \ rS rSr\rSrg)FlaxBeitModeli  r   N)r   r   r   r   rm  r=  r!   r   r"   r#   rx  rx    s	    
 "Lr"   rx  a  
    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxBeitModel
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k-ft22k")
    >>> model = FlaxBeitModel.from_pretrained("microsoft/beit-base-patch16-224-pt22k-ft22k")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> last_hidden_states = outputs.last_hidden_state
    ```
)output_typerX  c                   z    \ rS rSr% \\S'   \R                  r\R                  \S'   S r	      S	S\
4S jjrSrg)
$FlaxBeitForMaskedImageModelingModulei  rg   r-   c                    [        U R                  SU R                  S9U l        [        R
                  " U R                  R                  U R                  S9U l        [        R                  " U R                  R                  [        R                  R                  R                  U R                  R                  5      U R                  S9U l        g )NF)rn  r-   r   r   )rm  rg   r-   r<  r`   r  r  re  r   
vocab_sizerP   ru   rv   rw   lm_headr   s    r#   ry   *FlaxBeitForMaskedImageModelingModule.setup  s    "4;;%tzzZ	 dkk.H.HPTPZPZ[xxKK""++224;;3P3PQ**
r"   NrJ   c           	      "   Ub  UOU R                   R                  nU R                  UUUUUUS9nUS   nU R                  U5      nU R	                  US S 2SS 24   5      n	U(       d  U	4USS  -   n
U
$ [        U	UR                  UR                  S9$ )Nr8  r   r   r'   logitsr   r'  )rg   use_return_dictr<  re  r~  r   r   r'  )rT   r}   r   rJ   r   r  r  r   sequence_outputprediction_scoresrZ   s              r#   r[   -FlaxBeitForMaskedImageModelingModule.__call__  s     &1%<k$++B]B]))'/!5#  
 "!*..9 LLAB)?@')GABK7FM!$!//))
 	
r"   )r<  re  r~  NNTNNNr   r   r"   r#   r{  r{    sG    {{E399"	
 "! 
 	 
  
r"   r{  zYBeit Model transformer with a 'language' modeling head on top (to predict visual tokens).c                       \ rS rSr\rSrg)FlaxBeitForMaskedImageModelingi9  r   N)r   r   r   r   r{  r=  r!   r   r"   r#   r  r  9  s	    
 8Lr"   r  a?  
    bool_masked_pos (`numpy.ndarray` of shape `(batch_size, num_patches)`):
        Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, BeitForMaskedImageModeling
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
    >>> model = BeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    ```
c                   z    \ rS rSr% \\S'   \R                  r\R                  \S'   S r	      S	S\
4S jjrSrg)
$FlaxBeitForImageClassificationModulei`  rg   r-   c                 4   [        U R                  U R                  SS9U l        [        R
                  " U R                  R                  [        R                  R                  R                  U R                  R                  5      U R                  S9U l        g )NT)rg   r-   rn  r   )rm  rg   r-   r<  r`   r   
num_labelsrP   ru   rv   rw   
classifierr   s    r#   ry   *FlaxBeitForImageClassificationModule.setupd  sa    "$++TZZ[_`	((KK""++224;;3P3PQ**
r"   NrJ   c                     Ub  UOU R                   R                  nU R                  UUUUUS9nUS   nU R                  U5      n	U(       d  U	4USS  -   n
U
$ [	        U	UR
                  UR                  S9$ )Nr8  r   r'   r  )rg   r  r<  r  r   r   r'  )rT   r}   r   rJ   r   r  r  r   rj  r  rZ   s              r#   r[   -FlaxBeitForImageClassificationModule.__call__l  s     &1%<k$++B]B]))'/!5#  
  
/Y,FM+!//))
 	
r"   )r<  r  r  r   r   r"   r#   r  r  `  sG    {{E399"
 "!
 	
 
r"   r  z
    Beit Model transformer with an image classification head on top (a linear layer on top of the average of the final
    hidden states of the patch tokens) e.g. for ImageNet.
    c                       \ rS rSr\rSrg)FlaxBeitForImageClassificationi  r   N)r   r   r   r   r  r=  r!   r   r"   r#   r  r    s	     8Lr"   r  aM  
    Returns:

    Example:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxBeitForImageClassification
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224")
    >>> model = FlaxBeitForImageClassification.from_pretrained("microsoft/beit-base-patch16-224")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_class_idx = logits.argmax(-1).item()
    >>> print("Predicted class:", model.config.id2label[predicted_class_idx])
    ```
)r  r  rx  r;  )Htypingr   r   flax
flax.linenlinenr`   rP   	jax.numpynumpyr6   r.   flax.core.frozen_dictr   r   r   flax.linen.attentionr   flax.traverse_utilr	   r
   modeling_flax_outputsr   r   r   r   modeling_flax_utilsr   r   r   r   utilsr   r   configuration_beitr   struct	dataclassr   BEIT_START_DOCSTRINGr]  r   r   r0  r?   r   rE   r[  rG   re   r   r   r   r   r   r   r   r   r  r2  r;  rb  rm  rx  FLAX_BEIT_MODEL_DOCSTRINGr{  r  FLAX_BEIT_MLM_DOCSTRINGr  r  FLAX_BEIT_CLASSIF_DOCSTRING__all__r   r"   r#   <module>r     s  " &   
   > > > ;   Q * %C  ,! F ".eCHo .#++ .0 .1[[ *ryy *Cbii C@& &R@299 @.RBII Rj $		 4299 &RYY &:BII :z4
bii 4
n%
bii %
PP
1 P
fRYY (0
RYY 0
f d"+ "	" , (A B  <Zis t/
299 /
d _8%< 8	8 2 79P Q  "0BQ[
*
299 *
Z  8%< 88 2 79T U  "0L[e
r"   