
    cCiG                   >   S r SSKJr  SSKrSSKJr  SSKrSSKr	SSK
Jr  SSKJrJrJrJrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJ r J!r!  SS	K"J#r#J$r$J%r%  SS
K&J'r'J(r(J)r)J*r*J+r+J,r,  SSK-J.r.  \+R^                  " \05      r1Sr2Sr3Sr4Sr5Sr6Sr7Sr8Sr9Sr:Sr;Sr<Sr=Sr> " S S5      r? " S S\R                  R                  5      rB " S S\R                  R                  5      rD " S  S!\R                  R                  5      rE\D\ES".rF " S# S$\R                  R                  5      rG " S% S&\R                  R                  5      rH " S' S(\R                  R                  5      rI " S) S*\R                  R                  5      rJ " S+ S,\R                  R                  5      rK " S- S.\R                  R                  5      rL " S/ S0\R                  R                  5      rM " S1 S2\R                  R                  5      rN " S3 S4\R                  R                  5      rO " S5 S6\R                  R                  5      rP " S7 S8\R                  R                  5      rQ " S9 S:\R                  R                  5      rR " S; S<\R                  R                  5      rS " S= S>\R                  R                  5      rT " S? S@\R                  R                  5      rU " SA SB\R                  R                  5      rV\  " SC SD\R                  R                  5      5       rW " SE SF\5      rX\ " SG SH\'5      5       rYSIrZSJr[\)" SK\Z5       " SL SM\X5      5       r\\)" SN\Z5       " SO SP\X\?5      5       r]\)" SQ\Z5       " SR SS\X\5      5       r^ " ST SU\R                  R                  5      r_\)" SV\Z5       " SW SX\X\5      5       r`\)" SY\Z5       " SZ S[\X\5      5       ra\)" S\\Z5       " S] S^\X\5      5       rb\)" S_\Z5       " S` Sa\X\5      5       rc\)" Sb\Z5       " Sc Sd\X\5      5       rd/ SeQreg)fzTF 2.0 MobileBERT model.    )annotationsN)	dataclass   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPoolingTFMaskedLMOutputTFMultipleChoiceModelOutputTFNextSentencePredictorOutputTFQuestionAnsweringModelOutputTFSequenceClassifierOutputTFTokenClassifierOutput)TFMaskedLanguageModelingLossTFModelInputTypeTFMultipleChoiceLossTFNextSentencePredictionLossTFPreTrainedModelTFQuestionAnsweringLossTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeraskeras_serializableunpack_inputs)check_embeddings_within_bounds
shape_liststable_softmax)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )MobileBertConfigzgoogle/mobilebert-uncasedr%   z"vumichien/mobilebert-finetuned-nerzK['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']gQ?z%vumichien/mobilebert-uncased-squad-v2z'a nice puppet'gףp=
@      zvumichien/emo-mobilebertz'others'z4.72c                  "    \ rS rSrSrSS jrSrg)TFMobileBertPreTrainingLossW   z
Loss function suitable for BERT-like pretraining, that is, the task of pretraining a language model by combining
NSP + MLM. .. note:: Any label of -100 will be ignored (along with the corresponding logits) in the loss
computation.
c                   [         R                  R                  S[         R                  R                  R                  S9nU" [
        R                  R                  US   5      US   S9n[
        R                  " US   S:g  UR                  S9nXE-  n[
        R                  " U5      [
        R                  " U5      -  nU" [
        R                  R                  US   5      US	   S9n[
        R                  " US   S:g  UR                  S9n	X-  n
[
        R                  " U
5      [
        R                  " U	5      -  n[
        R                  " X{-   S
5      $ )NT)from_logits	reductionlabelsr   )y_truey_predidtypenext_sentence_labelr$   )r$   )r   lossesSparseCategoricalCrossentropy	ReductionNONEtfnnrelucastr2   
reduce_sumreshape)selfr.   logitsloss_fnunmasked_lm_losseslm_loss_maskmasked_lm_lossesreduced_masked_lm_lossunmasked_ns_lossns_loss_maskmasked_ns_lossreduced_masked_ns_losss               o/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/mobilebert/modeling_tf_mobilebert.pyhf_compute_loss+TFMobileBertPreTrainingLoss.hf_compute_loss^   s&   ,,<<Y^YeYeYoYoYtYt<u %BEEJJvh7G,HQWXYQZ[ wwvh/47?Q?W?WX-<!#/?!@2==Q]C^!^ #"%%**V<Q5R*S\bcd\efwwv&;<DL\LbLbc)8!#~!>|A\!\zz0I4PP     N)r.   	tf.Tensorr?   rN   returnrN   )__name__
__module____qualname____firstlineno____doc__rJ   __static_attributes__rM   rL   rI   r)   r)   W   s    QrL   r)   c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )TFMobileBertIntermediates   c                "  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  SS9U l        [        UR                  [        5      (       a  [        UR                  5      U l        OUR                  U l        Xl        g )NdensenamerM   )super__init__r   layersDenseintermediate_sizerZ   
isinstance
hidden_actstrr   intermediate_act_fnconfigr>   rf   kwargs	__class__s      rI   r^   !TFMobileBertIntermediate.__init__t   sm    "6"\\''(@(@w'O
f''--'89J9J'KD$'-'8'8D$rL   c                J    U R                  U5      nU R                  U5      nU$ N)rZ   re   r>   hidden_statess     rI   callTFMobileBertIntermediate.call   s&    

=100?rL   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fNTrZ   )	builtgetattrr8   
name_scoperZ   r\   buildrf   true_hidden_sizer>   input_shapes     rI   rv   TFMobileBertIntermediate.build   sm    ::
4$'3tzz/

  $dkk.J.J!KL 0/ 4//   3B
B)rs   rf   rZ   re   rl   	rP   rQ   rR   rS   r^   ro   rv   rU   __classcell__ri   s   @rI   rW   rW   s   s    	M MrL   rW   c                  8   ^  \ rS rSrU 4S jrSU 4S jjrSrU =r$ )TFLayerNorm   c                2   > Xl         [        TU ]  " U0 UD6  g rl   )	feat_sizer]   r^   )r>   r   argsrh   ri   s       rI   r^   TFLayerNorm.__init__   s    "$)&)rL   c                >   > [         TU ]  S S U R                  /5        g rl   )r]   rv   r   r>   ry   ri   s     rI   rv   TFLayerNorm.build   s    tT4>>23rL   )r   rl   )rP   rQ   rR   rS   r^   rv   rU   r}   r~   s   @rI   r   r      s    *4 4rL   r   c                  B   ^  \ rS rSrSU 4S jjrU 4S jrSS jrSrU =r$ )TFNoNorm   c                2   > [         TU ]  " S0 UD6  Xl        g )NrM   )r]   r^   r   )r>   r   epsilonrh   ri   s       rI   r^   TFNoNorm.__init__   s    "6""rL   c                   > U R                  SU R                  /SS9U l        U R                  SU R                  /SS9U l        [        TU ]  U5        g )Nbiaszeros)shapeinitializerweightones)
add_weightr   r   r   r]   rv   r   s     rI   rv   TFNoNorm.build   sK    OOF4>>2BPWOX	ooht~~6FTZo[k"rL   c                8    XR                   -  U R                  -   $ rl   )r   r   )r>   inputss     rI   ro   TFNoNorm.call   s    #dii//rL   )r   r   r   rl   )r   rN   )	rP   rQ   rR   rS   r^   rv   ro   rU   r}   r~   s   @rI   r   r      s    ##
0 0rL   r   )
layer_normno_normc                  @   ^  \ rS rSrSrU 4S jrSS jrSS jrSrU =r	$ )	TFMobileBertEmbeddings   zGConstruct the embeddings from word, position and token_type embeddings.c                @  > [         TU ]  " S0 UD6  UR                  U l        UR                  U l        Xl        UR
                  U l        UR                  U l        UR                  U l        [        R                  R                  UR
                  SS9U l        [        UR                     " UR
                  UR                  SS9U l        [        R                  R!                  UR"                  S9U l        U R                  U R                  (       a  SOS-  U l        g )	Nembedding_transformationr[   	LayerNormr   r\   )rater   r$   rM   )r]   r^   trigram_inputembedding_sizerf   hidden_sizemax_position_embeddingsinitializer_ranger   r_   r`   r   NORM2FNnormalization_typelayer_norm_epsr   Dropouthidden_dropout_probdropoutembedded_input_sizerg   s      rI   r^   TFMobileBertEmbeddings.__init__   s    "6"#11$33!--'-'E'E$!'!9!9(-(:(:6;M;MTn(:(o% !!:!:;(=(=K
 ||++1K1K+L#'#6#6t?Q?Q!WX#Y rL   c           	        [         R                  " S5         U R                  SU R                  R                  U R
                  /[        U R                  S9S9U l        S S S 5        [         R                  " S5         U R                  SU R                  R                  U R                  /[        U R                  S9S9U l        S S S 5        [         R                  " S5         U R                  SU R                  U R                  /[        U R                  S9S9U l        S S S 5        U R                  (       a  g SU l        [        U S	S 5      b[  [         R                  " U R                   R"                  5         U R                   R%                  S S U R&                  /5        S S S 5        [        U S
S 5      bO  [         R                  " U R(                  R"                  5         U R(                  R%                  S 5        S S S 5        g g ! , (       d  f       GN= f! , (       d  f       GN]= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       g = f)Nword_embeddingsr   )r   )r\   r   r   token_type_embeddings
embeddingsposition_embeddingsTr   r   )r8   ru   r   rf   
vocab_sizer   r   r   r   type_vocab_sizer   r   r   r   rs   rt   r   r\   rv   r   r   rx   s     rI   rv   TFMobileBertEmbeddings.build   s   ]],-//{{--t/B/BC+d>T>TU * DK . ]]23)-!{{22D4D4DE+d>T>TU *9 *D& 4 ]]01'+!33T5E5EF+d>T>TU (7 (D$ 2 ::
43T:Ft<<AAB--33T4AYAY4Z[ C4d+7t~~223$$T* 43 87 .- 43 21 CB 43s=   AH	=AH#>H-:)H?#I	
H
H*-
H<?
I
Ic           
     H   Uc  Uc   eUb>  [        XR                  R                  5        [        R                  " U R
                  US9n[        U5      SS nUc  [        R                  " USS9nU R                  (       aT  [        R                  " [        R                  " USS2SS24   S5      U[        R                  " USS2SS24   S5      /S	S
9nU R                  (       d  U R                  U R                  :w  a  U R                  U5      nUc+  [        R                  " [        R                  " SUS   S9SS
9n[        R                  " U R                   US9n[        R                  " U R"                  US9nXG-   U-   n	U R%                  U	S9n	U R'                  XS9n	U	$ )zr
Applies embedding based on inputs tensor.

Returns:
    final_embeddings (`tf.Tensor`): output embedding tensor.
N)paramsindicesr   )dimsvaluer$   )r   r   )r   r$   r   )r   )r$   r   r      axis)startlimit)r   )r   training)r   rf   r   r8   gatherr   r   fillr   concatpadr   r   r   expand_dimsranger   r   r   r   )
r>   	input_idsposition_idstoken_type_idsinputs_embedsr   ry   position_embedstoken_type_embedsfinal_embeddingss
             rI   ro   TFMobileBertEmbeddings.call   s    %-*?@@ *9kk6L6LMIIT[[)LM /4!WW+Q?N IIFF=AB/1IJ!FF=CRC02JK
 M !4!48H8H!H 99-HM>>"((+b/*RYZ[L))4+C+C\ZIIT-G-GQ_`(:=NN>>1A>B<</?<SrL   )r   rs   rf   r   r   r   r   r   r   r   r   r   r   r   rl   )NNNNF)
rP   rQ   rR   rS   rT   r^   rv   ro   rU   r}   r~   s   @rI   r   r      s    QZ&+@/  / rL   r   c                  D   ^  \ rS rSrU 4S jrS r SS jrSS jrSrU =r	$ )	TFMobileBertSelfAttentioni  c                  > [         TU ]  " S0 UD6  UR                  UR                  -  S:w  a%  [	        SUR                   SUR                   35      eUR                  U l        UR
                  U l        UR                  UR                  -  S:X  d   e[        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R%                  UR&                  5      U l        Xl        g )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads (querykernel_initializerr\   keyr   rM   )r]   r^   r   num_attention_heads
ValueErroroutput_attentionsintrw   attention_head_sizeall_head_sizer   r_   r`   r   r   r   r   r   r   attention_probs_dropout_probr   rf   rg   s      rI   r^   "TFMobileBertSelfAttention.__init__  s   "6" : ::a?#F$6$6#7 8 4457 
 $*#=#= !'!9!9!!F$>$>>!CCC#&v'>'>A[A['[#\ !558P8PP\\''?6C[C[3\cj ( 

 <<%%?6C[C[3\ch & 
 \\''?6C[C[3\cj ( 

 ||++F,O,OPrL   c                    [         R                  " XSU R                  U R                  45      n[         R                  " U/ SQS9$ )Nr   r   r   r$   r   perm)r8   r=   r   r   	transpose)r>   x
batch_sizes      rI   transpose_for_scores.TFMobileBertSelfAttention.transpose_for_scores,  s6    JJqr4+C+CTE]E]^_||AL11rL   c                   [        U5      S   nU R                  U5      n	U R                  U5      n
U R                  U5      nU R	                  X5      nU R	                  X5      nU R	                  X5      n[
        R                  " XSS9n[
        R                  " [        U5      S   UR                  S9nU[
        R                  R                  U5      -  nUb"  [
        R                  " XOR                  S9nX-   n[        USS9nU R                  UUS9nUb  UU-  n[
        R                  " UU5      n[
        R                  " U/ SQS	9n[
        R                  " UUSU R                  45      nU(       a  UU4nU$ U4nU$ )
Nr   T)transpose_br   r1   r   r   r   r   )r   r   r   r   r   r8   matmulr;   r2   mathsqrtr   r   r   r=   r   )r>   query_tensor
key_tensorvalue_tensorattention_mask	head_maskr   r   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresdkattention_probscontext_layeroutputss                       rI   ro   TFMobileBertSelfAttention.call1  s     /2
 JJ|4((:. JJ|4//0AN--oJ	//0AN 99
 WWZ	*2.6F6L6LM+bggll2.>>%WW^;Q;QRN/@ ))9C ,,,J  -	9O		/;?]F

JD,>,>?
 7H=/2 O\M]rL   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      b  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  (       a  U R                  R                  OU R                  R                  /5        S S S 5        g g ! , (       d  f       GN(= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r   r   )rs   rt   r8   ru   r   r\   rv   rf   rw   r   r   use_bottleneck_attentionr   rx   s     rI   rv   TFMobileBertSelfAttention.build^  s8   ::
4$'3tzz/

  $dkk.J.J!KL 04%1txx}}-dDKK,H,HIJ .4$'3tzz/

  ;;?? 44![[44 0/ 4 0/ .- 0/s%   3F&3F88A$G	&
F58
G	
G)
r   r   rs   rf   r   r   r   r   r   r   Frl   )
rP   rQ   rR   rS   r^   r   ro   rv   rU   r}   r~   s   @rI   r   r     s$    62 ns+Z rL   r   c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFMobileBertSelfOutputiu  c                  > [         TU ]  " S0 UD6  UR                  U l        [        R                  R                  UR                  [        UR                  5      SS9U l	        [        UR                     " UR                  UR                  SS9U l        U R                  (       d.  [        R                  R                  UR                  5      U l        Xl        g )NrZ   r   r   r   rM   )r]   r^   use_bottleneckr   r_   r`   rw   r   r   rZ   r   r   r   r   r   r   r   rf   rg   s      rI   r^   TFMobileBertSelfOutput.__init__v  s    "6"$33\\''##H`H`8aho ( 

 !!:!:;##V-B-B
 "" <<//0J0JKDLrL   c                    U R                  U5      nU R                  (       d  U R                  XS9nU R                  X-   5      nU$ Nr   )rZ   r  r   r   )r>   rn   residual_tensorr   s       rI   ro   TFMobileBertSelfOutput.call  s@    

=1"" LLLJM}'FGrL   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = fNTrZ   r   
rs   rt   r8   ru   rZ   r\   rv   rf   rw   r   rx   s     rI   rv   TFMobileBertSelfOutput.build      ::
4$'3tzz/

  $dkk.J.J!KL 04d+7t~~223$$T* 43 8 0/ 43   3C+C<+
C9<
D
)r   rs   rf   rZ   r   r  r  rl   r|   r~   s   @rI   r	  r	  u  s    	+ 	+rL   r	  c                  D   ^  \ rS rSrU 4S jrS r SS jrSS jrSrU =r	$ )	TFMobileBertAttentioni  c                b   > [         TU ]  " S0 UD6  [        USS9U l        [	        USS9U l        g )Nr>   r[   outputrM   )r]   r^   r   r>   r	  mobilebert_outputrg   s      rI   r^   TFMobileBertAttention.__init__  s0    "6"-f6B	!7X!NrL   c                    [         erl   NotImplementedError)r>   headss     rI   prune_heads!TFMobileBertAttention.prune_heads  s    !!rL   c	           
     b    U R                  XX5XgUS9n	U R                  U	S   XHS9n
U
4U	SS  -   nU$ )Nr   r   r$   )r>   r  )r>   r   r   r   layer_inputr   r   r   r   self_outputsattention_outputr  s               rI   ro   TFMobileBertAttention.call  sV     yylIks ! 
  11,q/;1b#%QR(88rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr>   r  )rs   rt   r8   ru   r>   r\   rv   r  rx   s     rI   rv   TFMobileBertAttention.build  s    ::
4&2tyy~~.		% /4,d3?t55::;&&,,T2 <; @ /. <;   C.C%
C"%
C3)rs   r  r>   r  rl   )
rP   rQ   rR   rS   r^   r!  ro   rv   rU   r}   r~   s   @rI   r  r    s"    O
" &	3 	3rL   r  c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFOutputBottlenecki  c                L  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  SS9U l        [        UR                     " UR
                  UR                  SS9U l
        [        R                  R                  UR                  5      U l        Xl        g NrZ   r[   r   r   rM   )r]   r^   r   r_   r`   r   rZ   r   r   r   r   r   r   r   rf   rg   s      rI   r^   TFOutputBottleneck.__init__  s    "6"\\''(:(:'I
 !:!:;(=(=K
 ||++F,F,FGrL   c                l    U R                  U5      nU R                  XCS9nU R                  XB-   5      nU$ r  )rZ   r   r   )r>   rn   r  r   layer_outputss        rI   ro   TFOutputBottleneck.call  s7    

=1]F}'FGrL   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = fr  r  rx   s     rI   rv   TFOutputBottleneck.build  r  r  )r   rs   rf   rZ   r   r  rl   r|   r~   s   @rI   r,  r,    s    	+ 	+rL   r,  c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFMobileBertOutputi  c                  > [         TU ]  " S0 UD6  UR                  U l        [        R                  R                  UR                  [        UR                  5      SS9U l	        [        UR                     " UR                  UR                  SS9U l        U R                  (       d/  [        R                  R                  UR                  5      U l        O[#        USS9U l        Xl        g )NrZ   r   r   r   
bottleneckr[   rM   )r]   r^   r  r   r_   r`   rw   r   r   rZ   r   r   r   r   r   r   r   r,  r8  rf   rg   s      rI   r^   TFMobileBertOutput.__init__  s    "6"$33\\''##H`H`8aho ( 

 !!:!:;##V-B-B
 "" <<//0J0JKDL0lKDOrL   c                    U R                  U5      nU R                  (       d$  U R                  XS9nU R                  X-   5      nU$ U R                  X-   5      nU R	                  X5      nU$ r  )rZ   r  r   r   r8  )r>   rn   residual_tensor_1residual_tensor_2r   s        rI   ro   TFMobileBertOutput.call  si    

=1"" LLLJM NN=+LMM  !NN=+LMM OOMMMrL   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)NTrZ   r   r8  )rs   rt   r8   ru   rZ   r\   rv   rf   ra   r   r8  rx   s     rI   rv   TFMobileBertOutput.build  s    ::
4$'3tzz/

  $dkk.K.K!LM 04d+7t~~223$$T* 44t,8t334%%d+ 54 9 0/ 43 54s$   3EE!E)
E
E&)
E7)r   r8  rs   rf   rZ   r   r  r  rl   r|   r~   s   @rI   r6  r6    s    , ,rL   r6  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )TFBottleneckLayeri  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  SS9U l        [        UR                     " UR
                  UR                  SS9U l
        Xl        g r.  )r]   r^   r   r_   r`   intra_bottleneck_sizerZ   r   r   r   r   rf   rg   s      rI   r^   TFBottleneckLayer.__init__  se    "6"\\''(D(D7'S
 !:!:;((&2G2Gk
 rL   c                J    U R                  U5      nU R                  U5      nU$ rl   rZ   r   )r>   r   rn   s      rI   ro   TFBottleneckLayer.call  s$    

6*}5rL   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = fr  
rs   rt   r8   ru   rZ   r\   rv   rf   r   r   rx   s     rI   rv   TFBottleneckLayer.build      ::
4$'3tzz/

  $dkk.E.E!FG 04d+7t~~223$$T* 43 8 0/ 43r  r   rs   rf   rZ   rl   r|   r~   s   @rI   rA  rA        
	+ 	+rL   rA  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )TFBottlenecki  c                   > [         TU ]  " S0 UD6  UR                  U l        UR                  U l        [	        USS9U l        U R                  (       a  [	        USS9U l        g g )Ninputr[   	attentionrM   )r]   r^   key_query_shared_bottleneckr  rA  bottleneck_inputrR  rg   s      rI   r^   TFBottleneck.__init__  sZ    "6"+1+M+M((.(G(G% 1&w G++.vKHDN ,rL   c                    U R                  U5      nU R                  (       a  U4S-  $ U R                  (       a  U R                  U5      nX3X4$ XX4$ )N   )rT  r  rS  rR  )r>   rn   bottlenecked_hidden_statesshared_attention_inputs       rI   ro   TFBottleneck.call&  sZ    " &*%:%:=%I"((.0144--%)^^M%B"*Mnn!-\\rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTrT  rR  )rs   rt   r8   ru   rT  r\   rv   rR  rx   s     rI   rv   TFBottleneck.build@  s    ::
4+T2>t4499:%%++D1 ;4d+7t~~223$$T* 43 8 ;: 43r*  )rR  rT  rs   rS  r  rl   r|   r~   s   @rI   rO  rO    s    I]4	+ 	+rL   rO  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )TFFFNOutputiL  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  SS9U l        [        UR                     " UR
                  UR                  SS9U l
        Xl        g r.  )r]   r^   r   r_   r`   rw   rZ   r   r   r   r   rf   rg   s      rI   r^   TFFFNOutput.__init__M  se    "6"\\''(?(?g'N
 !:!:;##V-B-B
 rL   c                N    U R                  U5      nU R                  X-   5      nU$ rl   rF  )r>   rn   r  s      rI   ro   TFFFNOutput.callU  s'    

=1}'FGrL   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = fr  )
rs   rt   r8   ru   rZ   r\   rv   rf   ra   r   rx   s     rI   rv   TFFFNOutput.buildZ  s    ::
4$'3tzz/

  $dkk.K.K!LM 04d+7t~~223$$T* 43 8 0/ 43r  rL  rl   r|   r~   s   @rI   r^  r^  L  rM  rL   r^  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )
TFFFNLayerif  c                b   > [         TU ]  " S0 UD6  [        USS9U l        [	        USS9U l        g )Nintermediater[   r  rM   )r]   r^   rW   rh  r^  r  rg   s      rI   r^   TFFFNLayer.__init__g  s1    "6"4V.Q!,V(!CrL   c                J    U R                  U5      nU R                  X!5      nU$ rl   )rh  r  )r>   rn   intermediate_outputr1  s       rI   ro   TFFFNLayer.calll  s*    "//>../BRrL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTrh  r  )rs   rt   r8   ru   rh  r\   rv   r  rx   s     rI   rv   TFFFNLayer.buildq  s    ::
4.:t00556!!''- 74,d3?t55::;&&,,T2 <; @ 76 <;r*  )rs   rh  r  rl   r|   r~   s   @rI   rf  rf  f  s    D

	3 	3rL   rf  c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFMobileBertLayeri}  c                  > [         TU ]  " S0 UD6  UR                  U l        UR                  U l        [	        USS9U l        [        USS9U l        [        USS9U l	        U R                  (       a  [        USS9U l        UR                  S:  a:  [        UR                  S-
  5       Vs/ s H  n[        USU 3S9PM     snU l        g g s  snf )	NrR  r[   rh  r  r8  r$   zffn.rM   )r]   r^   r  num_feedforward_networksr  rR  rW   rh  r6  r  rO  r8  r   rf  ffnr>   rf   rh   iri   s       rI   r^   TFMobileBertLayer.__init__~  s    "6"$33(.(G(G%.vKH4V.Q!3F!J*6EDO**Q.EJ6KjKjmnKnEopEo
6$qc
;EopDH /ps   +Cc                   U R                   (       a  U R                  U5      u  pgpO	U/S-  u  pgpU R                  UUUU	UUUUS9n
U
S   nU4nU R                  S:w  a+  [	        U R
                  5       H  u  pU" U5      nX4-  nM     U R                  U5      nU R                  XXS9nU4U
SS  -   [        R                  " S5      UUUU	UU4-   U-   nU$ )NrW  r   r   r$   )
r  r8  rR  rr  	enumeraters  rh  r  r8   constant)r>   rn   r   r   r   r   r   r   r   r$  attention_outputsr&  sru  
ffn_modulerk  layer_outputr  s                     rI   ro   TFMobileBertLayer.call  s.   BF//R_B`?LlKCP/TUBU?Ll NN + 	
 -Q/((A-!*488!4#-.>#? (( "5 #//0@A--.AUb-v O#$ A #
  	 rL   c                ~   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       GN= f! , (       d  f       GN6= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       M  = f)NTrR  rh  r  r8  rs  )rs   rt   r8   ru   rR  r\   rv   rh  r  r8  rs  r>   ry   layers      rI   rv   TFMobileBertLayer.build  s   ::
4d+7t~~223$$T* 44.:t00556!!''- 74,d3?t55::;&&,,T2 <4t,8t334%%d+ 54%1]]5::.KK% /. " 2 43 76 <; 54 /.s<   G'.G9
H&HH-'
G69
H
H
H*-
H<	)rR  r8  rs   rs  rh  r  rr  r  r  rl   r|   r~   s   @rI   rp  rp  }  s    q+Z& &rL   rp  c                  >   ^  \ rS rSrU 4S jr SS jrSS jrSrU =r$ )TFMobileBertEncoderi  c                   > [         TU ]  " S0 UD6  UR                  U l        UR                  U l        [	        UR
                  5       Vs/ s H  n[        USU 3S9PM     snU l        g s  snf )Nzlayer_._r[   rM   )r]   r^   r   output_hidden_statesr   num_hidden_layersrp  r  rt  s       rI   r^   TFMobileBertEncoder.__init__  sd    "6"!'!9!9$*$?$?!NSTZTlTlNmnNm'xs^DNmn
ns   A+c           	     $   U(       a  SOS nU(       a  SOS n	[        U R                  5       H1  u  pU(       a  X4-   nU" XX:   XGS9nUS   nU(       d  M)  XS   4-   n	M3     U(       a  X4-   nU(       d  [        S XU	4 5       5      $ [        XU	S9$ )NrM   r   r   r$   c              3  .   #    U  H  oc  M  Uv   M     g 7frl   rM   ).0vs     rI   	<genexpr>+TFMobileBertEncoder.call.<locals>.<genexpr>  s     h$Vq$Vs   	)last_hidden_statern   
attentions)rx  r  tupler   )r>   rn   r   r   r   r  return_dictr   all_hidden_statesall_attentionsru  layer_moduler1  s                rI   ro   TFMobileBertEncoder.call  s     #7BD0d(4OA#$58H$H!(y|=NM *!,M  !/3C2E!E  5   14D Dh]~$Vhhh +Yg
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = f)NTr  )rs   rt   r  r8   ru   r\   rv   r  s      rI   rv   TFMobileBertEncoder.build  s`    ::
4$'3]]5::.KK% /. $ 4..s   A77
B	)rs   r  r   r  r  rl   r|   r~   s   @rI   r  r    s    o !
F& &rL   r  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )TFMobileBertPooleri  c                   > [         TU ]  " S0 UD6  UR                  U l        U R                  (       aB  [        R
                  R                  UR                  [        UR                  5      SSS9U l
        Xl        g )NtanhrZ   )r   
activationr\   rM   )r]   r^   classifier_activationdo_activater   r_   r`   r   r   r   rZ   rf   rg   s      rI   r^   TFMobileBertPooler.__init__  si    "6"!77++""#263K3K#L!	 , DJ rL   c                `    US S 2S4   nU R                   (       d  U$ U R                  U5      nU$ Nr   )r  rZ   )r>   rn   first_token_tensorpooled_outputs       rI   ro   TFMobileBertPooler.call  s7     +1a40%% JJ'9:M  rL   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = frr   )	rs   rt   r8   ru   rZ   r\   rv   rf   r   rx   s     rI   rv   TFMobileBertPooler.build  sm    ::
4$'3tzz/

  $dkk.E.E!FG 0/ 4//r{   )rs   rf   rZ   r  rl   r|   r~   s   @rI   r  r    s    
!H HrL   r  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )#TFMobileBertPredictionHeadTransformi"  c                  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        UR                  [        5      (       a  [        UR                  5      U l        OUR                  U l        [        S   " UR
                  UR                  SS9U l        Xl        g )NrZ   r   r   r   r   rM   )r]   r^   r   r_   r`   r   r   r   rZ   rb   rc   rd   r   transform_act_fnr   r   r   rf   rg   s      rI   r^   ,TFMobileBertPredictionHeadTransform.__init__#  s    "6"\\''?6C[C[3\cj ( 

 f''--$5f6G6G$HD!$*$5$5D! .v/A/A6K`K`grsrL   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rl   )rZ   r  r   rm   s     rI   ro   (TFMobileBertPredictionHeadTransform.call/  s4    

=1--m<}5rL   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = fr  rI  rx   s     rI   rv   )TFMobileBertPredictionHeadTransform.build5  rK  r  )r   rs   rf   rZ   r  rl   r|   r~   s   @rI   r  r  "  s    
	+ 	+rL   r  c                  P   ^  \ rS rSrU 4S jrS
S jrS rS rS rS r	S r
S	rU =r$ )TFMobileBertLMPredictionHeadiA  c                P   > [         TU ]  " S0 UD6  [        USS9U l        Xl        g )N	transformr[   rM   )r]   r^   r  r  rf   rg   s      rI   r^   %TFMobileBertLMPredictionHead.__init__B  s&    "6"<V+VrL   c                   U R                  U R                  R                  4SSSS9U l        U R                  U R                  R                  U R                  R
                  -
  U R                  R                  4SSSS9U l        U R                  U R                  R                  U R                  R
                  4SSSS9U l        U R                  (       a  g SU l        [        U SS 5      bO  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)Nr   Tr   )r   r   	trainabler\   zdense/weightzdecoder/weightr  )r   rf   r   r   r   r   rZ   decoderrs   rt   r8   ru   r  r\   rv   rx   s     rI   rv   "TFMobileBertLMPredictionHead.buildG  s   OO4;;+A+A*CQXdhouOv	__;;**T[[-G-GGI_I_`	 % 

 ;;))4;;+E+EF!	 ' 
 ::
4d+7t~~223$$T* 43 833s   D==
Ec                    U $ rl   rM   r>   s    rI   get_output_embeddings2TFMobileBertLMPredictionHead.get_output_embeddings]  s    rL   c                J    Xl         [        U5      S   U R                  l        g r  )r  r   rf   r   r>   r   s     rI   set_output_embeddings2TFMobileBertLMPredictionHead.set_output_embeddings`  s    !+E!21!5rL   c                    SU R                   0$ )Nr   )r   r  s    rI   get_bias%TFMobileBertLMPredictionHead.get_biasd  s    		""rL   c                X    US   U l         [        US   5      S   U R                  l        g )Nr   r   )r   r   rf   r   r  s     rI   set_bias%TFMobileBertLMPredictionHead.set_biasg  s'    &M	!+E&M!:1!=rL   c                    U R                  U5      n[        R                  " U[        R                  " [        R                  " U R
                  5      U R                  /SS95      nXR                  -   nU$ )Nr   r   )r  r8   r   r   r   r  rZ   r   rm   s     rI   ro   !TFMobileBertLMPredictionHead.callk  sW    }5		-BLL<VX\XbXb;cjk1lm%		1rL   )r   rs   rf   r  rZ   r  rl   )rP   rQ   rR   rS   r^   rv   r  r  r  r  ro   rU   r}   r~   s   @rI   r  r  A  s+    
+,6#> rL   r  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )TFMobileBertMLMHeadir  c                D   > [         TU ]  " S0 UD6  [        USS9U l        g )Npredictionsr[   rM   )r]   r^   r  r  rg   s      rI   r^   TFMobileBertMLMHead.__init__s  s"    "6"7]SrL   c                (    U R                  U5      nU$ rl   r  )r>   sequence_outputprediction_scoress      rI   ro   TFMobileBertMLMHead.callw  s     ,,_=  rL   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTr  )rs   rt   r8   ru   r  r\   rv   rx   s     rI   rv   TFMobileBertMLMHead.build{  sb    ::
4-9t//445  &&t, 65 :55   A88
B)rs   r  rl   r|   r~   s   @rI   r  r  r  s    T!- -rL   r  c                  t   ^  \ rS rSr\rS	U 4S jjrS rS rS r	\
          S
S j5       rSS jrSrU =r$ )TFMobileBertMainLayeri  c                2  > [         TU ]  " S0 UD6  Xl        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        [        USS9U l	        [        USS9U l        U(       a  [        USS9U l        g S U l        g )Nr   r[   encoderpoolerrM   )r]   r^   rf   r  r   r  use_return_dictr  r   r   r  r  r  r  )r>   rf   add_pooling_layerrh   ri   s       rI   r^   TFMobileBertMainLayer.__init__  s    "6"!'!9!9!'!9!9$*$?$?!!110lK*6	BCT(h?Z^rL   c                    U R                   $ rl   )r   r  s    rI   get_input_embeddings*TFMobileBertMainLayer.get_input_embeddings  s    rL   c                ^    XR                   l        [        U5      S   U R                   l        g r  )r   r   r   r   r  s     rI   set_input_embeddings*TFMobileBertMainLayer.set_input_embeddings  s"    !&%/%6q%9"rL   c                    [         e)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
r  )r>   heads_to_prunes     rI   _prune_heads"TFMobileBertMainLayer._prune_heads  s
    
 "!rL   c           
     \   Ub  Ub  [        S5      eUb  [        U5      nOUb  [        U5      S S nO[        S5      eUc  [        R                  " US5      nUc  [        R                  " US5      nU R	                  XX6U
S9n[        R
                  " X+S   SSUS   45      n[        R                  " XR                  S9n[        R                  " SUR                  S9n[        R                  " S	UR                  S9n[        R                  " [        R                  " X5      U5      nUb  [        eS /U R                  -  nU R                  UUUUUU	U
S9nUS   nU R                  b  U R                  U5      OS nU	(       d
  UU4USS  -   $ [        UUUR                   UR"                  S
9$ )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr$   r   r   r1   g      ?g     )r  pooler_outputrn   r  )r   r   r8   r   r   r=   r;   r2   ry  multiplysubtractr  r  r  r  r   rn   r  )r>   r   r   r   r   r   r   r   r  r  r   ry   embedding_outputextended_attention_maskone_cstten_thousand_cstencoder_outputsr  r  s                      rI   ro   TFMobileBertMainLayer.call  s     ]%>cdd"$Y/K&$]3CR8KTUU!WW[!4N!WW[!4N??9Nlt?u #%**^!naQRT_`aTb=c"d #%''*AI_I_"`++c)9)?)?@;;x7G7M7MN"$++bkk'.[]m"n  %%!7!77I,,#  ' 
 *!,8<8OO4UY  #$ $
 ,-')77&11	
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)NTr   r  r  )	rs   rt   r8   ru   r   r\   rv   r  r  rx   s     rI   rv   TFMobileBertMainLayer.build  s    ::
4t,8t334%%d+ 54D)5t||001""4( 244(4t{{//0!!$' 10 5 54 21 10$   D0.E
E0
D>
E
E )	rs   rf   r   r  r  r   r  r  r  )T
NNNNNNNNNFrl   )rP   rQ   rR   rS   r%   config_classr^   r  r  r  r   ro   rv   rU   r}   r~   s   @rI   r  r    s^    #L_:"  !Q
 Q
f( (rL   r  c                       \ rS rSrSr\rSrSrg)TFMobileBertPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.

mobilebertrM   N)	rP   rQ   rR   rS   rT   r%   r  base_model_prefixrU   rM   rL   rI   r  r    s    
 $L$rL   r  c                  `    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S\S
'   Srg) TFMobileBertForPreTrainingOutputi  a  
Output type of [`TFMobileBertForPreTraining`].

Args:
    prediction_logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    seq_relationship_logits (`tf.Tensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nztf.Tensor | Nonelossprediction_logitsseq_relationship_logitsztuple[tf.Tensor] | Nonern   r  rM   )rP   rQ   rR   rS   rT   r  __annotations__r  r   rn   r  rU   rM   rL   rI   r  r    sB    , "D
!*.'.04-4-1M*1*.J'.rL   r  a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`MobileBertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`Numpy array` or `tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`Numpy array` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False`):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zdThe bare MobileBert Model transformer outputting raw hidden-states without any specific head on top.c                     ^  \ rS rSrU 4S jr\\" \R                  S5      5      \	" \
\\S9          S                     S	S jj5       5       5       rS
S jrSrU =r$ )TFMobileBertModeli  c                L   > [         TU ]  " U/UQ70 UD6  [        USS9U l        g )Nr  r[   )r]   r^   r  r  r>   rf   r   rh   ri   s       rI   r^   TFMobileBertModel.__init__  s(    3&3F3/\JrL   batch_size, sequence_length
checkpointoutput_typer  c                6    U R                  UUUUUUUUU	U
S9
nU$ )N)
r   r   r   r   r   r   r   r  r  r   )r  )r>   r   r   r   r   r   r   r   r  r  r   r  s               rI   ro   TFMobileBertModel.call  s<    ( //))%'/!5# " 
 rL   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTr  )rs   rt   r8   ru   r  r\   rv   rx   s     rI   rv   TFMobileBertModel.build  s^    ::
4t,8t334%%d+ 54 944r  )rs   r  r  )r   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r  r   r  r   r  r   r  r   bool | Noner  r  r  r  r   r  rO   z$tuple | TFBaseModelOutputWithPoolingrl   )rP   rQ   rR   rS   r^   r   r!   MOBILEBERT_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCro   rv   rU   r}   r~   s   @rI   r  r    s    
K *+F+M+MNk+lm&0$ .28<8<6:377;)-,0#' %* 6 6	
 4 1 5 ' * !  
. n 8, ,rL   r  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    c                     ^  \ rS rSrU 4S jrS rS r\\" \	R                  S5      5      \" \\S9            S                         SS jj5       5       5       rSS jrS	 rS
rU =r$ )TFMobileBertForPreTrainingi  c                   > [         TU ]  " U/UQ70 UD6  [        USS9U l        [	        USS9U l        [        USS9U l        g )Nr  r[   predictions___clsseq_relationship___cls)r]   r^   r  r  r  r  TFMobileBertOnlyNSPHeadseq_relationshipr  s       rI   r^   #TFMobileBertForPreTraining.__init__  sH    3&3F3/\J.v<OP 7E] ^rL   c                .    U R                   R                   $ rl   r  r  s    rI   get_lm_head&TFMobileBertForPreTraining.get_lm_head      +++rL   c                    [         R                  " S[        5        U R                  S-   U R                  R                  -   S-   U R                  R                  R                  -   $ NzMThe method get_prefix_bias_name is deprecated. Please use `get_bias` instead./)warningswarnFutureWarningr\   r  r  s    rI   get_prefix_bias_name/TFMobileBertForPreTraining.get_prefix_bias_name  sM    egtuyy3!1!1!6!66<t?O?O?[?[?`?```rL   r  r
  r  c                D   U R                  UUUUUUUUU	US9
nUSS u  pU R                  U5      nU R                  U5      nSnU
b  Ub  SU
0nUUS'   U R                  UUU4S9nU	(       d  UU4USS -   nUb  U4U-   $ U$ [	        UUUUR
                  UR                  S9$ )a  
Return:

Examples:

```python
>>> import tensorflow as tf
>>> from transformers import AutoTokenizer, TFMobileBertForPreTraining

>>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = TFMobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :]  # Batch size 1
>>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2]
```	r   r   r   r   r   r   r  r  r   Nr   r.   r3   r.   r?   )r  r  r   rn   r  )r  r  r  rJ   r  rn   r  )r>   r   r   r   r   r   r   r   r  r  r.   r3   r   r  r  r  r  seq_relationship_score
total_lossd_labelsr  s                        rI   ro   TFMobileBertForPreTraining.call  s   B //))%'/!5# " 
 *1!& ,,_=!%!6!6}!E
"5"A &)H.AH*+--XGXZpFq-rJ')?@712;NF/9/EZMF*Q6Q//$:!//))
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)NTr  r  r  )	rs   rt   r8   ru   r  r\   rv   r  r  rx   s     rI   rv    TFMobileBertForPreTraining.build  s    ::
4t,8t334%%d+ 54-9t//445  &&t, 64+T2>t4499:%%++D1 ;: ? 54 65 ;:r  c                    US:X  a  US4$ U4$ Nzcls.predictions.decoder.weightz,mobilebert.embeddings.word_embeddings.weightrM   r>   	tf_weights     rI   tf_to_pt_weight_rename1TFMobileBertForPreTraining.tf_to_pt_weight_rename-      88LLL<rL   )rs   r  r  r  NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r.   r  r3   r  r   r  rO   z(tuple | TFMobileBertForPreTrainingOutputrl   )rP   rQ   rR   rS   r^   r  r(  r   r!   r  r  r#   r  r  ro   rv   r8  rU   r}   r~   s   @rI   r  r    s   _,a *+F+M+MNk+lm+KZij .28<8<6:377;)-,0#'04=A %?
*?
 6?
 6	?

 4?
 1?
 5?
 '?
 *?
 !?
 .?
 ;?
 ?
 
2?
 k n ?
B2   rL   r  z8MobileBert Model with a `language modeling` head on top.c            
         ^  \ rS rSr/ SQrU 4S jrS rS r\\	" \
R                  S5      5      \" \\\SSS	9           S                       SS
 jj5       5       5       rSS jrS rSrU =r$ )TFMobileBertForMaskedLMi4  )r  r  cls.seq_relationshipc                l   > [         TU ]  " U/UQ70 UD6  [        USSS9U l        [	        USS9U l        g )NFr  r  r\   r  r[   )r]   r^   r  r  r  r  r  s       rI   r^    TFMobileBertForMaskedLM.__init__=  s;    3&3F3/%Vbc.v<OPrL   c                .    U R                   R                   $ rl   r  r  s    rI   r  #TFMobileBertForMaskedLM.get_lm_headC  r!  rL   c                    [         R                  " S[        5        U R                  S-   U R                  R                  -   S-   U R                  R
                  R                  -   $ r#  )r%  r&  r'  r\   mlmr  r  s    rI   r(  ,TFMobileBertForMaskedLM.get_prefix_bias_nameF  sG    egtuyy3.4txx7K7K7P7PPPrL   r  z'paris'g=
ףp=?r	  r
  r  expected_outputexpected_lossc                    U R                  UUUUUUUUU	US9
nUS   nU R                  XS9nU
c  SOU R                  X5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR
                  S9$ )aR  
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels
r,  r   r   Nr   r  r?   rn   r  )r  r  rJ   r	   rn   r  )r>   r   r   r   r   r   r   r   r  r  r.   r   r  r  r  r  r  s                    rI   ro   TFMobileBertForMaskedLM.callJ  s    : //))%'/!5# " 
 "!* ,,_,P~t4+?+?+Z')GABK7F)-)9TGf$EvE$!//))	
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  r  )rs   rt   r8   ru   r  r\   rv   r  rx   s     rI   rv   TFMobileBertForMaskedLM.build  s    ::
4t,8t334%%d+ 54-9t//445  &&t, 65 : 54 65r*  c                    US:X  a  US4$ U4$ r5  rM   r6  s     rI   r8  .TFMobileBertForMaskedLM.tf_to_pt_weight_rename  r:  rL   )rs   r  r  NNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r.   r  r   r  rO   ztuple | TFMaskedLMOutputrl   )rP   rQ   rR   rS   "_keys_to_ignore_on_load_unexpectedr^   r  r(  r   r!   r  r  r   r  r	   r  ro   rv   r8  rU   r}   r~   s   @rI   r=  r=  4  s   *&Q,Q *+F+M+MNk+lm&$$! .28<8<6:377;)-,0#'04 %.
*.
 6.
 6	.

 4.
 1.
 5.
 '.
 *.
 !.
 ..
 .
 
".
 n .
`	-   rL   r=  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )r  i  c                x   > [         TU ]  " S0 UD6  [        R                  R	                  SSS9U l        Xl        g )Nr   r  r[   rM   )r]   r^   r   r_   r`   r  rf   rg   s      rI   r^    TFMobileBertOnlyNSPHead.__init__  s5    "6" % 2 21;M 2 NrL   c                (    U R                  U5      nU$ rl   )r  )r>   r  r.  s      rI   ro   TFMobileBertOnlyNSPHead.call  s    !%!6!6}!E%%rL   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = f)NTr  )	rs   rt   r8   ru   r  r\   rv   rf   r   rx   s     rI   rv   TFMobileBertOnlyNSPHead.build  st    ::
4+T2>t4499:%%++T49P9P,QR ;: ?::r{   )rs   rf   r  rl   r|   r~   s   @rI   r  r    s    
&S SrL   r  zPMobileBert Model with a `next sentence prediction (classification)` head on top.c                     ^  \ rS rSrSS/rU 4S jr\\" \R                  S5      5      \
" \\S9           S
                       SS jj5       5       5       rSS jrS	rU =r$ )%TFMobileBertForNextSentencePredictioni  r  cls.predictionsc                j   > [         TU ]  " U/UQ70 UD6  [        USS9U l        [	        USS9U l        g )Nr  r[   r  )r]   r^   r  r  r  clsr  s       rI   r^   .TFMobileBertForNextSentencePrediction.__init__  s7    3&3F3/\J*68PQrL   r  r*  c                    U R                  UUUUUUUUU	US9
nUS   nU R                  U5      nU
c  SOU R                  XS9nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR
                  S9$ )a  
Return:

Examples:

```python
>>> import tensorflow as tf
>>> from transformers import AutoTokenizer, TFMobileBertForNextSentencePrediction

>>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = TFMobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="tf")

>>> logits = model(encoding["input_ids"], token_type_ids=encoding["token_type_ids"])[0]
```r,  r$   Nr-  r   rK  )r  r^  rJ   r   rn   r  )r>   r   r   r   r   r   r   r   r  r  r3   r   r  r  seq_relationship_scoresnext_sentence_lossr  s                    rI   ro   *TFMobileBertForNextSentencePrediction.call  s    F //))%'/!5# " 
  
"&((="9 #* %%-@%a 	 -/'!"+=F7I7U')F2a[aa,#*!//))	
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  r^  )rs   rt   r8   ru   r  r\   rv   r^  rx   s     rI   rv   +TFMobileBertForNextSentencePrediction.build  s    ::
4t,8t334%%d+ 54%1txx}}-t$ .- 2 54 .-r*  )rs   r^  r  rQ  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r3   r  r   r  rO   z%tuple | TFNextSentencePredictorOutputrl   )rP   rQ   rR   rS   rR  r^   r   r!   r  r  r#   r   r  ro   rv   rU   r}   r~   s   @rI   r[  r[    s     +?@R)S&R *+F+M+MNk+lm+HWfg .28<8<6:377;)-,0#'=A %>
*>
 6>
 6	>

 4>
 1>
 5>
 '>
 *>
 !>
 ;>
 >
 
/>
 h n >
@	% 	%rL   r[  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c            
         ^  \ rS rSr/ SQrS/rU 4S jr\\" \	R                  S5      5      \" \\\\\S9           S
                       SS jj5       5       5       rSS jrS	rU =r$ )%TFMobileBertForSequenceClassificationi  r  r  r\  r>  r   c                  > [         TU ]  " U/UQ70 UD6  UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  R                  U5      U l
        [        R                  R                  UR                  [        UR                  5      SS9U l        Xl        g )Nr  r[   
classifierr   r]   r^   
num_labelsr  r  classifier_dropoutr   r   r_   r   r   r`   r   r   rj  rf   r>   rf   r   rh   rm  ri   s        rI   r^   .TFMobileBertForSequenceClassification.__init__  s    3&3F3 ++/\J)/)B)B)NF%%TZTnTn 	 ||++,>?,,,,/&BZBZ2[bn - 
 rL   r  rG  c                   U R                  UUUUUUUUU	US9
nUS   nU R                  XS9nU R                  U5      nU
c  SOU R                  X5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [	        UUUR
                  UR                  S9$ )a^  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
r,  r$   r   Nr   rK  )r  r   rj  rJ   r   rn   r  )r>   r   r   r   r   r   r   r   r  r  r.   r   r  r  r?   r  r  s                    rI   ro   *TFMobileBertForSequenceClassification.call$  s    : //))%'/!5# " 
  
]F/~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = fNTr  rj  
rs   rt   r8   ru   r  r\   rv   rj  rf   r   rx   s     rI   rv   +TFMobileBertForSequenceClassification.build_      ::
4t,8t334%%d+ 54t,8t334%%tT4;;3J3J&KL 54 9 54 54   C+.3C<+
C9<
D
rs   rj  rf   r   r  rl  rQ  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r.   r  r   r  rO   z"tuple | TFSequenceClassifierOutputrl   )rP   rQ   rR   rS   rR  _keys_to_ignore_on_load_missingr^   r   r!   r  r  r   '_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATIONr   r  _SEQ_CLASS_EXPECTED_OUTPUT_SEQ_CLASS_EXPECTED_LOSSro   rv   rU   r}   r~   s   @rI   rg  rg    s   *& (2l# *+F+M+MNk+lm:.$2. .28<8<6:377;)-,0#'04 %0
*0
 60
 6	0

 40
 10
 50
 '0
 *0
 !0
 .0
 0
 
,0
 n 0
d	M 	MrL   rg  z
    MobileBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a
    linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                     ^  \ rS rSr/ SQrU 4S jr\\" \R                  S5      5      \
" \\\\\\\S9            S	                         S
S jj5       5       5       rSS jrSrU =r$ ) TFMobileBertForQuestionAnsweringik  r  r  r  r\  r>  c                   > [         TU ]  " U/UQ70 UD6  UR                  U l        [        USSS9U l        [
        R                  R                  UR                  [        UR                  5      SS9U l
        Xl        g )NFr  r@  
qa_outputsr   )r]   r^   rl  r  r  r   r_   r`   r   r   r  rf   r  s       rI   r^   )TFMobileBertForQuestionAnswering.__init__|  ss    3&3F3 ++/%Vbc,,,,/&BZBZ2[bn - 
 rL   r  )r	  r
  r  qa_target_start_indexqa_target_end_indexrH  rI  c                   U R                  UUUUUUUUU	US9
nUS   nU R                  U5      n[        R                  " USSS9u  nn[        R                  " USS9n[        R                  " USS9nSnU
b  Ub  XS.nU R                  UUU45      nU	(       d  UU4USS -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	a  
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for position (index) of the start of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for position (index) of the end of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
r,  r   r   r   r   N)start_positionend_position)r  start_logits
end_logitsrn   r  )	r  r  r8   splitsqueezerJ   r   rn   r  )r>   r   r   r   r   r   r   r   r  r  start_positionsend_positionsr   r  r  r?   r  r  r  r.   r  s                        rI   ro   %TFMobileBertForQuestionAnswering.call  s   H //))%'/!5# " 
 "!*1#%88FAB#? jzz,R8ZZ
4
&=+D(7WF''z0JKD"J/'!"+=F)-)9TGf$EvE-%!!//))
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTr  r  )
rs   rt   r8   ru   r  r\   rv   r  rf   r   rx   s     rI   rv   &TFMobileBertForQuestionAnswering.build  rv  rw  )rs   rf   r  rl  r  r;  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r  r  r  r  r   r  rO   z&tuple | TFQuestionAnsweringModelOutputrl   )rP   rQ   rR   rS   rR  r^   r   r!   r  r  r   _CHECKPOINT_FOR_QAr   r  _QA_TARGET_START_INDEX_QA_TARGET_END_INDEX_QA_EXPECTED_OUTPUT_QA_EXPECTED_LOSSro   rv   rU   r}   r~   s   @rI   r~  r~  k  s   *& *+F+M+MNk+lm%2$40+' .28<8<6:377;)-,0#'9=7; %;
*;
 6;
 6	;

 4;
 1;
 5;
 ';
 *;
 !;
 7;
 5;
 ;
 
0;
 n ;
z	M 	MrL   r~  z
    MobileBert Model with a multiple choice classification head on top (a linear layer on top of the pooled output and
    a softmax) e.g. for RocStories/SWAG tasks.
    c                     ^  \ rS rSr/ SQrS/rU 4S jr\\" \	R                  S5      5      \" \\\S9           S
                       SS jj5       5       5       rSS jrS	rU =r$ )TFMobileBertForMultipleChoicei  rh  r   c                "  > [         TU ]  " U/UQ70 UD6  [        USS9U l        [        R
                  R                  UR                  5      U l        [        R
                  R                  S[        UR                  5      SS9U l        Xl        g )Nr  r[   r$   rj  r   )r]   r^   r  r  r   r_   r   r   r   r`   r   r   rj  rf   r  s       rI   r^   &TFMobileBertForMultipleChoice.__init__  sy    3&3F3/\J||++F,F,FG,,,,/&2J2J"KR^ - 
 rL   z(batch_size, num_choices, sequence_lengthr  c                   Ub  [        U5      S   n[        U5      S   nO[        U5      S   n[        U5      S   nUb  [        R                  " USU45      OSnUb  [        R                  " USU45      OSnUb  [        R                  " USU45      OSnUb  [        R                  " USU45      OSnUb&  [        R                  " USU[        U5      S   45      OSnU R                  UUUUUUUUU	US9
nUS   nU R	                  UUS9nU R                  U5      n[        R                  " USU45      nU
c  SOU R                  U
U5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )	a  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
    where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
Nr$   r   r   r   )r  r   r   rK  )
r   r8   r=   r  r   rj  rJ   r
   rn   r  )r>   r   r   r   r   r   r   r   r  r  r.   r   num_choices
seq_lengthflat_input_idsflat_attention_maskflat_token_type_idsflat_position_idsflat_inputs_embedsr  r  r?   reshaped_logitsr  r  s                            rI   ro   "TFMobileBertForMultipleChoice.call  s   8  $Y/2K#I.q1J$]3A6K#M215JDMDYIJ/?@_cN\Nhbjj"j9IJnrN\Nhbjj"j9IJnrJVJbBJJ|b*5EFhl ( JJ}r:z-7PQR7S&TU 	
 // # " 
  
]XF/**Vb+->?~t4+?+?+X%''!"+5F)-)9TGf$EvE*"!//))	
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = frs  rt  rx   s     rI   rv   #TFMobileBertForMultipleChoice.build?  rv  rw  )rs   rj  rf   r   r  rQ  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r.   r  r   r  rO   z#tuple | TFMultipleChoiceModelOutputrl   )rP   rQ   rR   rS   rR  ry  r^   r   r!   r  r  r   r  r
   r  ro   rv   rU   r}   r~   s   @rI   r  r    s   *& (2l# *#**+UV  &/$ .28<8<6:377;)-,0#'04 %?
*?
 6?
 6	?

 4?
 1?
 5?
 '?
 *?
 !?
 .?
 ?
 
-?
 ?
B	M 	MrL   r  z
    MobileBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
    for Named-Entity-Recognition (NER) tasks.
    c            
         ^  \ rS rSr/ SQrS/rU 4S jr\\" \	R                  S5      5      \" \\\\\S9           S
                       SS jj5       5       5       rSS jrS	rU =r$ )"TFMobileBertForTokenClassificationiK  r  r   c                  > [         TU ]  " U/UQ70 UD6  UR                  U l        [        USSS9U l        UR
                  b  UR
                  OUR                  n[        R                  R                  U5      U l
        [        R                  R                  UR                  [        UR                  5      SS9U l        Xl        g )NFr  r@  rj  r   rk  rn  s        rI   r^   +TFMobileBertForTokenClassification.__init__]  s    3&3F3 ++/%Vbc)/)B)B)NF%%TZTnTn 	 ||++,>?,,,,/&BZBZ2[bn - 
 rL   r  rG  c                   U R                  UUUUUUUUU	US9
nUS   nU R                  XS9nU R                  U5      nU
c  SOU R                  X5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [	        UUUR
                  UR                  S9$ )z
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
r,  r   r   Nr   rK  )r  r   rj  rJ   r   rn   r  )r>   r   r   r   r   r   r   r   r  r  r.   r   r  r  r?   r  r  s                    rI   ro   'TFMobileBertForTokenClassification.callk  s    6 //))%'/!5# " 
 "!*,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
rL   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = frs  rt  rx   s     rI   rv   (TFMobileBertForTokenClassification.build  rv  rw  rx  rQ  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r.   r  r   r  rO   ztuple | TFTokenClassifierOutputrl   )rP   rQ   rR   rS   rR  ry  r^   r   r!   r  r  r   $_CHECKPOINT_FOR_TOKEN_CLASSIFICATIONr   r  _TOKEN_CLASS_EXPECTED_OUTPUT_TOKEN_CLASS_EXPECTED_LOSSro   rv   rU   r}   r~   s   @rI   r  r  K  s   *& (2l# *+F+M+MNk+lm7+$40 .28<8<6:377;)-,0#'04 %.
*.
 6.
 6	.

 4.
 1.
 5.
 '.
 *.
 !.
 ..
 .
 
).
 n .
`	M 	MrL   r  )
r=  r  r[  r  r~  rg  r  r  r  r  )frT   
__future__r   r%  dataclassesr   numpynp
tensorflowr8   activations_tfr   modeling_tf_outputsr   r   r	   r
   r   r   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   r   r   r   tf_utilsr   r   r   utilsr   r   r    r!   r"   r#   configuration_mobilebertr%   
get_loggerrP   loggerr  r  r  r  r  r  r  r  r  r  rz  r{  r|  r)   r_   LayerrW   LayerNormalizationr   r   r   r   r   r	  r  r,  r6  rA  rO  r^  rf  rp  r  r  r  r  r  r  r  r  MOBILEBERT_START_DOCSTRINGr  r  r  r=  r  r[  rg  r~  r  r  __all__rM   rL   rI   <module>r     s     "  !   /	 	 	    S R  7 
		H	%1 $ (L $l !  = '     +E '' ! Q Q8Mu||11 M64%,,11 40u||!! 0 %
:e U\\// e Pb 2 2 bJ+U\\// +B%3ELL.. %3P+++ +8&,++ &,R+** +4,+5<<%% ,+^+%,,$$ +43## 3.N&** N&b1&%,,,, 1&hH++ H@+%,,*<*< +>.5<<#5#5 .b-%,,,, -$ ~(ELL.. ~( ~(B%"3 % /{ / /<( T5 p j.,3 .,	.,b  d !<>Y d d N TVpq] 9;W ]  r] @Sell00 S& ZV%,GIe V%	V%r  \M,GIe \M\M~  eM'BD[ eMeMP  gM$?AU gMgMT  [M)DF_ [M[M|rL   