
    bCi/                       S r SSKJr  SSKrSSKrSSKJr  SSK	J
r
JrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJrJ r   SS	K!J"r"J#r#J$r$J%r%  S
SK&J'r'  \%RP                  " \)5      r*Sr+Sr, " S S\RZ                  R\                  5      r/ " S S\RZ                  R\                  5      r0 " S S\RZ                  R\                  5      r1 " S S\RZ                  R\                  5      r2 " S S\RZ                  R\                  5      r3 " S S\RZ                  R\                  5      r4 " S S\RZ                  R\                  5      r5 " S S\RZ                  R\                  5      r6 " S S\RZ                  R\                  5      r7 " S  S!\RZ                  R\                  5      r8\ " S" S#\RZ                  R\                  5      5       r9 " S$ S%\5      r:S&r;S'r<\#" S(\;5       " S) S*\:5      5       r= " S+ S,\RZ                  R\                  5      r> " S- S.\RZ                  R\                  5      r?\#" S/\;5       " S0 S1\:\5      5       r@ " S2 S3\RZ                  R\                  5      rA\#" S4\;5       " S5 S6\:\5      5       rB\#" S7\;5       " S8 S9\:\5      5       rC\#" S:\;5       " S; S<\:\5      5       rD\#" S=\;5       " S> S?\:\5      5       rE/ S@QrFg)AzTF 2.0 ConvBERT model.    )annotationsN   )get_tf_activation)TFBaseModelOutputTFMaskedLMOutputTFMultipleChoiceModelOutputTFQuestionAnsweringModelOutputTFSequenceClassifierOutputTFTokenClassifierOutput)TFMaskedLanguageModelingLossTFModelInputTypeTFMultipleChoiceLossTFPreTrainedModelTFQuestionAnsweringLossTFSequenceClassificationLossTFSequenceSummaryTFTokenClassificationLossget_initializerkeraskeras_serializableunpack_inputs)check_embeddings_within_bounds
shape_liststable_softmax)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )ConvBertConfigzYituTech/conv-bert-baser    c                  j   ^  \ rS rSrSrSU 4S jjrSS jr      S	           S
S jjrSrU =r	$ )TFConvBertEmbeddings>   zGConstruct the embeddings from word, position and token_type embeddings.c                J  > [         TU ]  " S0 UD6  Xl        UR                  U l        UR                  U l        UR
                  U l        [        R                  R                  UR                  SS9U l
        [        R                  R                  UR                  S9U l        g )N	LayerNormepsilonname)rate )super__init__configembedding_sizemax_position_embeddingsinitializer_ranger   layersLayerNormalizationlayer_norm_epsr%   Dropouthidden_dropout_probdropoutselfr-   kwargs	__class__s      k/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/convbert/modeling_tf_convbert.pyr,   TFConvBertEmbeddings.__init__A   s    "6"$33'-'E'E$!'!9!988AVAV]h8i||++1K1K+L    c                   [         R                  " S5         U R                  SU R                  R                  U R
                  /[        U R                  5      S9U l        S S S 5        [         R                  " S5         U R                  SU R                  R                  U R
                  /[        U R                  5      S9U l
        S S S 5        [         R                  " S5         U R                  SU R                  U R
                  /[        U R                  5      S9U l        S S S 5        U R                  (       a  g SU l        [        U SS 5      bf  [         R                  " U R                  R                   5         U R                  R#                  S S U R                  R
                  /5        S S S 5        g g ! , (       d  f       GNc= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       g = f)	Nword_embeddingsweight)r(   shapeinitializertoken_type_embeddings
embeddingsposition_embeddingsTr%   )tf
name_scope
add_weightr-   
vocab_sizer.   r   r0   r@   type_vocab_sizerC   r/   rE   builtgetattrr%   r(   buildr8   input_shapes     r;   rM   TFConvBertEmbeddings.buildK   s   ]],-//{{--t/B/BC+D,B,BC * DK . ]]23)-!{{22D4G4GH+D,B,BC *9 *D& 4 ]]01'+!33T5H5HI+D,B,BC (7 (D$ 2 ::
4d+7t~~223$$dD$++2L2L%MN 43 81 .- 43 21 43s2   A
F=?A
G'A G! 3G2=
G
G!
G/2
H c                   Uc  Uc  [        S5      eUb>  [        XR                  R                  5        [        R
                  " U R                  US9n[        U5      SS nUc  [        R                  " USS9nUc-  [        R                  " [        R                  " XWS   U-   S9SS	9n[        R
                  " U R                  US9n[        R
                  " U R                  US9n	XH-   U	-   n
U R                  U
S
9n
U R                  XS9n
U
$ )zr
Applies embedding based on inputs tensor.

Returns:
    final_embeddings (`tf.Tensor`): output embedding tensor.
Nz5Need to provide either `input_ids` or `input_embeds`.)paramsindicesr   )dimsvaluer   )startlimitaxis)inputs)r[   training)
ValueErrorr   r-   rI   rF   gatherr@   r   fillexpand_dimsrangerE   rC   r%   r6   )r8   	input_idsposition_idstoken_type_idsinputs_embedspast_key_values_lengthr\   rO   position_embedstoken_type_embedsfinal_embeddingss              r;   callTFConvBertEmbeddings.calli   s    !6TUU *9kk6L6LMIIT[[)LM /4!WW+Q?N>>5^Nd=delmL ))4+C+C\ZIIT-G-GQ_`(:=NN>>1A>B<</?<Sr=   )
r%   rK   r-   r6   r.   r0   r/   rE   rC   r@   )r-   r    N)NNNNr   F)rb   tf.Tensor | Nonerc   rm   rd   rm   re   rm   r\   boolreturnz	tf.Tensor)
__name__
__module____qualname____firstlineno____doc__r,   rM   rj   __static_attributes____classcell__r:   s   @r;   r"   r"   >   sk    QMO@ '+)-+/*. & #&  '&  )	& 
 (&  &  
&  & r=   r"   c                  B   ^  \ rS rSrU 4S jrS rSS jrSS jrSrU =r	$ )	TFConvBertSelfAttention   c           
       > [         TU ]  " S0 UD6  UR                  UR                  -  S:w  a&  [	        SUR                   SUR                   S35      e[        UR                  UR                  -  5      nUS:  a  UR                  U l        SnOUnUR                  U l        X@l        UR                  U l        UR                  U R                  -  S:w  a  [	        S5      eUR                  UR                  -  U l        U R                  U R                  -  U l	        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R                  U R                  [        UR                  5      S	S9U l        [        R                  R                  U R                  [        UR                  5      S
S9U l        [        R                  R%                  U R                  U R                  SS [        SU R                  -  5      [        UR                  5      SS9U l        [        R                  R                  U R                  U R                  -  S S[        UR                  5      S9U l        [        R                  R                  U R                  S S[        UR                  5      S9U l        [        R                  R-                  UR.                  5      U l        Xl        g )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()r   z6hidden_size should be divisible by num_attention_headsquerykernel_initializerr(   keyrV   samekey_conv_attn_layer)padding
activationdepthwise_initializerpointwise_initializerr(   conv_kernel_layer)r   r(   r   conv_out_layerr*   )r+   r,   hidden_sizenum_attention_headsr]   int
head_ratioconv_kernel_sizeattention_head_sizeall_head_sizer   r1   Denser   r0   r}   r   rV   SeparableConv1Dr   r   r   r4   attention_probs_dropout_probr6   r-   )r8   r-   r9   new_num_attention_headsr   r:   s        r;   r,    TFConvBertSelfAttention.__init__   s   "6" : ::a?#F$6$6#7 8 445Q8 
 #&f&@&@6CTCT&T"U"Q&$88DO"#"9$//DO#6  & 7 7 8 88A=UVV#)#5#59S9S#S !558P8PP\\''?6C[C[3\cj ( 

 <<%%?6C[C[3\ch & 
 \\''?6C[C[3\cj ( 

 $)<<#?#?!!"1!d6K6K2K"L"1&2J2J"K& $@ $
  "'!3!3$$t'<'<<$.v/G/GH	 "4 "
 $ll00!.v/G/GH	 1 
 ||++F,O,OPr=   c                    [         R                  " XSU R                  U R                  45      n[         R                  " U/ SQS9$ )NrT   r      r   r   perm)rF   reshaper   r   	transpose)r8   x
batch_sizes      r;   transpose_for_scores,TFConvBertSelfAttention.transpose_for_scores   s6    JJqr4+C+CTE]E]^_||AL11r=   c                   [        U5      S   nU R                  U5      nU R                  U5      nU R                  U5      n	U R	                  U5      n
U R                  Xv5      nU R                  X5      n[        R                  " X5      nU R                  U5      n[        R                  " USU R                  S/5      n[        USS9n[        R                  " SS/[        U R                  S-
  S-  5      [        U R                  S-
  S-  5      /SS//5      nU R                  U5      n[        R                  " UUSU R                  /5      n[        R                   " UUS5      n[        R"                  " [%        U R                  5       Vs/ s H7  n[        R&                  " USUS/U[        U5      S   U R                  /5      PM9     snSS9n[        R                  " USU R(                  U R                  /5      n[        R*                  " UU5      n[        R                  " USU R                  /5      n[        R*                  " XSS9n[        R,                  " [        U5      S   UR.                  5      nU[        R0                  R3                  U5      -  nUb  UU-   n[        USS9nU R5                  UUS	9nUb  UU-  n[        R                  " XSU R6                  U R(                  /5      n[        R8                  " U/ S
Q5      n[        R*                  " UU5      n[        R8                  " U/ S
QS9n[        R                  " UUSU R6                  U R(                  /5      n[        R:                  " UU/S5      n[        R                  " UUSU R<                  U R                  -  45      nU(       a  UU4nU$ U4nU$ s  snf )Nr   rT   r   rY   r   CONSTANTT)transpose_br\   r   r   )r   r}   r   rV   r   r   rF   multiplyr   r   r   r   constantr   r   r   padstackra   slicer   matmulcastdtypemathsqrtr6   r   r   concatr   )r8   hidden_statesattention_mask	head_maskoutput_attentionsr\   r   mixed_query_layermixed_key_layermixed_value_layermixed_key_conv_attn_layerquery_layer	key_layerconv_attn_layerr   paddingsr   iunfold_conv_out_layerattention_scoresdkattention_probsvalue_layercontext_layerconv_outoutputss                             r;   rj   TFConvBertSelfAttention.call   s   .q1
 JJ}5((=1 JJ}5$($<$<]$K!//0AN--oJ	++&?S 22?CJJ'82t?T?TVW:XY*+<1E;;  d++a/145sD<Q<QTU<UYZ;Z7[\A	
 ,,];NZTEWEW4XY*E " t4455A !QZL]A^_`Aacgcucu4vw5 !
 $9B@X@XZ^ZoZo;pq>3DENR9K9K4LM 99
 WWZ	*2.0@0F0FG+bggll2.>>%/.@ ))9C ,,,J  -	9OjjB0H0H$JbJbc
 ll;=		/;?]F::nz2t?W?WY]YqYq.rs		=(";Q?

JDOOd>P>P,PQ
 7H=/2 O\M]cs   >O	c                \   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       GNF= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN#= f! , (       d  f       N= f! , (       d  f       g = f)NTr}   r   rV   r   r   r   )rK   rL   rF   rG   r}   r(   rM   r-   r   r   rV   r   r   r   r   rN   s     r;   rM   TFConvBertSelfAttention.build)  s"   ::
4$'3tzz/

  $dkk.E.E!FG 04%1txx}}-dDKK,C,CDE .4$'3tzz/

  $dkk.E.E!FG 04.5At77<<=((..dDKK<S<S/TU >4,d3?t55::;&&,,dD$:L:L-MN <4)40<t22778##))4t{{7N7N*OP 98 = 0/ .- 0/ >= <; 98sH   3K3K83K(+3K:)L3L
K
K%(
K7:
L	
L
L+)r   r   rK   r-   r   r   r   r6   r   r   r   r   r}   rV   Frl   )
rp   rq   rr   rs   r,   r   rj   rM   ru   rv   rw   s   @r;   ry   ry      s!    <|2
QfQ Qr=   ry   c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFConvBertSelfOutputiA  c                j  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        R                  R                  UR                  SS9U l        [        R                  R                  UR                  5      U l        Xl        g Ndenser~   r%   r&   r*   )r+   r,   r   r1   r   r   r   r0   r   r2   r3   r%   r4   r5   r6   r-   r7   s      r;   r,   TFConvBertSelfOutput.__init__B  s    "6"\\''?6C[C[3\cj ( 

 88AVAV]h8i||++F,F,FGr=   c                l    U R                  U5      nU R                  XS9nU R                  X-   5      nU$ Nr   r   r6   r%   r8   r   input_tensorr\   s       r;   rj   TFConvBertSelfOutput.callL  7    

=1]F}'CDr=   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = fNTr   r%   
rK   rL   rF   rG   r   r(   rM   r-   r   r%   rN   s     r;   rM   TFConvBertSelfOutput.buildS      ::
4$'3tzz/

  $dkk.E.E!FG 04d+7t~~223$$dD$++2I2I%JK 43 8 0/ 43   3D3D
D
D!r%   rK   r-   r   r6   r   rl   	rp   rq   rr   rs   r,   rj   rM   ru   rv   rw   s   @r;   r   r   A  s    	L 	Lr=   r   c                  B   ^  \ rS rSrU 4S jrS rSS jrSS jrSrU =r	$ )	TFConvBertAttentioni_  c                b   > [         TU ]  " S0 UD6  [        USS9U l        [	        USS9U l        g )Nr8   r(   outputr*   )r+   r,   ry   self_attentionr   dense_outputr7   s      r;   r,   TFConvBertAttention.__init__`  s1    "6"5f6J0hGr=   c                    [         erl   NotImplementedError)r8   headss     r;   prune_headsTFConvBertAttention.prune_headsf  s    !!r=   c                `    U R                  XX4US9nU R                  US   XS9nU4USS  -   nU$ Nr   r   r   )r   r   )	r8   r   r   r   r   r\   self_outputsattention_outputr   s	            r;   rj   TFConvBertAttention.calli  sT    **)QY + 
  ,,\!_l,^#%QR(88r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr   r   )rK   rL   rF   rG   r   r(   rM   r   rN   s     r;   rM   TFConvBertAttention.buildr  s    ::
4)40<t22778##))$/ 94.:t00556!!''- 76 ; 98 76   C.C%
C"%
C3)rK   r   r   r   rl   )
rp   rq   rr   rs   r,   r   rj   rM   ru   rv   rw   s   @r;   r   r   _  s    H"	. 	.r=   r   c                  >   ^  \ rS rSrU 4S jrSU 4S jjrS rSrU =r$ )GroupedLinearLayeri~  c                   > [         TU ]  " S0 UD6  Xl        X l        X0l        X@l        U R                  U R                  -  U l        U R                  U R                  -  U l        g Nr*   )r+   r,   
input_sizeoutput_size
num_groupsr   group_in_dimgroup_out_dim)r8   r   r   r   r   r9   r:   s         r;   r,   GroupedLinearLayer.__init__  sU    "6"$&$"4 OOt>!--@r=   c                  > U R                  SU R                  U R                  U R                  /U R                  SS9U l        U R                  SU R                  /U R                  U R                  SS9U l        [        TU ])  U5        g )NkernelT)rA   rB   	trainablebias)rA   rB   r   r   )rH   r   r   r   r   r   r   r   r   r+   rM   r8   rO   r:   s     r;   rM   GroupedLinearLayer.build  s    oo%%t'8'8$//J//	 & 
 OO4++,$:Q:QY]YcYcos $ 
	 	k"r=   c                   [        U5      S   n[        R                  " [        R                  " USU R                  U R
                  /5      / SQ5      n[        R                  " U[        R                  " U R                  / SQ5      5      n[        R                  " U/ SQ5      n[        R                  " X2SU R                  /5      n[        R                  R                  X0R                  S9nU$ )Nr   rT   )r   r   r   )r   r   r   rV   r   )r   rF   r   r   r   r   r   r   r   nnbias_addr   )r8   r   r   r   s       r;   rj   GroupedLinearLayer.call  s    .q1
LLMBIZIZ3[\^ghIIadkk9=>LLI&JJqr4+;+;<=EENNN3r=   )r   r   r   r   r   r   r   r   rl   )	rp   rq   rr   rs   r,   rM   rj   ru   rv   rw   s   @r;   r   r   ~  s    A# r=   r   c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )TFConvBertIntermediatei  c                  > [         TU ]  " S0 UD6  UR                  S:X  aB  [        R                  R                  UR                  [        UR                  5      SS9U l	        OC[        UR                  UR                  UR                  [        UR                  5      SS9U l	        [        UR                  [        5      (       a  [        UR                  5      U l        OUR                  U l        Xl        g )Nr   r   r~   r   r   r(   r*   )r+   r,   r   r   r1   r   intermediate_sizer   r0   r   r   r   
isinstance
hidden_actstrr   intermediate_act_fnr-   r7   s      r;   r,   TFConvBertIntermediate.__init__  s    "6"!++((_VMeMe=fmt , DJ ,""((!,,#263K3K#LDJ f''--'89J9J'KD$'-'8'8D$r=   c                J    U R                  U5      nU R                  U5      nU$ rl   )r   r  r8   r   s     r;   rj   TFConvBertIntermediate.call  s&    

=100?r=   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = f)NTr   )	rK   rL   rF   rG   r   r(   rM   r-   r   rN   s     r;   rM   TFConvBertIntermediate.build  sm    ::
4$'3tzz/

  $dkk.E.E!FG 0/ 4//s   3B
B)rK   r-   r   r  rl   r   rw   s   @r;   r	  r	    s    *H Hr=   r	  c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFConvBertOutputi  c                  > [         TU ]  " S0 UD6  UR                  S:X  aB  [        R                  R                  UR                  [        UR                  5      SS9U l	        OC[        UR                  UR                  UR                  [        UR                  5      SS9U l	        [        R                  R                  UR                  SS9U l        [        R                  R                  UR                   5      U l        Xl        g )Nr   r   r~   r  r%   r&   r*   )r+   r,   r   r   r1   r   r   r   r0   r   r   r  r2   r3   r%   r4   r5   r6   r-   r7   s      r;   r,   TFConvBertOutput.__init__  s    "6"!++""vG_G_7`gn , DJ ,((""!,,#263K3K#LDJ 88AVAV]h8i||++F,F,FGr=   c                l    U R                  U5      nU R                  XS9nU R                  X-   5      nU$ r   r   r   s       r;   rj   TFConvBertOutput.call  r   r=   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = fNTr%   r   )rK   rL   rF   rG   r%   r(   rM   r-   r   r   r  rN   s     r;   rM   TFConvBertOutput.build  s    ::
4d+7t~~223$$dD$++2I2I%JK 44$'3tzz/

  $dkk.K.K!LM 0/ 4 43 0/r   r   r   rl   r   rw   s   @r;   r  r    s    &	N 	Nr=   r  c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFConvBertLayeri  c                   > [         TU ]  " S0 UD6  [        USS9U l        [	        USS9U l        [        USS9U l        g )N	attentionr   intermediater   r*   )r+   r,   r   r#  r	  r$  r  bert_outputr7   s      r;   r,   TFConvBertLayer.__init__  s?    "6",V+F26O+FBr=   c                    U R                  XX4US9nUS   nU R                  U5      nU R                  XUS9n	U	4USS  -   n
U
$ r   )r#  r$  r%  )r8   r   r   r   r   r\   attention_outputsr   intermediate_outputlayer_outputr   s              r;   rj   TFConvBertLayer.call  sm     NN9RZ + 
 -Q/"//0@A''(;X`'a/$5ab$99r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)NTr#  r$  r%  )	rK   rL   rF   rG   r#  r(   rM   r$  r%  rN   s     r;   rM   TFConvBertLayer.build  s    ::
4d+7t~~223$$T* 44.:t00556!!''- 74-9t//445  &&t, 65 : 43 76 65$   D0.E
E0
D>
E
E )r#  r%  rK   r$  r   rl   r   rw   s   @r;   r!  r!    s    C	- -r=   r!  c                  >   ^  \ rS rSrU 4S jr SS jrSS jrSrU =r$ )TFConvBertEncoderi  c                   > [         TU ]  " S0 UD6  [        UR                  5       Vs/ s H  n[	        USU 3S9PM     snU l        g s  snf )Nzlayer_._r   r*   )r+   r,   ra   num_hidden_layersr!  layer)r8   r-   r9   r   r:   s       r;   r,   TFConvBertEncoder.__init__  sG    "6"LQRXRjRjLklLkqofXaS>BLkl
ls   A	c           	     $   U(       a  SOS nU(       a  SOS n	[        U R                  5       H1  u  pU(       a  X4-   nU" XX:   XGS9nUS   nU(       d  M)  XS   4-   n	M3     U(       a  X4-   nU(       d  [        S XU	4 5       5      $ [        XU	S9$ )Nr*   r   r   r   c              3  .   #    U  H  oc  M  Uv   M     g 7frl   r*   ).0vs     r;   	<genexpr>)TFConvBertEncoder.call.<locals>.<genexpr>1  s     h$Vq$Vs   	)last_hidden_stater   
attentions)	enumerater3  tupler   )r8   r   r   r   r   output_hidden_statesreturn_dictr\   all_hidden_statesall_attentionsr   layer_modulelayer_outputss                r;   rj   TFConvBertEncoder.call  s     #7BD0d(4OA#$58H$H!(y|=NM *!,M  !/3C2E!E  5   14D Dh]~$Vhhh +Yg
 	
r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = f)NTr3  )rK   rL   r3  rF   rG   r(   rM   )r8   rO   r3  s      r;   rM   TFConvBertEncoder.build7  s`    ::
4$'3]]5::.KK% /. $ 4..s   A77
B	)rK   r3  r   rl   r   rw   s   @r;   r0  r0    s    m "
H& &r=   r0  c                  8   ^  \ rS rSrU 4S jrS rSS jrSrU =r$ )!TFConvBertPredictionHeadTransformiA  c                  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        UR                  [        5      (       a  [        UR                  5      U l        OUR                  U l        [        R                  R                  UR                  SS9U l        Xl        g r   )r+   r,   r   r1   r   r.   r   r0   r   r  r  r  r   transform_act_fnr2   r3   r%   r-   r7   s      r;   r,   *TFConvBertPredictionHeadTransform.__init__B  s    "6"\\''!!ofF^F^6_fm ( 

 f''--$5f6G6G$HD!$*$5$5D!88AVAV]h8ir=   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rl   )r   rK  r%   r  s     r;   rj   &TFConvBertPredictionHeadTransform.callQ  s4    

=1--m<}5r=   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = fr   r   rN   s     r;   rM   'TFConvBertPredictionHeadTransform.buildX  r   r   )r%   rK   r-   r   rK  rl   r   rw   s   @r;   rI  rI  A  s    	L 	Lr=   rI  c                  |   ^  \ rS rSr\rU 4S jrS rS rS r	S r
S r\          SS j5       rSS	 jrS
rU =r$ )TFConvBertMainLayerid  c                   > [         TU ]  " S0 UD6  [        USS9U l        UR                  UR
                  :w  a-  [        R                  R                  UR
                  SS9U l	        [        USS9U l        Xl        g )NrD   r   embeddings_projectencoderr*   )r+   r,   r"   rD   r.   r   r   r1   r   rT  r0  rU  r-   r7   s      r;   r,   TFConvBertMainLayer.__init__h  sk    "6".vLI  F$6$66&+ll&8&89K9KRf&8&gD#(i@r=   c                    U R                   $ rl   )rD   r8   s    r;   get_input_embeddings(TFConvBertMainLayer.get_input_embeddingss  s    r=   c                `    XR                   l        UR                  S   U R                   l        g Nr   )rD   r@   rA   rI   r8   rV   s     r;   set_input_embeddings(TFConvBertMainLayer.set_input_embeddingsv  s     !&%*[[^"r=   c                    [         e)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
r   )r8   heads_to_prunes     r;   _prune_heads TFConvBertMainLayer._prune_headsz  s
    
 "!r=   c                    Uc  [         R                  " US5      n[         R                  " XS   SSUS   45      n[         R                  " XC5      nSU-
  S-  nU$ )Nr   r   g      ?g     )rF   r_   r   r   )r8   r   rO   r   extended_attention_masks        r;   get_extended_attention_mask/TFConvBertMainLayer.get_extended_attention_mask  sg    !WW[!4N #%**^!naQRT_`aTb=c"d #%''*A"I#&)@#@H"L&&r=   c                L    Ub  [         eS /U R                  R                  -  nU$ rl   )r   r-   r2  )r8   r   s     r;   get_head_mask!TFConvBertMainLayer.get_head_mask  s*     %%!>!>>Ir=   c           
        Ub  Ub  [        S5      eUb  [        U5      nOUb  [        U5      S S nO[        S5      eUc  [        R                  " US5      nUc  [        R                  " US5      nU R	                  XX6U
S9nU R                  X+UR                  5      nU R                  U5      n[        U S5      (       a  U R                  XS9nU R                  UUUUUU	U
S9nU$ )NzDYou cannot specify both input_ids and inputs_embeds at the same timerT   z5You have to specify either input_ids or inputs_embedsr   r   r   rT  )r]   r   rF   r_   rD   rf  r   ri  hasattrrT  rU  )r8   rb   r   rd   rc   r   re   r   r?  r@  r\   rO   r   re  s                 r;   rj   TFConvBertMainLayer.call  s
     ]%>cdd"$Y/K&$]3CR8KTUU!WW[!4N!WW[!4N	iqr"&"B"B>`m`s`s"t&&y1	4-.. 33M3UM#  % 
 r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTrD   rU  rT  )rK   rL   rF   rG   rD   r(   rM   rU  rT  r-   r.   rN   s     r;   rM   TFConvBertMainLayer.build  s   ::
4t,8t334%%d+ 54D)5t||001""4( 24-t4@t66;;<''--tT4;;;U;U.VW =< A 54 21 =<$   E.E
3E)
E
E&)
E7)rK   r-   rD   rT  rU  
NNNNNNNNNFrl   )rp   rq   rr   rs   r    config_classr,   rY  r^  rb  rf  ri  r   rj   rM   ru   rv   rw   s   @r;   rR  rR  d  sg    !L	4"'*  !- -^X Xr=   rR  c                       \ rS rSrSr\rSrSrg)TFConvBertPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
convbertr*   N)	rp   rq   rr   rs   rt   r    rr  base_model_prefixru   r*   r=   r;   rt  rt    s    
 "L"r=   rt  ax	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Args:
        config ([`ConvBertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`Numpy array` or `tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`Numpy array` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False`):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zbThe bare ConvBERT Model transformer outputting raw hidden-states without any specific head on top.c                     ^  \ rS rSrU 4S jr\\" \R                  S5      5      \	" \
\\S9          S                     S	S jj5       5       5       rS
S jrSrU =r$ )TFConvBertModeliI  c                L   > [         TU ]  " U/UQ70 UD6  [        USS9U l        g )Nru  r   )r+   r,   rR  ru  r8   r-   r[   r9   r:   s       r;   r,   TFConvBertModel.__init__N  s(    3&3F3+FDr=   batch_size, sequence_length
checkpointoutput_typerr  c                6    U R                  UUUUUUUUU	U
S9
nU$ )N
rb   r   rd   rc   r   re   r   r?  r@  r\   )ru  )r8   rb   r   rd   rc   r   re   r   r?  r@  r\   r   s               r;   rj   TFConvBertModel.callS  s<    ( --))%'/!5#   
 r=   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTru  )rK   rL   rF   rG   ru  r(   rM   rN   s     r;   rM   TFConvBertModel.buildv  s^    ::
4T*6t}}112##D) 32 722s   A88
B)rK   ru  rq  )rb   TFModelInputType | Noner   np.array | tf.Tensor | Nonerd   r  rc   r  r   r  re   rm   r   bool | Noner?  r  r@  r  r\   rn   ro   z$TFBaseModelOutput | tuple[tf.Tensor]rl   )rp   rq   rr   rs   r,   r   r   CONVBERT_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCrj   rM   ru   rv   rw   s   @r;   rx  rx  I  s    
E
 *+D+K+KLi+jk&%$ .26:6:4815*.)-,0#'* 4 4	
 2 / ( ' * !  
. l 8* *r=   rx  c                  R   ^  \ rS rSrU 4S jrU 4S jrS rS rS rS r	S r
S	rU =r$ )
TFConvBertMaskedLMHeadi  c                `   > [         TU ]  " S0 UD6  Xl        UR                  U l        X l        g r   )r+   r,   r-   r.   input_embeddings)r8   r-   r  r9   r:   s       r;   r,   TFConvBertMaskedLMHead.__init__  s,    "6"$33 0r=   c                |   > U R                  U R                  R                  4SSSS9U l        [        TU ]  U5        g )NzerosTr   )rA   rB   r   r(   )rH   r-   rI   r   r+   rM   r  s     r;   rM   TFConvBertMaskedLMHead.build  s7    OO4;;+A+A*CQXdhouOv	k"r=   c                    U R                   $ rl   )r  rX  s    r;   get_output_embeddings,TFConvBertMaskedLMHead.get_output_embeddings  s    $$$r=   c                ^    XR                   l        [        U5      S   U R                   l        g r\  )r  r@   r   rI   r]  s     r;   set_output_embeddings,TFConvBertMaskedLMHead.set_output_embeddings  s&    ',$+5e+<Q+?(r=   c                    SU R                   0$ )Nr   )r   rX  s    r;   get_biasTFConvBertMaskedLMHead.get_bias  s    		""r=   c                X    US   U l         [        US   5      S   U R                  l        g )Nr   r   )r   r   r-   rI   r]  s     r;   set_biasTFConvBertMaskedLMHead.set_bias  s'    &M	!+E&M!:1!=r=   c                V   [        US9S   n[        R                  " USU R                  /S9n[        R                  " XR
                  R                  SS9n[        R                  " USX R                  R                  /S9n[        R                  R                  XR                  S9nU$ )N)tensorr   rT   )r  rA   T)abr   r  )r   rF   r   r.   r   r  r@   r-   rI   r  r  r   )r8   r   
seq_lengths      r;   rj   TFConvBertMaskedLMHead.call  s    }5a8


-DDWDW?XY		M5J5J5Q5Q_cd

-JP[P[PfPf?gh]Kr=   )r   r-   r.   r  )rp   rq   rr   rs   r,   rM   r  r  r  r  rj   ru   rv   rw   s   @r;   r  r    s,    1#
%@#> r=   r  c                  <   ^  \ rS rSrU 4S jrSS jrSS jrSrU =r$ )TFConvBertGeneratorPredictionsi  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  SS9U l        [        R                  R                  UR                  SS9U l	        Xl
        g )Nr%   r&   r   r   r*   )r+   r,   r   r1   r2   r3   r%   r   r.   r   r-   r7   s      r;   r,   'TFConvBertGeneratorPredictions.__init__  s[    "6"88AVAV]h8i\\''(=(=G'L
r=   c                l    U R                  U5      n[        S5      " U5      nU R                  U5      nU$ )Ngelu)r   r   r%   )r8   generator_hidden_statesr\   r   s       r;   rj   #TFConvBertGeneratorPredictions.call  s4    

#:;)&1-@}5r=   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = fr  )rK   rL   rF   rG   r%   r(   rM   r-   r.   r   r   rN   s     r;   rM   $TFConvBertGeneratorPredictions.build  s    ::
4d+7t~~223$$dD$++2L2L%MN 44$'3tzz/

  $dkk.E.E!FG 0/ 4 43 0/r   )r%   rK   r-   r   r   rl   r   rw   s   @r;   r  r    s    	H 	Hr=   r  z6ConvBERT Model with a `language modeling` head on top.c                     ^  \ rS rSrU 4S jrS rS r\\" \	R                  S5      5      \" \\\S9           S
                       SS jj5       5       5       rSS jrS	rU =r$ )TFConvBertForMaskedLMi  c                L  > [         TU ]  " U40 UD6  Xl        [        USS9U l        [        USS9U l        [        UR                  [        5      (       a  [        UR                  5      U l        OUR                  U l        [        XR                  R                  SS9U l        g )Nru  r   generator_predictionsgenerator_lm_head)r+   r,   r-   rR  ru  r  r  r  r  r  r   r   r  rD   r  rz  s       r;   r,   TFConvBertForMaskedLM.__init__  s    *6*+FD%CFQh%i"f''--/0A0ABDO$//DO!7@X@X_r!sr=   c                    U R                   $ rl   )r  rX  s    r;   get_lm_head!TFConvBertForMaskedLM.get_lm_head  s    %%%r=   c                N    U R                   S-   U R                  R                   -   $ )N/)r(   r  rX  s    r;   get_prefix_bias_name*TFConvBertForMaskedLM.get_prefix_bias_name  s!    yy3!7!7!<!<<<r=   r|  r}  c                   U R                  UUUUUUUUU	US9
nUS   nU R                  XS9nU R                  XS9nU
c  SOU R                  X5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [	        UUUR
                  UR                  S9$ )as  
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
r  r   r   Nr   losslogitsr   r<  )ru  r  r  hf_compute_lossr   r   r<  )r8   rb   r   rd   rc   r   re   r   r?  r@  labelsr\   r  generator_sequence_outputprediction_scoresr  r   s                    r;   rj   TFConvBertForMaskedLM.call  s    6 #'--))%'/!5# #0 #
 %<A$>! 667P6d 223D2X~t4+?+?+Z'),CAB,GGF)-)9TGf$EvE$1??.99	
 	
r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)NTru  r  r  )	rK   rL   rF   rG   ru  r(   rM   r  r  rN   s     r;   rM   TFConvBertForMaskedLM.build  s    ::
4T*6t}}112##D) 340$7Ct99>>?**006 @4,d3?t55::;&&,,T2 <; @ 32 @? <;r.  )r   rK   r-   ru  r  r  NNNNNNNNNNF)rb   r  r   np.ndarray | tf.Tensor | Nonerd   r  rc   r  r   r  re   rm   r   r  r?  r  r@  r  r  rm   r\   r  ro   ztuple | TFMaskedLMOutputrl   )rp   rq   rr   rs   r,   r  r  r   r   r  r  r   r  r   r  rj   rM   ru   rv   rw   s   @r;   r  r    s    t&= *+D+K+KLi+jk&$$ .28<8<6:37*.)-,0#'#' %/
*/
 6/
 6	/

 4/
 1/
 (/
 '/
 */
 !/
 !/
 /
 
"/
 l /
b3 3r=   r  c                  <   ^  \ rS rSrSrU 4S jrS rSS jrSrU =r	$ )TFConvBertClassificationHeadi  z-Head for sentence-level classification tasks.c                  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        UR                  b  UR                  OUR                  n[        R                  R                  U5      U l        [        R                  R	                  UR                  [        UR                  5      SS9U l        Xl        g )Nr   r~   out_projr*   )r+   r,   r   r1   r   r   r   r0   r   classifier_dropoutr5   r4   r6   
num_labelsr  r-   )r8   r-   r9   r  r:   s       r;   r,   %TFConvBertClassificationHead.__init__  s    "6"\\''?6C[C[3\cj ( 

 *0)B)B)NF%%TZTnTn 	 ||++,>?**/&BZBZ2[bl + 
 r=   c                    US S 2SS S 24   nU R                  U5      nU R                  U5      n[        U R                  R                  5      " U5      nU R                  U5      nU R                  U5      nU$ r\  )r6   r   r   r-   r  r  )r8   r   r9   r   s       r;   rj   !TFConvBertClassificationHead.call/  se    !Q'"LLOJJqMdkk445a8LLOMM!r=   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTr   r  )
rK   rL   rF   rG   r   r(   rM   r-   r   r  rN   s     r;   rM   "TFConvBertClassificationHead.build9  s    ::
4$'3tzz/

  $dkk.E.E!FG 04T*6t}}112##T41H1H$IJ 32 7 0/ 32r   )rK   r-   r   r6   r  rl   )
rp   rq   rr   rs   rt   r,   rj   rM   ru   rv   rw   s   @r;   r  r    s    7 	K 	Kr=   r  zp
    ConvBERT Model transformer with a sequence classification/regression head on top e.g., for GLUE tasks.
    c                     ^  \ rS rSrU 4S jr\\" \R                  S5      5      \	" \
\\S9           S                       S	S jj5       5       5       rS
S jrSrU =r$ )#TFConvBertForSequenceClassificationiE  c                   > [         TU ]  " U/UQ70 UD6  UR                  U l        [        USS9U l        [        USS9U l        g )Nru  r   
classifier)r+   r,   r  rR  ru  r  r  rz  s       r;   r,   ,TFConvBertForSequenceClassification.__init__L  sC    3&3F3 +++FD6vLQr=   r|  r}  c                    U R                  UUUUUUUUU	US9
nU R                  US   US9nU
c  SOU R                  X5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR
                  S9$ )a^  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
	r   rd   rc   r   re   r   r?  r@  r\   r   r   Nr   r  )ru  r  r  r
   r   r<  )r8   rb   r   rd   rc   r   re   r   r?  r@  r  r\   r   r  r  r   s                   r;   rj   (TFConvBertForSequenceClassification.callR  s    6 --))%'/!5#   
 h?~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = fNTru  r  )rK   rL   rF   rG   ru  r(   rM   r  rN   s     r;   rM   )TFConvBertForSequenceClassification.build  s    ::
4T*6t}}112##D) 34t,8t334%%d+ 54 9 32 54r   )rK   r  ru  r  r  )rb   r  r   r  rd   r  rc   r  r   r  re   rm   r   r  r?  r  r@  r  r  rm   r\   r  ro   z"tuple | TFSequenceClassifierOutputrl   )rp   rq   rr   rs   r,   r   r   r  r  r   r  r
   r  rj   rM   ru   rv   rw   s   @r;   r  r  E  s    R *+D+K+KLi+jk&.$ .28<8<6:37*.)-,0#'#' %-
*-
 6-
 6	-

 4-
 1-
 (-
 '-
 *-
 !-
 !-
 -
 
,-
 l -
^	, 	,r=   r  z
    ConvBERT Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
    softmax) e.g. for RocStories/SWAG tasks.
    c                     ^  \ rS rSrU 4S jr\\" \R                  S5      5      \	" \
\\S9           S                       S	S jj5       5       5       rS
S jrSrU =r$ )TFConvBertForMultipleChoicei  c                   > [         TU ]  " U/UQ70 UD6  [        USS9U l        [	        XR
                  SS9U l        [        R                  R                  S[        UR
                  5      SS9U l        Xl        g )Nru  r   sequence_summary)r0   r(   r   r  r~   )r+   r,   rR  ru  r   r0   r  r   r1   r   r   r  r-   rz  s       r;   r,   $TFConvBertForMultipleChoice.__init__  sx    3&3F3+FD 1&>&>EW!
  ,,,,/&2J2J"KR^ - 
 r=   z(batch_size, num_choices, sequence_lengthr}  c                   Ub  [        U5      S   n[        U5      S   nO[        U5      S   n[        U5      S   nUb  [        R                  " USU45      OSnUb  [        R                  " USU45      OSnUb  [        R                  " USU45      OSnUb  [        R                  " USU45      OSnUb&  [        R                  " USU[        U5      S   45      OSnU R                  UUUUUUUUU	US9
nU R	                  US   US9nU R                  U5      n[        R                  " USU45      nU
c  SOU R                  U
U5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S	9$ )
a  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
    where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
Nr   r   rT   r   )r@  r\   r   r   r  )
r   rF   r   ru  r  r  r  r   r   r<  )r8   rb   r   rd   rc   r   re   r   r?  r@  r  r\   num_choicesr  flat_input_idsflat_attention_maskflat_token_type_idsflat_position_idsflat_inputs_embedsr   r  reshaped_logitsr  r   s                           r;   rj    TFConvBertForMultipleChoice.call  s   8  $Y/2K#I.q1J$]3A6K#M215JDMDYIJ/?@_cN\Nhbjj"j9IJnrN\Nhbjj"j9IJnrJVJbBJJ|b*5EFhl ( JJ}r:z-7PQR7S&TU 	
 -- #   
 &&wqzH&E(**Vb+->?~t4+?+?+X%''!"+5F)-)9TGf$EvE*"!//))	
 	
r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTru  r  r  )rK   rL   rF   rG   ru  r(   rM   r  r  r-   r   rN   s     r;   rM   !TFConvBertForMultipleChoice.build  s    ::
4T*6t}}112##D) 34+T2>t4499:%%++D1 ;4t,8t334%%tT4;;3J3J&KL 54 9 32 ;: 54rp  )rK   r  r-   ru  r  r  )rb   r  r   r  rd   r  rc   r  r   r  re   rm   r   r  r?  r  r@  r  r  rm   r\   r  ro   z#tuple | TFMultipleChoiceModelOutputrl   )rp   rq   rr   rs   r,   r   r   r  r  r   r  r   r  rj   rM   ru   rv   rw   s   @r;   r  r    s    
 *!(()ST  &/$ .28<8<6:37*.)-,0#'#' %>
*>
 6>
 6	>

 4>
 1>
 (>
 '>
 *>
 !>
 !>
 >
 
->
 >
@M Mr=   r  z
    ConvBERT Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                     ^  \ rS rSrU 4S jr\\" \R                  S5      5      \	" \
\\S9           S                       S	S jj5       5       5       rS
S jrSrU =r$ ) TFConvBertForTokenClassificationi   c                  > [         TU ]  " U/UQ70 UD6  UR                  U l        [        USS9U l        UR
                  b  UR
                  OUR                  n[        R                  R                  U5      U l
        [        R                  R                  UR                  [        UR                  5      SS9U l        Xl        g )Nru  r   r  r~   )r+   r,   r  rR  ru  r  r5   r   r1   r4   r6   r   r   r0   r  r-   )r8   r-   r[   r9   r  r:   s        r;   r,   )TFConvBertForTokenClassification.__init__  s    3&3F3 +++FD)/)B)B)NF%%TZTnTn 	 ||++,>?,,,,/&BZBZ2[bn - 
 r=   r|  r}  c                   U R                  UUUUUUUUU	US9
nUS   nU R                  XS9nU R                  U5      nU
c  SOU R                  X5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [	        UUUR
                  UR                  S9$ )z
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
r  r   r   Nr   r  )ru  r6   r  r  r   r   r<  )r8   rb   r   rd   rc   r   re   r   r?  r@  r  r\   r   sequence_outputr  r  r   s                    r;   rj   %TFConvBertForTokenClassification.call  s    2 --))%'/!5#   
 "!*,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = fr  )
rK   rL   rF   rG   ru  r(   rM   r  r-   r   rN   s     r;   rM   &TFConvBertForTokenClassification.buildK      ::
4T*6t}}112##D) 34t,8t334%%tT4;;3J3J&KL 54 9 32 54   C+.3C<+
C9<
D
)rK   r  r-   ru  r6   r  r  )rb   r  r   r  rd   r  rc   r  r   r  re   rm   r   r  r?  r  r@  r  r  rm   r\   r  ro   ztuple | TFTokenClassifierOutputrl   )rp   rq   rr   rs   r,   r   r   r  r  r   r  r   r  rj   rM   ru   rv   rw   s   @r;   r  r     s     *+D+K+KLi+jk&+$ .28<8<6:37*.)-,0#'#' %,
*,
 6,
 6	,

 4,
 1,
 (,
 ',
 *,
 !,
 !,
 ,
 
),
 l ,
\	M 	Mr=   r  z
    ConvBERT Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
    layer on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                     ^  \ rS rSrU 4S jr\\" \R                  S5      5      \	" \
\\S9            S                         S	S jj5       5       5       rS
S jrSrU =r$ )TFConvBertForQuestionAnsweringiW  c                   > [         TU ]  " U/UQ70 UD6  UR                  U l        [        USS9U l        [
        R                  R                  UR                  [        UR                  5      SS9U l
        Xl        g )Nru  r   
qa_outputsr~   )r+   r,   r  rR  ru  r   r1   r   r   r0   r  r-   rz  s       r;   r,   'TFConvBertForQuestionAnswering.__init___  sp    3&3F3 +++FD,,,,/&BZBZ2[bn - 
 r=   r|  r}  c                   U R                  UUUUUUUUU	US9
nUS   nU R                  U5      n[        R                  " USSS9u  nn[        R                  " USS9n[        R                  " USS9nSnU
b   Ub  SU
0nUUS'   U R                  UUU45      nU	(       d  UU4US	S -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S
9$ )a  
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for position (index) of the start of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for position (index) of the end of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
r  r   r   rT   rY   Nstart_positionend_positionr   )r  start_logits
end_logitsr   r<  )	ru  r  rF   splitsqueezer  r	   r   r<  )r8   rb   r   rd   rc   r   re   r   r?  r@  start_positionsend_positionsr\   r   r  r  r  r  r  r  r   s                        r;   rj   #TFConvBertForQuestionAnswering.calli  s   @ --))%'/!5#   
 "!*1#%88FAB#? jzz,R8ZZ
4
&=+D&8F%2F>"''z0JKD"J/'!"+=F)-)9TGf$EvE-%!!//))
 	
r=   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTru  r  )
rK   rL   rF   rG   ru  r(   rM   r  r-   r   rN   s     r;   rM   $TFConvBertForQuestionAnswering.build  r  r  )rK   r-   ru  r  r  )NNNNNNNNNNNF)rb   r  r   r  rd   r  rc   r  r   r  re   rm   r   r  r?  r  r@  r  r  rm   r  rm   r\   r  ro   z&tuple | TFQuestionAnsweringModelOutputrl   )rp   rq   rr   rs   r,   r   r   r  r  r   r  r	   r  rj   rM   ru   rv   rw   s   @r;   r  r  W  s     *+D+K+KLi+jk&2$ .28<8<6:37*.)-,0#',0*. %;
*;
 6;
 6	;

 4;
 1;
 (;
 ';
 *;
 !;
 *;
 (;
 ;
 
0;
 l ;
z	M 	Mr=   r  )r  r  r  r  r  r!  rx  rt  )Grt   
__future__r   numpynp
tensorflowrF   activations_tfr   modeling_tf_outputsr   r   r   r	   r
   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   r   r   r   tf_utilsr   r   r   utilsr   r   r   r   configuration_convbertr    
get_loggerrp   loggerr  r  r1   Layerr"   ry   r   r   r   r	  r  r!  r0  rI  rR  rt  CONVBERT_START_DOCSTRINGr  rx  r  r  r  r  r  r  r  r  __all__r*   r=   r;   <module>r     s    "   /     S R  3 
		H	%/ "Q 5<<-- Q hlQell00 lQ^L5<<-- L<.%,,,, .>++ B"HU\\// "HJ$Nu||)) $NN-ell(( -D1&** 1&h L(:(:  LF uX%,,,, uX uXp# 1 #( T5 p h/*/ /*	/*d"U\\// "JHU\\%7%7 H6 RTlmY357S Y3 nY3x&K5<<#5#5 &KR  	F,*CEa F,F,R  bM";=Q bMbMJ  MM'@B[ MMMM`  XM%>@W XMXMv	r=   