
    cCiy                   ^   S r SSKJr  SSKrSSKrSSKrSSKJ	r	J
r
Jr  SSKJrJrJrJrJr  SSKJrJrJrJrJrJrJrJrJr  SSKJrJr  SS	KJ r   S
SK!J"r"  \ RF                  " \$5      r%Sr&Sr'S r(S r)S r*S r+ " S S\RX                  RZ                  5      r. " S S\RX                  RZ                  5      r/ " S S\RX                  RZ                  5      r0 " S S\RX                  RZ                  5      r1 " S S\RX                  RZ                  5      r2 " S S\RX                  RZ                  5      r3 " S S\RX                  RZ                  5      r4 " S  S!\RX                  RZ                  5      r5 " S" S#\RX                  RZ                  5      r6 " S$ S%\RX                  RZ                  5      r7 " S& S'\RX                  RZ                  5      r8 " S( S)\5      r9S*r:S+r;\
" S,\:5       " S- S.\RX                  RZ                  5      5       r<\
" S,\:5       " S/ S0\95      5       r=\
" S1\:5       " S2 S3\9\5      5       r> " S4 S5\RX                  RZ                  5      r?\
" S6\:5       " S7 S8\9\5      5       r@\
" S9\:5       " S: S;\9\5      5       rA " S< S=\RX                  RZ                  5      rBS@S> jrC/ S?QrDg)AzPyTorch ESM model.    )annotationsN   )add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward)+TFBaseModelOutputWithPastAndCrossAttentions.TFBaseModelOutputWithPoolingAndCrossAttentionsTFMaskedLMOutputTFSequenceClassifierOutputTFTokenClassifierOutput)	TFMaskedLanguageModelingLossTFModelInputTypeTFPreTrainedModelTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeras
shape_listunpack_inputs)check_embeddings_within_boundsstable_softmax)logging   )	EsmConfigzfacebook/esm2_t6_8M_UR50Dr   c                b    [         R                  " U SSS9u  p[         R                  " U* U4SS9$ )N   axis)tfsplitconcat)xx1x2s      a/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/esm/modeling_tf_esm.pyrotate_halfr'   6   s-    XXa$FB99rc2YR((    c                    US S 2S S 2S [         R                  " U 5      S   2S S 24   nUS S 2S S 2S [         R                  " U 5      S   2S S 24   nX-  [        U 5      U-  -   $ )N)r    shaper'   )r#   cossins      r&   apply_rotary_pos_embr.   ;   sb    
a%bhhqk"o%q(
)C
a%bhhqk"o%q(
)CGA,--r(   c                F    U [         R                  R                  U 5      -   $ )zJMake layer symmetric in final two dimensions, used for contact prediction.)r    linalgmatrix_transpose)r#   s    r&   
symmetrizer2   B   s    ryy))!,,,r(   c                    [         R                  " U SSS9n[         R                  " U SSS9n[         R                  " U SSS9nX-  nXC-  nX-
  nU$ )z=Perform average product correct, used for contact prediction.r   T)keepdimsr*   )r   r*   )r    
reduce_sum)r#   a1a2a12avg
normalizeds         r&   average_product_correctr;   G   sS    	q"t	,B	q"t	,B
--8d
3C
'C
)CJr(   c                  T   ^  \ rS rSrSrSS	U 4S jjjrU 4S jrS
S jrSS jrSr	U =r
$ )TFRotaryEmbeddingS   z
Rotary position embeddings based on those in
[RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
matrices which depend on their relative positions.
c                ,   > [         TU ]  US9  Xl        g )Nname)super__init__dim)selfrD   rA   	__class__s      r&   rC   TFRotaryEmbedding.__init__Z   s    d# r(   c                T  > [         TU ]  U5        U R                  SU R                  S-  4[        R
                  [        S5      SS9U l        U R                  R                  SS[        R                  " SU R                  S[        R
                  S9U R                  -  -  -  5        g )	Ninv_freqr         ?F)r+   dtypeinitializer	trainablei'  r   )startlimitdeltarK   )
rB   build
add_weightrD   r    float32r   rI   assignrange)rE   input_shaperF   s     r&   rQ   TFRotaryEmbedding.buildd   s    k"txx1}.bjjo^aNbns ( 
 	5RXXATXXQbjjY\`\d\ddef	
r(   c                `   [         R                  " U5      U   n[         R                  " X0R                  R                  S9n[         R
                  " SX@R                  5      n[         R                  " XU4SS9S S S S 2S S 24   n[         R                  " U5      [         R                  " U5      4$ )NrK   z
i, j -> ijr   r   )	r    r+   rU   rI   rK   einsumr"   r,   r-   )rE   r#   seq_dimensionseq_lentfreqsembs          r&   _compute_cos_sin"TFRotaryEmbedding._compute_cos_sinm   s{    ((1+m,HHWMM$7$78		,==9iiR0tQ1ABvvc{BFF3K''r(   c                V    U R                  USS9u  p4[        XU5      [        X#U5      4$ )Nr*   )r[   )r`   r.   )rE   qkcos_embsin_embs        r&   callTFRotaryEmbedding.callv   s:    00"0E !W5 W5
 	
r(   )rD   rI   N)rD   int)r   )rc   	tf.Tensorrd   rk   returnztuple[tf.Tensor, tf.Tensor])__name__
__module____qualname____firstlineno____doc__rC   rQ   r`   rg   __static_attributes____classcell__rF   s   @r&   r=   r=   S   s&     
(
 
r(   r=   c                  P   ^  \ rS rSrSr   S   SU 4S jjjrS	S jrS rSrU =r	$ )
TFEsmContactPredictionHead   zWPerforms symmetrization, apc, and computes a logistic regression on the output featuresc                   > [         TU ]  US9  X0l        Xl        [        R
                  R                  SUSSS9U l        g )Nr@   r   sigmoid
regression)use_bias
activationrA   )rB   rC   eos_idxin_featuresr   layersDenserz   )rE   r~   biasr}   rA   rF   s        r&   rC   #TFEsmContactPredictionHead.__init__   s@     	d#&,,,,Q)Zf,gr(   c                *   U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S U R                  45        S S S 5        g g ! , (       d  f       g = f)NTrz   )builtgetattrr    
name_scoperz   rA   rQ   r~   rE   rV   s     r&   rQ    TFEsmContactPredictionHead.build   si    ::
4t,8t334%%tT-=-=&>? 54 944s   (B
Bc                    [         R                  " XR                  :g  UR                  5      n[         R                  " US5      [         R                  " US5      -  nX#S S 2S S S S 2S S 24   -  nUSS S2S S24   nUSSS 2SS 24   n[        U5      u  pEpgn[         R                  " X$XV-  Xw45      n[        [        U5      5      n[         R                  " USS9n[         R                  " U R                  U5      S5      $ )Nr   r   .r   )r   r   r   r   permr   )r    castr}   rK   expand_dimsr   reshaper;   r2   	transposesqueezerz   )	rE   tokens
attentionseos_mask
batch_sizer   headsseqlen_s	            r&   rg   TFEsmContactPredictionHead.call   s    776\\1:3C3CD>>(A.!1LL1dD!Q+>"??
SbS#2#.
QR,
/9*/E,
E1ZZ
,XY
 -Z
-CD
\\*<@
zz$//*5q99r(   )r   r}   r~   rz   )Tr   N)r~   rj   r}   rj   ri   )
rm   rn   ro   rp   rq   rC   rQ   rg   rr   rs   rt   s   @r&   rv   rv      sA    a
 
h
h 	
h 
h@: :r(   rv   c                  L   ^  \ rS rSrSrSU 4S jjr S	S jrS rSS jrSr	U =r
$ )
TFEsmEmbeddings   zN
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
c                  > [         TU ]  US9  [        R                  R	                  UR
                  UR                  [        UR                  5      SS9U l	        [        R                  R	                  UR                  UR                  [        UR                  5      SS9U l        UR                  (       a.  [        R                  R                  UR                  SS9U l        OS U l        [!        USS5      U l        [$        R&                  " UR                  5      S S S 24   U l        UR*                  U l        UR.                  U l        UR0                  U l        Xl        g )	Nr@   word_embeddings)embeddings_initializerrA   position_embeddings
layer_normepsilonrA   position_embedding_typeabsolute)rB   rC   r   r   	Embedding
vocab_sizehidden_sizer   initializer_ranger   max_position_embeddingsr   emb_layer_norm_beforeLayerNormalizationlayer_norm_epsr   r   r   r    rU   position_idspad_token_idpadding_idxtoken_dropoutmask_token_idconfigrE   r   rA   rF   s      r&   rC   TFEsmEmbeddings.__init__   s+   d#$||55#263K3K#L"	  6  
 $)<<#9#9**#263K3K#L&	 $: $
  ''#ll==fF[F[bn=oDO"DO (/v7PR\']$HHV%C%CDT1WM!..#11#11r(   c                d   Uc+  Ub  [        XR                  U5      nOU R                  U5      nUc0  [        XR                  R
                  5        U R                  U5      nUnU R                  (       a  [        R                  " XR                  :H  S S 2S S 2S 4   SU5      nSn[        R                  " [        R                  " USS9[        R                  5      nXR                  :H  n	[        R                  R                  U	[        R                  SS9U-  n
USU-
  -  SU
-
  S S 2S S 4   -  nU R                   S:X  a  U R#                  U5      nXk-  nU R$                  b  U R%                  U5      nUb9  U[        R                  " [        R&                  " US5      UR(                  5      -  nU$ )Ng        gQ?r   r   )rK   r   r   r   )"create_position_ids_from_input_idsr   &create_position_ids_from_inputs_embedsr   r   r   r   r   r    wherer   r   r5   rS   mathcount_nonzeror   r   r   r   rK   )rE   	input_idsattention_maskr   inputs_embedspast_key_values_length
embeddingsmask_ratio_trainsrc_lengthsmasked_tokensmask_ratio_observedr   s               r&   rg   TFEsmEmbeddings.call   s    $A)M]M]_uv#JJ=Y *9kk6L6LM 00;M #
 90B0B#BAq$J"OQTV`aJ)''"--R"H"**UK%););;M"$''"7"7RZZ^`"7"ado"o#q+;';<DW@WYZ\`bfYf?ggJ'':5"&":":<"H-J??&4J%#bggbnn^R.PR\RbRb&ccJ r(   c                    [        U5      SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  S9n[        R
                  " [        R                  " US5      U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: tf.Tensor

Returns: tf.Tensor
Nr   r   )rN   rO   rK   r   )r   r    rU   r   int64broadcast_tor   )rE   r   rV   sequence_lengthr   s        r&   r   6TFEsmEmbeddings.create_position_ids_from_inputs_embeds   sr     !/4%a.xx""Q&o@P@P.PST.T\^\d\d
 r~~lA>LLr(   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r   r   )r   r   r    r   r   rA   rQ   r   r   r   r   r   s     r&   rQ   TFEsmEmbeddings.build  s   ::
4*D1=t33889$$**40 :4.5At77<<=((..t4 >4t,8t334%%tT4;;3J3J&KL 54 9 :9 >= 54$   E.E
3E)
E
E&)
E7)
r   r   r   r   r   r   r   r   r   r   ri   )NNNNr   )rm   rn   ro   rp   rq   rC   rg   r   rQ   rr   rs   rt   s   @r&   r   r      s,    > rs+ZM"M Mr(   r   c                  ~   ^  \ rS rSrSU 4S jjrSS jr       S	                 S
S jjrSS jrSrU =r	$ )TFEsmSelfAttentioni  c                   > [         TU ]  US9  UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  R                  U R                  [        UR                  5      SS9U l        [        R                  R                  U R                  [        UR                  5      S	S9U l        [        R                  R                  U R                  [        UR                  5      S
S9U l        [        R                  R#                  UR$                  5      U l        U=(       d    [)        USS5      U l        S U l        U R*                  S:X  d  U R*                  S:X  ac  UR.                  U l        [        R                  R1                  SUR.                  -  S-
  U R                  [        UR                  5      S9U l        O)U R*                  S:X  a  [5        U R                  SS9U l        UR6                  U l        Xl        g )Nr@   r   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()querykernel_initializerrA   keyvaluer   r   relative_keyrelative_key_queryr   r   )r   rotaryrotary_embeddings)rD   rA   )rB   rC   r   num_attention_headshasattr
ValueErrorrj   attention_head_sizeall_head_sizer   r   r   r   r   r   r   r   Dropoutattention_probs_dropout_probdropoutr   r   r   r   r   distance_embeddingr=   
is_decoderr   )rE   r   r   rA   rF   s       r&   rC   TFEsmSelfAttention.__init__  sA   d# : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP\\''?6C[C[3\cj ( 

 <<%%?6C[C[3\ch & 
 \\''?6C[C[3\cj ( 

 ||++F,O,OP'> (
'-zC
$ "&''>9T=Y=Y]q=q+1+I+ID(&+ll&<&<F222Q6(('6v7O7O'P '= 'D#
 ))X5%64;S;SZm%nD" ++r(   c                    [        U5      S S U R                  U R                  /-   n[        R                  " X5      n[        R
                  " USS9$ )Nr   r   r   r   r   r   )r   r   r   r    r   r   )rE   r#   new_x_shapes      r&   transpose_for_scores'TFEsmSelfAttention.transpose_for_scores@  sG     mCR(D,D,DdF^F^+__JJq&||AL11r(   c	                .   U R                  U5      n	US Ln
U
(       a  Ub  US   nUS   nUnGOU
(       aC  U R                  U R                  U5      5      nU R                  U R                  U5      5      nUnOUbu  U R                  U R                  U5      5      nU R                  U R                  U5      5      n[        R
                  " US   U/SS9n[        R
                  " US   U/SS9nO@U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U	5      nXR                  S-  -  nU R                  (       a  X4nU R                  S:X  a  U R                  X5      u  p[        R                  " XSS9nU R                  S	:X  d  U R                  S
:X  Ga;  [        U5      S   n[        R                  " [        R                  " U[        R                  S9S5      n[        R                  " [        R                  " U[        R                  S9S5      nUU-
  nU R                  UU R                   -   S-
  5      n[        R"                  " UUR$                  5      nU R                  S	:X  a  [        R&                  " SUU5      nUU-   nOHU R                  S
:X  a8  [        R&                  " SUU5      n[        R&                  " SUU5      nUU-   U-   nUb  X-   n[)        USS9nU R+                  UUS9nUb  UU-  nUU-  n[        R,                  " USS9n[        U5      S S U R.                  /-   n[        R0                  " UU5      nU(       a  UU4OU4nU R                  (       a  UU4-   nU$ )Nr   r   r   r   g      r   Ttranspose_br   r   rY   r   zbhld,lrd->bhlrzbhrd,lrd->bhlrtrainingr   r   r*   )r   r   r   r   r    r"   r   r   r   r   matmulr   r   rU   r   r   r   r   rK   rZ   r   r   r   r   r   )rE   hidden_statesr   	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layerattention_scores
seq_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                              r&   rg   TFEsmSelfAttention.callE  sh    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@qII))^A%6$D1MK11$((=2IJI33DJJ}4MNK//0AB "$<$<d$BB?? (5N''83%)%;%;K%S"K 99[N''>9T=Y=Y]q=q#M215J^^BHHZrxx,PRTUN^^BHHZrxx,PRSTN%6H#'#:#:8dFbFb;bef;f#g #%77+?ARAR#S ++~=+-995E{Th+i(#36N#N --1EE13;K[Zn1o./1yy9I9Vj/k,#36T#TWs#s %/@ ))9C ,,,J  -	9O'+5]F",]";CR"@DDVDVCW"W

=2IJ6G=/2mM]?? 11Gr(   c                ,   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       GNS= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r   r   r   )r   r   r    r   r   rA   rQ   r   r   r   r   r   r   s     r&   rQ   TFEsmSelfAttention.build  s`   ::
4$'3tzz/

  $dkk.E.E!FG 04%1txx}}-dDKK,C,CDE .4$'3tzz/

  $dkk.E.E!FG 04,d3?t55::;&&,,T2 <; @ 0/ .- 0/ <;s0   3G3G#83G4+H
G #
G14
H
H)r   r   r   r   r   r   r   r   r   r   r   r   r   r   NN)r#   rk   rl   rk   NNNNNFF)r   rk   r   tf.Tensor | Noner   r  r   r  r   r  r   ztuple[tuple[tf.Tensor]] | Noner   bool | Noner   boolrl   ztuple[tf.Tensor]ri   )
rm   rn   ro   rp   rC   r   rg   rQ   rr   rs   rt   s   @r&   r   r     s    &P2 ,0&*26379=).e e )e $	e
  0e !1e 7e 'e e 
eN3 3r(   r   c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )TFEsmSelfOutputi  c                
  > [         TU ]  US9  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        R                  R                  UR                  5      U l        Xl        g Nr@   denser   rB   rC   r   r   r   r   r   r   r  r   hidden_dropout_probr   r   r   s      r&   rC   TFEsmSelfOutput.__init__  j    d#\\''?6C[C[3\cj ( 

 ||++F,F,FGr(   c                N    U R                  U5      nU R                  XS9nX-  nU$ Nr   r  r   rE   r   input_tensorr   s       r&   rg   TFEsmSelfOutput.call  .    

=1]F%r(   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fNTr  	r   r   r    r   r  rA   rQ   r   r   r   s     r&   rQ   TFEsmSelfOutput.build  m    ::
4$'3tzz/

  $dkk.E.E!FG 0/ 4//   3B
Br   r   r  r   ri   F	rm   rn   ro   rp   rC   rg   rQ   rr   rs   rt   s   @r&   r  r    s    H Hr(   r  c                  T   ^  \ rS rSrSU 4S jjrS r       SS jrSS jrSrU =r	$ )	TFEsmAttentioni  c                   > [         TU ]  US9  [        USS9U l        [	        USS9U l        [        5       U l        [        R                  R                  UR                  SS9U l        Xl        g )Nr@   rE   output	LayerNormr   )rB   rC   r   rE   r  output_layersetpruned_headsr   r   r   r   r.  r   r   s      r&   rC   TFEsmAttention.__init__  sa    d#&vF;	+FBE88AVAV]h8ir(   c                    [         eri   NotImplementedError)rE   r   s     r&   prune_headsTFEsmAttention.prune_heads      !!r(   c	           
         U R                  U5      n	U R                  U	UUUUUUU5      n
U R                  U
S   U5      nU4U
SS  -   nU$ )Nr   r   )r.  rE   r/  )rE   r   r   r   r   r   r   r   r   hidden_states_lnself_outputsattention_outputr  s                r&   rg   TFEsmAttention.call  sk      >>-8yy!"	
  ,,\!_mL#%QR(88r(   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTrE   r/  r.  )r   r   r    r   rE   rA   rQ   r/  r.  r   r   r   s     r&   rQ   TFEsmAttention.build  s    ::
4&2tyy~~.		% /4.:t00556!!''- 74d+7t~~223$$dD$++2I2I%JK 43 8 /. 76 43r   )r.  r   r   r/  r1  rE   ri   r  )
rm   rn   ro   rp   rC   r6  rg   rQ   rr   rs   rt   s   @r&   r+  r+    s5    " "#4L Lr(   r+  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	TFEsmIntermediatei  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SS9U l        Xl	        g )Nr  )unitsr   rA    )
rB   rC   r   r   r   intermediate_sizer   r   r  r   rE   r   kwargsrF   s      r&   rC   TFEsmIntermediate.__init__  sO    "6"\\''**.v/G/GH ( 


 r(   c                b    U R                  US9n[        R                  R                  U5      nU$ )Ninputs)r  r    nngelu)rE   r   s     r&   rg   TFEsmIntermediate.call  s*    

-
8

=1r(   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr"  r#  r   s     r&   rQ   TFEsmIntermediate.build  r%  r&  r   r   r  r   r   r   rk   rl   rk   ri   r)  rt   s   @r&   rA  rA    s    
H Hr(   rA  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )TFEsmOutputi$  c                
  > [         TU ]  US9  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        R                  R                  UR                  5      U l        Xl        g r  r  r   s      r&   rC   TFEsmOutput.__init__%  r  r(   c                N    U R                  U5      nU R                  XS9nX-  nU$ r  r  r  s       r&   rg   TFEsmOutput.call-  r   r(   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr"  )	r   r   r    r   r  rA   rQ   r   rE  r   s     r&   rQ   TFEsmOutput.build3  sm    ::
4$'3tzz/

  $dkk.K.K!LM 0/ 4//r&  r'  ri   r(  r)  rt   s   @r&   rU  rU  $  s    N Nr(   rU  c                  N   ^  \ rS rSrSU 4S jjr       SS jrSS jrSrU =r$ )
TFEsmLayeri<  c                  > [         TU ]  US9  UR                  U l        SU l        [	        USS9U l        UR                  U l        UR                  U l        U R                  (       a/  U R                  (       d  [        U  S35      e[	        U5      U l	        [        USS9U l        [        USS9U l        [        R                  R!                  UR"                  SS9U l        Xl        g )	Nr@   r   	attentionz> should be used as a decoder model if cross attention is addedintermediater-  r.  r   )rB   rC   chunk_size_feed_forwardseq_len_dimr+  r_  r   add_cross_attentionRuntimeErrorcrossattentionrA  r`  rU  r/  r   r   r   r   r.  r   r   s      r&   rC   TFEsmLayer.__init__=  s    d#'-'E'E$'[A ++#)#=#= ##??"dV+i#jkk"0"8D-f>J'X>88AVAV]h8ir(   c	                   Ub  US S OS n	U R                  UUUUU	US9n
U
S   nU R                  (       a  U
SS nU
S   nOU
SS  nS nU R                  (       aZ  UbW  [        U S5      (       d  [        SU  S35      eUb  US	S  OS nU R	                  UUUUUUUUS
9nUS   nUUSS -   nUS   nWU-   nU R                  U5      nU R                  US9nU R                  UXS9nU4U-   nU R                  (       a  UW4-   nU$ )Nr   )r   r   r   r   r   r   re  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r*   r   r   )r   r  r   )r_  r   r   AttributeErrorre  r.  r`  r/  )rE   r   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr<  r  present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayernorm_outputintermediate_outputlayer_outputs                       r&   rg   TFEsmLayer.callM  s    :H9S>"1#5Y] !%/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!122$=dV D` `  @N?Yrs(;_c%&*&9&9 %&)!! ': 	'#  7q9 7" ==G ,C2+F( 14P P>>*:;"//>N/O((-<L ) 
  /G+ ??!2 44Gr(   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       GN<= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr_  r`  r/  r.  )r   r   r    r   r_  rA   rQ   r`  r/  r.  r   r   r   s     r&   rQ   TFEsmLayer.build  sH   ::
4d+7t~~223$$T* 44.:t00556!!''- 74.:t00556!!''- 74d+7t~~223$$dD$++2I2I%JK 43 8 43 76 76 43s0   F#.F5
G&3G#
F25
G
G
G%)r.  rc  r_  r   ra  r   re  r`  r   r/  rb  ri   r  r)  rt   s   @r&   r]  r]  <  s2    & "#DLL Lr(   r]  c                  T   ^  \ rS rSrSU 4S jjr          SS jrSS jrSrU =r$ )TFEsmEncoderi  c                   > [         TU ]  US9  Xl        [        UR                  5       Vs/ s H  n[        USU 3S9PM     snU l        [        R                  R                  UR                  SS9U l        g s  snf )Nr@   zlayer_._emb_layer_norm_afterr   )rB   rC   r   rU   num_hidden_layersr]  layerr   r   r   r   ry  )rE   r   rA   irF   s       r&   rC   TFEsmEncoder.__init__  sw    d#GLVMeMeGfgGf!jn=Gfg
$)LL$C$C))0F %D %
! hs   A9c                x   U	(       a  SOS nU(       a  SOS nU(       a  U R                   R                  (       a  SOS nU(       a  SOS n[        U R                  5       H  u  nnU	(       a  X4-   nUb  UU   OS nUb  UU   OS nU" UUUUUUUU5      nUS   nU(       a	  UUS   4-  nU(       d  MS  UUS   4-   nU R                   R                  (       d  My  UUS   4-   nM     U R                  (       a  U R	                  U5      nU	(       a  X4-   nU
(       d  [        S UUUUU4 5       5      $ [        UUUUUS9$ )NrD  r   r   r   r   c              3  0   #    U  H  nUc  M  Uv   M     g 7fri   rD  ).0vs     r&   	<genexpr>$TFEsmEncoder.call.<locals>.<genexpr>  s"      
A  s   	)last_hidden_statepast_key_valuesr   r   cross_attentions)r   rc  	enumerater{  ry  tupler   )rE   r   r   r   r   r   r  	use_cacher   output_hidden_statesreturn_dictr   all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacher|  layer_modulelayer_head_maskr   layer_outputss                        r&   rg   TFEsmEncoder.call  sz    #7BD$5b4%64;;;Z;Zr`d#,R$(4OA|#$58H$H!.7.CilO3B3N_Q/TXN(%&!	M *!,M"}R'8&::"  &9]1=M<O&O#;;222+?=QRCSBU+U(1  54 $$ 55mDM 14D D 
 "&%'(
 
 
 ;+.+*1
 	
r(   c                   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       Nk= f! , (       d  f       Mc  = f)NTry  r{  )
r   r   r    r   ry  rA   rQ   r   r   r{  )rE   rV   r{  s      r&   rQ   TFEsmEncoder.build  s    ::
4/6Bt88==>))//tT[[=T=T0UV ?4$'3]]5::.KK% /. $ 4 ?> /.s   3C*
C;*
C8;
D
	)r   r   ry  r{  ri   )
NNNNNNFFTFr)  rt   s   @r&   rw  rw    s9    
 "#"E
N
& 
&r(   rw  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	TFEsmPooleri  c                   > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  [        UR                  5      SSS9U l        Xl	        g )Ntanhr  )rC  r   r|   rA   rD  )
rB   rC   r   r   r   r   r   r   r  r   rF  s      r&   rC   TFEsmPooler.__init__  sR    "6"\\''$$.v/G/GH	 ( 

 r(   c                6    US S 2S4   nU R                  US9nU$ )Nr   rJ  )r  )rE   r   first_token_tensorpooled_outputs       r&   rg   TFEsmPooler.call  s*     +1a40

*<
=r(   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr"  r#  r   s     r&   rQ   TFEsmPooler.build  r%  r&  rQ  rR  rS  ri   r)  rt   s   @r&   r  r    s    	H Hr(   r  c                       \ rS rSrSr\rSrSrg)TFEsmPreTrainedModeli   zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
esmrD  N)	rm   rn   ro   rp   rq   r   config_classbase_model_prefixrr   rD  r(   r&   r  r     s    
 Lr(   r  a2  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a Keras [Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it as a
    regular Keras model and refer to the TF/Keras documentation for all matters related to general usage and behavior.

    Parameters:
        config ([`EsmConfig`]): Model configuration class with all the parameters of the
            model. Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        position_ids (`tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
z]The bare ESM Model transformer outputting raw hidden-states without any specific head on top.c                     ^  \ rS rSrSrS/rSU 4S jjrSS jrS rSS jr	S r
             S                           SS	 jjrS
 rSrU =r$ )TFEsmMainLayeric  a  

The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
cross-attention is added between the self-attention layers, following the architecture described in [Attention is
all you need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit,
Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
`add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.
r   c                :  > [         TU ]  " S	SU0UD6  Xl        UR                  U l        [	        USS9U l        [        USS9U l        U(       a
  [        USS9OS U l	        [        U R                  R                  U R                  R                  -  SSS9U l        g )
NrA   r   r@   encoderpoolerTcontact_head)r~   r   rA   rD  )rB   rC   r   r   r   r   rw  r  r  r  rv   rz  r   r  )rE   r   add_pooling_layerrA   rG  rF   s        r&   rC   TFEsmMainLayer.__init__v  s    -d-f- ++)&|D#F;<Mk&x8SW6558W8WW^biw
r(   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       GN%= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r  r  r  )
r   r   r    r   r   rA   rQ   r  r  r  r   s     r&   rQ   TFEsmMainLayer.build  s3   ::
4t,8t334%%d+ 54D)5t||001""4( 244(4t{{//0!!$' 14.:t00556!!''- 76 ; 54 21 10 76s0   F.F
F/&G 
F
F,/
F= 
Gc                .    U R                   R                  $ ri   )r   r   rE   s    r&   get_input_embeddings#TFEsmMainLayer.get_input_embeddings  s    ...r(   c                r    XR                   R                  l        [        U5      S   U R                   l        g )Nr   )r   r   weightr   r   )rE   r   s     r&   set_input_embeddings#TFEsmMainLayer.set_input_embeddings  s(    16''.%/%6q%9"r(   c                    [         eri   r4  )rE   heads_to_prunes     r&   _prune_headsTFEsmMainLayer._prune_heads  r8  r(   c                   U R                   R                  (       d  Sn	Ub  Ub  [        S5      eUb  [        U5      nOUb  [        U5      S S nO[        S5      eUu  nnUc&  SnS /[	        U R
                  R                  5      -  nO[        US   S   5      S   nUc  [        R                  " UUU-   4SS9nU R                  UUUUUUS	9n[        U5      nUU-   nU R                  (       a  [        R                  " U5      n[        R                  " [        R                  " US S S S 24   UUS45      US S S 2S 4   5      n[        R                  " UUR                  S
9nUUS S 2S S S 24   -  n[        U5      n[        R                  " UUS   SUS   US   45      nUS   b  US S 2S S 2U* S 2S S 24   nO![        R                  " UUS   SSUS   45      n[        R                  " UUR                  S
9n[        R                   " SUR                  S
9n[        R                   " SUR                  S
9n[        R"                  " [        R$                  " UU5      U5      nU R                  (       ag  Ubd  [        R                  " UUR                  S
9n[	        [        U5      5      nUS:X  a  US S 2S S S 2S S 24   nUS:X  a  US S 2S S S S 24   nSW-
  S-  nOS nUb  [&        eS /U R                   R(                  -  nU R                  UUUUUUU	U
UUUS9nUS   nU R*                  b  U R+                  US9OS nU(       d
  UU4USS  -   $ [-        UUUR.                  UR0                  UR2                  UR4                  S9$ )NFzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr   r*   r   )dimsr   )r   r   r   r   r   r   rY   r   rJ   g     r   )r   r   r   r   r   r  r  r   r  r  r   rh  )r  pooler_outputr  r   r   r  )r   r   r   r   lenr  r{  r    fillr   rU   
less_equaltiler   rK   r   constantmultiplysubtractr5  rz  r  r	   r  r   r   r  )rE   r   r   r   r   r   r   r   r  r  r   r  r  r   rV   r   r   r   embedding_outputattention_mask_shapemask_seq_lengthseq_idscausal_maskextended_attention_maskone_cstten_thousand_cstnum_dims_encoder_attention_maskencoder_extended_attention_maskencoder_outputssequence_outputr  s                                  r&   rg   TFEsmMainLayer.call  s     {{%%I ]%>cdd"$Y/K&$]3CR8KTUU!,
J"%&"#fs4<<+=+='>>O%/0B10E%Fr%J"!WW:zDZ7Z*[cdeN??)%'#9 + 
  *.9$'==
 ??hh/G--dA._a0PQa&K ''+^5I5IJK&1N1dA:4N&N##-.E#F &(jj'*>q*A1FZ[\F]_stu_v)w'# q!-*A!QVWBW*X'&(jj!5a!8!Q@TUV@W X'# #%''*AIYI_I_"`++c)9)?)?@;;x7G7M7MN"$++bkk'CZ.[]m"n ??5A &(WW-CKbKhKh%i".1*=S2T.U+.!32HDRSUV2W/.!32HDRVXYIY2Z/ 035T/TX`.`+.2+  %%!>!>>I,,*2"7#B+/!5# ' 
 *!,FJkkF]/Bcg  #$ $
 >-'+;;)77&11,==
 	
r(   c                    U " XSSS9R                   n[        R                  " USS9n[        R                  " X#R                  5      nX2S S 2S S S 4   -  nX2S S 2S S S S 2S 4   -  nU R                  X5      $ )NT)r   r  r   r   r   )r   r    stackr   rK   r  )rE   r   r   attnss       r&   predict_contactsTFEsmMainLayer.predict_contacts8  sz    V`deppQ'
 =4t 3444q$ 677  //r(   )r   r   r  r   r  r   r  )TNri   )r   ztf.VariableNNNNNNNNNNNNF)r   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r  r   r  r   r  r   r  r   r  r  +tuple[tuple[np.ndarray | tf.Tensor]] | Noner  r  r   r  r  r  r  r  r   r  rl   ATFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor])rm   rn   ro   rp   rq   _keys_to_ignore_on_load_missingrC   rQ   r  r  r  rg   r  rr   rs   rt   s   @r&   r  r  c  s    

 (7&7#
."/:"
 .28<6:377;?C@DGK!%)-,0#'W
*W
 6W
 4	W

 1W
 5W
  =W
 !>W
 EW
 W
 'W
 *W
 !W
 W
 
KW
r
0 
0r(   r  c                     ^  \ rS rSrS	S
U 4S jjjr\\" \R                  S5      5      \	" \
\\S9             S                           SS jj5       5       5       rS rSS jrSrU =r$ )
TFEsmModeliE  c                L   > [         TU ]  " U/UQ70 UD6  [        XSS9U l        g )Nr  r  rA   )rB   rC   r  r  )rE   r   r  rK  rG  rF   s        r&   rC   TFEsmModel.__init__J  s)    3&3F3!&TYZr(   batch_size, sequence_length
checkpointoutput_typer  c                <    U R                  UUUUUUUUU	U
UUUS9nU$ )aK  
encoder_hidden_states  (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
    Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
    the model is configured as a decoder.
encoder_attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
    the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

past_key_values (`tuple[tuple[tf.Tensor]]` of length `config.n_layers`)
    contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
    If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
    don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
    `decoder_input_ids` of shape `(batch_size, sequence_length)`.
use_cache (`bool`, *optional*, defaults to `True`):
    If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
    `past_key_values`). Set to `False` during training, `True` during generation
)r   r   r   r   r   r   r   r  r  r   r  r  r   )r  )rE   r   r   r   r   r   r   r   r  r  r   r  r  r   r  s                  r&   rg   TFEsmModel.callO  sF    V (()%'"7#9+/!5#  
 r(   c                8    U R                   R                  X5      $ ri   r  r  rE   r   r   s      r&   r  TFEsmModel.predict_contacts      xx((@@r(   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTr  )r   r   r    r   r  rA   rQ   r   s     r&   rQ   TFEsmModel.build  sZ    ::
4%1txx}}-t$ .- 2--s   A88
B)r   r  )TrR  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r  r  r   r  r  r  r  r  r   r  rl   r  ri   )rm   rn   ro   rp   rC   r   r   ESM_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr	   _CONFIG_FOR_DOCrg   r  rQ   rr   rs   rt   s   @r&   r  r  E  s   
[ [
 *+?+F+FGd+ef&B$ .28<6:377;?C@DGK!%)-,0#' %3*3 63 4	3
 13 53  =3 !>3 E3 3 '3 *3 !3 3 
K3 g 3jA% %r(   r  z1ESM Model with a `language modeling` head on top.c            	        ^  \ rS rSrS/rS/rU 4S jrS rS rS r	\
\" \R                  S5      5      \" \\\S	S
9            S                         SS jj5       5       5       rS rSS jrSrU =r$ )TFEsmForMaskedLMi  r   r  c                d  > [         TU ]  U5        UR                  (       a  [        R	                  S5        [        USSS9U l        [        USS9U l        UR                  (       a  [        R                  " [        R                  R                  U R                  5       SSS5      5         U R                  R                   R"                  R%                  S	5        S S S 5        U R                  R                   R"                  R&                  S
   U R                  l        g g ! , (       d  f       NL= f)NzjIf you want to use `EsmForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.Fr  r  lm_headr@   r   r   r  r   )rB   rC   r   loggerwarningr  r  TFEsmLMHeadr  tie_word_embeddingsr    r   ospathjoin_name_scoper   r   rQ   weightsdecoderrE   r   rF   s     r&   rC   TFEsmForMaskedLM.__init__  s     NN1
 "&EN"6	:%%rww||D,<,<,>|Ufgh##3399,G i#'88#6#6#F#F#N#Nq#QDLL 	 &hhs   +0D!!
D/c                .    U R                   R                  $ ri   r  r  r  s    r&   get_output_embeddings&TFEsmForMaskedLM.get_output_embeddings  s    ||###r(   c                $    XR                   l        g ri   r  )rE   new_embeddingss     r&   set_output_embeddings&TFEsmForMaskedLM.set_output_embeddings  s    -r(   c                    U R                   $ ri   )r  r  s    r&   get_lm_headTFEsmForMaskedLM.get_lm_head  s    ||r(   r  z<mask>)r  r  r  maskc                4   Ub  UOU R                   R                  nU R                  UUUUUUUU	U
UUS9nUS   nU R                  U5      nSnUb  U R	                  XS9nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
kwargs (`dict[str, any]`, *optional*, defaults to `{}`):
    Used to hide legacy arguments that have been deprecated.
N)
r   r   r   r   r   r   r   r  r  r   r   )labelslogitsr   lossr  r   r   )r   use_return_dictr  r  hf_compute_lossr
   r   r   )rE   r   r   r   r   r   r   r   r  r   r  r  r   r  r  prediction_scoresmasked_lm_lossr-  s                     r&   rg   TFEsmForMaskedLM.call  s    > &1%<k$++B]B](()%'"7#9/!5#  
 "!* LL9!111ZN')GABK7F3A3M^%.YSYY$!//))	
 	
r(   c                8    U R                   R                  X5      $ ri   r  r  s      r&   r  !TFEsmForMaskedLM.predict_contacts  r  r(   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTr  r  )r   r   r    r   r  rA   rQ   r  r   s     r&   rQ   TFEsmForMaskedLM.build  s    ::
4%1txx}}-t$ .4D)5t||001""4( 21 6 .- 21   C.C%
C"%
C3)r   r  r  )NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r   r  r  r  r  r  r   r  rl   z#TFMaskedLMOutput | tuple[tf.Tensor]ri   )rm   rn   ro   rp   r  "_keys_to_ignore_on_load_unexpectedrC   r  r
  r  r   r   r  r  r   r  r
   r  rg   r  rQ   rr   rs   rt   s   @r&   r  r    s(   '6&7#*3&R"$. *+?+F+FGd+ef&$$	 .28<6:377;?C@D04)-,0#'6
*6
 66
 4	6

 16
 56
  =6
 !>6
 .6
 '6
 *6
 !6
 6
 
-6
 g 6
pA	) 	)r(   r  c                  F   ^  \ rS rSrSrSU 4S jjrSS jrS rS rSr	U =r
$ )	r  i  z&ESM Head for masked language modeling.c                  > [         TU ]  US9  [        R                  R	                  UR
                  [        UR                  5      SS9U l        [        R                  R                  UR                  SS9U l        UR                  (       a  S U l        OB[        R                  R	                  UR                  [        UR                  5      SSS9U l        Xl        g )	Nr@   r  r   r   r   r  F)r   rA   r{   )rB   rC   r   r   r   r   r   r   r  r   r   r   r  r  r   r   r   s      r&   rC   TFEsmLMHead.__init__  s    d#\\''?6C[C[3\cj ( 

  ,,99&BWBW^j9k%%DL <<--!!#263K3K#L	 . DL r(   c                   U R                   (       a  g SU l         U R                  SU R                  R                  4SSS9U l        [        U SS 5      be  [        R                  " U R                  R                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      be  [        R                  " U R                  R                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      b  U R                  R                  (       df  [        R                  " U R                  R                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g g ! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       g = f)NTr   zeros)r+   rL   rM   r  r   r  )r   rR   r   r   r   r   r    r   r  rA   rQ   r   r   r  r  r   s     r&   rQ   TFEsmLMHead.build  sT    ::
OOF4;;3I3I2KY`lpOq	4$'3tzz/

  $dkk.E.E!FG 04t,8t334%%tT4;;3J3J&KL 54D)5dkk>]>]t||001""D$0G0G#HI 21 ?^5 0/ 54 21s$   >3F=13G?3G =
G
G 
G.c                    SU R                   0$ )Nr   )r   r  s    r&   get_biasTFEsmLMHead.get_bias+  s    		""r(   c                V   U R                  U5      n[        R                  R                  U5      nU R	                  U5      nU R
                  R                  (       a.  [        R                  " X R                  SS9U R                  -   nU$ U R                  U5      U R                  -   nU$ )NTr   )
r  r    rL  rM  r   r   r  r   r  r   )rE   featuresr#   s      r&   rg   TFEsmLMHead.call.  s    JJx EEJJqMOOA ;;**		!\\t<tyyHA  Q$))+Ar(   )r   r   r   r  r  r   ri   )rm   rn   ro   rp   rq   rC   rQ   r'  rg   rr   rs   rt   s   @r&   r  r    s     0$J"#
 
r(   r  z
    ESM Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                     ^  \ rS rSrS/rU 4S jr\\" \R                  S5      5      \
" \\\S9          S	                     S
S jj5       5       5       rSS jrSrU =r$ )TFEsmForSequenceClassificationi;  r   c                   > [         TU ]  U5        UR                  U l        Xl        [	        USSS9U l        [        USS9U l        g NFr  r  
classifierr@   )rB   rC   
num_labelsr   r  r  TFEsmClassificationHeadr0  r  s     r&   rC   'TFEsmForSequenceClassification.__init__E  s@      ++!&EN1&|Lr(   r  r  c                4   U	b  U	OU R                   R                  n	U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nUc  SOU R	                  Xm5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a^  
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr   r   r   r   r   r  r  r   r   r   r  )r   r  r  r0  r  r   r   r   rE   r   r   r   r   r   r  r   r  r  r   r  r  r  r  r-  s                   r&   rg   #TFEsmForSequenceClassification.callM  s    4 &1%<k$++B]B](()%'/!5#  

 "!*1~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
r(   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = fNTr  r0  )r   r   r    r   r  rA   rQ   r0  r   s     r&   rQ   $TFEsmForSequenceClassification.build  s    ::
4%1txx}}-t$ .4t,8t334%%d+ 54 9 .- 54r  )r   r0  r   r  r1  
NNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r  r  r   r  r  r  r  r  r   r  rl   z-TFSequenceClassifierOutput | tuple[tf.Tensor]ri   )rm   rn   ro   rp   r  rC   r   r   r  r  r   r  r   r  rg   rQ   rr   rs   rt   s   @r&   r-  r-  ;  s     (7&7#M *+?+F+FGd+ef&.$ .28<6:377;04)-,0#'.
*.
 6.
 4	.

 1.
 5.
 ..
 '.
 *.
 !.
 .
 
7.
 g .
`	, 	,r(   r-  z
    ESM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                     ^  \ rS rSrS/rS/rU 4S jr\\" \	R                  S5      5      \" \\\S9          S
                     SS jj5       5       5       rSS jrS	rU =r$ )TFEsmForTokenClassificationi  r  r   c                (  > [         TU ]  U5        UR                  U l        [        USSS9U l        [
        R                  R                  UR                  5      U l	        [
        R                  R                  UR                  SS9U l        Xl        g r/  )rB   rC   r1  r  r  r   r   r   r  r   r   r0  r   r  s     r&   rC   $TFEsmForTokenClassification.__init__  so      ++!&EN||++F,F,FG,,,,V->->\,Rr(   r  r  c                R   U	b  U	OU R                   R                  n	U R                  UUUUUUUU	U
S9	nUS   nU R                  XS9nU R	                  U5      nUc  SOU R                  Xm5      nU	(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )z
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr5  r   r   r   r  )	r   r  r  r   r0  r  r   r   r   r6  s                   r&   rg    TFEsmForTokenClassification.call  s    0 &1%<k$++B]B](()%'/!5#  

 "!*,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
r(   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = fr9  )
r   r   r    r   r  rA   rQ   r0  r   r   r   s     r&   rQ   !TFEsmForTokenClassification.build  s    ::
4%1txx}}-t$ .4t,8t334%%tT4;;3J3J&KL 54 9 .- 54s   C+.3C<+
C9<
D
)r   r0  r   r   r  r1  r;  )r   r  r   r  r   r  r   r  r   r  r  r  r   r  r  r  r  r  r   r  rl   z*TFTokenClassifierOutput | tuple[tf.Tensor]ri   )rm   rn   ro   rp   r  r  rC   r   r   r  r  r   r  r   r  rg   rQ   rr   rs   rt   s   @r&   r=  r=    s     +4&'6&7# *+?+F+FGd+ef&+$ .28<6:377;04)-,0#'/
*/
 6/
 4	/

 1/
 5/
 ./
 '/
 */
 !/
 /
 
4/
 g /
b	M 	Mr(   r=  c                  D   ^  \ rS rSrSrSU 4S jjrSS jrSS jrSrU =r	$ )	r2  i  z-Head for sentence-level classification tasks.c                  > [         TU ]  US9  [        R                  R	                  UR
                  [        UR                  5      SSS9U l        [        R                  R                  UR                  5      U l        [        R                  R	                  UR                  [        UR                  5      SSS9U l        Xl        g )Nr@   r  r  )r   r|   rA   linearout_proj)rB   rC   r   r   r   r   r   r   r  r   r  r   r1  rG  r   r   s      r&   rC    TFEsmClassificationHead.__init__  s    d#\\''.v/G/GH	 ( 

 ||++F,F,FG**.v/G/GH	 + 
 r(   c                    US S 2SS S 24   nU R                  X2S9nU R                  U5      nU R                  X2S9nU R                  U5      nU$ )Nr   r   )r   r  rG  )rE   r*  r   r#   s       r&   rg   TFEsmClassificationHead.call  sR    Q1WLLL.JJqMLLL.MM!r(   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTr  rG  )
r   r   r    r   r  rA   rQ   r   r   rG  r   s     r&   rQ   TFEsmClassificationHead.build  s    ::
4$'3tzz/

  $dkk.E.E!FG 04T*6t}}112##T41H1H$IJ 32 7 0/ 32s   3D3D
D
D!)r   r   r  r   rG  ri   r(  )
rm   rn   ro   rp   rq   rC   rg   rQ   rr   rs   rt   s   @r&   r2  r2    s    7"	K 	Kr(   r2  c                    [         R                  " X:g  [         R                  5      n[         R                  " USS9U-   U-  nXA-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.

Args:
    x: tf.Tensor x:

Returns: tf.Tensor
r   r   )r    r   r   cumsum)r   r   r   r  incremental_indicess        r&   r   r     s@     779+RXX6D99T25KKtS,,r(   )r  r-  r=  r  r  )r   )Erq   
__future__r   r  numpynp
tensorflowr    
file_utilsr   r   r   modeling_tf_outputsr   r	   r
   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   tf_utilsr   r   utilsr   configuration_esmr   
get_loggerrm   r  r  r  r'   r.   r2   r;   r   Layerr=   rv   r   r   r  r+  rA  rU  r]  rw  r  r  ESM_START_DOCSTRINGr  r  r  r  r  r-  r=  r2  r   __all__rD  r(   r&   <module>r^     s    " 	   q q 
 
 
 G  ( 
		H	%1 )
.-
	)
** )
X%:!3!3 %:PmMell(( mM`d3++ d3NHell(( H02LU\\'' 2LjH** H2N%,,$$ N0fL## fLRZ&5<<%% Z&|H%,,$$ H:,  ' T c[0U\\'' [0	[0| cK%% K%	K%\ MObcj)+-I j) dj)Z3%,,$$ 3l  K,%9;W K,K,\  NM"68Q NMNMb%Kell00 %KP- r(   