
    cCiX                     ,   S SK r S SKJr  S SKJrJr  S SKrS SKJrJr  SSK	J
r
Jr  SSKJr  SSKJr  SS	KJrJrJrJr  SS
KJrJrJr  SSKJrJrJr  SSKJr  SSKJ r J!r!  SSK"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+  SSK,J-r-J.r.J/r/J0r0J1r1J2r2  SSK3J4r4J5r5  \Rl                  " \75      r8 " S S\$5      r9S r:S r; " S S\Rx                  5      r= " S S\*5      r> " S S\+5      r? " S S\#5      r@ " S S \&5      rA " S! S"\(5      rB " S# S$\'5      rC " S% S&\%5      rD " S' S(\)5      rE\ " S) S*\5      5       rF " S+ S,\F5      rG " S- S.\Rx                  5      rH " S/ S0\Rx                  5      rI " S1 S2\Rx                  5      rJ\\ " S3 S4\5      5       5       rK " S5 S6\Rx                  5      rL " S7 S8\Rx                  5      rM " S9 S:\15      rN " S; S<\25      rO " S= S>\/5      rP " S? S@\-5      rQ " SA SB\.5      rR " SC SD\05      rS " SE SF\S5      rT " SG SH\S\5      rU/ SIQrVg)J    N)	dataclass)OptionalUnion)Tensornn   )CacheDynamicCache)GenerationMixin)create_causal_mask)BaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)ModuleUtilsMixinPreTrainedModelget_parameter_dtype)auto_docstringcan_return_tuplelogging)deprecate_kwarg)OutputRecordercheck_model_inputs   )	EsmAttentionEsmEmbeddings
EsmEncoderEsmIntermediateEsmLayer	EsmOutput	EsmPoolerEsmSelfAttentionEsmSelfOutput)LlamaAttentionLlamaDecoderLayerLlamaMLPLlamaPreTrainedModelLlamaRMSNormLlamaRotaryEmbedding   )EvollaConfigSaProtConfigc                   (   ^  \ rS rSrU 4S jrSrU =r$ )EvollaSaProtEmbeddingsA   c                 2   > [         TU ]  U5        S U l        g N)super__init__position_idsselfconfig	__class__s     c/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/evolla/modular_evolla.pyr3   EvollaSaProtEmbeddings.__init__B   s          )r4   )__name__
__module____qualname____firstlineno__r3   __static_attributes____classcell__r8   s   @r9   r.   r.   A   s    ! !r;   r.   c                 V    U R                  SSS9u  p[        R                  " U* U4SS9$ )Nr   dim)chunktorchcat)xx1x2s      r9   rotate_half_esmrM   H   s-    WWQBWFB99rc2YB''r;   c                     US S 2S S 2S U R                   S   2S S 24   nUS S 2S S 2S U R                   S   2S S 24   nX-  [        U 5      U-  -   $ )N)shaperM   )rJ   cossins      r9   apply_rotary_pos_emb_esmrS   M   sW    
aMaggbkM1$
%C
aMaggbkM1$
%CG*S011r;   c                      ^  \ rS rSr% Sr\R                  \S'   S\4U 4S jjr	SS jr
S\R                  S\R                  S	\\R                  \R                  4   4S
 jrSrU =r$ )EvollaSaProtRotaryEmbeddingT   z
Rotary position embeddings based on those in
[RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
matrices which depend on their relative positions.
inv_freqrF   c           	         > [         TU ]  5         SS[        R                  " SUS[        R                  S9R                  5       U-  -  -  nU R                  SU5        S U l        S U l        S U l	        g )N      ?i'  r   r   dtyperW   )
r2   r3   rH   arangeint64floatregister_buffer_seq_len_cached_cos_cached_sin_cached)r6   rF   rW   r8   s      r9   r3   $EvollaSaProtRotaryEmbedding.__init__]   sg    %ELLC%++$N$T$T$VY\$\]^Z2#r;   c                 j   UR                   U   nX0R                  :w  d$  U R                  R                  UR                  :w  a  X0l        [        R
                  " UR                   U   UR                  S9R                  U R                  5      n[        R                  " X@R                  5      n[        R                  " XU4SS9R                  UR                  5      nUR                  5       S S S S 2S S 24   U l        UR                  5       S S S S 2S S 24   U l        U R                  U R                  4$ )NdevicerD   rE   )rP   r`   ra   rf   rH   r\   type_asrW   outerrI   torQ   rR   rb   )r6   rJ   seq_dimensionseq_lentfreqsembs          r9   _update_cos_sin_tables2EvollaSaProtRotaryEmbedding._update_cos_sin_tablesg   s    ''-( ***d.>.>.E.E.Q#* QWW]3AHHEMMdmm\AKK==1E))UN366qxx@C"wwytQ)9:D"wwytQ)9:D!1!111r;   qkreturnc                    U R                  USS9u  U l        U l        [        XR                  U R                  5      R	                  UR
                  S9[        X R                  U R                  5      R	                  UR
                  S94$ )NrO   )rj   rZ   )ro   ra   rb   rS   ri   r[   )r6   rq   rr   s      r9   forward#EvollaSaProtRotaryEmbedding.forwardw   s    -1-H-HZ\-H-]*$* %Q(8(8$:J:JKNNUVU\U\N]$Q(8(8$:J:JKNNUVU\U\N]
 	
r;   )ra   r`   rb   )r   )r<   r=   r>   r?   __doc__rH   r   __annotations__intr3   ro   tupleru   r@   rA   rB   s   @r9   rU   rU   T   s^     ll C  2 
 
%,, 
5u||A[;\ 
 
r;   rU   c                       \ rS rSrSS jrSrg)EvollaSaProtSelfAttention   Nc                    [         R                  R                  U 5        Xl        UR                  UR
                  -  S:w  a7  [        US5      (       d&  [        SUR                   SUR
                   S35      eUR
                  U l        [        UR                  UR
                  -  5      U l	        U R
                  U R                  -  U l
        [         R                  " UR                  U R                  5      U l        [         R                  " UR                  U R                  5      U l        [         R                  " UR                  U R                  5      U l        UR                  U l        U=(       d    [#        USS5      U l        S U l        U R$                  S:X  d  U R$                  S	:X  aH  UR(                  U l        [         R*                  " S
UR(                  -  S-
  U R                  5      U l        O(U R$                  S:X  a  [/        U R                  S9U l        UR0                  U l        X0l        SU l        U R0                  =(       a    U(       + U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_queryr   r*   rotaryrE   rY   )r   Moduler3   r7   hidden_sizenum_attention_headshasattr
ValueErrorry   attention_head_sizeall_head_sizeLinearquerykeyvalueattention_probs_dropout_probdropoutgetattrr   rotary_embeddingsmax_position_embeddings	Embeddingdistance_embeddingrU   
is_decoder	layer_idxscaling	is_causal)r6   r7   r   r   is_cross_attentions        r9   r3   "EvollaSaProtSelfAttention.__init__   s   
		4  : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
::'> (
'-zC
$ "&''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD#))X5%@TE]E]%^D" ++"C1C-Cr;   )r   r   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   )NNF)r<   r=   r>   r?   r3   r@    r;   r9   r|   r|      s     Dr;   r|   c                       \ rS rSrSrg)EvollaSaProtSelfOutput   r   Nr<   r=   r>   r?   r@   r   r;   r9   r   r          r;   r   c                       \ rS rSrSrg)EvollaSaProtAttention   r   Nr   r   r;   r9   r   r      r   r;   r   c                       \ rS rSrSrg)EvollaSaProtIntermediate   r   Nr   r   r;   r9   r   r      r   r;   r   c                       \ rS rSrSrg)EvollaSaProtOutput   r   Nr   r   r;   r9   r   r      r   r;   r   c                       \ rS rSrSrg)EvollaSaProtLayer   r   Nr   r   r;   r9   r   r      r   r;   r   c                       \ rS rSrSrg)EvollaSaProtEncoder   r   Nr   r   r;   r9   r   r      r   r;   r   c                       \ rS rSrSrg)EvollaSaProtPooler   r   Nr   r   r;   r9   r   r      r   r;   r   c                   `    \ rS rSr% \\S'   S/rSrSrSr	\
\" \SSS9/\" \SSS9/S	.rS
 rSrg)EvollaSaProtPreTrainedModel   r7   r   Tr*   	attention)index
layer_namecrossattention)hidden_states
attentionscross_attentionsc                    U R                   R                  n[        U[        R                  5      (       aW  UR
                  R                  R                  SUS9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ad  UR
                  R                  R                  SUS9  UR                  b2  UR
                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR
                  R                  R                  S5        gg)zInitialize the weights        meanstdNrY   )r7   initializer_range
isinstancer   r   weightdatanormal_biaszero_r   padding_idx	LayerNormfill_r6   moduler   s      r9   _init_weights)EvollaSaProtPreTrainedModel._init_weights   s   kk++fbii((MM&&CS&9{{&  &&( '--MM&&CS&9!!-""6#5#56<<> .--KK""$MM$$S) .r;   r   N)r<   r=   r>   r?   r,   rx   _no_split_modules_supports_flash_attn_supports_sdpa_supports_attention_backendr   r   r|   _can_record_outputsr   r@   r   r;   r9   r   r      sX    ,-N"& +%&?qU`ab4AJZ[
*r;   r   c                   *  ^  \ rS rSrS\4U 4S jjrS rS rS r\	" 5        SS\
\R                     S\
\R                     S	\\\R                     \4   4S
 jj5       r  SS\S\\   S\
\R$                     S\
\R&                     S	\4
S jjrSrU =r$ )EvollaSaProtProteinEncoder   r7   c                 d   > [         TU ]  U5        [        U5      U l        [	        U5      U l        g r1   )r2   r3   r.   
embeddingsr   encoderr5   s     r9   r3   #EvollaSaProtProteinEncoder.__init__   s(     08*62r;   c                 .    U R                   R                  $ r1   r   word_embeddingsr6   s    r9   get_input_embeddings/EvollaSaProtProteinEncoder.get_input_embeddings   s    ...r;   c                 $    XR                   l        g r1   r   r6   r   s     r9   set_input_embeddings/EvollaSaProtProteinEncoder.set_input_embeddings   s    */'r;   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr   layerr   prune_heads)r6   heads_to_pruner   headss       r9   _prune_heads'EvollaSaProtProteinEncoder._prune_heads   s<    
 +002LELLu%//;;EB 3r;   	input_idsattention_maskrs   c                 0   UR                  5       nUu  pEUR                  nUc  [        R                  " XE4US9nU R	                  XS9nU R                  X#5      nU R                  XxS9n	U	S   n
[        U
U	R                  U	R                  U	R                  S9$ )Nre   r   r   )r   r   )last_hidden_stater   r   r   )sizerf   rH   onesr   get_extended_attention_maskr   r   r   r   r   )r6   r   r   input_shape
batch_size
seq_lengthrf   inputs_embedsextended_attention_maskencoder_outputssequence_outputs              r9   ru   "EvollaSaProtProteinEncoder.forward   s      nn&!,
!!!"ZZ*)A6RN)["&"B"B>"_,,},])!,;-)77&11,==	
 	
r;   r   rf   r[   c                 P   Uc  [        U 5      nUR                  5       S:X  a  U R                  R                  (       d  Ub  [        R
                  " S[        5        UR                  5       S:X  a  USS2SSS2SS24   nOqUR                  5       S:X  aA  U R                  R                  (       a  [        R                  " X!U5      nO*USS2SSSS24   nO[        SU SUR                   S35      eUR                  US9nS	U-
  [        R                  " U5      R                  -  nU$ )
a  
Makes broadcastable attention and causal masks so that future and masked tokens are ignored.

Arguments:
    attention_mask (`torch.Tensor`):
        Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
    input_shape (`Tuple[int]`):
        The shape of the input to the model.

Returns:
    `torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
Nr   zNThe `device` argument is deprecated and will be removed in v5 of Transformers.r   z!Wrong shape for input_ids (shape z) or attention_mask (shape r   rZ   rY   )r   rF   r7   r   warningswarnFutureWarningr   *create_extended_attention_mask_for_decoderr   rP   ri   rH   finfomin)r6   r   r   rf   r[   r   s         r9   r   6EvollaSaProtProteinEncoder.get_extended_attention_mask  s    & ='-E""$)dkk.D.D!dfs
 1$&4Qa]&C#!Q& {{%%*:*e*e+' +9D$9I*J'3K=@[\j\p\p[qqrs  #:"<"<5"<"I#&)@#@EKKPUDVDZDZ"Z&&r;   )r   r   r1   )NN)r<   r=   r>   r?   r,   r3   r   r   r   r   r   rH   r   r   rz   r   ru   ry   rf   r[   r   r@   rA   rB   s   @r9   r   r      s    3| 3
/0C  26
ELL)
 !.
 
uU\\"$PP	Q	
 
8 *.'+6'6' 3Z6' &	6'
 $6' 
6' 6'r;   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )!EvollaSequenceCompressorAttentioniF  c                 X  > [         TU ]  5         US-  U l        X0l        X#-  n[        R
                  " U5      U l        [        R
                  " U5      U l        [        R                  " XSS9U l	        [        R                  " XS-  SS9U l
        [        R                  " XASS9U l        g )N      Fr   r   )r2   r3   scaler   r   r   
norm_medianorm_latentsr   to_qto_kvto_out)r6   rF   dim_headr   	inner_dimr8   s        r9   r3   *EvollaSequenceCompressorAttention.__init__G  s    t^

$	,,s+LL-IIc59	YYsM>
ii	U;r;   c                 &   U R                  U5      nU R                  U5      nU R                  nU R                  U5      n[        R
                  " X4SS9nU R                  U5      R                  SSS9u  pxUR                  UR                  S5      UR                  S5      US5      R                  SSSS5      nUR                  UR                  S5      UR                  S5      US5      R                  SSSS5      nUR                  UR                  S5      UR                  S5      US5      R                  SSSS5      nXPR                  -  n[        R                  " XWR                  SS5      5      n	XR                  SSS	9R                  5       -
  n	U	R                   u  pp[        R"                  " X5      R%                  UR&                  5      nUS
S
2S
S
S
S
24   nUS
S
S
2S
S
2S
4   nUU-  nU	R)                  SU-
  R+                  5       S5      n	U	R-                  SS9n[        R                  " UU5      nUR                  SSSS5      nUR/                  UR                  S5      UR                  S5      S5      nU R1                  U5      $ )z
Args:
    x (torch.Tensor): image features
        shape (b, n1, D)
    latent (torch.Tensor): latent features
        shape (b, n2, D);  n2: num of latent tokens
rO   rE   r   rD   r   r*   r   TrF   keepdimNg     )r  r  r   r  rH   rI   r  rG   viewr   permuter  matmul	transposeamaxdetachrP   r   ri   rf   masked_fillboolsoftmaxreshaper  )r6   rJ   latentsmaskhrq   kv_inputrr   vsimbsnhskdokdr   mask_expones_expattnouts                      r9   ru   )EvollaSequenceCompressorAttention.forwardT  s2    OOA##G,JJIIg99a\r2zz(#))2 * 
 FF166!9affQiB/771aCFF166!9affQiB/771aCFF166!9affQiB/771aC

N ll1kk"b12HHTH299;;99zz""%%dkk24q()aD()("ooq4xoo/6{{r{"ll4#kk!Q1% kk#((1+sxx{B7{{3r;   )r   r  r  r  r  r  r  )@      r<   r=   r>   r?   r3   ru   r@   rA   rB   s   @r9   r
  r
  F  s    <)  ) r;   r
  c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )EvollaFeedForwardi  c                   > [         TU ]  5         [        X-  5      n[        R                  " U5      U l        [        R                  " XSS9U l        [        R                  " 5       U l	        [        R                  " X1SS9U l
        g NFr  )r2   r3   ry   r   r   normr   fc1GELU
activationfc2)r6   rF   multr  r8   s       r9   r3   EvollaFeedForward.__init__  sZ    
O	LL%	99S%8'')99Y%8r;   c           	      ~    U R                  U R                  U R                  U R                  U5      5      5      5      $ r1   )r>  r=  r;  r:  )r6   rJ   s     r9   ru   EvollaFeedForward.forward  s+    xx1(>?@@r;   )r=  r;  r>  r:  )   r5  rB   s   @r9   r7  r7    s    9A Ar;   r7  c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )!EvollaSequenceCompressorResampleri  r7   c                   > [         TU ]  5         UR                  R                  nUR                  U l        [        R                  " [        R                  " U R
                  U5      SS9U l
        [        R                  " / 5      U l        [        UR                  5       Ha  nU R                  R                  [        R                  " [!        X!R"                  UR$                  S9['        X!R(                  S9/5      5        Mc     [        R*                  " UR                  5      U l        [        R.                  " X!R                  5      U l        g )NT)requires_grad)rF   r  r   )rF   r?  )r2   r3   protein_encoder_configr   resampler_num_latentsnum_latentsr   	ParameterrH   randnr$  
ModuleListlayersrangeresampler_depthappendr
  resampler_dim_headresampler_headsr7  resampler_ff_multr   r:  r   protein_projector)r6   r7   protein_repr_dim_r8   s       r9   r3   *EvollaSequenceCompressorResampler.__init__  s    !88DD!77||EKK0@0@BR$ScghmmB'v--.AKK9 0;T;T\b\r\r *.>E]E]^		 / LL!3!34	!#+;=O=O!Pr;   c                 d   UR                   S   nUR                   u  pE[        R                  " X@R                  5      R	                  UR
                  5      n[        R                  " X&4SS9n[        R                  " U5      R	                  U R                  R
                  5      nU R                  S    UR                  SSS5      -  nUR	                  UR                  5      nU R                   H  u  pU	" XU5      U-   nU
" U5      U-   nM     U R                  U5      nU R                  U5      $ )Nr   r*   rE   rD   )rP   rH   r   rJ  ri   rf   rI   r$  r  r[   rN  rU  r:  )r6   embedsr%  br*  rW  latent_maskr   r$  r0  fftransformed_features               r9   ru   )EvollaSequenceCompressorResampler.forward  s    LLO

jj%5%5699$++Fyy$,!4 zz!} 3 34,,t$tyyQ'::**V\\*HD6D1G;GkG+G $ #44W=yy,--r;   )r$  rN  r:  rJ  rU  )	r<   r=   r>   r?   r+   r3   ru   r@   rA   rB   s   @r9   rE  rE    s    Q| Q*. .r;   rE  c                       \ rS rSr% Sr\\R                     \S'   Sr	\\R                     \S'   Sr
\\\R                  S4      \S'   Sr\\\R                  S4      \S'   Srg)	EvollaProteinEncoderModelOutputi  Nsequence_compressor_outputr   .r   r   r   )r<   r=   r>   r?   rb  r   rH   FloatTensorrx   r   r   rz   r   r@   r   r;   r9   ra  ra    so     ?C):): ;B59x 1 129=AM8E%"3"3S"89:A:>Ju00#567>r;   ra  c                   t   ^  \ rS rSrS\4U 4S jjr\S\R                  S\R                  4S j5       r
SrU =r$ )EvollaProteinEncoderi  r7   c                 n   > [         TU ]  5         [        UR                  S9U l        [        US9U l        g )Nr7   )r2   r3   r   rH  modelrE  sequence_compressor_resamplerr5   s     r9   r3   EvollaProteinEncoder.__init__  s.    /v7T7TU
-NV\-]*r;   r   r   c                     U R                  XS9nUR                  nU R                  XR5      n[        UUR                  S9$ )Nr   )rb  r   )rh  r   ri  ra  )r6   r   r   kwargsprotein_outputprotein_embedssequence_reprs          r9   ru   EvollaProteinEncoder.forward  sF    iW'99::>Z.'4,>>
 	
r;   )rh  ri  )r<   r=   r>   r?   r+   r3   r   rH   
LongTensorrc  ru   r@   rA   rB   s   @r9   re  re    s?    ^| ^
 
!1!1 
5CTCT 
 
r;   re  c                      ^  \ rS rSr   SS\\   S\\   S\\   4U 4S jjjrS r\" SSS	S
9       SS j5       r	Sr
U =r$ )#EvollaSequenceAlignerCrossAttentioni  protein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                   > [         TU ]  5         UR                  U l        UR                  U l        U R                  S-  U l        [        U R                  U R                  -  5      U l        U R                  U R                  -  U l        UR                  nUR                  nUR                  n[        R                  " U R                  U R                  5      U l        UbK  [        R                  " X R                  5      U l        [        R                  " X R                  5      U l        OS U l        S U l        UbK  [        R                  " X0R                  5      U l        [        R                  " X0R                  5      U l        OS U l        S U l        UbK  [        R                  " X@R                  5      U l        [        R                  " X@R                  5      U l        OS U l        S U l        [)        U R                  5      U l        [        R,                  " U5      U l        [        R                  " U R                  U R                  US9U l        [3        U R                  U5      U l        [        R6                  " [8        R:                  " S/5      5      U l        [        R6                  " [8        R:                  " S/5      5      U l        g )Nr  r  r   ) r2   r3   r   r   r  ry   r   r   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   r   r   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normDropoutr   out_projr7  r]  rK  rH   tensorgate_attentiongate_ffw)	r6   r7   rt  ru  rv  r   enable_biasffn_multr8   s	           r9   r3   ,EvollaSequenceAlignerCrossAttention.__init__  s    	!--#)#=#= --t3
#&t'7'7$:R:R'R#S !558P8PP'-'R'R$00**YYt//1C1CD
*!yy)<>P>PQD!#+>@R@R!SD#D!%D ,!#+@BTBT!UD#%99-BDVDV#WD !%D#'D &99_6H6HIDLYY8J8JKDNDL!DN+D,<,<=zz">?		$"2"2D4D4D;W#D$4$4h? ll5<<+>?U\\3%%89r;   c	                    XgU/n	U	 V
s/ s H	  oc  M  U
PM     n	n
U	(       d  [        S5      e[        R                  " U	SS9n	U R                  U5      nU R	                  U5      nU R
                  bA  U R                  b4  UR                  U5      nU R                  U5      nU R                  U5      nOSnSnU R                  bA  U R                  b4  UR                  U5      nU R                  U5      nU R                  U5      nOSnSnU R                  bA  U R                  b4  UR                  U5      nU R                  U5      nU R                  U5      nOSnSnXU/nU V
s/ s H	  oc  M  U
PM     nn
[        R                  " USS9nXU/nU V
s/ s H	  oc  M  U
PM     nn
[        R                  " USS9nUR                  5       SS U R                  U R                  4-   nUR                  " U6 R!                  SSSS5      nUR                  5       SS U R                  U R                  4-   nUR                  " U6 R!                  SSSS5      nUR                  5       SS U R                  U R                  4-   nUR                  " U6 R!                  SSSS5      nXR"                  -  nUcN  [        R$                  " UR                  S5      UR                  S5      5      R                  UR&                  5      nUSS2SSS2S4   U	SS2SSSS24   -  n[        R(                  " UUR+                  SS	5      5      nUUR-                  SS
S9R/                  5       -
  nUR1                  SU-
  R3                  5       [        R4                  " UR6                  5      R8                  5      n[:        R<                  " SS9" U5      n[        R(                  " UU5      nUR!                  SSSS5      R?                  5       nUR                  5       SS	 U R@                  4-   nUR                  " U6 nU RC                  U5      nU$ s  sn
f s  sn
f s  sn
f )z
query_states: text
key_value_states: protein
query_states: [bs, query_seq_len, dim]
key_value_states: [bs, kv_seq_len, dim]
query_attn_mask: [bs, query_seq_len]
kv_attn_mask: [bs, kv_seq_len]
Nz=At least one modality should be provided for cross attention.r*   rE   rD   r   r   r   rO   Tr  )"r   rH   rI   r  r   r{  r|  ri   r}  r~  r  r  r   r   r   r  r  r  r   rf   r  r  r  r  r   r!  r  r[   r  r   Softmax
contiguousr   r  )r6   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskrW  query_layerkey_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msa	key_layervalue_layernew_query_layer_shapenew_key_layer_shapenew_value_layer_shaper   attn_weightsattention_scoresattention_probscontext_layernew_context_layer_shapes                                r9   cross_attention3EvollaSequenceAlignerCrossAttention.cross_attention  sK   * -FVW#/A<a<A\]]yy15)),7 jj-'D,>,>,J'?'B'B<'P$ $ 0 01I J"&"4"45M"N $"&)d.B.B.N)C)F)F|)T&"&"4"45O"P$($8$89S$T!"&$(!<<#(B#7#:#:<#H  LL)=>M"nn-ABO M"O&]K	 );	1Q		;IIiQ/	*?S"-?+Qq+?ii3 + 0 0 23B 7$$$$;
 !
 "&&(=>FFq!QPQR'nn.s3$$$$7
 
 NN$78@@Aq!L	 + 0 0 23B 7$$$$;
 !
 "&&(=>FFq!QPQR!JJ. "#jj):):1)=|?P?PQR?STWWXdXkXklO(D!T)9:\!TSWYZJZ=[[||K1D1DR1LM#l&7&7B&7&M&T&T&VV'33%%'\5G5G)H)L)L
 **,-=> _kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**,CDm4q BL < @s"   QQ"Q,QQQpast_key_valuepast_key_values4.58new_nameversionc                 z   Ubv  UR                   u  pnUcc  [        R                  " X5      R                  U	R                  5      U	R                  X4S9R                  -  R                  UR                  5      nOS nUby  UR                   u  nnnUce  [        R                  " UU5      R                  U	R                  5      U
R                  UU4S9R                  -  R                  UR                  5      nOS nUby  UR                   u  nnnUce  [        R                  " UU5      R                  U	R                  5      UR                  UU4S9R                  -  R                  UR                  5      nOS nUnUb  UR                  5       (       d0  Ub  UR                  5       (       d  Ub  UR                  5       (       ay  UnU R                  UUUUUUUUS9n[        R                  " U R                  5      U-  nUU-   nUnU R                  U5      [        R                  " U R                  5      -  nUU-   nU$ )N)r   )r  r  r  r  r  r  r  r  )rP   rH   r   ri   rf   expandTanyr  tanhr  r]  r  )r6   r  protein_kv_statesstructure_kv_statesmsa_kv_statesr  r  r  r  protein_batch_maskstructure_batch_maskmsa_batch_maskr  r*  protein_kv_seq_lenrF   structure_kv_seq_lenmsa_kv_seq_lenr   residuals                       r9   ru   +EvollaSequenceAlignerCrossAttention.forwardw  sP     (*;*A*A'BC#+JJr699:L:S:ST(//6H5M/NPPQ"&--. %
 $( *,?,E,E)B$c%-JJr#78;;<N<U<UV*118Lb7Q1RTTU"(//0 '
 &*"$&3&9&9#B'JJr>2556H6O6OP$++."1E+FHHI"]))* !
  $$ */C/G/G/I/I#/4J4N4N4P4P).>.B.B.D.D$H 00*):+>%2 /%9'=!1 1 	M "JJt':':;mKM$}4M$H GGM2UZZ5NNM$}4Mr;   )r   r   r  r   r]  r  r  r   r  r{  r}  r   r  r   r  r  r|  r~  )NNNNNNNNNN)r<   r=   r>   r?   r   ry   r3   r  r   ru   r@   rA   rB   s   @r9   rs  rs    s     .2/3)-1: &c]1:  (}	1:
 "#1: 1:fn` %0A6R "#!G SGr;   rs  c                       \ rS rSrSrg)r  i  r   Nr   r   r;   r9   r  r    r   r;   r  c                       \ rS rSrSrg)EvollaRotaryEmbeddingi  r   Nr   r   r;   r9   r  r    r   r;   r  c                       \ rS rSrSrg)	EvollaMLPi  r   Nr   r   r;   r9   r  r    r   r;   r  c                       \ rS rSrSrg)EvollaAttentioni  r   Nr   r   r;   r9   r  r    r   r;   r  c                     ^  \ rS rSrS\S\4U 4S jjr\" SSSS9            SS	\R                  S
\
\R                  \R                  4   S\\R                     S\\R                     S\\   S\\   S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     4S jj5       rSrU =r$ )EvollaDecoderLayeri  r7   r   c                    > [         TU ]  X5        US-   [        UR                  UR                  -  S5      -  S:X  a  [        UUR                  S9U l        g g )Nr*   r   )rt  )r2   r3   maxnum_hidden_layersaligner_num_add_layersrs  r   adapterr6   r7   r   r8   s      r9   r3   EvollaDecoderLayer.__init__  sY    +MS!9!9V=Z=Z!Z\]^^bcc>$*$6$6DL dr;   r  r  r  r  r   position_embeddingsr   r4   	use_cachecache_positionr  r  r  r  r  r  r  c                    UnU R                  U5      nU R                  " SUUUUUUUS.UD6u  nnUU-   nUnU R                  U5      nU R                  U5      nUU-   n[	        U S5      (       a  U R                  UUU	U
UUUUS9nU$ )N)r   r   r4   r  r  r  r  r  )r  r  r  r  r  r  r  r  r   )input_layernorm	self_attnpost_attention_layernormmlpr   r  )r6   r   r  r   r4   r  r  r  r  r  r  r  r  r  r  rl  r  rW  s                     r9   ru   EvollaDecoderLayer.forward  s    & !,,];  >> 	
')%+) 3	
 	
q !=0 !55mD/ =04## LL*"3$7+ /#5%9- ) 	M r;   )r  )NNNFNNNNNNNN)r<   r=   r>   r?   r+   ry   r3   r   rH   r   rz   r   rq  r	   r!  ru   r@   rA   rB   s   @r9   r  r    sb   |   %0A6R
 2637+/$)59486:04597;15265||5 #5<<#=>5 !.	5
 u//05 "%5 D>5 !!1!125 $ELL15 &ell35  -5 %U\\25 'u||45 !.5 "%,,/5 S5r;   r  c                   .    \ rS rSrSrSrSr/ SQrS rSr	g)EvollaPreTrainedModeli  F)r  rE  rs  c                    U R                   R                  n[        R                  " X5        [	        U[
        5      (       ad  UR                  R                  5         UR                  R                  5         UR                  R                  R                  R                  S5        g [	        U[        5      (       a%  UR                  R                  R                  SUS9  g g )NrY   r   r   )r7   r   r   r   r   rs  r  r   r  r  r   r   r   rE  r$  r   r   s      r9   r   #EvollaPreTrainedModel._init_weights  s    kk++%%d3fABB!!'')OO!!#!!((--33C8 ABBNN''Sc': Cr;   r   N)
r<   r=   r>   r?   r   _supports_flex_attnr   r   r   r@   r   r;   r9   r  r    s!     "';r;   r  c            !         ^  \ rS rSrS\4U 4S jjrS rS r\\	" 5                    SS\
\R                     S\
\R                     S\
\R                     S	\
\   S
\
\R                     S\
\   S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\
\R                     S\\\4   4S jj5       5       rSrU =r$ )EvollaModeli)  r7   c           
      8  > [         TU ]  U5        UR                  U l        UR                  U l        [
        R                  " U R                  UR                  U R                  5      U l        [        US9U l
        [
        R                  " [        UR                  5       Vs/ s H  n[        UUS9PM     sn5      U l        [!        UR                  UR"                  S9U l        ['        US9U l        [+        USS5      U l        U R/                  5         g s  snf )Nrg  )r7   r   )epsgradient_checkpointingF)r2   r3   pad_token_idr   
vocab_sizer   r   r   embed_tokensre  protein_encoderrM  rO  r  r  rN  r  rms_norm_epsr:  r  
rotary_embr   r  	post_initr  s      r9   r3   EvollaModel.__init__*  s     !.. ++LL&:L:LdN^N^_36Bmm "'v'?'?!@
 "AI	 #!' "A
 "&"4"4&:M:MN	/v>&-f6NPU&V#s   #Dc                     U R                   $ r1   r  r   s    r9   r    EvollaModel.get_input_embeddings?  s       r;   c                     Xl         g r1   r  r   s     r9   r    EvollaModel.set_input_embeddingsB  s    !r;   r   r   r4   r  r   r  r  protein_input_idsprotein_attention_maskstructure_feats	msa_featsr  r  rs   c                    USL USL-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcD  Ub  UR	                  5       OSn[
        R                  " XUR                  S   -   UR                  S9nUc  UR                  S5      nSnSnUbO  U	bL  U R                  UU	S9nUR                  n[
        R                  " S/UR                  S   -  UR                  S9n[        U R                  UUUUS	9nUnU R                  UU5      nU R                   H  nU" U4UUUUUUUU
UUUUUS
.UD6nM     U R!                  U5      n[#        UUS9nU$ )a  
protein_input_ids (torch.LongTensor):
    The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
protein_attention_mask (torch.Tensor):
    The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
structure_feats (torch.FloatTensor):
    The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
msa_feats (torch.FloatTensor):
    The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
structure_batch_mask (torch.Tensor):
    The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
msa_batch_mask (torch.Tensor):
    The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
Nz:You must specify exactly one of input_ids or inputs_embedsrg  r   r*   re   r   T)r7   input_embedsr   r  r  )r   r4   r  r  r  r  r  r  r  r  r  r  r  )r   r  )r   r  r
   r7   get_seq_lengthrH   r\   rP   rf   	unsqueezer  rb  r  r   r  rN  r:  r   )r6   r   r   r4   r  r   r  r  r  r  r  r  r  r  rl  past_seen_tokensprotein_featsr  protein_outputscausal_maskr   r  decoder_layeroutputs                           r9   ru   EvollaModel.forwardE  s   B -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ ]5H5H5K"KTaThThN )33A6L!(-C-O"22+5 3 O ,FFM!&tf7H7N7Nq7Q.QZkZrZr!s(;;&))+
 & #oom\J![[M)*) /#-$7"/$3'#5%9- . M )& 		-0(++
 r;   )r  r  rN  r:  r   r  r  r  )NNNNNNNNNNNNN)r<   r=   r>   r?   r+   r3   r   r   r   r   r   rH   rq  r   r	   rc  r!  r   rz   r   ru   r@   rA   rB   s   @r9   r  r  )  s   | *!"  151537+/59$(598<9=7;157;15bE,,-b !.b u//0	b
 "%b   1 12b D>b !!1!12b $E$4$45b !) 6b "%"3"34b E--.b 'u||4b !.b  
u--	.!b  br;   r  c                   "  ^  \ rS rSrU 4S jrS rS r\\       SS\	\
R                     S\	\
R                     S\	\
R                     S\	\
R                     S	\	\
R                     S
\	\
R                     S\	\   4S jj5       5       rSrU =r$ )EvollaForProteinText2Texti  c                    > [         TU ]  U5        [        U5      U l        UR                  U l        [
        R                  " UR                  U R                  SS9U l        U R                  5         g r9  )
r2   r3   r  rh  r  r   r   r   lm_headr  r5   s     r9   r3   "EvollaForProteinText2Text.__init__  sQ      (
 ++yy!3!3T__5Qr;   c                 6    U R                   R                  5       $ r1   )rh  r   r   s    r9   r   .EvollaForProteinText2Text.get_input_embeddings  s    zz..00r;   c                 8    U R                   R                  U5      $ r1   )rh  r   r   s     r9   r   .EvollaForProteinText2Text.set_input_embeddings  s    zz..u55r;   r   r   r   labelsr  r  r  c           
         U R                   " SUUUUUUS.UD6n	U	S   n
U R                  U
5      nSnUb  U R                  " SXU R                  S.UD6n[	        UUU	R
                  U	R                  U	R                  S9nU$ )a|  
protein_input_ids (torch.LongTensor):
    The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
protein_attention_mask (torch.Tensor):
    The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

Example:

```python
>>> from transformers import EvollaProcessor, EvollaForProteinText2Text
>>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
>>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

>>> protein_information = {
    "aa_seq": "your amino acid sequence",
    "foldseek": "your foldseek sequence",
}
>>> question = "What is the function of this protein?"
>>> message = [
    {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
    {"role": "user", "content": question},
]

>>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
>>> outputs = model.generate(**inputs)

>>> print(processor.batch_decode(outputs, skip_special_tokens=True))
```)r   r   r   r  r  r  r   N)logitsr	  r  )lossr  r  r   r   r   )rh  r  loss_functionr  r   r  r   r   )r6   r   r   r   r	  r  r  r  rl  outputsr   r  r  
lm_outputss                 r9   ru   !EvollaForProteinText2Text.forward  s    T ** 
)'/#9
 
  
m,%%iVtibhiD+#33!//))

 r;   )r  rh  r  r  )r<   r=   r>   r?   r3   r   r   r   r   r   rH   rq  r   rc  r!  ru   r@   rA   rB   s   @r9   r  r    s    16  151559-18<9=$(?E,,-? !.?   1 12	?
 ))*? $E$4$45? !) 6? D>?  ?r;   r  )r  r  r  )Wr  dataclassesr   typingr   r   rH   r   r   cache_utilsr	   r
   
generationr   masking_utilsr   modeling_outputsr   r   r   r   modeling_utilsr   r   r   utilsr   r   r   utils.deprecationr   utils.genericr   r   esm.modeling_esmr   r   r   r   r   r    r!   r"   r#   llama.modeling_llamar$   r%   r&   r'   r(   r)   configuration_evollar+   r,   
get_loggerr<   loggerr.   rM   rS   r   rU   r|   r   r   r   r   r   r   r   r   r   r
  r7  rE  ra  re  rs  r  r  r  r  r  r  r  r  __all__r   r;   r9   <module>r!     s     ! "   . ) /  U T 
 1 ?
 
 
  = 
		H	%!] !(
2)
")) )
X!D 0 !DH	] 		L 		 		 		 		* 		 	 */ * *>c'!< c'L7 		 7 tA		 A'.		 '.T ?k ?  ?
299 
$l")) l^	L 		0 		 		n 	?* ?D;0 ;*@' @FP 5 Pf Pr;   