
    cCi,                        S r SSKrSSKrSSKrSSKJrJrJr  SSKJ	r	J
r
  SSKrSSKJr  \\\R                   4   r\\\	4   rSr\R(                  " SS	9 " S
 S5      5       rS\S\4S jrSS\S\S\\   4S jjrS\S\S\4S jrS\S\4S jrS\S\R                   4S jr     SS\S\S\
\R                      S\
\R                      S\
\   S\
\\      S\
\\      S\4S jjrg) zProtein data type.    N)IteratorMappingSequence)AnyOptional   )residue_constantsg{Gz?T)frozenc                       \ rS rSr% Sr\R                  \S'   \R                  \S'   \R                  \S'   \R                  \S'   \R                  \S'   Sr\	\R                     \S	'   Sr
\	\   \S
'   Sr\	\\      \S'   Sr\	\\      \S'   Srg)Protein"   z!Protein structure representation.atom_positionsaatype	atom_maskresidue_index	b_factorsNchain_indexremarkparentsparents_chain_index )__name__
__module____qualname____firstlineno____doc__npndarray__annotations__r   r   r   strr   r   r   int__static_attributes__r       h/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/esm/openfold_utils/protein.pyr   r   "   s    + JJ JJ zz ::
 zz )-K"**%, !FHSM  (,GXhsm$+ 48(3-07r#   r   proteinnet_strreturnc                    Sn[         R                  " X5       Vs/ s H$  n[        U5      S:  d  M  UR                  5       PM&     nn[	        USS S2   USS S2    Vs/ s H  oDR                  S5      PM     sn5      n/ SQnS nS nS n	U GH  n
SU
S   :X  a  U
S   S   R                  5       n[        [        U5      5       H  nX   [        R                  ;  d  M  SX'   M!     [        R                  " U Vs/ s H1  n[        R                  R                  U[        R                  5      PM3     sn5      nM  S	U
S   :X  Ga  / n[        S
5       H?  nUR                  [        [        [         U
S   U   R                  5       5      5      5        MA     [        R                  " U5      n[        R"                  " [        US   5      S
-  [        R$                  S
45      R'                  [        R(                  5      n[+        U5       HA  u  nn[        R,                  " US S 2US S
24   5      US S 2[        R.                  U   S S 24'   MC     U[0        -  nGM  SU
S   :X  d  GM  [        R                  " [        [        SSS.R                  U
S   S   R                  5       5      5      5      n[        R"                  " [        U5      [        R$                  45      R'                  [        R(                  5      n	[+        U5       H   u  nnSU	S S 2[        R.                  U   4'   M"     U	US   -  n	GM     Uc   e[3        UU	U[        R4                  " [        U5      5      S S9$ s  snf s  snf s  snf )Nz(\[[A-Z]+\]\n)r      r   
)NCACz	[PRIMARY]Xz
[TERTIARY]   z[MASK])-+).N)r   r   r   r   r   )resplitlenstripzipranger	   restypesr   arrayrestype_ordergetrestype_numappendlistmapfloatzerosatom_type_numastypefloat32	enumerate	transpose
atom_orderPICO_TO_ANGSTROMr   arange)r%   tag_retagtagslgroupsatomsr   r   r   gseqi
res_symboltertiaryaxistertiary_npatommasks                      r$   from_proteinnet_stringrX   H   s   F.0hhv.N_.NsRUVYRZ]^R^{syy{.ND_.1$qt!t*VZ[\[_^_[_V`>aV`QRwwt}V`>a.bF'EFNI!A$A$q'--/C3s8_6!2!;!;; CF % XXruvrudn"0044ZARA^A^_ruvF QqT!*,HaS!T
0@0@0B%C DE !((8,KXXs8A;'71'<>O>]>]_`&abiijljtjtuN$U+4KM<<XcdeghgkjkgkdkXlKmq"3">">t"DaGH ,..N188D11%5%9%91Q47==?!KLMDI%33
 fRZZ   %U+4CD	!.99$??@ ,i(I9 < %iiF, O `>a ws   M&M&M+
98M0
protchain_idc                 T   / nU R                   nUb  UR                  SU 35        U R                  nU R                  nUb)  Ub&  [	        XT5       VVs/ s H  u  pgXa:X  d  M  UPM     nnnUb  [        U5      S:X  a  S/nUR                  SSR                  U5       35        U$ s  snnf )NREMARK r   N/APARENT  )r   r<   r   r   r5   r3   join)rY   rZ   pdb_headersr   r   r   rQ   ps           r$   get_pdb_headersrc   z   s    K[[FWVH-.llG222>!$%8!BT!Bam1!BT#g,!+''!2 345 Us   B$!B$pdb_strc                    / nUR                  S5      nU R                  nUb  UR                  SU 35        U R                  Gb  [	        U R                  5      S:  a  / nU R
                  b  0 n[        U R                  U R
                  5       H=  u  pxUR                  [        U5      / 5        U[        U5         R                  U5        M?     [        S U 5       5      n	[        U	S-   5       H0  nUR                  [        U5      S/5      n
UR                  U
5        M2     O)UR                  [        U R                  5      5        OS//nS[        [           S[        4S	 jnUR                  U" US   5      5        Sn[        U5       Hj  u  pS
U;  a  SU;  a  UR                  U5        SU;   d  M*  SX8S-      ;  d  M7  US-  nU[	        U5      :  d  X\   n
OS/n
UR                  U" U
5      5        Ml     SR                  U5      $ )zOAdd pdb headers to an existing PDB string. Useful during multi-chain
recycling
r)   r\   r   c              3   8   #    U  H  n[        U5      v   M     g 7f)N)r!   ).0	chain_idxs     r$   	<genexpr>"add_pdb_headers.<locals>.<genexpr>   s     F+Y#i..+s   r   r]   rb   r&   c                 *    SSR                  U 5       3$ )Nr^   r_   )r`   )rb   s    r$   make_parent_line)add_pdb_headers.<locals>.make_parent_line   s    !&&r#   PARENTREMARKTEREND)r2   r   r<   r   r3   r   r5   
setdefaultr    maxr6   r:   r=   r   rD   r`   )rY   rd   out_pdb_lineslinesr   parents_per_chainparent_dictrb   rQ   max_idxchain_parentsrl   chain_counterrL   s                 r$   add_pdb_headersr{      s     "MMM$E[[Fwvh/0 ||C$5$9##/02KDLL$*B*BC&&s1vr2CF#**1- D F+FFG7Q;' +A @!((7 ( $$T$,,%78#WI'HSM 'c ' )*;A*>?@M% 1!2  #A:%uU|3QM C(9$:: 1 @!&  !1-!@A ! 99]##r#   c                 t  ^! [         R                  S/-   m!S[        S[        4U!4S jjn[         R                  n/ nU R
                  nU R                  nU R                  nU R                  R                  [        R                  5      nU R                  nU R                  n	[        R                  " U[         R                  :  5      (       a  [!        S5      e[#        U 5      n
[%        U
5      S:  a  UR'                  U
5        UR(                  S   nSnSn[*        R,                  nSn[/        U5       GHO  nU" UU   5      n[1        X&U   UU   UU   5       H  u  nnnnUS	:  a  M  S
n[%        U5      S:X  a  UOSU 3nSnSnSnUS   nSnSnU	b  XU      nUS US SUS US US SUS UU   S US SUS   S US   S US   S US US SUS US 3nUR3                  U5        US-  nM     UUS-
  :H  nU	b  UUS-
  :w  a  U	US-      U:w  a
  SnU	US-      nU(       d  M  SnUS US SU" UU   5      S SUS UU   S 3n UR3                  U 5        US-  nUUS-
  :w  d  GM5  UR'                  [#        X5      5        GMR     UR3                  S5        UR3                  S5        S R5                  U5      $ )!zuConverts a `Protein` instance to a PDB string.

Args:
  prot: The protein to convert to PDB.

Returns:
  PDB string.
r-   rr&   c                 J   > [         R                  R                  TU    S5      $ )NUNK)r	   restype_1to3r:   )r}   r7   s    r$   res_1to3to_pdb.<locals>.res_1to3   s      --11(1+uEEr#   zInvalid aatypes.r   r   Ng      ?ATOM   r_    g      ?Az<6z>5z<4z>1z>3z>4z   z>8.3fr(   z>6.2fz
          z>2Trp   z      rq   r)   )r	   r7   r!   r    
atom_typesr   r   r   r   rB   r   int32r   r   anyr;   
ValueErrorrc   r3   extendshapestringascii_uppercaser6   r5   r<   r`   )"rY   r   r   	pdb_linesr   r   r   r   r   r   headersn
atom_indexprev_chain_index
chain_tags	chain_tagrQ   
res_name_3	atom_nameposrW   b_factorrecord_typenamealt_locinsertion_code	occupancyelementcharge	atom_lineshould_terminate	chain_endchain_termination_liner7   s"                                    @r$   to_pdbr      sA    !))SE1HFC FC F #--JII[[F((N&&--bhh7MI""K	vvf(44455+,,d#G
7|a!QAJ''JI1XfQi(
.1*Q>OQZ[\Q]_hij_k.l*IsD(cz K #I! 391YKDGNIlGFI&&1~6	 r":b/4)GB<b/9R. #B'r':#q6%.Qs1venU#HU#3:2,vbk+  Y'!OJ5 /m8 A:"AEzk!a%04DD#' #.q1u#5 IR.Bvhvay6I"5MQyY[n]jkl]mnp\qr # 34!OJAEz   !HIa d UR99Yr#   c                 <    [         R                  U R                     $ )aS  Computes an ideal atom mask.

`Protein.atom_mask` typically is defined according to the atoms that are reported in the PDB. This function
computes a mask according to heavy atoms that should be present in the given sequence of amino acids.

Args:
  prot: `Protein` whose fields are `numpy.ndarray` objects.

Returns:
  An ideal atom mask.
)r	   STANDARD_ATOM_MASKr   )rY   s    r$   ideal_atom_maskr     s     //<<r#   featuresresultr   r   r   r   r   c                 z    [        U S   US   US   U S   S-   Ub  UO[        R                  " US   5      UUUUS9	$ )a  Assembles a protein from a prediction.

Args:
  features: Dictionary holding model inputs.
  result: Dictionary holding model outputs.
  b_factors: (Optional) B-factors to use for the protein.
  chain_index: (Optional) Chain indices for multi-chain predictions
  remark: (Optional) Remark about the prediction
  parents: (Optional) List of template names
Returns:
  A protein instance.
r   final_atom_positionsfinal_atom_maskr   r   )	r   r   r   r   r   r   r   r   r   )r   r   
zeros_like)r   r   r   r   r   r   r   s          r$   from_predictionr   ,  s]    * !45*+/!3(4)"--O`Ha:b/
 
r#   )r   )NNNNN)r   dataclassesr1   r   collections.abcr   r   r   typingr   r   numpyr   r   r	   r    r   FeatureDictModelOutputrG   	dataclassr   rX   r!   r=   rc   r{   r   r   r   r   r#   r$   <module>r      st      	  7 7     c2::o&c3h  d#"8 "8 $"8J/3 /7 /d' S c (/$' /$C /$C /$dZ  Z S Z z=' =bjj =$ '+(, '+37 

# "**%	
 SM hsm$ "(3-0 r#   