
    cCizF                        S r SSKrSSKrSSKJr  SSKJrJr  SSKrSSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJrJr  SSKJr  SSKJrJrJr  SSKJrJrJ r J!r!  SSK"J#r#  SSK$J%r%  \ RL                  " \'5      r(Sr)Sr*Sr+Sr,S r- " S S\	R\                  5      r/ " S S\	R\                  5      r0 " S S\	R\                  5      r1S\00r2 " S S\	R\                  5      r3 " S  S!\	R\                  5      r4 " S" S#\	R\                  5      r5 " S$ S%\5      r6 " S& S'\	R\                  5      r7 " S( S)\	R\                  5      r8\ " S* S+\5      5       r9\ " S, S-\5      5       r:\ " S. S/\5      5       r;\ " S0 S1\5      5       r< " S2 S3\	R\                  5      r= " S4 S5\	R\                  5      r> " S6 S7\	R\                  5      r? " S8 S9\	R\                  5      r@ " S: S;\	R\                  5      rAS<rBS=rC " S> S?\5      rD " S@ SA\D5      rE\" SB\B5       " SC SD\D5      5       rF\" SE\B5       " SF SG\D5      5       rG\" SH\B5       " SI SJ\D5      5       rH\" SK\B5       " SL SM\D5      5       rISNrJ\" SO\B5       " SP SQ\D5      5       rK/ SRQrLg)SzPyTorch REALM model.    N)	dataclass)OptionalUnion)nn)CrossEntropyLoss   )ACT2FN)Cache)GradientCheckpointingLayer))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputModelOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)add_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)deprecate_kwarg   )RealmConfigz(google/realm-cc-news-pretrained-embedderz'google/realm-cc-news-pretrained-encoderz&google/realm-cc-news-pretrained-scorerr   c           	      p    SSK nSSKnSSKn[        R                  R                  U5      n[        R                  SU 35        UR                  R                  U5      n/ n/ n	U H]  u  p[        R                  SU
 SU 35        UR                  R                  Xj5      nUR                  U
5        U	R                  U5        M_     [        X5       GHD  u  p[        U [         5      (       a8  SU
;  a2  [        R                  SU
 S	U R"                  R$                   S
35        MS  U
R'                  S5      (       d  U
R'                  S5      (       a9  [        U [(        5      (       a$  U
R+                  SS5      n
U
R+                  SS5      n
U
R'                  S5      (       d  U
R'                  S5      (       a'  [        U [,        5      (       a  U
R+                  SS5      n
U
R'                  S5      (       a  [        U [         5      (       a  SOSnU
R+                  SU S35      n
U
R+                  SU S35      n
U
R+                  SU S35      n
U
R+                  SU S35      n
U
R+                  SU S35      n
U
R'                  S5      (       a  [        U [.        5      (       a  SOSnU
R+                  SU S35      n
U
R+                  SU S 35      n
U
R+                  S!U S"35      n
U
R+                  S#U S$35      n
U
R+                  S%U S35      n
U
R+                  S&U S$35      n
OYU
R'                  S'5      (       aC  [        U [.        5      (       a  SOSnU
R+                  S(U S 35      n
U
R+                  S)U S"35      n
U
R1                  S*5      n
[3        S+ U
 5       5      (       a*  [        R                  SS*R5                  U
5       35        GM  U nU
 H  nUR7                  S,U5      (       a  UR1                  S-U5      nOU/nUS   S.:X  d	  US   S/:X  a  [9        US05      nO/US   S1:X  d	  US   S2:X  a  [9        US35      nO [9        UUS   5      n[=        U5      S4:  d  M  [?        US5   5      nUU   nM     WS6S S7:X  a  [9        US05      nOUS.:X  a  URA                  U5      n URB                  URB                  :X  d"   S8URB                   S9URB                   S:35       e [        R                  S;U
 35        [H        RJ                  " U5      Ul&        GMG     U $ ! [         a    [        R                  S5        e f = f! [:         a,    [        R                  SS*R5                  U
5       35         GM  f = f! [D         a1  nU=RF                  URB                  URB                  4-  sl#        e SnAff = f)<z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape readerz	Skipping z as it is not z's parameterbertclszbert/zreader/realm/zcls/zreader/cls/zrealm/ zreader/zreader/module/bert/zreader/module/cls/zreader/dense/zqa_outputs/dense_intermediate/zreader/dense_1/zqa_outputs/dense_output/zreader/layer_normalizationzqa_outputs/layer_normalizationzmodule/module/module/z	embedder/z!module/module/module/module/bert/zmodule/module/module/LayerNorm/zcls/LayerNorm/zmodule/module/module/dense/z
cls/dense/z,module/module/module/module/cls/predictions/zcls/predictions/zmodule/module/module/bert/z%module/module/module/cls/predictions/zmodule/module/zmodule/module/LayerNorm/zmodule/module/dense//c              3   ,   #    U  H
  nUS ;   v   M     g7f))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     m/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/deprecated/realm/modeling_realm.py	<genexpr>+load_tf_weights_in_realm.<locals>.<genexpr>s   s      
 nns   z[A-Za-z]+_\d+z_(\d+)kernelgammaweightoutput_biasbetabias   r   i_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )'renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzip
isinstanceRealmReader	__class____name__
startswithRealmForOpenQAreplaceRealmKnowledgeAugEncoderRealmEmbeddersplitanyjoin	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargstorch
from_numpydata)modelconfigtf_checkpoint_pathr5   nptftf_path	init_varsnamesarraysnamerV   arrayreader_prefixembedder_prefixpointerm_namescope_namesnumes                       r*   load_tf_weights_in_realmrn   1   sb   
 ggoo01G
KK8	BC''0IEF (l5'BC&&w5Te	 ! 5)e[))hd.BKK)D68P8P7QQ]^_ OOF##tu'='=:eUcCdCd<<9D<<6D OOF##tu'='=:eUmCnCn<<2D ??8$$",UK"@"@BiM<< 5-7OPD<< 4t6LMD<<M?B`1abD<< 1m_D\3]^D<< <Om>noD ??233$.um$D$Db+O<< CGXX^E_`D<< AoEVVdCefD<< =/ARR\?]^D<< NSbRccsPtuD<< <@QQW>XYD<< GOK\\lImnD__-..$.um$D$Db+O<< :>O~<^_D<< 6?:K:8VWDzz#  

 
 
 KK)CHHTN#345F||,f55 hhy&9%h1~)[^w-F!'84Q=0KNf4L!'62%g{1~>G ;1$+a.)!#,# $ #$<=(gx0GxLL'E	==EKK/  /@[Y/ 	078''.[ *\ LC  Q	
 	\ & KK)CHHTN+; <=  	FFw}}ekk22F	s5   T U&<U:!T>1U76U7:
V5,V00V5c                      ^  \ rS rSrSrU 4S jr     SS\\R                     S\\R                     S\\R                     S\\R                     S\
S	\R                  4S
 jjrSrU =r$ )RealmEmbeddings   zGConstruct the embeddings from word, position and token_type embeddings.c                 .  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        [#        USS5      U l        U R'                  S[(        R*                  " UR                  5      R-                  S5      SS9  U R'                  S	[(        R.                  " U R0                  R3                  5       [(        R4                  S
9SS9  g )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutrQ   rv   register_bufferrY   arangeexpandzerosrx   sizelongselfr]   rF   s     r*   r   RealmEmbeddings.__init__   s/   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<='.v7PR\']$ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
    	input_idsr{   rx   inputs_embedspast_key_values_lengthreturnc                 d   Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2XWU-   24   nUcv  [        U S5      (       a-  U R                  S S 2S U24   nUR	                  US   U5      n	U	nO8[
        R                  " U[
        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      n
XJ-   nU R                  S:X  a  U R                  U5      nX-  nU R                  U5      nU R                  U5      nU$ )Nry   r   r{   r   r}   devicerw   )r   rx   hasattrr{   r   rY   r   r   r   r   r   rv   r   r   r   )r   r   r{   rx   r   r   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr   
embeddingsr   s                r*   forwardRealmEmbeddings.forward   sC     #..*K',,.s3K ^
,,Q0FVlIl0l-lmL
 !t-..*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J":
'':5"&":":<"H-J^^J/
\\*-
r   )r   r   rv   r   r   r   )NNNNr   )rG   
__module____qualname____firstlineno____doc__r   r   rY   
LongTensorFloatTensorrT   Tensorr   __static_attributes____classcell__rF   s   @r*   rp   rp      s    Q
* 15593759&''E,,-' !!1!12' u//0	'
   1 12' !$' 
' 'r   rp   c                   X  ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jr\" SSSS	9      SS
\R                  S\	\R                     S\	\R                     S\	\R                     S\	\R                     S\	\   S\	\   S\\R                     4S jj5       rSrU =r$ )RealmSelfAttention   c                   > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  5      U l        U=(       d    [#        USS5      U l        U R$                  S:X  d  U R$                  S	:X  aG  UR&                  U l        [        R(                  " S
UR&                  -  S-
  U R                  5      U l        UR,                  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()rv   rw   relative_keyrelative_key_queryr3   r   )r~   r   r   num_attention_headsr   
ValueErrorrT   attention_head_sizeall_head_sizer   Linearquerykeyvaluer   attention_probs_dropout_probr   rQ   rv   r   r   distance_embedding
is_decoderr   r]   rv   rF   s      r*   r   RealmSelfAttention.__init__   s    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++r   xr   c                     UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nry   r   r3   r      )r   r   r   viewpermute)r   r   new_x_shapes      r*   transpose_for_scores'RealmSelfAttention.transpose_for_scores   sL    ffhsmt'?'?AYAY&ZZFF;yyAq!$$r   past_key_valuepast_key_values4.58new_nameversionhidden_statesattention_mask	head_maskencoder_hidden_statesencoder_attention_maskoutput_attentionsc                 V   U R                  U5      nUS Ln	U	(       a  Ub  US   n
US   nUnGOU	(       aC  U R                  U R                  U5      5      n
U R                  U R                  U5      5      nUnOUbu  U R                  U R                  U5      5      n
U R                  U R                  U5      5      n[        R
                  " US   U
/SS9n
[        R
                  " US   U/SS9nO@U R                  U R                  U5      5      n
U R                  U R                  U5      5      nU R                  U5      nUS LnU R                  (       a  X4n[        R                  " XR                  SS5      5      nU R                  S:X  d  U R                  S:X  Ga  UR                  S   U
R                  S   nnU(       aB  [        R                  " US-
  [        R                  UR                  S	9R                  SS5      nO>[        R                  " U[        R                  UR                  S	9R                  SS5      n[        R                  " U[        R                  UR                  S	9R                  SS5      nUU-
  nU R!                  UU R"                  -   S-
  5      nUR%                  UR&                  S
9nU R                  S:X  a  [        R(                  " SUU5      nUU-   nOHU R                  S:X  a8  [        R(                  " SUU5      n[        R(                  " SU
U5      nUU-   U-   nU[*        R,                  " U R.                  5      -  nUb  X-   n[0        R2                  R5                  USS9nU R7                  U5      nUb  UU-  n[        R                  " UU5      nUR9                  SSSS5      R;                  5       nUR=                  5       S S U R>                  4-   nUR                  U5      nU(       a  UU4OU4nU R                  (       a  UU4-   nU$ )Nr   r   r3   dimry   r   r   r   r|   zbhld,lrd->bhlrzbhrd,lrd->bhlrr   ) r   r   r   r   rY   catr   matmulrU   rv   rV   tensorr   r   r   r   r   r   tor}   einsummathsqrtr   r   
functionalsoftmaxr   r   
contiguousr   r   )r   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                               r*   r   RealmSelfAttention.forward   s    !JJ}5
 3$>/"='*I)!,K3N11$((;P2QRI33DJJ?T4UVK3N(11$((=2IJI33DJJ}4MNK		?1#5y"AqII))_Q%7$E1MK11$((=2IJI33DJJ}4MNK//0AB#4/	??  )6O !<<5H5HR5PQ''>9T=Y=Y]q=q'2'8'8';Y__Q=O*L!&j1nEJJWdWkWk!l!q!q" "'l%**UbUiUi!j!o!oprtu!v"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s +dii8P8P.QQ%/@ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2mM]?? 22Gr   )r   r   r   r   r   r   r   r   rv   r   r   NNNNNNF)rG   r   r   r   r   rY   r   r   r   r   r   r
   booltupler   r   r   r   s   @r*   r   r      s    ,4%ell %u|| %
 %0A6R 7;15=A>B+/,1c||c !!2!23c E--.	c
  ((9(9:c !)):): ;c "%c $D>c 
u||	c Scr   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )RealmSelfOutputic  c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nrt   )r~   r   r   r   r   denser   r   r   r   r   r   s     r*   r   RealmSelfOutput.__init__d  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r   r   input_tensorr   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r  r   r   r   r   r  s      r*   r   RealmSelfOutput.forwardj  5    

=1]3}'CDr   r   r  r   
rG   r   r   r   r   rY   r   r   r   r   r   s   @r*   r  r  c  6    >U\\  RWR^R^  r   r  eagerc                   $  ^  \ rS rSrSU 4S jjrS r\" SSSS9      SS\R                  S	\	\R                     S
\	\R                     S\	\R                     S\	\R                     S\	\   S\	\   S\\R                     4S jj5       rSrU =r$ )RealmAttentioniv  c                    > [         TU ]  5         [        UR                     " XS9U l        [        U5      U l        [        5       U l        g )Nrv   )	r~   r   REALM_SELF_ATTENTION_CLASSES_attn_implementationr   r  outputsetpruned_headsr   s      r*   r   RealmAttention.__init__w  s@    01L1LM
	 &f-Er   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )rS   r   r   r   r   r  r   r   r   r   r  r  r   union)r   headsindexs      r*   prune_headsRealmAttention.prune_heads  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r   r   r   r   r   r   r   r   r   r   r   r   c           	      p    U R                  UUUUUUU5      nU R                  US   U5      n	U	4USS  -   n
U
$ )Nr   r   )r   r  )r   r   r   r   r   r   r   r   self_outputsattention_outputr   s              r*   r   RealmAttention.forward  sW     yy!"
  ;;|AF#%QR(88r   )r  r  r   r   r   )rG   r   r   r   r   r  r   rY   r   r   r   r
   r   r   r   r   r   r   s   @r*   r  r  v  s    ";$ %0A6R 7;15=A>B+/,1|| !!2!23 E--.	
  ((9(9: !)):): ; "% $D> 
u||	 Sr   r  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )RealmIntermediatei  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r~   r   r   r   r   intermediate_sizer  rD   
hidden_actstrr	   intermediate_act_fnr   s     r*   r   RealmIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r   r   r   c                 J    U R                  U5      nU R                  U5      nU$ r   r  r+  r   r   s     r*   r   RealmIntermediate.forward  s&    

=100?r   r.  r  r   s   @r*   r&  r&    s(    9U\\ ell  r   r&  c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )RealmOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r  )r~   r   r   r   r(  r   r  r   r   r   r   r   r   s     r*   r   RealmOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r   r   r  r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r	  r
  s      r*   r   RealmOutput.forward  r  r   r  r  r   s   @r*   r2  r2    r  r   r2  c                      ^  \ rS rSrU 4S jr\" SSSS9      SS\R                  S\\R                     S	\\R                     S
\\R                     S\\R                     S\\
   S\\   S\\R                     4S jj5       rS rSrU =r$ )
RealmLayeri  c                 t  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        UR                  U l        UR                  U l        U R                  (       a.  U R                  (       d  [        U  S35      e[	        USS9U l	        [        U5      U l        [        U5      U l        g )Nr   z> should be used as a decoder model if cross attention is addedrw   r  )r~   r   chunk_size_feed_forwardseq_len_dimr  	attentionr   add_cross_attentionr   crossattentionr&  intermediater2  r  r   s     r*   r   RealmLayer.__init__  s    '-'E'E$'/ ++#)#=#= ##?? D6)g!hii"0Q["\D-f5!&)r   r   r   r   r   r   r   r   r   r   r   r   c           	         Ub  US S OS nU R                  UUUUUS9n	U	S   n
U R                  (       a  U	SS nU	S   nOU	SS  nS nU R                  (       aZ  UbW  [        U S5      (       d  [        SU  S35      eUb  US	S  OS nU R	                  U
UUUUUU5      nUS   n
XSS -   nUS   nWU-   n[        U R                  U R                  U R                  U
5      nU4U-   nU R                  (       a  UW4-   nU$ )
Nr3   )r   r   r   r   ry   r>  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r<  r   r   r   r>  r   feed_forward_chunkr:  r;  )r   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr#  r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                    r*   r   RealmLayer.forward  s}    ;J:U?2A#6[_ !%/4 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!122 =dV DD D  AP@[(<ae%&*&9&9 %&)!'#  7q9" ==G ,C2+F( 14P P0##T%A%A4CSCSUe
  /G+ ??!2 44Gr   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r?  r  )r   r#  intermediate_outputrI  s       r*   rB  RealmLayer.feed_forward_chunk  s)    "//0@A{{#6Ir   )r=  r<  r:  r>  r?  r   r  r;  r   )rG   r   r   r   r   r   rY   r   r   r   r
   r   r   r   rB  r   r   r   s   @r*   r8  r8    s    * %0A6R 7;15=A>B+/,1?||? !!2!23? E--.	?
  ((9(9:? !)):): ;? "%? $D>? 
u||	? S?B r   r8  c                   2  ^  \ rS rSrU 4S jr         SS\R                  S\\R                     S\\R                     S\\R                     S\\R                     S\\	   S	\\
   S
\\
   S\\
   S\\
   S\\\R                     \4   4S jjrSrU =r$ )RealmEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r~   r   r]   r   
ModuleListrangenum_hidden_layersr8  layergradient_checkpointing)r   r]   _rF   s      r*   r   RealmEncoder.__init__  sR    ]]fF^F^@_#`@_1Jv$6@_#`a
&+# $as   A&r   r   r   r   r   r   r   r   output_hidden_statesreturn_dictr   c           
         U	(       a  SOS nU(       a  SOS nU(       a  U R                   R                  (       a  SOS nU R                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnU(       a  SOS n[        U R                  5       H}  u  nnU	(       a  X4-   nUb  X?   OS nU" UUUUUUb  Xo   OS U5      nUS   nU(       a	  UUS   4-  nU(       d  MN  UUS   4-   nU R                   R                  (       d  Mt  UUS   4-   nM     U	(       a  X4-   nU
(       d  [        S UUUUU4 5       5      $ [        UUUUUS	9$ )
Nr'   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   ry   r   r3   c              3   0   #    U  H  nUc  M  Uv   M     g 7fr   r'   )r(   vs     r*   r+   'RealmEncoder.forward.<locals>.<genexpr>Z  s"      
A  s   	)last_hidden_stater   r   
attentionscross_attentions)
r]   r=  rU  trainingr9   warning_once	enumeraterT  r   r   )r   r   r   r   r   r   r   r   r   rX  rY  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacheilayer_modulelayer_head_masklayer_outputss                      r*   r   RealmEncoder.forward%  s~    #7BD$5b4%64;;;Z;Zr`d&&4==##p "	#,R$(4OA|#$58H$H!.7.CilO(%&&5&A"t!M *!,M"}R'8&::"  &9]1=M<O&O#;;222+?=QRCSBU+U(-  50   14D D 
 "&%'(
 
 
 9+.+*1
 	
r   )r]   rU  rT  )	NNNNNNFFT)rG   r   r   r   r   rY   r   r   r   r
   r   r   r   r   r   r   r   r   s   @r*   rO  rO    s    , 7;15=A>B+/$(,1/4&*F
||F
 !!2!23F
 E--.	F

  ((9(9:F
 !)):): ;F
 "%F
 D>F
 $D>F
 'tnF
 d^F
 
uU\\"$MM	NF
 F
r   rO  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )RealmPoolerin  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )r~   r   r   r   r   r  Tanh
activationr   s     r*   r   RealmPooler.__init__o  s9    YYv1163E3EF
'')r   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r  rq  )r   r   first_token_tensorpooled_outputs       r*   r   RealmPooler.forwardt  s6     +1a40

#566r   )rq  r  r  r   s   @r*   rn  rn  n  s(    $
U\\ ell  r   rn  c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\\R                        \	S'   Sr\\\R                        \	S'   Srg)RealmEmbedderOutputi}  a  
Outputs of [`RealmEmbedder`] models.

Args:
    projected_score (`torch.FloatTensor` of shape `(batch_size, config.retriever_proj_size)`):

        Projected score.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
        shape `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nprojected_scorer   r_  r'   )rG   r   r   r   r   ry  r   rY   r   __annotations__r   r   r_  r   r'   r   r*   rx  rx  }  sR    ( 48OXe//078<M8E%"3"345<59Ju00129r   rx  c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Srg)RealmScorerOutputi  a  
Outputs of [`RealmScorer`] models.

Args:
    relevance_score (`torch.FloatTensor` of shape `(batch_size, config.num_candidates)`):
        The relevance score of document candidates (before softmax).
    query_score (`torch.FloatTensor` of shape `(batch_size, config.retriever_proj_size)`):
        Query score derived from the query embedder.
    candidate_score (`torch.FloatTensor` of shape `(batch_size, config.num_candidates, config.retriever_proj_size)`):
        Candidate score derived from the embedder.
Nrelevance_scorequery_scorecandidate_scorer'   )rG   r   r   r   r   r}  r   rY   r   rz  r~  r  r   r'   r   r*   r|  r|    sH    
 48OXe//07/3K%++,337OXe//07r   r|  c                      \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                      \	S	'   Sr\\R                      \	S
'   Sr\\R&                     \	S'   Sr\\R&                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)RealmReaderOutputi  a  
Outputs of [`RealmReader`] models.

Args:
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
        Total loss.
    retriever_loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
        Retriever loss.
    reader_loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
        Reader loss.
    retriever_correct (`torch.BoolTensor` of shape `(config.searcher_beam_size,)`, *optional*):
        Whether or not an evidence block contains answer.
    reader_correct (`torch.BoolTensor` of shape `(config.reader_beam_size, num_candidates)`, *optional*):
        Whether or not a span candidate contains answer.
    block_idx (`torch.LongTensor` of shape `()`):
        The index of the retrieved evidence block in which the predicted answer is most likely.
    candidate (`torch.LongTensor` of shape `()`):
        The index of the retrieved span candidates in which the predicted answer is most likely.
    start_pos (`torch.IntTensor` of shape `()`):
        Predicted answer starting position in *RealmReader*'s inputs.
    end_pos (`torch.IntTensor` of shape `()`):
        Predicted answer ending position in *RealmReader*'s inputs.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
        shape `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nlossretriever_lossreader_lossretriever_correctreader_correct	block_idx	candidate	start_posend_posr   r_  r'   )rG   r   r   r   r   r  r   rY   r   rz  r  r  r  
BoolTensorr  r  r   r  r  	IntTensorr  r   r   r_  r   r'   r   r*   r  r    s    !F )-D(5$$
%,26NHU../6/3K%++,348x 0 01815NHU--.5,0Ix(()0,0Ix(()0+/Ix(/)-GXeoo&-8<M8E%"3"345<59Ju00129r   r  c                   V    \ rS rSr% SrSr\\   \S'   Sr	\\
R                     \S'   Srg)RealmForOpenQAOutputi  z

Outputs of [`RealmForOpenQA`] models.

Args:
    reader_output (`dict`):
        Reader output.
    predicted_answer_ids (`torch.LongTensor` of shape `(answer_sequence_length)`):
        Predicted answer ids.
Nreader_outputpredicted_answer_idsr'   )rG   r   r   r   r   r  r   dictrz  r  rY   r   r   r'   r   r*   r  r    s-    	 %)M8D>(7;(5#3#34;r   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )RealmPredictionHeadTransformi  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r  )r~   r   r   r   r   r  rD   r)  r*  r	   transform_act_fnr   r   r   s     r*   r   %RealmPredictionHeadTransform.__init__  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r  r  r   r/  s     r*   r   $RealmPredictionHeadTransform.forward  s4    

=1--m<}5r   )r   r  r  rG   r   r   r   r   r   r   r   r   s   @r*   r  r    s    U r   r  c                   4   ^  \ rS rSrU 4S jrS rS rSrU =r$ )RealmLMPredictionHeadi  c                 H  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NF)r2   )r~   r   r  	transformr   r   r   r   decoder	ParameterrY   r   r2   r   s     r*   r   RealmLMPredictionHead.__init__   sm    5f= yy!3!3V5F5FUSLLV->->!?@	 !IIr   c                 :    U R                   U R                  l         g r   )r2   r  r   s    r*   _tie_weights"RealmLMPredictionHead._tie_weights  s     IIr   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  r  r/  s     r*   r   RealmLMPredictionHead.forward  s$    }5]3r   )r2   r  r  )	rG   r   r   r   r   r  r   r   r   r   s   @r*   r  r    s    && r   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )RealmOnlyMLMHeadi  c                 B   > [         TU ]  5         [        U5      U l        g r   )r~   r   r  predictionsr   s     r*   r   RealmOnlyMLMHead.__init__  s    08r   c                 (    U R                  U5      nU$ r   r  )r   sequence_outputprediction_scoress      r*   r   RealmOnlyMLMHead.forward  s     ,,_=  r   r  r  r   s   @r*   r  r    s    9! !r   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )RealmScorerProjectioni   c                    > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l	        g r  )r~   r   r  r  r   r   r   retriever_proj_sizer  r   r   r   s     r*   r   RealmScorerProjection.__init__!  sW    08YYv1163M3MN
f&@&@fF[F[\r   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  r   r/  s     r*   r   RealmScorerProjection.forward'  s$    

=1}5r   )r   r  r  r  r   s   @r*   r  r     s    ] r   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )RealmReaderProjectioni-  c                 p  > [         TU ]  5         Xl        [        R                  " UR
                  UR                  S-  5      U l        [        R                  " UR                  S5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " 5       U l        g )Nr3   r   rt   )r~   r   r]   r   r   r   span_hidden_sizedense_intermediatedense_outputr   reader_layer_norm_epslayer_normalizationReLUrelur   s     r*   r   RealmReaderProjection.__init__.  s}    "$))F,>,>@W@WZ[@["\IIf&=&=qA#%<<0G0GVMiMi#j GGI	r   c                   ^  U 4S jn[         R                  4S jnT R                  U5      nUR                  SSS9u  pVU" U5      u  pxn	[         R                  " USUS9n
[         R                  " USUS9nX-   nT R                  U5      nT R                  U5      nT R                  U5      R                  S5      nX" XR                  S9-  nXU4$ )	Nc                 \  >^ ^^ T R                   u  nmU U4S jm[        U4S j[        T	R                  R                  5       5       6 u  p#[
        R                  " US5      n[
        R                  " US5      n[
        R                  " T SUS9n[
        R                  " T SUS9nXE-  nX#U4$ )z
Generate span candidates.

Args:
    masks: <bool> [num_retrievals, max_sequence_len]

Returns:
    starts: <int32> [num_spans] ends: <int32> [num_spans] span_masks: <int32> [num_retrievals, num_spans]
    whether spans locate in evidence block.
c                    > [         R                  " TU -
  S-   TR                  S9n[         R                  " U S-
  TTR                  S9nX4$ )Nr   r   )rY   r   r   )widthcurrent_startscurrent_endsmasksmax_sequence_lens      r*   _spans_given_widthRRealmReaderProjection.forward.<locals>.span_candidates.<locals>._spans_given_widthD  sL    !&.>.F.JSXS_S_!`$||EAI7GPUP\P\]%33r   c              3   :   >#    U  H  nT" US -   5      v   M     g7f)r   Nr'   )r(   wr  s     r*   r+   IRealmReaderProjection.forward.<locals>.span_candidates.<locals>.<genexpr>I  s       fDeq!3AE!:!:Des   r   ry   r   r  )rV   rC   rR  r]   max_span_widthrY   r   index_select)
r  rV  startsendsstart_masks	end_masks
span_masksr  r  r   s
   `      @@r*   span_candidates6RealmReaderProjection.forward.<locals>.span_candidates7  s     #(++A4
  fE$++JdJdDe fgLF YYvq)F99T1%D  ,,U&IK**5bEI$0J++r   c                 l    SU R                  U5      -
  [        R                  " U5      R                  -  $ N      ?typerY   finfominmaskr}   s     r*   mask_to_score4RealmReaderProjection.forward.<locals>.mask_to_scoreV  s*    $))E**ekk%.@.D.DDDr   r3   ry   r   r   r  r|   )
rY   float32r  chunkr  r  r  r  squeezer}   )r   r   
block_maskr  r  start_projectionend_projectioncandidate_startscandidate_endscandidate_maskcandidate_start_projectionscandidate_end_projectionscandidate_hiddenreader_logitss   `             r*   r   RealmReaderProjection.forward6  s    	,> ',mm 	E //>+8+>+>qb+>+I(;J:;V8.&+&8&89IqXh&i#$)$6$6~1Tb$c!6R  99%56334DE))*:;CCBG~=P=PQQ>>r   )r]   r  r  r  r  r  r   s   @r*   r  r  -  s    7? 7?r   r  aH  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`RealmConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a5
  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   8    \ rS rSr% Sr\\S'   \rSr	S r
S rSrg)	RealmPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
r]   realmc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        gg)zInitialize the weightsg        )meanstdNr  )rD   r   r   r/   r[   normal_r]   initializer_ranger2   zero_r   rs   r   fill_)r   modules     r*   _init_weights"RealmPreTrainedModel._init_weights  s   fbii(( MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S) .r   c                     / nU H[  nUc  UR                  S5        M  UR                  n[        U5      S:  a  UR                  SUS   45      nUR                  U5        M]     U$ )z.Flatten inputs' shape to (-1, input_shape[-1])Nr3   ry   )rB   rV   rS   r   )r   inputsflattened_inputsr   r   s        r*   _flatten_inputs$RealmPreTrainedModel._flatten_inputs  sh    F~ ''-$ll{#a'#[["k"o)>?F ''/   r   r'   N)rG   r   r   r   r   r   rz  rn   load_tf_weightsbase_model_prefixr  r  r   r'   r   r*   r  r    s$    
 .O*  r   r  c                   f   ^  \ rS rSrSrS	U 4S jjrS rS rS r             S
S jr	Sr
U =r$ )RealmBertModeli  z7
Same as the original BertModel but remove docstrings.
c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U(       a  [        U5      OS U l        U R                  5         g r   )
r~   r   r]   rp   r   rO  encoderrn  pooler	post_init)r   r]   add_pooling_layerrF   s      r*   r   RealmBertModel.__init__  sI     )&1#F+->k&)D 	r   c                 .    U R                   R                  $ r   r   r   r  s    r*   get_input_embeddings#RealmBertModel.get_input_embeddings  s    ...r   c                 $    XR                   l        g r   r  r   r   s     r*   set_input_embeddings#RealmBertModel.set_input_embeddings  s    */'r   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr  rT  r<  r  )r   heads_to_prunerT  r  s       r*   _prune_headsRealmBertModel._prune_heads  s<    
 +002LELLu%//;;EB 3r   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R                   R                  (       a  U
b  U
OU R                   R
                  n
OSn
Ub  Ub  [        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       S S nO[        S5      eUu  nnUb  UR                  OUR                  nU	b  U	R                  5       OSnUc  [        R                  " UUU-   4US9nUcs  [        U R                  S5      (       a4  U R                  R                  S S 2S U24   nUR!                  UU5      nUnO$[        R"                  " U[        R$                  US9nU R'                  X.5      nU R                   R                  (       aE  UbB  UR                  5       u  nnnUU4nUc  [        R                  " UUS9nU R)                  U5      nOS nU R+                  XPR                   R,                  5      nU R                  UUUUUS	9nU R/                  UUUUUU	U
UUUS
9
nUS   nU R0                  b  U R1                  U5      OS nU(       d
  UU4USS  -   $ [3        UUUR4                  UR6                  UR8                  UR:                  S9$ )NFzDYou cannot specify both input_ids and inputs_embeds at the same timery   z5You have to specify either input_ids or inputs_embedsr   r  r{   r   )r   rx   r{   r   r   )	r   r   r   r   r   r   r   rX  rY  r   )r^  pooler_outputr   r   r_  r`  )r]   r   rX  use_return_dictr   r   r   %warn_if_padding_and_no_attention_maskr   r   get_seq_lengthrY   onesr   r   r{   r   r   r   get_extended_attention_maskinvert_attention_maskget_head_maskrS  r  r  r   r   r   r_  r`  )r   r   r   r{   rx   r   r   r   r   r   r   r   rX  rY  r   
batch_sizer   r   r   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthrV  encoder_hidden_shapeencoder_extended_attention_maskembedding_outputencoder_outputsr  ru  s                                  r*   r   RealmBertModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B];;!!%.%:	@U@UII ]%>cdd"66yQ#..*K&',,.s3KTUU!,
J%.%:!!@T@T FUE`!?!?!Afg!"ZZ*jCY6Y)ZdjkN!t(899*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_0m ;;!!&;&G=R=W=W=Y: 7$68O#P %-).4HQW)X&.2.H.HI_.`+.2+ &&y++2O2OP	??%)'#9 + 
 ,,2"7#B+/!5# ' 
 *!,8<8OO4UY#]3oab6III;-'+;;)77&11,==
 	
r   )r]   r   r  r  )TNNNNNNNNNNNNN)rG   r   r   r   r   r   r  r  r  r   r   r   r   s   @r*   r	  r	    sR    /0C "#!l
 l
r   r	  z`The embedder of REALM outputting projected score that will be used to calculate relevance score.c                     ^  \ rS rSrS/rU 4S jrS rS r\" \	R                  S5      5      \" \\S9         SS\\R                      S	\\R"                     S
\\R                      S\\R                      S\\R"                     S\\R"                     S\\   S\\   S\\   S\\\4   4S jj5       5       rSrU =r$ )rL   id  zcls.predictions.decoder.biasc                    > [         TU ]  U5        [        U R                  5      U l        [        U R                  5      U l        U R                  5         g r   )r~   r   r	  r]   r  r  r   r  r   s     r*   r   RealmEmbedder.__init__k  s:     #DKK0
(5r   c                 B    U R                   R                  R                  $ r   r  r   r   r  s    r*   r  "RealmEmbedder.get_input_embeddingsr      zz$$444r   c                 8    XR                   R                  l        g r   r4  r  s     r*   r  "RealmEmbedder.set_input_embeddingsu      05

-r   batch_size, sequence_lengthoutput_typeconfig_classr   r   r{   rx   r   r   r   rX  rY  r   c
                     U	b  U	OU R                   R                  n	U R                  UUUUUUUUU	S9	n
U
S   nU R                  U5      nU	(       d	  U4U
SS -   $ [	        UU
R
                  U
R                  S9$ )a  
Returns:

Example:

```python
>>> from transformers import AutoTokenizer, RealmEmbedder
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-embedder")
>>> model = RealmEmbedder.from_pretrained("google/realm-cc-news-pretrained-embedder")

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

>>> projected_score = outputs.projected_score
```
r   r{   rx   r   r   r   rX  rY  r   r3   r   )ry  r   r_  )r]   r  r  r   rx  r   r_  )r   r   r   r{   rx   r   r   r   rX  rY  realm_outputsr  ry  s                r*   r   RealmEmbedder.forwardx  s    B &1%<k$++B]B]

))%'/!5# # 

 &a(((=1#%a(:::& /+99(33 r   r   r  )	NNNNNNNNN)rG   r   r   r   _tied_weights_keysr   r  r  r   REALM_INPUTS_DOCSTRINGformatr   rx  _CONFIG_FOR_DOCr   rY   r   r   r   r   r   r   r   r   r   s   @r*   rL   rL   d  s-   
 9956 ++A+H+HIf+gh+>_] 156:59371559,0/3&*9E,,-9 !!2!239 !!1!12	9
 u//09 E--.9   1 129 $D>9 'tn9 d^9 
u))	*9 ^ i9r   rL   zoThe scorer of REALM outputting relevance scores representing the score of document candidates (before softmax).c            !         ^  \ rS rSrSrSU 4S jjr\" \R                  S5      5      \	" \
\S9             SS\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\
4   4S jj5       5       rSrU =r$ )RealmScoreri  z
Args:
    query_embedder ([`RealmEmbedder`]):
        Embedder for input sequences. If not specified, it will use the same embedder as candidate sequences.
c                    > [         TU ]  U5        [        U R                  5      U l        Ub  UOU R                  U l        U R                  5         g r   )r~   r   rL   r]   embedderquery_embedderr  )r   r]   rK  rF   s      r*   r   RealmScorer.__init__  s@     %dkk20>0JnPTP]P]r   r:  r;  r   r   r{   rx   candidate_input_idscandidate_attention_maskcandidate_token_type_idscandidate_inputs_embedsr   r   r   rX  rY  r   c                    Ub  UOU R                   R                  nUc  U
c  [        S5      eUc  Uc  [        S5      eU R                  UUUUU	U
UUUS9	nU R	                  XVU5      u  nnnU R                  UUUUU	UUUUS9	nUS   nUS   nUR                  SU R                   R                  U R                   R                  5      n[        R                  " SUU5      nU(       d  UUU4$ [        UUUS9$ )a	  
candidate_input_ids (`torch.LongTensor` of shape `(batch_size, num_candidates, sequence_length)`):
    Indices of candidate input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
candidate_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_candidates, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

    [What are attention masks?](../glossary#attention-mask)
candidate_token_type_ids (`torch.LongTensor` of shape `(batch_size, num_candidates, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
candidate_inputs_embeds (`torch.FloatTensor` of shape `(batch_size * num_candidates, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `candidate_input_ids` you can choose to directly pass an embedded
    representation. This is useful if you want more control over how to convert *candidate_input_ids* indices
    into associated vectors than the model's internal embedding lookup matrix.

Returns:

Example:

```python
>>> import torch
>>> from transformers import AutoTokenizer, RealmScorer

>>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-scorer")
>>> model = RealmScorer.from_pretrained("google/realm-cc-news-pretrained-scorer", num_candidates=2)

>>> # batch_size = 2, num_candidates = 2
>>> input_texts = ["How are you?", "What is the item in the picture?"]
>>> candidates_texts = [["Hello world!", "Nice to meet you!"], ["A cute cat.", "An adorable dog."]]

>>> inputs = tokenizer(input_texts, return_tensors="pt")
>>> candidates_inputs = tokenizer.batch_encode_candidates(candidates_texts, max_length=10, return_tensors="pt")

>>> outputs = model(
...     **inputs,
...     candidate_input_ids=candidates_inputs.input_ids,
...     candidate_attention_mask=candidates_inputs.attention_mask,
...     candidate_token_type_ids=candidates_inputs.token_type_ids,
... )
>>> relevance_score = outputs.relevance_score
```z5You have to specify either input_ids or input_embeds.zJYou have to specify either candidate_input_ids or candidate_inputs_embeds.r?  r   ry   z
bd,bnd->bn)r}  r~  r  )r]   r  r   rK  r  rJ  r   num_candidatesr  rY   r   r|  )r   r   r   r{   rx   rM  rN  rO  rP  r   r   r   rX  rY  query_outputsflattened_input_idsflattened_attention_maskflattened_token_type_idscandidate_outputsr~  r  r}  s                         r*   r   RealmScorer.forward  sE   R &1%<k$++B]B]!6TUU&+B+Jijj++))%'/!5# , 

 UYThTh;SU
Q	68P !MM33%1/!5# * 

 $A&+A.)..r4;;3M3Mt{{OnOno,,|[/R"K@@ +Ve
 	
r   )rJ  rK  r   r/  )rG   r   r   r   r   r   r   rD  rE  r   r|  rF  r   rY   r   r   r   r   r   r   r   r   r   s   @r*   rH  rH    s   
 ++A+H+HIf+gh+<?[ 156:5937:>@D?C?C1559,0/3&*z
E,,-z
 !!2!23z
 !!1!12	z

 u//0z
 &e&6&67z
 #+5+<+<"=z
 #+5+;+;"<z
 "*%*;*;!<z
 E--.z
   1 12z
 $D>z
 'tnz
 d^z
 
u''	(z
 \ iz
r   rH  zrThe knowledge-augmented encoder of REALM outputting masked language model logits and marginal log-likelihood loss.c                     ^  \ rS rSrS/rU 4S jrS rS rS rS r	\
" \R                  S5      5      \" \\S	9            SS
\\R$                     S\\R&                     S\\R$                     S\\R$                     S\\R&                     S\\R&                     S\\R&                     S\\R$                     S\\R$                     S\\   S\\   S\\   S\\\4   4S jj5       5       rSrU =r$ )rK   iI  zcls.predictions.decoderc                    > [         TU ]  U5        [        U R                  5      U l        [        U R                  5      U l        U R                  5         g r   )r~   r   r	  r]   r  r  r   r  r   s     r*   r   !RealmKnowledgeAugEncoder.__init__Q  s:     #DKK0
#DKK0r   c                 B    U R                   R                  R                  $ r   r4  r  s    r*   r  -RealmKnowledgeAugEncoder.get_input_embeddingsW  r6  r   c                 8    XR                   R                  l        g r   r4  r  s     r*   r  -RealmKnowledgeAugEncoder.set_input_embeddingsZ  r9  r   c                 B    U R                   R                  R                  $ r   )r   r  r  r  s    r*   get_output_embeddings.RealmKnowledgeAugEncoder.get_output_embeddings]  s    xx##+++r   c                     XR                   R                  l        UR                  U R                   R                  l        g r   )r   r  r  r2   )r   new_embeddingss     r*   set_output_embeddings.RealmKnowledgeAugEncoder.set_output_embeddings`  s*    '5$$2$7$7!r   z+batch_size, num_candidates, sequence_lengthr;  r   r   r{   rx   r   r   r}  labelsmlm_maskr   rX  rY  r   c                 2   Ub  UOU R                   R                  nUb  Uc  [        S5      eU R                  XU5      u  pnU R	                  UUUUUUU
UUS9	nUS   nU R                  U5      nUnSnUGbg  UR                  5       u  nnU	c$  [        R                  " U[        R                  S9n	OU	R                  [        R                  5      n	[        SS9nUR                  SU R                   R                  5      nUR                  S	U R                   R                  5      R                  S5      nU" UU5      R                  UU R                   R                  U5      * nUR!                  S5      R#                  S5      nUU-   nUR%                  S	5      n[        R&                  " [        R(                  " UU	-  5      [        R(                  " U	5      -  5      * nU(       d  U4US
S -   nUb  U4U-   $ U$ [+        UUUR,                  UR.                  S9$ )a  
relevance_score (`torch.FloatTensor` of shape `(batch_size, num_candidates)`, *optional*):
    Relevance score derived from RealmScorer, must be specified if you want to compute the masked language
    modeling loss.

labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

mlm_mask (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid calculating joint loss on certain positions. If not specified, the loss will not be masked.
    Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

Returns:

Example:

```python
>>> import torch
>>> from transformers import AutoTokenizer, RealmKnowledgeAugEncoder

>>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-encoder")
>>> model = RealmKnowledgeAugEncoder.from_pretrained(
...     "google/realm-cc-news-pretrained-encoder", num_candidates=2
... )

>>> # batch_size = 2, num_candidates = 2
>>> text = [["Hello world!", "Nice to meet you!"], ["The cute cat.", "The adorable dog."]]

>>> inputs = tokenizer.batch_encode_candidates(text, max_length=10, return_tensors="pt")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
```NzZYou have to specify `relevance_score` when `labels` is specified in order to compute loss.r?  r   r|   none)	reductionry   r   r3   r   )r  logitsr   r_  )r]   r  r   r  r  r   r   rY   	ones_liker  r  r   r   r   tilerR  log_softmax	unsqueeze	logsumexpnansumsumr   r   r_  )r   r   r   r{   rx   r   r   r}  rg  rh  r   rX  rY  rT  rU  rV  joint_outputsjoint_outputr  r  masked_lm_lossr&  r   loss_fct
mlm_logitsmlm_targetsmasked_lm_log_probcandidate_log_probjoint_gold_log_probmarginal_gold_log_probsr  s                                  r*   r    RealmKnowledgeAugEncoder.forwardd  s-   p &1%<k$++B]B]/"9l  UYThTh~U
Q	8P 

33%'/!5# # 

 %Q' HH\2)%+[[]"J
 ??6G#==7 (&9H +//DKK4J4JKJ ++a)C)CDII"MK"*:{"C"H"HDKK66
# " "1!<!<R!@!J!J2!N"47I"I&9&C&CA&F##ll5995Lx5W+X[`[d[dem[n+nooN')M!A,>>F3A3M^%.YSYY$'55$//	
 	
r   rB  )NNNNNNNNNNNN)rG   r   r   r   rC  r   r  r  ra  re  r   rD  rE  r   r   rF  r   rY   r   r   r   r   r   r   r   r   r   s   @r*   rK   rK   I  s    4456,8 +%%&ST >X 156:593715597;-1/3,0/3&*x
E,,-x
 !!2!23x
 !!1!12	x

 u//0x
 E--.x
   1 12x
 "%"3"34x
 ))*x
 5++,x
 $D>x
 'tnx
 d^x
 
un$	%x
 Yx
r   rK   zThe reader of REALM.c            #         ^  \ rS rSrU 4S jr\" \R                  S5      5      \" \	\
S9              SS\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S\\\	4   4S jj5       5       rSrU =r$ )rE   i  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [        U5      U l        [        U5      U l        U R                  5         g r   )
r~   r   
num_labelsr	  r  r  r   r  
qa_outputsr  r   s     r*   r   RealmReader.__init__  sK      ++#F+
#F+/7r   z!reader_beam_size, sequence_lengthr;  r   r   r{   rx   r   r   r}  r  start_positionsend_positionshas_answersr   rX  rY  r   c                     Ub  UOU R                   R                  nUc  [        S5      eUc  [        S5      eUR                  S5      U R                   R                  :  a  [        S5      eU R                  UUUUUUUUUS9	nUS   nU R                  UUSU R                   R                   5      u  nnn[        R                  " USU R                   R                   S5      nUU-  n[        R                  " [        R                  " USS	9R                  5      n[        R                  " [        R                  " USS	9R                  5      n[        R                  " USUS
9n[        R                  " USUS
9nSnSnSnSnSnU	Gb/  U
Gb+  UGb'  S nS nUR                  S5      n U	R                  SU 5      n	U
R                  SU 5      n
Un[        R                  " U5      n!U" UUU	SU R                   R                   U
SU R                   R                   S9n[        R                  " U5      n"U" UU5      nU" UR!                  S5      UR!                  S5      5      nUU!R#                  [        R$                  5      -  nUU"R#                  [        R$                  5      -  nUU-   R'                  5       nU(       d  UUUU4USS -   n#Ub
  UUUUU4U#-   $ U#$ [)        UUUUUUUUUUR*                  UR,                  S9$ )a  
relevance_score (`torch.FloatTensor` of shape `(searcher_beam_size,)`, *optional*):
    Relevance score, which must be specified if you want to compute the logits and marginal log loss.
block_mask (`torch.BoolTensor` of shape `(searcher_beam_size, sequence_length)`, *optional*):
    The mask of the evidence block, which must be specified if you want to compute the logits and marginal log
    loss.
start_positions (`torch.LongTensor` of shape `(searcher_beam_size,)`, *optional*):
    Labels for position (index) of the start of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
end_positions (`torch.LongTensor` of shape `(searcher_beam_size,)`, *optional*):
    Labels for position (index) of the end of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
has_answers (`torch.BoolTensor` of shape `(searcher_beam_size,)`, *optional*):
    Whether or not the evidence block has answer(s).

Returns:
NzCYou have to specify `relevance_score` to calculate logits and loss.zOYou have to specify `block_mask` to separate question block and evidence block.r   zQThe input sequence length must be greater than or equal to config.max_span_width.r?  r   ry   r   r  c                    [         R                  " [         R                  " [         R                  " U S5      S5      [         R                  " US5      5      n[         R                  " [         R                  " [         R                  " US5      S5      [         R                  " US5      5      n[         R                  " [         R                  " XE5      S5      $ )zCompute correct span.r   ry   r   )rY   eqrp  rN   logical_and)r  r  gold_starts	gold_endsis_gold_startis_gold_ends         r*   compute_correct_candidates7RealmReader.forward.<locals>.compute_correct_candidatesD  s     !&OOEOO4Da$H!Leoo^ikmNn! $hhOOEOONA$FJEOO\egiLj
 yy!2!2=!NPQRRr   c                     [         R                  4S jn[         R                  " X" XR                  S9-   SS9n[         R                  " U SS9nXC-
  $ )z3Loss based on the negative marginal log-likelihood.c                 l    SU R                  U5      -
  [        R                  " U5      R                  -  $ r  r  r  s     r*   r  ERealmReader.forward.<locals>.marginal_log_loss.<locals>.mask_to_scoreT  s*    $))E"22ekk%6H6L6LLLr   r|   ry   r   )rY   r  rq  r}   )rl  
is_correctr  log_numeratorlog_denominators        r*   marginal_log_loss.RealmReader.forward.<locals>.marginal_log_lossQ  sM     /4mm M !&zYeYe9f0fln o"'//&b"A&66r   )r  r  r  r  r3   )r  r  r  r  r  r  r  r  r  r   r_  )r]   r  r   r   r  r  r  reader_beam_sizerY   rp  argmaxmaxvaluesr  clamprN   r   r  r  r  r  r   r_  )$r   r   r   r{   rx   r   r   r}  r  r  r  r  r   rX  rY  r   r  r  r  r  retriever_logitspredicted_block_indexpredicted_candidatepredicted_startpredicted_end
total_lossr  r  r  r  r  r  ignored_indexany_retriever_correctany_reader_correctr  s$                                       r*   r   RealmReader.forward  s7   L &1%<k$++B]B]"bccnooq!DKK$>$>>pqq**))%'/!5#  

 "!* ;?//ZDKK,H,HI;
7' !???1t{{?[?[+\^`a)) %UYY}!-L-S-S T#ll599]+J+Q+QR,,-=1L_`**>qH[\
 &=+DI`S	7 ,003M-33BFO)//MBM +$)II.?$@!7!1-+A0L0LM'DKK,H,HI	N "'>!:.@QRN+M,>,>r,BNDWDWXZD[\K388GGN-225==AAK(;6<<>J+-@/S`adklmlndooF ) nk;Ln]`ff  !)#/)+)%!!//))
 	
r   )r   r  r  r  )NNNNNNNNNNNNNN)rG   r   r   r   r   r   rD  rE  r   r  rF  r   rY   r   r   r  r   r   r   r   r   r   r   s   @r*   rE   rE     s    ++A+H+HIl+mn+<?[ 156:593715597;156:4826,0/3&*W
E,,-W
 !!2!23W
 !!1!12	W

 u//0W
 E--.W
   1 12W
 "%"3"34W
 U--.W
 "%"2"23W
   0 01W
 e../W
 $D>W
 'tnW
 d^W
  
u''	(!W
 \ oW
r   rE   ay  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token (should not be used in this model by design).

            [What are token type IDs?](../glossary#token-type-ids)
        answer_ids (`list` of shape `(num_answers, answer_length)`, *optional*):
            Answer ids for computing the marginal log-likelihood loss. Indices should be in `[-1, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-1` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z?`RealmForOpenQA` for end-to-end open domain question answering.c                   0  ^  \ rS rSrSU 4S jjr\S 5       rS r\" \	R                  S5      5      \" \\S9    SS\\R                      S\\R"                     S	\\R                      S
\\R                      S\\   S\\\4   4S jj5       5       rSrU =r$ )rI   i  c           
      j  > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  S[        R                  " S5      R                  UR                  UR                  4[        R                  [        R                  " S5      S95        X l        U R                  5         g )N	block_embr'   cpu)r   r}   r   )r~   r   rL   rJ  rE   r   r   rY   r   	new_emptynum_block_recordsr  r  r   	retrieverr  )r   r]   r  rF   s      r*   r   RealmForOpenQA.__init__  s     %f-!&)KKO%%..0J0JKmm||E* & 	
 #r   c                 |    U R                   (       a  U R                  R                  $ U R                  R                  $ r   )ra  r]   searcher_beam_sizer  r  s    r*   r  !RealmForOpenQA.searcher_beam_size  s)    ==;;111{{+++r   c                 D    U R                   R                  U5      U l         g)zSend `self.block_emb` to a specific device.

Args:
    device (`str` or `torch.device`):
        The device to which `self.block_emb` will be sent.
N)r  r   )r   r   s     r*   block_embedding_to!RealmForOpenQA.block_embedding_to  s     **62r   z1, sequence_lengthr;  r   r   r{   
answer_idsrY  r   c                    Ub  UOU R                   R                  nUb  UR                  S   S:w  a  [        S5      eU R	                  XUSS9nUS   n[
        R                  " SU R                  UR                  U R                  R                  5      5      n[
        R                  " XR                  SS9u  pU
R                  5       n
[
        R                  " U R                  SU
S	9nU R                  U
R                  5       XU R                   R                   S
9u  ppUR                  U R"                  R                  5      nUR$                  R'                  [
        R(                  5      R                  U R"                  R                  S9nUR+                  5       R-                  UR.                  R'                  [
        R(                  5      5        Ub  [
        R0                  " U[
        R(                  U R"                  R                  S9n[
        R0                  " U[
        R2                  U R"                  R                  S9n[
        R0                  " U[
        R2                  U R"                  R                  S9n[
        R                  " SUR                  5       UR                  U R"                  R                  5      5      nU R#                  UR4                  SU R                   R6                   UR8                  SU R                   R6                   UR.                  SU R                   R6                   UUUUUSS9	nUR4                  UR:                     nUUR<                  UR>                  S-    nU(       d  UU4$ [A        UUS9$ )ay  
Returns:

Example:

```python
>>> import torch
>>> from transformers import RealmForOpenQA, RealmRetriever, AutoTokenizer

>>> retriever = RealmRetriever.from_pretrained("google/realm-orqa-nq-openqa")
>>> tokenizer = AutoTokenizer.from_pretrained("google/realm-orqa-nq-openqa")
>>> model = RealmForOpenQA.from_pretrained("google/realm-orqa-nq-openqa", retriever=retriever)

>>> question = "Who is the pioneer in modern computer science?"
>>> question_ids = tokenizer([question], return_tensors="pt")
>>> answer_ids = tokenizer(
...     ["alan mathison turing"],
...     add_special_tokens=False,
...     return_token_type_ids=False,
...     return_attention_mask=False,
... ).input_ids

>>> reader_output, predicted_answer_ids = model(**question_ids, answer_ids=answer_ids, return_dict=False)
>>> predicted_answer = tokenizer.decode(predicted_answer_ids)
>>> loss = reader_output.loss
```r   r   z'The batch_size of the inputs must be 1.T)r   r{   r   rY  z	BD,QD->QBry   )kr   r  )
max_lengthr  r   zD,BD->B)	r   r   r{   r}  r  r  r  r  rY  )r  r  )!r]   r  rV   r   rJ  rY   r   r  r   r   topkr  r  r  r  r  reader_seq_lenr   special_tokens_maskr  r   logical_not_logical_and_r{   r   r   r   r  r   r  r  r  r  )r   r   r   r{   r  rY  question_outputsquestion_projectionbatch_scoresrV  retrieved_block_idsretrieved_block_embr  r  r  concat_inputsr  retrieved_logitsr  predicted_blockr  s                        r*   r   RealmForOpenQA.forward  s   J &1%<k$++B]B] Y__Q%71%<FGG==~ko ) 
 /q1 ||KATAWAWX\XfXfXmXmAno!&L<S<SY[!\199;#00QNab :>##%yIcIc :H :
6 &((););<"66;;EJJGJJRVR]R]RdRdJe
!..}/K/K/P/PQVQ[Q[/\]",,{%**T[[M_M_`KYejjI[I[\Ill7%**T[[EWEWXG !<<*2246I6L6LT[[M_M_6`
 #--a$++2N2NO(77DKK<X<XY(77DKK<X<XY,!#%! $ 

 (11-2I2IJ.}/F/FI^I^abIbc "666#'!5
 	
r   )r  rJ  r   r  r   )NNNN)rG   r   r   r   r   propertyr  r  r   REALM_FOR_OPEN_QA_DOCSTRINGrE  r   r  rF  r   rY   r   r   r   r   r   r   r   r   r   s   @r*   rI   rI     s    
  , ,
3 ++F+M+MNb+cd+?o^ 7;5915&*a
E,,-a
 !!2!23a
 !!1!12	a

 U--.a
 d^a
 
u**	+a
 _ ea
r   rI   )rL   rI   rK   r  rE   rH  rn   )Mr   r   r;   dataclassesr   typingr   r   rY   r   torch.nnr   activationsr	   cache_utilsr
   modeling_layersr   modeling_outputsr   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   utils.deprecationr   configuration_realmr   
get_loggerrG   r9   _EMBEDDER_CHECKPOINT_FOR_DOC_ENCODER_CHECKPOINT_FOR_DOC_SCORER_CHECKPOINT_FOR_DOCrF  rn   Modulerp   r   r  r  r  r&  r2  r8  rO  rn  rx  r|  r  r  r  r  r  r  r  REALM_START_DOCSTRINGrD  r  r	  rL   rH  rK   rE   r  rI   __all__r'   r   r*   <module>r     s     	 ! "   % " ! :  / m m u u 1 , 
		H	%I G E hV=bii =@D DNbii    
1RYY 1h		 ")) T+ TnM
299 M
`"))  :+ : :4 8 8 8$ .: .: .:b <; < < 299 "BII .!ryy !
BII 
@?BII @?F	 / d% ? % PL
) L
^ fK( K	K\ uL
& L
	L
^ 
R
3 R

R
j ,.CDd
& d
 Ed
N B ED
) D
	D
Nr   