
    hI                     H   S SK JrJrJrJrJrJrJr  S SKJ	r	  S SK
Jr  S SKJr  S SKrS SKrS SKJrJrJr  S SKJr  S SKrS SKJrJr  S S	KJr  S S
KJrJr  S SKJr  S SK J!r!  S SK"J#r#  S SK$J%r%  S SK&J'r'J(r(J)r)  S SK*J+r+J,r,  S SK-J.r.  S SK/J0r0  S SK1J2r2  Sr3S r4 " S S\!5      r5g)    )OptionalIterableCallableDictUnionListAny)Floats2d)Path)isliceN)CosineDistanceModel	Optimizer)set_dropout_rate)KnowledgeBase	Candidate)empty_kb)DocSpan)deserialize_config)TrainablePipe)Language)Vocab)Examplevalidate_examplesvalidate_get_examples)ErrorsWarnings)SimpleFrozenList)util)ScorerTc                 R    [         R                  " U 4S[        R                  /0UD6$ )Nnegative_labels)r!   score_linksEntityLinker_v1NIL)exampleskwargss     _/home/james-whalen/.local/lib/python3.13/site-packages/spacy_legacy/components/entity_linker.pyentity_linker_scorer*      s'    hX9L9L8MXQWXX    c                   P   \ rS rSrSrSr S2\\S.S\S\	S\
S\\
   S	\S
\S\S\S\\\/\\   4   S\S\\   SS4S jjjrS\\/\4   4S jrS3S jrSSS.S\/ \\   4   S\\   S\\\/\4      4S jjrSSSS.S\\   S\S\\   S\\\
\4      S\\
\4   4
S  jjrS\\   S!\4S" jrS#\\   S\ \
   4S$ jr!S#\\   S%\ \
   SS4S& jr"\#" 5       S'.S( jr$\#" 5       S'.S) jr%\&" 5       S'.S*\'\
\(4   S+\\
   SS4S, jjr)\&" 5       S'.S*\'\
\(4   S+\\
   SS 4S- jjr*SSS..S/ jr+S0 r,S1r-g)4r%   #   zVPipeline component for named entity linking.

DOCS: https://spacy.io/api/entitylinker
r&   )	overwritescorervocabmodelnamelabels_discardn_sents
incl_priorincl_contextentity_vector_lengthget_candidatesr.   r/   returnNc                    Xl         X l        X0l        [        U5      U l        XPl        X`l        Xpl        Xl        SU
0U l	        [        SS9U l        [        U5      " U R                   5      U l        Xl        g)a  Initialize an entity linker.

vocab (Vocab): The shared vocabulary.
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
    losses during training.
labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction.
n_sents (int): The number of neighbouring sentences to take into account.
incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
incl_context (bool): Whether or not to include the local context in the model.
entity_vector_length (int): Size of encoding vectors in the KB.
get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that
    produces a list of candidates, given a certain knowledge base and a textual mention.
scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links.
DOCS: https://spacy.io/api/entitylinker#init
r.   F)	normalizeN)r0   r1   r2   listr3   r4   r5   r6   r8   cfgr   distancer   kbr/   )selfr0   r1   r2   r3   r4   r5   r6   r7   r8   r.   r/   s               r)   __init__EntityLinker_v1.__init__+   sk    > 

	">2$(,$/#;&7 /0<r+   	kb_loaderc                     [        U5      (       d/  [        [        R                  R	                  [        U5      S95      eU" U R                  5      U l        g)zaDefine the KB of this pipe by providing a function that will
create it using this object's vocab.)arg_typeN)callable
ValueErrorr   E885formattyper0   r?   )r@   rC   s     r)   set_kbEntityLinker_v1.set_kbY   s@     	""V[[//i/IJJDJJ'r+   c                    U R                   c0  [        [        R                  R	                  U R
                  S95      e[        U R                   5      S:X  a0  [        [        R                  R	                  U R
                  S95      eg )Nr2   r   )r?   rG   r   E1018rI   r2   lenE139r@   s    r)   validate_kbEntityLinker_v1.validate_kba   sb    77?V\\00dii0@AAtww<1V[[//TYY/?@@ r+   )nlprC   get_examplesrU   c                   [        US5        Ub  U R                  U5        U R                  5         U R                  R                  n/ n/ n[        U" 5       S5       HR  nUR                  UR                  5        UR                  U R                  R                  R                  U5      5        MT     [        U5      S:  d,   [        R                  R                  U R                  S95       e[        U5      S:  d,   [        R                  R                  U R                  S95       eU R                  R!                  XPR                  R                  R#                  USS9S9  g)	aO  Initialize the pipe for training, using a representative set
of data examples.

get_examples (Callable[[], Iterable[Example]]): Function that
    returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
kb_loader (Callable[[Vocab], KnowledgeBase]): A function that creates an InMemoryLookupKB from a Vocab instance.
    Note that providing this argument, will overwrite all data accumulated in the current KB.
    Use this only when loading a KB as-such from file.

DOCS: https://spacy.io/api/entitylinker#initialize
zEntityLinker_v1.initializeN
   r   rN   float32)dtype)XY)r   rK   rS   r?   r7   r   appendxr1   opsalloc1frP   r   E923rI   r2   
initializeasarray)r@   rV   rU   rC   nO
doc_samplevector_sampleexamples           r)   rb   EntityLinker_v1.initializeh   s   & 	l,HI KK	"WW))
lnb1Ggii(  !7!7!;< 2 :"FFKK$6$6DII$6$FF"=!A%Iv{{'9'9tyy'9'II%

JJNN22=	2R 	 	
r+           )dropsgdlossesr'   rj   rk   rl   c                   U R                  5         Uc  0 nUR                  U R                  S5        U(       d  U$ [        US5        / nU GH  nUR                  R
                   Vs/ s H  owPM     nnUR                  SSS9n	UR                  R                   H  n
XR                     nU(       d  M   UR                  U
R                  5      n[        SXR                   -
  5      n[#        [%        U5      S-
  XR                   -   5      nX   R                  nX   R&                  nUR(                  UU R+                  5       nUR-                  U5        M     GM     [/        U R0                  U5        U(       d3  [2        R4                  " [6        R8                  R;                  S	S
95        U$ U R0                  R=                  U5      u  nnU R?                  UUS9u  nnU" U5        Ub  U RA                  U5        X@R                  ==   U-  ss'   U$ s  snf ! [         a    [        [        R                  5      Sef = f)a  Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict and get_loss.

examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate.
sgd (thinc.api.Optimizer): The optimizer.
losses (Dict[str, float]): Optional record of the loss during training.
    Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.

DOCS: https://spacy.io/api/entitylinker#update
Nri   zEntityLinker_v1.update	ENT_KB_IDT	as_stringr      zEntity LinkerrN   )sentence_encodingsr'   )!rS   
setdefaultr2   r   	referencesentsget_alignedentsstartindexsentAttributeErrorRuntimeErrorr   E030maxr4   minrP   end	predictedas_docr]   r   r1   warningswarnr   W093rI   begin_updateget_lossfinish_update)r@   r'   rj   rk   rl   sentence_docsegs	sentenceskb_idsentkb_id
sent_indexstart_sentenceend_sentencestart_token	end_tokensent_docrr   
bp_contextlossd_scoress                         r)   updateEntityLinker_v1.update   s   ( 	>F$))S)M($<=B$&LL$6$67$6q$6I7^^K4^@F||((yy)5B%.__SXX%>

 &)J,E%FN#&s9~'9:;T#UL"+";"A"AK ) 7 ; ;I!||K	BIIKH!((2' ) . 	T*MM(--..O.DEM)-)@)@)O&J1H ' 
h 	8?s#yyT!E 8 * B*6;;7TABs   'H5H%H>rr   c                    [        US5        / nU Hs  nUR                  SSS9nUR                  R                   HF  nXVR                     nU(       d  M  U R
                  R                  U5      nUR                  U5        MH     Mu     U R                  R                  R                  U5      nUR                  UR                  :w  a)  [        R                  R                  SSS9n	[        U	5      eU R                   R#                  X#5      n
U R                   R%                  X#5      nU['        U5      -  n[)        U5      U
4$ )NzEntityLinker_v1.get_lossrn   Tro   r   zgold entities do not match upmethodmsg)r   rv   rt   rw   rx   r?   
get_vectorr]   r1   r_   	asarray2fshaper   E147rI   r|   r>   get_gradr   rP   float)r@   r'   rr   entity_encodingsr   r   r   r   entity_encodingerr	gradientsr   s               r)   r   EntityLinker_v1.get_loss   s   ($>?B^^K4^@F||((yy)5&*gg&8&8&?O$++O<	 )   ::>>334DE##'7'='==++$$!'F % C s##MM**+=P	}}%%&8Kc*++T{I%%r+   docsc           	      r   U R                  5         Sn/ nU(       d  U$ [        U[        5      (       a  U/n[        U5       GH  u  pEUR                   Vs/ s H  ofPM     nn[        U5      S:  d  M2  UR                   GHb  nUR                  n	UR                  U	5      n
U
S:  d   e[        SXR                  -
  5      n[        [        U5      S-
  XR                  -   5      nX{   R                  nX|   R                  nX]U R                  5       nU R                  R                   R"                  nU R$                  (       aF  U R                  R'                  U/5      S   nUR(                  nUR*                  R-                  U5      nUS-  nUR.                  U R0                  ;   a  UR3                  U R4                  5        GMG  [7        U R9                  U R:                  U5      5      nU(       d  UR3                  U R4                  5        GM  [        U5      S:X  a!  UR3                  US   R<                  5        GM  [>        R@                  " U5        URC                  U Vs/ s H  nURD                  PM     sn5      nU RF                  (       d"  URC                  U Vs/ s H  nSPM     sn5      nUnU R$                  (       a  URC                  U Vs/ s H  nURH                  PM     sn5      nUR*                  R-                  USS9n[        U5      [        U5      :w  a'  [K        [L        RN                  RQ                  SSS95      eURS                  UW5      WU-  -  nURT                  URT                  :w  a  [W        [L        RX                  5      eUU-   UU-  -
  nUR[                  5       R]                  5       nUU   nUR3                  UR<                  5        GMe     GM     [        U5      U:X  d)  [L        RN                  RQ                  SSS9n[K        U5      eU$ s  snf s  snf s  snf s  snf )	a@  Apply the pipeline's model to a batch of docs, without modifying them.
Returns the KB IDs for each entity in each doc, including NIL if there is
no prediction.

docs (Iterable[Doc]): The documents to predict.
RETURNS (List[str]): The models prediction for each document.

DOCS: https://spacy.io/api/entitylinker#predict
r   rq   ri   )axispredictzvectors not of equal lengthr   z$result variables not of equal length)/rS   
isinstancer   	enumerateru   rP   rw   rz   ry   r~   r4   r   rx   r   r   r1   r_   xpr6   r   Tlinalgnormlabel_r3   r]   r&   r<   r8   r?   entity_randomshufflerc   
prior_probr5   entity_vectorr|   r   r   rI   dotr   rG   E161argmaxitem)r@   r   entity_countfinal_kb_idsidocr   r   r   rz   r   r   r   r   r   r   r   sentence_encodingsentence_encoding_tsentence_norm
candidatescprior_probs_scoresr   entity_normsims
best_indexbest_candidater   s                                  r)   r   EntityLinker_v1.predict   s    	"$dC  6DoFA$'II.IqII.3x!|88C88D!*!6J%?*?%(J,E%FN#&s9~'9:;T#UL"+";"A"AK ) 7 ; ;I"y9@@BH**B((,0JJ,>,>z,J1,M).?.A.A+(*		7J(K A%LzzT%8%88$++DHH5%)$*=*=dggs*K%L
)(//9 _1(//
10E0EF"NN:6*,**J5WJqallJ5W*XK#'??.0jjz9Rz!#z9R.S%0F#0035::>H$IjQ__j$I4" 0 /1iinn=MTUn.V#&'7#8C<L#L*6(.(:(:3<0M ); )*+& %& (*vv.>@S'T$1K$?(" $(::1B1B#B*4V[[*A$A)4t);{T?Q)R)/)=)=)?J-7
-CN(//0F0FGs $	 &| L!\1++$$ &L % C s##E /D 6X9R
 %Js   P%%P*"P/P4r   c                    [        U VVs/ s H  o3R                    H  oDPM     M     snn5      nU[        U5      :w  a0  [        [        R                  R                  U[        U5      S95      eSnU R                  S   nU HG  nUR                   H4  nX&   nUS-  nU H"  n	U	R                  S:X  d	  U(       d  M  Xl        M$     M6     MI     gs  snnf )zModify a batch of documents, using pre-computed scores.

docs (Iterable[Doc]): The documents to modify.
kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.

DOCS: https://spacy.io/api/entitylinker#set_annotations
)rw   idsr   r.   rq   N)	rP   rw   rG   r   E148rI   r=   	ent_kb_id
ent_kb_id_)
r@   r   r   r   r   
count_entsr   r.   r   tokens
             r)   set_annotationsEntityLinker_v1.set_annotations7  s     B####BC
V$V[[//ZS[/QRRHH[)	Cxx	Q E!+yy+0( !    Cs   C
excludec                  ^ ^ T R                  5         0 n[        T S5      (       a  T R                  b	  U 4S jUS'   UU 4S jUS'   T R                  R                  US'   T R
                  R                  US'   [        R                  " UT5      $ )zSerialize the pipe to a bytestring.

exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.

DOCS: https://spacy.io/api/entitylinker#to_bytes
r=   c                  D   > [         R                  " T R                  5      $ N)srsly
json_dumpsr=   rR   s   r)   <lambda>*EntityLinker_v1.to_bytes.<locals>.<lambda>W  s    u'7'7'Ar+   c                  6   > TR                   R                  T S9$ Nr   )r0   to_bytes)r   r@   s   r)   r   r   X  s    TZZ%8%8%8%Ir+   r0   r?   r1   )_validate_serialization_attrshasattrr=   r?   r   r1   r    )r@   r   	serializes   `` r)   r   EntityLinker_v1.to_bytesL  sx     	**,	4DHH$8AIeI	'''**	$!ZZ00	'}}Y00r+   c                   ^ ^ T R                  5         U 4S jn0 n[        T S5      (       a  T R                  b	  U 4S jUS'   UU 4S jUS'   U 4S jUS'   X4S'   [        R                  " XT5        T $ )	zLoad the pipe from a bytestring.

exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (TrainablePipe): The loaded object.

DOCS: https://spacy.io/api/entitylinker#from_bytes
c                    >  TR                   R                  U 5        g ! [         a    [        [        R
                  5      S ef = fr   )r1   
from_bytesr{   rG   r   E149br@   s    r)   
load_model.EntityLinker_v1.from_bytes.<locals>.load_modelg  s:    8

%%a(! 8 -478s	    %Ar=   c                 b   > TR                   R                  [        R                  " U 5      5      $ r   )r=   r   r   
json_loadsr   s    r)   r   ,EntityLinker_v1.from_bytes.<locals>.<lambda>o  s    488??5;K;KA;N+Or+   c                 8   > TR                   R                  U TS9$ r   )r0   r   )r   r   r@   s    r)   r   r   p  s    )>)>q')>)Rr+   r0   c                 :   > TR                   R                  U 5      $ r   )r?   r   r   s    r)   r   r   q  s    dgg&8&8&;r+   r?   r1   )r   r   r=   r    r   )r@   
bytes_datar   r   deserializes   ` `  r)   r   EntityLinker_v1.from_bytes]  sl     	**,	8 4DHH$8!OKRG;D)G
9r+   pathr   c                   ^ ^ 0 nUU 4S jUS'   U 4S jUS'   U 4S jUS'   U 4S jUS'   [         R                  " XT5        g	)
zSerialize the pipe to disk.

path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.

DOCS: https://spacy.io/api/entitylinker#to_disk
c                 8   > TR                   R                  U TS9$ r   )r0   to_diskpr   r@   s    r)   r   )EntityLinker_v1.to_disk.<locals>.<lambda>  s    tzz'9'9!W'9'Mr+   r0   c                 F   > [         R                  " U TR                  5      $ r   )r   
write_jsonr=   r   r@   s    r)   r   r     s    U%5%5a%Br+   r=   c                 :   > TR                   R                  U 5      $ r   )r?   r   r  s    r)   r   r     s    DGGOOA$6r+   r?   c                 :   > TR                   R                  U 5      $ r   )r1   r   r  s    r)   r   r     s    tzz'9'9!'<r+   r1   N)r    r   )r@   r   r   r   s   ` ` r)   r   EntityLinker_v1.to_diskv  s?     	M	'B	%6	$<	'Tg.r+   c                   ^ ^ U 4S jn0 nU 4S jUS'   UU 4S jUS'   U 4S jUS'   X4S'   [         R                  " XT5        T $ )	a&  Load the pipe from disk. Modifies the object in place and returns it.

path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (EntityLinker): The modified EntityLinker object.

DOCS: https://spacy.io/api/entitylinker#from_disk
c                    >  U R                  S5       nTR                  R                  UR                  5       5        S S S 5        g ! , (       d  f       g = f! [         a    [        [        R                  5      S ef = f)Nrb)openr1   r   readr{   rG   r   r   )r   infiler@   s     r)   r   -EntityLinker_v1.from_disk.<locals>.load_model  sV    8VVD\VJJ))&++-8 "\\! 8 -478s'   A *AA 
AA A %A=c                 L   > TR                   R                  [        U 5      5      $ r   )r=   r   r   r  s    r)   r   +EntityLinker_v1.from_disk.<locals>.<lambda>  s    txx7I!7L'Mr+   r=   c                 8   > TR                   R                  U TS9$ r   )r0   	from_diskr   s    r)   r   r    s    )=)=a)=)Qr+   r0   c                 :   > TR                   R                  U 5      $ r   )r?   r  r  s    r)   r   r    s    dgg&7&7&:r+   r?   r1   )r    r  )r@   r   r   r   r   s   ` `  r)   r  EntityLinker_v1.from_disk  sH    	8 8:MEQG:D)Gt'2r+   )rk   rl   c                    [         er   NotImplementedError)r@   r'   rk   rl   configs        r)   rehearseEntityLinker_v1.rehearse      !!r+   c                     [         er   r  )r@   labels     r)   	add_labelEntityLinker_v1.add_label  r  r+   )r=   r>   r8   r6   r5   r?   r3   r1   r4   r2   r/   r0   )entity_linker)r9   N).__name__
__module____qualname____firstlineno____doc__r&   BACKWARD_OVERWRITEr*   r   r   strr   intboolr   r   r   r   r   rA   rK   rS   r   r   rb   r   r   r   r   r
   r   r   r   r   r   tupler   r   r   r   r   r   r  r  r  __static_attributes__ r+   r)   r%   r%   #   sr   
 C $	, -%8,, , 	, !, , , , ", !-!68K!KL, , ", 
,\(%-)? @ (A #'@D!
r8G#445!
 h	!

 HeWm%;<=!
N #'-1?7#? 	?
 i ? c5j)*? 
c5j	?B&'!2 & &*THSM Td3i Tl1HSM 149 1 1* #(' 1" 16 4 CSBT/#t)$/2:3-/	/$ CSBT#t)$2:3-	6 )-T ""r+   r%   )6typingr   r   r   r   r   r   r	   thinc.typesr
   pathlibr   	itertoolsr   r   r   	thinc.apir   r   r   r   r   spacy.kbr   r   spacy.mlr   spacy.tokensr   r   spacy.pipeline.piper   spacy.pipeline.trainable_piper   spacy.languager   spacy.vocabr   spacy.trainingr   r   r   spacy.errorsr   r   
spacy.utilr   spacyr    spacy.scorerr!   r#  r*   r%   r)  r+   r)   <module>r;     so    H G G       6 6 &  -  " 2 7 #  L L ) '    YC"m C"r+   