
    hS)                     V   S SK r S SKrS SKJrJrJrJrJrJrJ	r	  S SK
JrJrJrJr  S SKJr  SSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSK J!r!  Sr"\" 5       RG                  \"5      S   r$S r%S\\   S\\&\4   4S jr'S\S\	\(\(4   4S jr) " S S\!5      r*S r+g)    N)AnyCallableDictIterableListOptionalTuple)ConfigModel	Optimizerset_dropout_rate)Floats2d   )Errors)Language)Scorer)DocSpan)Example)registry   )DEFAULT_SPANS_KEY)TrainablePipea  
[model]
@architectures = "spacy.SpanFinder.v1"

[model.scorer]
@layers = "spacy.LinearLogistic.v1"
nO = 2

[model.tok2vec]
@architectures = "spacy.Tok2Vec.v2"

[model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = 96
rows = [5000, 1000, 2500, 1000]
attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
include_static_vectors = false

[model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${model.tok2vec.embed.width}
window_size = 1
maxout_pieces = 3
depth = 4
modelc                      [         $ N)span_finder_score     T/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/span_finder.pymake_span_finder_scorerr!   .   s    r   examplesreturnc                 X  ^^ [        U5      nSmUS   mUR                  ST T 35        UR                  SU4S j5        UR                  SU4S j5        UR                  SS	5        UR                  S
S5        [        R                  " U 40 UD6nUR	                  US    S3S 5        U$ )Nspans_	spans_keyattrgetterc                 T   > U R                   R                  U[        T5      S  / 5      $ r   )spansgetlen)dockeyattr_prefixs     r    <lambda>#span_finder_score.<locals>.<lambda>8   s!    399==S5E5G1H"#Mr   has_annotationc                 "   > TU R                   ;   $ r   )r*   )r-   r.   s    r    r0   r1   :   s    C3994Dr   allow_overlapTlabeledF	_per_type)dict
setdefaultr   score_spanspop)r"   kwargsscoresr/   r.   s      @@r    r   r   2   s    &\FK

C
fcU34
M &(DE
ot,
i'3F3F
JJ&.!+T2Mr   spanc                 b    U S   R                   nU S   R                   [        U S   5      -   nX4$ )Nr   )idxr,   )r=   startends      r    _char_indicesrC   B   s1    GKKE
r(,,T"X
&C:r   c                   b   \ rS rSrSr S!\SSS\S.S\S\\	\
   \4   S\S	\S
\S\\   S\\   S\\   SS4S jjjrS\	\
   4S jrS\	\
   S\SS4S jrSSSS.S\	\   S\S\\   S\\\\4      S\\\4   4
S jjrS\\\4   4S jrS\\\4   4S jrSS.S\/ \	\   4   S\\   SS4S jjrS rg)"
SpanFinderH   zMPipeline that learns span boundaries.

DOCS: https://spacy.io/api/spanfinder
g      ?N)r&   	threshold
max_length
min_lengthscorernlpr   namer&   rG   rH   rI   rJ   r#   c                    UR                   U l         Ub  US:  d	  Ub,  US:  a&  [        [        R                  R	                  XvS95      eX l        X0l        Xl        UUUUS.U l        g)a   Initialize the span finder.
model (thinc.api.Model): The Thinc Model powering the pipeline
    component.
name (str): The component instance name, used to add entries to the
    losses during training.
threshold (float): Minimum probability to consider a prediction
    positive.
scorer (Optional[Callable]): The scoring method.
spans_key (str): Key of the doc.spans dict to save the spans under.
    During initialization and training, the component will look for
    spans on the reference document under the same key.
max_length (Optional[int]): Maximum length of the produced spans,
    defaults to None meaning unlimited length.
min_length (Optional[int]): Minimum length of the produced spans,
    defaults to None meaning shortest span length is 1.

DOCS: https://spacy.io/api/spanfinder#init
Nr   )rI   rH   )rI   rH   rG   r&   )	vocab
ValueErrorr   E1053formatr   rL   rJ   cfg)	selfrK   r   rL   r&   rG   rH   rI   rJ   s	            r    __init__SpanFinder.__init__N   so    < YY
"zA~"zA~##z#Q  
	$$""	$
r   docsc                 <    U R                   R                  U5      nU$ )zApply the pipeline's model to a batch of docs, without modifying
them.

docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document.

DOCS: https://spacy.io/api/spanfinder#predict
)r   predict)rS   rV   r<   s      r    rX   SpanFinder.predict}   s     ##D)r   r<   c           	         Sn[        U5       GHW  u  pE/ UR                  U R                  S   '   / n/ nX#U[        U5      -    n[	        XX5       Hi  u  pU
S   U R                  S   :  a  UR                  U	R                  5        U
S   U R                  S   :  d  MN  UR                  U	R                  5        Mk     U H  nU H  nUS-   U-
  nUS:  a  M  U R                  S   b  U R                  S   U::  d  M8  U R                  S   b  XR                  S   ::  d  M\  UR                  U R                  S      R                  X[US-    5        M     M     U[        U5      -  nGMZ     g)zModify a batch of Doc objects, using pre-computed scores.
docs (Iterable[Doc]): The documents to modify.
scores: The scores to set, produced by SpanFinder predict method.

DOCS: https://spacy.io/api/spanfinder#set_annotations
r   r&   rG   r   rI   NrH   )	enumerater*   rR   r,   zipappendi)rS   rV   r<   offsetr^   r-   startsends
doc_scorestokentoken_scorerA   rB   span_lengths                 r    set_annotationsSpanFinder.set_annotations   sN    oFA/1CIIdhh{+,FD#c():;J&)#&:"q>TXXk%::MM%''*q>TXXk%::KK(	 ';  C"%'E/K"Q .688L1[@.6&((<*@@		$((;"78??CRSG@TU     c#hF3 &r           )dropsgdlossesr"   ri   rj   rk   c                x   Uc  0 nUR                  U R                  S5        U Vs/ s H  oUR                  PM     nn[        U R                  U5        U R                  R                  U5      u  pxU R                  X5      u  pU" U
5        Ub  U R                  U5        X@R                  ==   U	-  ss'   U$ s  snf )a  Learn from a batch of documents and gold-standard information,
updating the pipe's model. Delegates to predict and get_loss.
examples (Iterable[Example]): A batch of Example objects.
drop (float): The dropout rate.
sgd (Optional[thinc.api.Optimizer]): The optimizer.
losses (Optional[Dict[str, float]]): Optional record of the loss during
    training. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.

DOCS: https://spacy.io/api/spanfinder#update
rh   )r8   rL   	predictedr   r   begin_updateget_lossfinish_update)rS   r"   ri   rj   rk   egrm   r<   backprop_scoreslossd_scoress              r    updateSpanFinder.update   s    & >F$))S),45Hb\\H	5T*"&**"9"9)"Dx8!?s#yyT! 6s   B7c                     U R                  XR                  R                  5      u  p4X R                  R                  R                  U5      -
  nXT-  n[	        US-  R                  5       5      nXe4$ )a2  Find the loss and gradient of loss for the batch of documents and
their predicted scores.
examples (Iterable[Examples]): The batch of examples.
scores: Scores representing the model's predictions.
RETURNS (Tuple[float, Floats2d]): The loss and the gradient.

DOCS: https://spacy.io/api/spanfinder#get_loss
r   )_get_aligned_truth_scoresr   ops	asarray2ffloatsum)rS   r"   r<   truthsmasksrt   rs   s          r    ro   SpanFinder.get_loss   s`     66xPJJNN44V<<hk&&()~r   c                    / n/ nU GH  nUR                   R                  UR                  R                  :w  a&  [        [        R
                  R                  SS95      e[        UR                  5      nUR                  R                  US4SS9nUR                  R                  US4SS9nU R                  S   UR                  R                  ;   a  UR                  R                  U R                  S       H  n	[        U	5      u  pUR                  R!                  XSS9n[        U5      u  pX:H  nX:H  nU(       a  S	X|S
   R"                  S
4'   OS
XS
   R"                  S
4'   U(       a  S	X|S   R"                  S	4'   M  S
XS   R"                  S	4'   M     UR%                  U5        UR%                  U5        GM     UR                  R'                  US
S9nUR                  R'                  US
S9nX44$ )zLAlign scores of the predictions to the references for calculating
the loss.
span_finder)	componentr   float32)dtyper&   expand)alignment_moder   r   r?   )axis)xtextyrO   r   E1054rQ   r,   rm   xpzerosonesrR   	referencer*   rC   	char_spanr^   r]   concatenate)rS   r"   ry   r}   r~   rq   n_tokenstruthmaskr=   ref_start_charref_end_char	pred_spanpred_start_charpred_end_charstart_match	end_matchs                    r    rx   $SpanFinder._get_aligned_truth_scores   s    BttyyBDDII% !4!4}!4!MNN2<<(HFFLL(AiL@E66;;!}I;>Dxx$(:(::LL..txx/DED3@3F0N " 6 6&X !7 !I 6C95M2O"1"CK - =I"34lnna/023q\^^Q./ 45mooq0134r]__a/0 F  MM% LL1 2 ##F#3""5q"1}r   )rK   get_examplesc                l   / nU" 5        H%  n[        U5      S:  d  M  UR                  U5        M'     U(       a\  U Vs/ s H  oDR                  PM     nnU R                  X0R                  R
                  5      u  pgU R                  R                  XVS9  gU R                  R                  5         gs  snf )aT  Initialize the pipe for training, using a representative set
of data examples.
get_examples (Callable[[], Iterable[Example]]): Function that
    returns a representative sample of gold-standard Example objects.
nlp (Optional[Language]): The current nlp object the component is part
    of.

DOCS: https://spacy.io/api/spanfinder#initialize

   )XYN)r,   r]   r   rx   r   ry   
initialize)rS   r   rK   subbatchrq   rV   r   _s           r    r   SpanFinder.initialize   s     #%.B8}r!# ! +348RLL8D411(JJNNKDAJJ!!D!.JJ!!#	 5s   B1)rR   r   rL   rJ   rN   )r   )__name__
__module____qualname____firstlineno____doc__r   r   r   r   r   r   r   strr{   r   intr   rT   rX   rf   r   r   r   ru   r	   ro   rx   r   __static_attributes__r   r   r    rE   rE   H   s    "	-
 +$($(%6-
-
 Xc]H,--
 	-
 -
 -
 SM-
 SM-
 "-
 
-
^
HSM 
!HSM !8 ! !N #'-17# 	
 i  c5j)* 
c5j	@E%/,B !%(@R:S !N #'	$r8G#445$ h	$
 
$ $r   rE   c                 |    U S:X  a"  [         R                  " S5      nUR                  $ [        S[         SU  35      e)Nmake_span_finderzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler   AttributeErrorr   )rL   modules     r    __getattr__r     sA    !!(()CD&&&
78*,>tfE
FFr   ),r   systypingr   r   r   r   r   r   r	   	thinc.apir
   r   r   r   thinc.typesr   errorsr   languager   rJ   r   tokensr   r   trainingr   utilr   spancatr   trainable_piper   span_finder_default_configfrom_strDEFAULT_SPAN_FINDER_MODELr!   r   r   r   rC   rE   r   r   r   r    <module>r      s     
 G G G @ @         & ) 4 #H--.HI'R  1 S#X   sCx P$ P$hGr   