
    hȲ                     H   S SK Jr  S SKJrJrJrJrJrJrJ	r	J
r
Jr  S SKrSSKJr  SSKJr  SSKJrJrJr  SSKJr  SS	KJrJr  \(       a  SS
KJr  Sr\" / SQ5      r " S S5      r  " S S5      r! " S S5      r"S\\   S\\#\4   4S jr$SS jr%S r&S r'S r(SS jr)S r*g)    )defaultdict)	TYPE_CHECKINGAnyCallableDictIterableListOptionalSetTupleN   )Errors)
Morphology)DocSpanToken)Example)SimpleFrozenListget_lang_class)Language)sentertaggermorphologizerparsernertextcat)Nr    c            	           \ rS rSrSrSSSS.S\S\S\SS	4S
 jjrS\4S jrS rS r	S\
S\
SS	4S jr\S\4S j5       r\S\4S j5       r\S\4S j5       rS\\\4   4S jrSrg	)PRFScore   zA precision / recall / F score.r   tpfpfnr"   r#   r$   returnNc                (    Xl         X l        X0l        g Nr!   )selfr"   r#   r$   s       F/home/james-whalen/.local/lib/python3.13/site-packages/spacy/scorer.py__init__PRFScore.__init__"   s         c                 N    U R                   U R                  -   U R                  -   $ r'   r!   r(   s    r)   __len__PRFScore.__len__-   s    ww 477**r,   c                     U =R                   UR                   -  sl         U =R                  UR                  -  sl        U =R                  UR                  -  sl        U $ r'   r!   r(   others     r)   __iadd__PRFScore.__iadd__0   s=    588588588r,   c                     [        U R                  UR                  -   U R                  UR                  -   U R                  UR                  -   S9$ )Nr!   )r   r"   r#   r$   r2   s     r)   __add__PRFScore.__add__6   s=    ww!dgg&8TWWuxx=O
 	
r,   candgoldc                     U =R                   [        UR                  U5      5      -  sl         U =R                  [        X-
  5      -  sl        U =R                  [        X!-
  5      -  sl        g r'   )r"   lenintersectionr#   r$   r(   r9   r:   s      r)   	score_setPRFScore.score_set;   sJ    3t((.//3t{##3t{##r,   c                 T    U R                   U R                   U R                  -   S-   -  $ N0.++)r"   r#   r.   s    r)   	precisionPRFScore.precision@   #    ww$''DGG+f455r,   c                 T    U R                   U R                   U R                  -   S-   -  $ rB   )r"   r$   r.   s    r)   recallPRFScore.recallD   rF   r,   c                 P    U R                   nU R                  nSX-  X-   S-   -  -  $ )N   rC   )rD   rH   )r(   prs      r)   fscorePRFScore.fscoreH   s,    NNKKQUquv~.//r,   c                 J    U R                   U R                  U R                  S.$ )N)rL   rM   f)rD   rH   rN   r.   s    r)   to_dictPRFScore.to_dictN   s    ^^$++DKKHHr,   )r$   r#   r"   )__name__
__module____qualname____firstlineno____doc__intr*   r/   r4   r7   setr?   propertyfloatrD   rH   rN   r   strrR   __static_attributes__ r,   r)   r   r      s    )
 	 	 		
 	 
	+ +

$c $ $ $
 65 6 6 6 6 6 0 0 0
Ic5j) Ir,   r   c                   B    \ rS rSrSrS	S jrS	S jrS r\S 5       r	Sr
g)
ROCAUCScoreR   zAn AUC ROC score. This is only defined for binary classification.
Use the method is_binary before calculating the score, otherwise it
may throw an error.Nc                 <    / U l         / U l        SU l        SU l        g )N        r   )goldscandssaved_scoresaved_score_at_lenr.   s    r)   r*   ROCAUCScore.__init__W   s      "
 "
"#r,   c                 p    U R                   R                  U5        U R                  R                  U5        g r'   )rf   appendre   r>   s      r)   r?   ROCAUCScore.score_set]   s&    

$

$r,   c                 Z    [        [        R                  " U R                  5      5      S:H  $ NrK   )r<   npuniquere   r.   s    r)   	is_binaryROCAUCScore.is_binarya   s    299TZZ()Q..r,   c                    U R                  5       (       d9  [        [        R                  R	                  [        U R                  5      S95      e[        U R                  5      U R                  :X  a  U R                  $ [        U R                  U R                  5      U l	        [        U R                  5      U l        U R                  $ )Nlabel)rq   
ValueErrorr   E165formatrZ   re   r<   rh   rg   _roc_auc_scorerf   r.   s    r)   scoreROCAUCScore.scored   s    ~~V[[//c$**o/FGGtzz?d555###)$**djjA"%djj/r,   )rf   re   rg   rh   )r%   N)rT   rU   rV   rW   rX   r*   r?   rq   r[   rz   r^   r_   r,   r)   ra   ra   R   s*    $ /    r,   ra   c                      \ rS rSrSrSS\4S\S   S\S\\   S	S4S
 jjr	SS.S\\
   S\S	\\\4   4S jjr\S\\
   S	\\\4   4S j5       r\\\S.S\\
   S\S\\\/\4   S\\   S	\\\4   4
S jj5       r\\\S.S\\
   S\S\\\/\4   S\\   S	\\\4   4
S jj5       r\\SSSS.S\\
   S\S\\\/\\   4   S\\\/\4      S\S\S	\\\4   4S jj5       r\\\" 5       SSSS.S\\
   S\S\\\/\4   S\\   S\S \\   S!\\   S	\\\4   4S" jj5       r\S\\
   S#\\   S	\\\4   4S$ j5       r\\S%\\" 5       \S&.S\\
   S\S\\\/\4   S'\S(\\\/\4   S)\\   S\\   S	\\\4   4S* jj5       rS+r g),Scorero   zCompute evaluation scores.Nxxnlpr   default_langdefault_pipeliner%   c                     X@l         U(       a  Xl        g[        U5      " 5       nU H  nUR                  U5        M     Xl        g)z?Initialize the Scorer.

DOCS: https://spacy.io/api/scorer#init
N)cfgr   r   add_pipe)r(   r   r   r   r   pipes         r)   r*   Scorer.__init__r   s9     H .0C(T" )Hr,   F)per_componentexamplesr   c                X   0 n[        U R                  R                  S5      (       a|  U(       a5  U R                  R                  R                  " U40 U R                  D6US'   O@UR                  U R                  R                  R                  " U40 U R                  D65        U R                  R                   Hl  u  pE[        US5      (       d  M  U(       a!  UR                  " U40 U R                  D6X4'   M@  UR                  UR                  " U40 U R                  D65        Mn     U$ )a!  Evaluate a list of Examples.

examples (Iterable[Example]): The predicted annotations + correct annotations.
per_component (bool): Whether to return the scores keyed by component
    name. Defaults to False.
RETURNS (Dict): A dictionary of scores.

DOCS: https://spacy.io/api/scorer#score
rz   	tokenizer)hasattrr   r   rz   r   updatepipeline)r(   r   r   scoresname	components         r)   rz   Scorer.score   s     488%%w//&*hh&8&8&>&>x&T488&T{#dhh0066xL488LM#xx00ODy'** #,??8#Htxx#HFLMM)//("Gdhh"GH  1 r,   c           	         [        5       n[        5       nU  GHm  nUR                  nUR                  nUR                  (       a  M/  UR                  n[        5       n[        5       n	U HW  n
U
R                  R                  5       (       a  M$  UR                  U
R                  U
R                  [        U
5      -   45        MY     U H  n
U
R                  R                  5       (       a  M$  U	R                  U
R                  U
R                  [        U
5      -   45        UR                  R                  U
R                     S:w  a  U=R                  S-  sl        M  U=R                  S-  sl        M     UR!                  X5        GMp     [        U5      S:  a/  UR"                  UR"                  UR$                  UR&                  S.$ SSSSS.$ )aF  Returns accuracy and PRF scores for tokenization.
* token_acc: # correct tokens / # gold tokens
* token_p/r/f: PRF for token character spans

examples (Iterable[Example]): Examples to score
RETURNS (Dict[str, Any]): A dictionary containing the scores
    token_acc/p/r/f.

DOCS: https://spacy.io/api/scorer#score_tokenization
r   r   )	token_acctoken_ptoken_rtoken_fN)r   	reference	predictedhas_unknown_spaces	alignmentrZ   orth_isspaceaddidxr<   x2ylengthsir#   r"   r?   rD   rH   rN   )r   r   	acc_score	prf_scoreexamplegold_docpred_docalign
gold_spans
pred_spanstokens              r)   score_tokenizationScorer.score_tokenization   sx    J	J	G((H((H**%%EJJ!;;&&((		599s5z+ABC " ";;&&((		599s5z+ABC99$$UWW-2LLA%LLLA%L " 
7)  * y>A&00$..$++$++	  "	 r,   )gettermissing_valuesattrr   r   c          	         [        5       nU  GH1  nUR                  nUR                  nUR                  n	[	        5       n
[	        5       n[        U5       H=  u  pU" X5      nX;  a  U
R                  X" X5      45        M,  UR                  U5        M?     [	        5       nU H  nUR                  R                  5       (       a  M$  U	R                  R                  UR                     S:X  d  MM  U	R                  UR                     S   nX;  d  Mp  UR                  X" X5      45        M     UR                  X5        GM4     U S3n[        U5      S:X  a  US0$ UUR                  0$ )a3  Returns an accuracy score for a token-level attribute.

examples (Iterable[Example]): Examples to score
attr (str): The attribute to score.
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
    getter(token, attr) should return the value of the attribute for an
    individual token.
missing_values (Set[Any]): Attribute values to treat as missing annotation
    in the reference annotation.
RETURNS (Dict[str, Any]): A dictionary containing the accuracy score
    under the key attr_acc.

DOCS: https://spacy.io/api/scorer#score_token_attr
r   r   _accN)r   r   r   r   rZ   	enumerater   r   r   r   r   r   r?   r<   rN   )r   r   r   r   r   	tag_scorer   r   r   r   	gold_tagsmissing_indicesgold_ir   value	pred_tags	score_keys                    r)   score_token_attrScorer.score_token_attr   sE   . J	G((H((H%%EI!eO!*8!4u+.MM66%+>"?@#''/ "5 I!;;&&((99$$UWW-2"YYuww/2F4!vve/B&CD " 	5)  * fDM	y>Qt$$y//00r,   c          
      J   [        5       n0 nU  GH  nUR                  nUR                  n	UR                  n
0 n[	        5       n[        U	5       H  u  pU" X5      nU	R                  R                  U   nX;  a  U[        R                  :w  a  UR                  [        R                  5       Ha  nUR                  [        R                  5      u  nnUU;  a  [        5       UU'   UU;  a  [	        5       UU'   UU   R                  UU45        Mc     M  UR                  U5        M     0 nU GH3  nUR                  R                  5       (       a  M%  U
R                   R"                  UR$                     S:X  d  MN  U
R                   UR$                     S   nX;  d  Mq  U" X5      nU	R                  R                  U   nX;  d  M  U[        R                  :w  d  M  UR                  [        R                  5       Ha  nUR                  [        R                  5      u  nnUU;  a  [        5       UU'   UU;  a  [	        5       UU'   UU   R                  UU45        Mc     GM6     U H  nUR'                  UR)                  U[	        5       5      UR)                  U[	        5       5      5        UU   R'                  UR)                  U[	        5       5      UR)                  U[	        5       5      5        M     GM     0 n[+        U5      S:  as  UR,                  UU S3'   UR.                  UU S3'   UR0                  UU S3'   UR3                  5        VVs0 s H  u  nnUUR5                  5       _M     snnUU S3'   U$ SUU S3'   SUU S3'   SUU S3'   SUU S3'   U$ s  snnf )aM  Return micro PRF and PRF scores per feat for a token attribute in
UFEATS format.

examples (Iterable[Example]): Examples to score
attr (str): The attribute to score.
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
    getter(token, attr) should return the value of the attribute for an
    individual token.
missing_values (Set[Any]): Attribute values to treat as missing
    annotation in the reference annotation.
RETURNS (dict): A dictionary containing the micro PRF scores under the
    key attr_micro_p/r/f and the per-feat PRF scores under
    attr_per_feat.
r   r   _micro_p_micro_r_micro_f	_per_featN)r   r   r   r   rZ   r   vocabstringsr   EMPTY_MORPHsplitFEATURE_SEP	FIELD_SEPr   r   r   r   r   r   r?   getr<   rD   rH   rN   itemsrR   )r   r   r   r   r   micro_scoreper_featr   r   r   r   gold_per_featr   r   r   r   morphfeatfieldvaluespred_per_featresultkvs                           r)   score_token_attr_per_feat Scorer.score_token_attr_per_feat  sh   . jG((H((H%%E,.M!eO!*8!4u+ ..u5.5J<R<R3R %J,B,B C(,

:3G3G(Hv 0.6jHUO 5365M%0%e,00&$@ !D $''/ "5 -/M!;;&&((99$$UWW-2"YYuww/2F4 &u 3 ( 6 6u =!7 %)?)? ?(-J4J4J(K04

:;O;O0Pv#(#86>jHUO#(#=;>5M%$8 -e 4 8 8&$ H )L "& "%%!%%eSU3]5F5Fuce5T ))!%%eSU3]5F5Fuce5T	 "O  \ "${a(3(=(=FdV8$%(3(:(:FdV8$%(3(:(:FdV8$%EM^^EU)VEUTQ!QYY[.EU)VFdV9%& 	 )-FdV8$%(,FdV8$%(,FdV8$%)-FdV9%& *Ws   NT)r   has_annotationlabeledallow_overlapr   r   r   c                   [        5       n[        5       nU  GH  n	U	R                  n
U	R                  nUb  U" U5      (       d  M.  [	        U" X5       Vs/ s H  oR
                  PM     sn5      nUb  Ub;  U" U
5      (       a.  U[	        U" X5       Vs/ s H  oR
                  PM     sn5      -  nU Vs0 s H  o[	        5       _M     nnU H  nX;  d  M
  [        5       X'   M     [	        5       n[	        5       nU" X5       H{  nU(       a'  UR
                  UR                  UR                  S-
  4nOUR                  UR                  S-
  4nUR                  U5        UUR
                     R                  U5        M}     U Vs0 s H  o[	        5       _M     nnUb  Ub  U" U
5      (       a  U	R                  U" X5      U5       H{  nU(       a'  UR
                  UR                  UR                  S-
  4nOUR                  UR                  S-
  4nUR                  U5        UUR
                     R                  U5        M}     U(       a9  UR                  5        H%  u  nnUU;   d  M  UR                  UU   X   5        M'     UR                  UU5        GM     U S3SU S3SU S3S0nU(       a  SUU S3'   [        U5      S:  ax  UR                  UU S3'   UR                  UU S3'   UR                  UU S3'   U(       a;  UR                  5        VVs0 s H  u  nnUUR!                  5       _M     snnUU S3'   U$ s  snf s  snf s  snf s  snf s  snnf )a  Returns PRF scores for labeled spans.

examples (Iterable[Example]): Examples to score
attr (str): The attribute to score.
getter (Callable[[Doc, str], Iterable[Span]]): Defaults to getattr. If
    provided, getter(doc, attr) should return the spans for the
    individual doc.
has_annotation (Optional[Callable[[Doc], bool]]) should return whether a `Doc`
    has annotation for this `attr`. Docs without annotation are skipped for
    scoring purposes.
labeled (bool): Whether or not to include label information in
    the evaluation. If set to 'False', two spans will be considered
    equal if their start and end match, irrespective of their label.
allow_overlap (bool): Whether or not to allow overlapping spans.
    If set to 'False', the alignment will automatically resolve conflicts.
RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
    the keys attr_p/r/f and the per-type PRF scores under attr_per_type.

DOCS: https://spacy.io/api/scorer#score_spans
Nr   _p_r_f	_per_typer   )r   dictr   r   rZ   label_startendr   get_aligned_spans_x2yr   r?   r<   rD   rH   rN   rR   )r   r   r   r   r   r   r   rz   score_per_typer   r   r   r   labelsru   gold_per_typer   r   span	gold_spanpred_per_type	pred_spanr   final_scoress                           r)   score_spansScorer.score_spansY  s/   > 
G((H((H).2J2JF8,BC,Bq((,BCDF%*~h/G/G#1GH1GAxx1GHIIGM,NveCE\vM,N.,4JN)   JJx.!%djj$((Q, GI!%TXX\ :Iy)dkk*..y9 / HN,NveCE\vM,N%*~h/G/G#998*MD %)[[$**dhhl$K	%)ZZA$>	NN9-!$++.229= *002DAqM)M!$4m6FG 3 OOJ
3c  h fBKfBKfBK(

 /3LD6+,u:>(-LD6%(-LD6%(-LD6%/=/C/C/E4/Etq!Aqyy{N/E4vY/0 w D
 I,N -OD4s   M
M5MMM)r   r   multi_labelpositive_label	thresholdr   r   r   r   c                
   Uc  U(       a  SOSnU(       d  SnU Vs0 s H  o[        5       _M     n	nU Vs0 s H  o[        5       _M     n
n[        U5      nU  GH  nU" UR                  U5      nUR	                  5        VVs0 s H  u  pX;   d  M  X_M     nnnU" UR
                  U5      nUR	                  5        VVs0 s H  u  pX;   d  M  X_M     nnnU HO  nUR                  US5      nUR                  U5      nU(       d	  U(       d  SnUc  M;  X   R                  UU5        MQ     U(       a  U H  nUR                  US5      nUR                  U5      nUc  M+  UU:  a  US:  a  X   =R                  S-  sl        MP  UU:  a  US:X  a  X   =R                  S-  sl	        Mu  UU:  d  M}  US:  d  M  X   =R                  S-  sl
        M     GM  U(       a  U(       a  [        UR	                  5       S S9u  nn[        UR	                  5       S S9u  nnUU:X  a  U	U   =R                  S-  sl        GM  U	U   =R                  S-  sl
        U	U   =R                  S-  sl	        GM  U(       a2  [        US	 S9u  nnUS:  a  U	U   =R                  S-  sl
        GMQ  GMT  U(       d  GM^  [        UR	                  5       S
 S9u  nnU	U   =R                  S-  sl	        GM     [        5       nU	R                  5        H`  nU=R                  UR                  -  sl        U=R                  UR                  -  sl
        U=R                  UR                  -  sl	        Mb     [        U	5      S-   n[        S U	R                  5        5       5      U-  n[        S U	R                  5        5       5      U-  n[        S U	R                  5        5       5      U-  n[        S U
R                  5        5       5      U-  nU S3SU S3SU S3UR                  U S3UR                   U S3UR"                  U S3UU S3UU S3UU S3UU S3U	R	                  5        VVs0 s H  u  pXR%                  5       _M     snnU S3U
R	                  5        VVs0 s H(  u  pXR'                  5       (       a  UR(                  OS_M*     snn0n[        U5      S:X  a2  U(       d+  U(       a$  UU S3   U   S   nUUU S3'   SU S3UU S3'   U$ U(       d  UU S3   UU S3'   SUU S3'   U$ UU S3   UU S3'   S UU S3'   U$ s  snf s  snf s  snnf s  snnf s  snnf s  snnf )!a  Returns PRF and ROC AUC scores for a doc-level attribute with a
dict with scores for each label like Doc.cats. The reported overall
score depends on the scorer settings.

examples (Iterable[Example]): Examples to score
attr (str): The attribute to score.
getter (Callable[[Doc, str], Any]): Defaults to getattr. If provided,
    getter(doc, attr) should return the values for the individual doc.
labels (Iterable[str]): The set of possible labels. Defaults to [].
multi_label (bool): Whether the attribute allows multiple labels.
    Defaults to True. When set to False (exclusive labels), missing
    gold labels are interpreted as 0.0 and the threshold is set to 0.0.
positive_label (str): The positive label for a binary task with
    exclusive classes. Defaults to None.
threshold (float): Cutoff to consider a prediction "positive". Defaults
    to 0.5 for multi-label, and 0.0 (i.e. whatever's highest scoring)
    otherwise.
RETURNS (Dict[str, Any]): A dictionary containing the scores, with
    inapplicable scores as None:
    for all:
        attr_score (one of attr_micro_f / attr_macro_f / attr_macro_auc),
        attr_score_desc (text description of the overall score),
        attr_micro_p,
        attr_micro_r,
        attr_micro_f,
        attr_macro_p,
        attr_macro_r,
        attr_macro_f,
        attr_macro_auc,
        attr_f_per_type,
        attr_auc_per_type

DOCS: https://spacy.io/api/scorer#score_cats
Ng      ?rd   r   r   c                     U S   $ Nr   r_   its    r)   <lambda>#Scorer.score_cats.<locals>.<lambda>  	    rRSur,   )keyc                     U S   $ r   r_   r   s    r)   r   r     r   r,   c                     U S   $ r   r_   r   s    r)   r   r     s    r!ur,   c                     U S   $ r   r_   r   s    r)   r   r     r   r,   rC   c              3   8   #    U  H  oR                   v   M     g 7fr'   rD   .0prfs     r)   	<genexpr>$Scorer.score_cats.<locals>.<genexpr>"       C/Bmm/B   c              3   8   #    U  H  oR                   v   M     g 7fr'   rH   r   s     r)   r  r  #       @,?Sjj,?r  c              3   8   #    U  H  oR                   v   M     g 7fr'   rN   r   s     r)   r  r  $  r  r  c              3   f   #    U  H'  oR                  5       (       a  UR                  OS v   M)     g7f)rd   N)rq   rz   )r   aucs     r)   r  r  (  s#     WAV#]]__		#5AVs   /1_score_score_descr   r   r   _macro_p_macro_r_macro_f
_macro_auc_f_per_type_auc_per_typerK   rQ   zF ()zmacro Fz	macro AUC)r   ra   rZ   r   r   r   r   r?   r"   r#   r$   maxr   r<   sumrD   rH   rN   rR   rq   rz   )r   r   r   r   r   r   r   r   ru   
f_per_typeauc_per_typer   	pred_catsr   r   	gold_cats
pred_score
gold_score
pred_label
gold_label	micro_prf	label_prfn_catsmacro_pmacro_rmacro_f	macro_aucresultspositive_label_fs                                r)   
score_catsScorer.score_cats  sv   \ *II5;<VEXZ'V
<:@A&{},&AVGw00$7I*3//*;K*;$!q{*;IKw00$7I*3//*;K*;$!q{*;IK&]]5#6
&]]51
!+!$J) '11*jI   #E!*uc!:J!*u!5J!-%2zA~&-00A50'94q&-00A50')3
Q&-00A50 $ y),Y__->DT)U&
J),Y__->DT)U&
J+z*--2--z*--2-z*--2--),Y<L)M&
J>z*--2-- "),Y__->DT)U&
J:&))Q.))Q  R J	#**,ILLILL(LLLILL(LLLILL(L - Z6)Cz/@/@/BCCfL@J,=,=,?@@6I@J,=,=,?@@6I WATATAVWW 	
 fFOTfK $fHy22fHy//fHy//fHwfHwfHwfJfK j>N>N>P"Q>Pda1iik>>P"QfM"BNBTBTBV%BV$!kkmm1775BV%#
 v;!KN&${';<^LSQ'7GtfFO$.1.1A,CGtfK()  '.$x/@'AGtfFO$,5GtfK()  (/$z/B'CGtfFO$,7GtfK()i =A
 LKz #R%s.   UUUUUU*U", /U(0negative_labelsc                   0 nU  GHV  n0 nUR                   R                   H  nXeUR                  UR                  4'   M     UR                  R                   H  nUR                  UR                  UR                  4S5      nUc  M0  UR                  n	X;  a  [        5       X9'   UR                  n
U
c  M^  UR                  nX;   a  X;   a  Mv  X:X  a  X9   =R                  S-  sl	        M  X;   a  X9   =R                  S-  sl
        M  X;   a  X9   =R                  S-  sl        M  X9   =R                  S-  sl
        X9   =R                  S-  sl        GM     GMY     [        5       nUR                  5        H`  nU=R                  UR                  -  sl	        U=R                  UR                  -  sl        U=R                  UR                  -  sl
        Mb     [        U5      S-   n[        S UR                  5        5       5      U-  n[        S UR                  5        5       5      U-  n[        S UR                  5        5       5      U-  nSUR                  SS	S
UR                   SUR"                  SUR                  SUSUSUSUR%                  5        VVs0 s H  u  nnUUR'                  5       _M     snn0	nU$ s  snnf )a  Returns PRF for predicted links on the entity level.
To disentangle the performance of the NEL from the NER,
this method only evaluates NEL links for entities that overlap
between the gold reference and the predictions.

examples (Iterable[Example]): Examples to score
negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
RETURNS (Dict[str, Any]): A dictionary containing the scores.

DOCS: https://spacy.io/api/scorer#score_links
Nr   rC   c              3   8   #    U  H  oR                   v   M     g 7fr'   r   r   s     r)   r  %Scorer.score_links.<locals>.<genexpr>{  r  r  c              3   8   #    U  H  oR                   v   M     g 7fr'   r  r   s     r)   r  r.  |  r  r  c              3   8   #    U  H  oR                   v   M     g 7fr'   r
  r   s     r)   r  r.  }  r  r  	nel_scorenel_score_desczmicro Fnel_micro_pnel_micro_rnel_micro_fnel_macro_pnel_macro_rnel_macro_fnel_f_per_type)r   ents
start_charend_charr   r   r   r   kb_id_r"   r#   r$   r   r<   r  rN   rD   rH   r   rR   )r   r+  r   r  r   gold_ent_by_offsetgold_entpred_entr   ru   r:   predr   r!  n_labelsr#  r$  r%  r   r   r'  s                        r)   score_linksScorer.score_linksF  s    
G!##--22OWH$7$79J9J#KL 3 $--22.22(((*;*;<d	 (%,,E.,4J
)$++D ''2t7N !\&-00A50!4&-00A50!4&-00A50 '-00A50&-00A5003 3  > J	#**,ILLILL(LLLILL(LLLILL(L - z?V+Cz/@/@/BCChN@J,=,=,?@@8K@J,=,=,?@@8K)**yI//I,,I,,GGG:;K;K;MN;M41a199;;MN

   Os   !K(head)r   	head_attrhead_getterignore_labelsr   rF  rG  rH  c                X   [        5       n[        5       n	[        5       n
[        5       nU  GH  nUR                  nUR                  nUR
                  n[        5       n0 n[        U5       H  u  nnU" UU5      nU" UU5      nUU;  ao  UU;  ag  UR                  UUR                  U45        UU
;  a  [        5       U
U'   UU;  a  [        5       UU'   UU   R                  UUR                  U45        M  M  UR                  U5        M     [        5       n0 nU GHx  nUR                  R                  5       (       a  M%  UR                  R                  UR                     S:w  a  SnOUR                  UR                     S   nUU;  d  Ms  U" UU5      nU" UU5      nUU;  d  M  UR                  R                  5       (       d  M  UR                  R                  UR                     S:X  a  UR                  UR                     S   nOSnUb  Uc-  U=R                  S-  sl        U	=R                  S-  sl        GM'  UR                  UUU45        UU
;  a  [        5       U
U'   UU;  a  [        5       UU'   UU   R                  UUU45        GM{     U	R                  UU5        U
 HH  nU
U   R                  UR!                  U[        5       5      UR!                  U[        5       5      5        MJ     UR                  [        S U 5       5      [        S U 5       5      5        GM     [#        U5      S:  aX  U S3UR$                  U S3U	R$                  U S3U
R'                  5        VVs0 s H  u  nnUUR)                  5       _M     snn0$ U S3SU S3SU S3S0$ s  snnf )	a  Returns the UAS, LAS, and LAS per type scores for dependency
parses.

examples (Iterable[Example]): Examples to score
attr (str): The attribute containing the dependency label.
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
    getter(token, attr) should return the value of the attribute for an
    individual token.
head_attr (str): The attribute containing the head token. Defaults to
    'head'.
head_getter (Callable[[Token, str], Token]): Defaults to getattr. If provided,
    head_getter(token, attr) should return the value of the head for an
    individual token.
ignore_labels (Tuple): Labels to ignore while scoring (e.g., punct).
missing_values (Set[Any]): Attribute values to treat as missing annotation
    in the reference annotation.
RETURNS (Dict[str, Any]): A dictionary containing the scores:
    attr_uas, attr_las, and attr_las_per_type.

DOCS: https://spacy.io/api/scorer#score_deps
r   Nr   c              3   *   #    U  H	  oS S v   M     g 7frn   r_   r   items     r)   r  $Scorer.score_deps.<locals>.<genexpr>  s     3!H   c              3   *   #    U  H	  oS S v   M     g 7frn   r_   rK  s     r)   r  rM    s     8XidbqirN  _uas_las_las_per_type)r   r   rZ   r   r   r   r   r   r   r   r   r   r   stripr#   r?   r   r<   rN   r   rR   )r   r   r   rF  rG  rH  r   r   
unlabelledlabelledlabelled_per_depr   r   r   r   r   	gold_depsgold_deps_per_depr   r   deprE  	pred_depspred_deps_per_dep	gold_headr   r   s                              r)   
score_depsScorer.score_deps  s   B Z
:6%G((H((H%%EI02!*8!4UD)"5)4n,-/!vtvvs&;<&664<J,S1&7758U-c2)#.22FDFFC3HI 0 $''/ "5 I02!;;&&((99$$UWW-2!F"YYuww/2F0 -C&ui8D-/EKK4E4E4G4G 99,,TVV49(-		$&&(9!(<I(,I ">Y->&MMQ.M$KK1,KK%MM69c*BC"*::8@
 0 5"*;;9< 1# 6-c266	37OP5 "6 y)4' %//%))#su57H7L7LSRURW7X (   333S8Xi8X5Xk  p z?Q&z00&x&&/?/E/E/G)/Gtq!Aqyy{N/G)  &t&t&& )s   2N&)r   r   )!rT   rU   rV   rW   rX   DEFAULT_PIPELINEr
   r]   r   r*   r   boolr   r   rz   staticmethodr   getattrMISSING_VALUESr   r   r   r   r   r   r   r   r   r\   r)  rC  r]  r^   r_   r,   r)   r}   r}   o   s   $ %) *:	j!  #3-	 
* EJ )=A	c3h4 /Xg%6 /$sCx. / /b 
 /6#1017#0101 %s*+	01
 C01 
c3h01 01d 
 /6#1Q7#QQ %s*+	Q
 CQ 
c3hQ Qf 
 8?:>#b7#bb #sXd^34	b
 !3%+!67b b b 
c3hb bH 
 -4 0 2 (,%)E7#EE #sS)	E
 E E !E E?E 
c3hE EN B7#B9A#B	c3hB BH 
 /65<'7'9#1i7#ii %s*+	i
 i uclE12i  }i Ci 
c3hi ir,   r}   r   r%   c           
         [        [        5      nU  GH  nUR                  R                  S5      (       d  M&  UR                  R                   Vs1 s H%  oDR
                  UR                  UR                  4iM'     nnUR                  R                  nUR                  R                   H  nUR
                  U;  a  [        5       X'R
                  '   XgR                  UR                   n[        U5      (       d  MS  UR                  US   US   S-    n	[        S U	 5       5      (       d  M  UR
                  US   US   S-   4n
X;   a4  X'R
                     =R                  S-  sl        UR                  U
5        M  X'R
                     =R                  S-  sl        M     U H  u  pnX+   =R                   S-  sl        M     GM     [        5       nUR#                  5        H  nX-  nM	     [        U5      S:  aX  UR$                  UR&                  UR(                  UR+                  5        VVs0 s H  u  nnUUR-                  5       _M     snnS.$ SSSSS.$ s  snf s  snnf )zGCompute micro-PRF and per-entity PRF scores for a sequence of examples.ENT_IOBr   r   c              3   >   #    U  H  oR                   S :g  v   M     g7f)r   N)ent_iob)r   r   s     r)   r  get_ner_prf.<locals>.<genexpr>	  s     >ve}})vs   )ents_pents_rents_fents_per_typeN)r   r   yr   r:  r   r   r   r   r   xr<   allr"   remover#   r$   r   rD   rH   rN   r   rR   )r   kwargsr   egere   	align_x2yr@  indicesg_spanr   ru   r   r   totalsr  r   r   s                     r)   get_ner_prfry    s     *Ntt""9--57TTYY?Y((AGGQUU+Y?LL$$			Hn42:*/>G7||gaj72;?; >v>>>#??GAJaHC|&7::a?:S)&7::a?: "  "'E#!$$)$ "'+ . ZF$$& '
6{Q&&mmmm9G9M9M9OP9OAan9OP	
 	
 !	
 	
? @8 Qs   ,I4I$c           	      .   [         R                  " U 5      n Uc  UnO[         R                  " U5      nUR                  S:X  aI  [         R                  " U5      nS/U R                  -  nUR                  S   XS'   UR                  U5      nO[         R                  " XS9nU R                  n[        S5      /U-  n[        S5      /U-  n[        SS5      Xs'   [        SS5      X'    X@[        U5         U [        U5         -   -  S-  R                  U5      n	U	$ ! [         ap    [         R                  " U5      n[         R                  " U 5      n [         R                  R                  X@[        U5         U [        U5         -   -  S-  U5      n	 U	$ f = f)a[  
Integrate along the given axis using the composite trapezoidal rule.

If `x` is provided, the integration happens in sequence along its
elements - they are not sorted.

Integrate `y` (`x`) along each 1d slice on the given axis, compute
:math:`\int y(x) dx`.
When `x` is specified, this integrates along the parametric curve,
computing :math:`\int_t y(t) dt =
\int_t y(t) \left.\frac{dx}{dt}\right|_{x=x(t)} dt`.

Parameters
----------
y : array_like
    Input array to integrate.
x : array_like, optional
    The sample points corresponding to the `y` values. If `x` is None,
    the sample points are assumed to be evenly spaced `dx` apart. The
    default is None.
dx : scalar, optional
    The spacing between sample points when `x` is None. The default is 1.
axis : int, optional
    The axis along which to integrate.

Returns
-------
trapezoid : float or ndarray
    Definite integral of `y` = n-dimensional array as approximated along
    a single axis by the trapezoidal rule. If `y` is a 1-dimensional array,
    then the result is a float. If `n` is greater than 1, then the result
    is an `n`-1 dimensional array.

See Also
--------
cumulative_trapezoid, simpson, romb

Notes
-----
Image [2]_ illustrates trapezoidal rule -- y-axis locations of points
will be taken from `y` array, by default x-axis distances between
points will be 1.0, alternatively they can be provided with `x` array
or with `dx` scalar.  Return value will be equal to combined area under
the red lines.

References
----------
.. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule

.. [2] Illustration image:
       https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png

Examples
--------
Use the trapezoidal rule on evenly spaced points:

>>> import numpy as np
>>> from scipy import integrate
>>> integrate.trapezoid([1, 2, 3])
4.0

The spacing between sample points can be selected by either the
``x`` or ``dx`` arguments:

>>> integrate.trapezoid([1, 2, 3], x=[4, 6, 8])
8.0
>>> integrate.trapezoid([1, 2, 3], dx=2)
8.0

Using a decreasing ``x`` corresponds to integrating in reverse:

>>> integrate.trapezoid([1, 2, 3], x=[8, 6, 4])
-8.0

More generally ``x`` is used to integrate along a parametric curve. We can
estimate the integral :math:`\int_0^1 x^2 = 1/3` using:

>>> x = np.linspace(0, 1, num=50)
>>> y = x**2
>>> integrate.trapezoid(y, x)
0.33340274885464394

Or estimate the area of a circle, noting we repeat the sample which closes
the curve:

>>> theta = np.linspace(0, 2 * np.pi, num=1000, endpoint=True)
>>> integrate.trapezoid(np.cos(theta), x=np.sin(theta))
3.141571941375841

``trapezoid`` can be applied along a specified axis to do multiple
computations in one call:

>>> a = np.arange(6).reshape(2, 3)
>>> a
array([[0, 1, 2],
       [3, 4, 5]])
>>> integrate.trapezoid(a, axis=0)
array([1.5, 2.5, 3.5])
>>> integrate.trapezoid(a, axis=1)
array([2.,  8.])
Nr   r   axisrf  g       @)ro   
asanyarrayndimdiffshapereshapeslicetupler  rv   asarrayr   reduce)
rn  ro  dxr|  dr  ndslice1slice2rets
             r)   	trapezoidr  )  sa   L 	aAyMM!66Q;
AC!&&LE''!*EK		% A%A	
BDk]RFDk]RFD>FLr?FLSeFm$qv'7783>CCDI J  SJJqMJJqMffmmA5=!1AeFm4D!DEKTRJSs   '1D A6FFc                     [        [        R                  " U 5      5      S:w  a:  [        [        R
                  R                  [        R                  " U 5      S95      e[        X5      u  p#n[        X#5      $ )a#  Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
from prediction scores.

Note: this implementation is restricted to the binary classification task

Parameters
----------
y_true : array, shape = [n_samples] or [n_samples, n_classes]
    True binary labels or binary label indicators.
    The multiclass case expects shape = [n_samples] and labels
    with values in ``range(n_classes)``.

y_score : array, shape = [n_samples] or [n_samples, n_classes]
    Target scores, can either be probability estimates of the positive
    class, confidence values, or non-thresholded measure of decisions
    (as returned by "decision_function" on some classifiers). For binary
    y_true, y_score is supposed to be the score of the class with greater
    label. The multiclass case expects shape = [n_samples, n_classes]
    where the scores correspond to probability estimates.

Returns
-------
auc : float

References
----------
.. [1] `Wikipedia entry for the Receiver operating characteristic
        <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_

.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
       Letters, 2006, 27(8):861-874.

.. [3] `Analyzing a portion of the ROC curve. McClish, 1989
        <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
rK   rt   )	r<   ro   rp   rv   r   rw   rx   
_roc_curve_auc)y_truey_scorefprtpr_s        r)   ry   ry     sY    H 299V"++"))F2C+DEEV-KCa>r,   c                    [        X5      u  p#n[        R                  SU4   n[        R                  SU4   n[        R                  US   S-   U4   nUS   S::  a0  [        R                  " [        R                  UR
                  5      nOX"S   -  nUS   S::  a0  [        R                  " [        R                  UR
                  5      nOX3S   -  nXVU4$ )aE  Compute Receiver operating characteristic (ROC)

Note: this implementation is restricted to the binary classification task.

Parameters
----------

y_true : array, shape = [n_samples]
    True binary labels. If labels are not either {-1, 1} or {0, 1}, then
    pos_label should be explicitly given.

y_score : array, shape = [n_samples]
    Target scores, can either be probability estimates of the positive
    class, confidence values, or non-thresholded measure of decisions
    (as returned by "decision_function" on some classifiers).

Returns
-------
fpr : array, shape = [>2]
    Increasing false positive rates such that element i is the false
    positive rate of predictions with score >= thresholds[i].

tpr : array, shape = [>2]
    Increasing true positive rates such that element i is the true
    positive rate of predictions with score >= thresholds[i].

thresholds : array, shape = [n_thresholds]
    Decreasing thresholds on the decision function used to compute
    fpr and tpr. `thresholds[0]` represents no instances being predicted
    and is arbitrarily set to `max(y_score) + 1`.

Notes
-----
Since the thresholds are sorted from low to high values, they
are reversed upon returning them to ensure they correspond to both ``fpr``
and ``tpr``, which are sorted in reversed order during their calculation.

References
----------
.. [1] `Wikipedia entry for the Receiver operating characteristic
        <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_

.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
       Letters, 2006, 27(8):861-874.
r   r   rf  )_binary_clf_curvero   r_repeatnanr  )r  r  fpstps
thresholdsr  r  s          r)   r  r    s    \ -V=Cj %%3-C
%%3-Cz!}q(*45J
2w!|ii		*Gm
2w!|ii		*GmZr,   c                    Sn[         R                  " U 5      n [         R                  " U5      nX:H  n [         R                  " USS9SSS2   nX   nX   n Sn[         R                  " [         R                  " U5      5      S   n[         R
                  XPR                  S-
  4   n[        X-  5      U   nSU-   U-
  nXX   4$ )a  Calculate true and false positives per binary classification threshold.

Parameters
----------
y_true : array, shape = [n_samples]
    True targets of binary classification

y_score : array, shape = [n_samples]
    Estimated probabilities or decision function

Returns
-------
fps : array, shape = [n_thresholds]
    A count of false positives, at index i being the number of negative
    samples assigned a score >= thresholds[i]. The total number of
    negative samples is equal to fps[-1] (thus true negatives are given by
    fps[-1] - fps).

tps : array, shape = [n_thresholds <= len(np.unique(y_score))]
    An increasing count of true positives, at index i being the number
    of positive samples assigned a score >= thresholds[i]. The total
    number of positive samples is equal to tps[-1] (thus false negatives
    are given by tps[-1] - tps).

thresholds : array, shape = [n_thresholds]
    Decreasing score values.
      ?	mergesort)kindNrf  r   r   )ro   ravelargsortwherer  r  size_stable_cumsum)	r  r  	pos_labeldesc_score_indicesweightdistinct_value_indicesthreshold_idxsr  r  s	            r)   r  r    s    8 IXXfFhhwG  F G+>ttD)G'FF
  XXbggg&67:UU1;;?BCN 
).
9C
n
s
"CW,,,r,   c           
      @   [         R                  " X[         R                  S9n[         R                  " X[         R                  S9n[         R                  " [         R
                  " UR                  SUS9XRUSS95      (       d  [        [        R                  5      eU$ )a  Use high precision for cumsum and check that final value matches sum

Parameters
----------
arr : array-like
    To be cumulatively summed as flat
axis : int, optional
    Axis along which the cumulative sum is computed.
    The default (None) is to compute the cumsum over the flattened array.
rtol : float
    Relative tolerance, see ``np.allclose``
atol : float
    Absolute tolerance, see ``np.allclose``
)r|  dtyperf  r{  T)rtolatol	equal_nan)
ro   cumsumfloat64r  rp  isclosetakerv   r   E163)arrr|  r  r  outexpecteds         r)   r  r  R  su     ))C"**
5CvvcBJJ7H66


HHRdH#Xtt	
 
 %%Jr,   c                    [         R                  " U 5      n [         R                  " U5      nSn[         R                  " U 5      n[         R                  " US:  5      (       aG  [         R                  " US:*  5      (       a  SnO&[        [        R                  R                  U S95      eU[        X5      -  n[        U[         R                  5      (       a  UR                  R                  U5      nU$ )a  Compute Area Under the Curve (AUC) using the trapezoidal rule

This is a general function, given points on a curve.  For computing the
area under the ROC-curve, see :func:`roc_auc_score`.

Parameters
----------
x : array, shape = [n]
    x coordinates. These must be either monotonic increasing or monotonic
    decreasing.
y : array, shape = [n]
    y coordinates.

Returns
-------
auc : float
r   r   rf  )ro  )ro   r  r  anyrp  rv   r   E164rx   r  
isinstancememmapr  type)ro  rn  	directionr  areas        r)   r  r  l  s    $ 	A
AI	B	vvb1f~~66"'??IV[[//!/455y&D$		"" zzt$Kr,   )Nr  rf  )Ngh㈵>g:0yE>)+collectionsr   typingr   r   r   r   r   r	   r
   r   r   numpyro   errorsr   
morphologyr   tokensr   r   r   trainingr   utilr   r   languager   r_  	frozensetrc  r   ra   r}   r]   ry  r  ry   r  r  r  r  r_   r,   r)   <module>r     s    #
 
 
   " $ $  2" U =)0I 0If   :F
 F
R*
(7+ *
$sCx. *
bL'T@ F3-l4#r,   