
    hFD                     @   S SK r S SKJrJr  S SK Jr  S SKJr  S SKJrJ	r	J
r
JrJr  S SKJrJr  S SKJr  S SKJr  S	/ S
Q/ SQS.//rSS/ SQ/ SQ/0//rSS/ SQ0//r\ R.                  S 5       r\ R.                  S 5       rS rS rS rS rS rS rS r S r!S r"S r#g)    N)assert_almost_equalassert_array_almost_equal)approx)English)PRFScoreROCAUCScoreScorer_roc_auc_score
_roc_curve)DocSpan)Example)offsets_to_biluo_tagsz7Apple is looking at buying U.K. startup for $ 1 billion)   r   r   r            r   
   r      )nsubjauxROOTpreppcompcompounddobjr   quantmodr   pobjheadsdepsz	100 - 200entities)r   r   CARDINAL)r   	   r#   z6Apple is looking at buying U.K. startup for $1 billion))r      ORG)      GPE),   6   MONEYc                      Sn / SQn/ SQn/ SQn[        5       nU" U 5      n[        [        U5      5       H?  nX   XV   l        X&   XV   l        XV   R                  X6   5        US:  d  M6  SXV   l        MA     U$ )Nz1Sarah's sister flew to Silicon Valley via London.)
NNPPOSNNVBDINr.   r.   r2   r.   .)
PROPNPARTNOUNVERBADPr4   r4   r8   r4   PUNCT)
NounType=prop|Number=singzPoss=yesNumber=singzTense=past|VerbForm=fin r:   r:   r<   r:   zPunctType=perir   F)r   rangelentag_pos_	set_morphis_sent_start)texttagsposmorphsnlpdocis          Q/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/test_scorer.py
tagged_docrK   -   s    >DLDCF )C
d)C3t9gf#q5#(CF   J    c                      Sn [        5       nU" U 5      n[        [        U5      5       H   nUS-  S:X  a  SX#   l        M  SX#   l        M"     U$ )Nz-One sentence. Two sentences. Three sentences.r   r   TF)r   r=   r>   rB   )rC   rG   rH   rI   s       rJ   
sented_docrN   T   sO    :D
)C
d)C3s8_q5A:#'CF #(CF 	 
 JrL   c                    [        5       nSU  Vs/ s H  o"R                  PM     sn0n[        R                  " X5      nUR	                  U/5      nUS   S:X  d   e[        5       n[        UR                  / SQ/ SQS9Ul        SUR                  S   l	        UR	                  U/5      nUS   S	:X  d   eUS
   S	:X  d   eUS   [        S5      :X  d   eUS   S:X  d   e[        5       nUR	                  U/SS9nUS   S   S	:X  d   eUS   S
   S	:X  d   eUS   S   [        S5      :X  d   eUS   S   S:X  d   eg s  snf )Nsent_starts	token_acc      ?)Onez	sentence.Two
sentences.ThreerU   )TTTTTF)wordsspacesF         ?token_ptoken_rQUU?token_fg?Tper_component	tokenizer)r	   
sent_startr   	from_dictscorer   r   vocab	predictedrB   r   )rN   scorertgoldexamplescoresrG   s          rJ   test_tokenizationrl   a   su   XF*=*QLL*=>D
1G\\7)$F+#%%%
)C		N4G
 */Ga&\\7)$F+#%%%)###)z 2222)### XF\\7)4\8F+{+s222+y)S000+y)VJ-????+y)S0001 >s   D<c                 \   [        5       nSU  Vs/ s H  o"R                  PM     sn0n[        R                  " X5      nUR	                  U/5      nUS   S:X  d   eSUS   S'   SUS   S'   [        R                  " X5      nUR	                  U/5      nUS   [        S5      :X  d   eg s  snf )	NrP   sents_frR   r   r   rY   r   g1UU?)r	   rb   r   rc   rd   r   )rN   rg   rh   ri   rj   rk   s         rJ   
test_sentsro   ~   s    XF*=*QLL*=>D
1G\\7)$F)### DD
1G\\7)$F)y 1111 >s   B)c                    [        5       n/ n[         HW  u  p4[        XR                  S5      US   US   S9nUS   US   S.n[        R
                  " XV5      nUR                  U5        MY     UR                  U5      nUS   S:X  d   eUS   S:X  d   eUS	   S
   S   S:X  d   eUS	   S
   S   S:X  d   eUS	   S
   S   S:X  d   eUS	   S   S   S:X  d   eUS	   S   S   S:X  d   eUS	   S   S   S:X  d   e[        5       n/ n[         Ha  u  p4[        XR                  S5      US   US   S9nUS   US   S.nSUS   l        [        R
                  " XV5      nUR                  U5        Mc     UR                  U5      nUS   S:X  d   e[        US   S5        US	   S
   S   S:X  d   eUS	   S
   S   S:X  d   eUS	   S
   S   S:X  d   e[        US	   S   S   S5        US	   S   S   S:X  d   eUS	   S   S   S:X  d   eg )N r    r!   )rW   r    r!   r   dep_uasrR   dep_lasdep_las_per_typer   prfr   r   g*E?gTUU?皙?)
r	   test_las_appler   splitr   rc   appendrd   dep_r   )	en_vocabrg   examplesinput_annotrH   ri   rj   resultss	            rJ   test_las_per_typer      s   XFH'LL-U7^%PV-
 wv?##C.  ( ll8$G9$$$9$$$%&w/4;;;%&w/4;;;%&w/4;;;%&z2373>>>%&z2373>>>%&z2373>>> XFH'LL-U7^%PV-
 wv? A##C.  ( ll8$G9$$$	*I6%&w/4999%&w/4999%&w/4999 23J?DkR%&z2373>>>%&z2373>>>rL   c                 H   [        5       n/ n[         H  u  p4[        XR                  S5      / SQS9n[	        XTS   5      n[
        R                  " USU05      nSUR                  S   l        SUR                  S   l        UR                  U5        M     UR                  U5      nUS   S:X  d   eUS	   S:X  d   eUS
   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   e[        5       n/ n[         H  u  p4[        U UR                  S5      / SQS9n[	        XTS   5      n[
        R                  " USU05      nSUR                  S   l        SUR                  S   l        UR                  U5        M     UR                  U5      nUS   [        S5      :X  d   eUS	   [        S5      :X  d   eUS
   [        S5      :X  d   eSUS   ;   d   eSUS   ;   d   eSUS   ;   d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   S:X  d   eUS   S   S   [        S5      :X  d   eg )Nrq   )
B-CARDINALOr   )rW   entsr"   FrY   ents_prR   ents_rents_fents_per_typer#   ru   rv   rw   )
B-ORGr   r   r   r   zB-GPEr   r   r   r   g1UU?r)   r,   r&   r   rZ   )r	   test_ner_cardinalr   rz   r   r   rc   rf   rB   	referencer{   rd   test_ner_appler   )	r}   rg   r~   r   r   rH   r"   rj   r   s	            rJ   test_ner_per_typer      sX   XFH*LL-4U
 )J.?@##C*h)?@-2!*-2!*  + ll8$G8###8###8###?#J/4;;;?#J/4;;;?#J/4;;; XFH',,s#O

 )J.?@##C*h)?@-2!*-2!*  ( ll8$G8y 11118y 11118y 1111GO,,,,go....GO,,,,?#E*3/3666?#E*3/3666?#E*3/3666?#G,S1Q666?#G,S1Q666?#G,S1Q666?#E*3/3666?#E*3/3666?#E*3/6)3DDDDrL   c                    [        5       nU  Vs/ s H  o"R                  PM     snU  Vs/ s H  o"R                  PM     snU  Vs/ s H  n[        UR                  5      PM     snU  Vs/ s H  o"R
                  (       a  SOSPM     snS.n[        R                  " X5      nUR                  U/5      nUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS	   S
   S   S:X  d   e[        5       nU  Vs/ s H  o"R                  PM     nnSUS'   U  Vs/ s H  o"R                  PM     nnSUS'   U  Vs/ s H  n[        UR                  5      PM     nnSUS'   SUS'   UUUUS   S.n[        R                  " X5      nUR                  U/5      nUS   S:X  d   eUS   S:X  d   eUS   [        S5      :X  d   eUS   [        S5      :X  d   eUS	   S
   S   S:X  d   eUS	   S   S   S:X  d   eUS	   S   S   [        S5      :X  d   e[        5       nUR                  U/SS9nUS   S   S:X  d   eUS   S   S:X  d   eUS   S   [        S5      :X  d   eg s  snf s  snf s  snf s  snf s  snf s  snf s  snf )NrY   )rD   rE   rF   rP   tag_accrR   pos_acc	morph_accmorph_micro_fmorph_per_featNounTyperw   r0   r   Xr;   zNumber=plurr   rP   ?rx   g"[L"?Poss        NumbergupE?Tr_   taggermorphologizer)
r	   r?   r@   strmorphrB   r   rc   rd   r   )	rK   rg   rh   ri   rj   r   rD   rE   rF   s	            rJ   test_tag_scorer      s   XF!+,A, *+
1
+)34A3qww<4>HIj__"4jI	D 
1GllG9%G9$$$9$$$;3&&&?#s***#$Z05<<< XF&'JqFFJD'DG%
&:a66:C
&CF$./Jqc!''lJF/F1IF1IM*	D 
1GllG9%G9$$$9$$$;6#;...?#vi'8888#$Z05<<<#$V,S1S888#$X.s3vj7IIII XFllG9Dl9G8Y'3...?#I.#555?#K0F3K???[ -+4I (
&/s(   I&I+I0*I5
 I: I? Jc                    U " S5      nSUS   l         SUS   l        US   R                  S5        SUS   l        U " S5      nSUl        [        X5      n[        5       nUR                  U/5      nU H"  nUR                  S5      (       a  M  XV   c  M"   e   U " S5      nSUl        SUS	   l         SUS	   l        US	   R                  S5        SUS	   l        [        X5      n[        5       nUR                  U/5      nUS
   b   eUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   b   eU " S5      nSUl        SUS   l         SUS   l        US	   R                  S5        SUS	   l        [        X5      n[        5       nUR                  U/5      nUS
   b   eUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eUS   b   eg )Nz	a b c d eAr   r   zFeat=ValdepTcatsrY   rQ   r   r   r   r   rr   rR   rs   rn   )	r?   r@   rA   r|   has_unknown_spacesr   r	   rd   
startswith)en_tokenizerpred_docref_docrj   rg   rk   keys          rJ   test_partial_annotationr   (  ss   K(HHQKHQKQK*%HQK ;'G!%Gh(GXF\\7)$F>>&!!{"""	  ;'G!%GGAJOGAJOAJ$GAJOh(GXF\\7)$F+&&&)###)###+#%%%)###)###)$$$ ;'G!%GGAJOGAJOAJ$GAJOh(GXF\\7)$F+&&&)###)###+#%%%)###)###)$$$rL   c                  <   SS/n SS/n[        X5      u  p#n[        X5      n[        U/ SQ5        [        U/ SQ5        [        US5        SS/n SS/n[        X5      u  p#n[        X5      n[        U/ SQ5        [        U/ SQ5        [        US5        SS/n SS/n[        X5      u  p#n[        X5      n[        USS/5        [        USS/5        [        US5        SS/n SS/n[        X5      u  p#n[        X5      n[        U/ SQ5        [        U/ SQ5        [        US5        SS/n SS/n[        X5      u  p#n[        X5      n[        USS/5        [        USS/5        [        US5        [	        5       nUR                  SS5        UR                  SS5        [        UR                  S5        SS/n SS	/n[        R                  " [        5         [        X5        S S S 5        [	        5       nUR                  SS5        UR                  S	S5        [        R                  " [        5         UR                  nS S S 5        SS/n SS	/n[        R                  " [        5         [        X5        S S S 5        [	        5       nUR                  SS5        UR                  S	S5        [        R                  " [        5         UR                  nS S S 5        g ! , (       d  f       GN = f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)
Nr   rY   )r   r   rY   )r   rY   rY   rR   r   rZ         ?      ?)
r   r
   r   r   r   	score_setrd   pytestraises
ValueError)y_truey_scoretprfpr_roc_aucrd   s          rJ   test_roc_auc_scorer   `  s   VF!fGV-KCaV-Gc9-c9-%VF!fGV-KCaV-Gc9-c9-%VF!fGV-KCaV-GcAq6*cAq6*%VF!fGV-KCaV-Gc9-c9-%VFCjGV-KCaV-GcAq6*cAq6*% ME	OOC	OOCS) VFTlG	z	"v' 
# ME	OOD!	OOD!	z	"KK 
# VFTlG	z	"v' 
# ME	OOD!	OOD!	z	"KK 
#	"# 
#	" 
#	"
 
#	" 
#	"s0   K0K+'K<L
K(+
K9<
L

Lc            
      h   [        5       n SnSnU R                  U5      nU R                  U5      n/ nUR                  UR                  SSSS95        UR                  UR                  SSSS95        UR                  UR                  S	S
SS95        XSR                  U'   S nUR                  U   R                  US9UR                  U'   [        XC5      n[        R                  " U/X&S9nX S3   S:X  d   eX S3   S:  d   eUR                  U   R                  US9UR                  U'   [        XC5      n[        R                  " U/X&SS9nX S3   S:X  d   eX S3   S:X  d   eU V	s/ s H"  n	[        XIR                  U	R                  SS9PM$     n
n	XR                  U'   [        XC5      n[        R                  " U/X&SS9nX S3   S:X  d   eX S3   S:X  d   eU S3U;   d   e[        R                  " U/X&SSS9nX S3   S:X  d   eX S3   S:X  d   eU S3U;  d   eg s  sn	f )NzThis is just a random sentence.my_spansr   r   PERSON)labelr   r&         c                      U R                   U   $ )N)spans)rH   span_keys     rJ   span_getter%test_score_spans.<locals>.span_getter  s    yy""rL   )rH   )attrgetter_prR   _rT)r   r   allow_overlapWRONGr   	_per_typeF)r   r   r   labeled)r   make_docr{   	char_spanr   copyr   r	   score_spansr   startend)rG   rC   r   ri   predr   r   egrk   span	new_spanss              rJ   test_score_spansr     sz   
)C,D
C<<D<<DE	LL1H56	LL1E23	LL2U34JJsO# jjo**t*4DJJsO		B3CFE*$$$E*### jjo**t*4DJJsO		B3RVWFE*$$$E*$$$ NSSUTdJJ@UISJJsO		B3RVWFE*$$$E*$$$U)&&& 	3$F E*$$$E*$$$U)F*** Ts   1)H/c                  n   SS1n SS1n[        5       n[        5       nUR                  XS9  UR                  UR                  UR
                  4[        S5      :X  d   e[        5       nUR                  XS9  UR                  UR                  UR
                  4[        S5      :X  d   eX4-   nUR                  UR                  UR
                  4[        S5      :X  d   eX4-  nUR                  UR                  UR
                  4[        UR                  UR                  UR
                  45      :X  d   eg )Nhihoyo)candri   )rZ   rZ   rZ   )r   r   r   )r   rZ   r]   )setr   r   	precisionrecallfscorer   )r   gold1gold2abcs         rJ   test_prf_scorer     s   $<D4LEEE
AKKTK&KK188,0GGGG
AKKTK&KK188,0GGGG	AKK188,7N0OOOOFAKK188,	
ahh)1   rL   c           	         SnU " U5      nSSS.Ul         U " U5      nSSS.Ul         [        X25      n[        R                  " U/S[	        UR                   R                  5       5      SS	S
S9n[        R                  " U/S[	        UR                   R                  5       5      SS	SS9nUS   S:X  d   eUS   S:X  d   eXV:X  d   e[        R                  " U/S[	        UR                   R                  5       5      SSS9nUS   S:X  d   e[        R                  " U/S[	        UR                   R                  5       5      SS
S9nUS   S:X  d   eg )Nz	some textrR   r   )POSITIVENEGATIVEr   r   r   Fr   g?)labelsmulti_labelpositive_label	thresholdr   
cats_scoreT)r   r   r   cats_macro_frZ   )r   r   r	   
score_catslistkeys)r   rC   gold_docr   rj   scores1scores2rk   s           rJ   test_score_catsr     so   DD!H!$#6HMD!H!%48HMh)G		HMM&&()!G 		HMM&&()!G < C'''< C'''		HMM&&()F .!S(((		HMM&&()F .!S(((rL   )$r   numpy.testingr   r   r   spacy.lang.enr   spacy.scorerr   r   r	   r
   r   spacy.tokensr   r   spacy.trainingr   spacy.training.iob_utilsr   ry   r   r   fixturerK   rN   rl   ro   r   r   r   r   r   r   r   r    rL   rJ   <module>r      s     H  ! R R " " : 	B8	
. : 24FGHI  	A	HI # #L 	 	1:2*?Z7Et1@h5%pEP,+^,,)rL   