
    hq                     X   S SK r S SKrS SKrS SKJr  S SKJrJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJrJr  S SKJrJrJr  S SKJ r   SSKJ!r!  SSS/04SSSS/04/r"\RF                  S 5       r$\RF                  S 5       r%\RF                  S 5       r&\RF                  S 5       r'\RF                  S 5       r(\RF                  S 5       r)\RT                  RW                  SS/5      \RT                  RY                  S5      S  5       5       r-\RT                  RY                  S!5      S" 5       r.\RT                  RY                  S#5      S$ 5       r/\RT                  RY                  S%5      S& 5       r0\RT                  RY                  S'5      S( 5       r1S) r2\RT                  RY                  S*5      S+ 5       r3\RT                  RY                  S,5      S- 5       r4S. r5S/ r6S0 r7S1 r8S2 r9\RT                  Ru                  S3S49S5 5       r;\RT                  Ru                  S3S49S6 5       r<S7 r=S8 r>S9 r?S: r@S; rAS< rBS= rCS> rDS? rE\RT                  RW                  S@SASB/5      SC 5       rFSD rGSE rHSF rISG rJSH rKSI rL\R                  " SJ5       " SK SL5      5       rNg)M    N)assert_equal)registryutil)ENT_IOB)English)Italian)Language)Lookups)EntityRecognizer)BiluoPushDown)DEFAULT_NER_MODEL)DocSpan)Exampleiob_to_biluosplit_bilu_labelVocab   )make_tempdirWho is Shaka Khan?entities      PERSONzI like London and Berlin.)r      LOC)      r   c                      g)Nnon_entities r#       U/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/parser/test_ner.pyneg_keyr&      s    r$   c                      [        5       $ Nr   r#   r$   r%   vocabr)   !   s	    7Nr$   c                     [        U / SQS9$ )N)CaseywenttoNewYork.words)r   )r)   s    r%   docr3   &   s    uGHHr$   c                 z    U SS nU SS nUR                   UR                  S4UR                   UR                  S4/$ )Nr            r   GPE)
start_charend_char)r3   caseynys      r%   entity_annotsr=   +   sH    !HE	QqB			5>>84	U+ r$   c                 f    [        [        U  VVVs/ s H  u  po3PM	     snnn5      5      $ s  snnnf r(   )sortedset)r=   selabels       r%   entity_typesrD   5   s(    #-@-!u-@ABB@s   ,c                 V    [         R                  " US9n[        U R                  U5      $ )NrD   )r   get_actionsstrings)r)   rD   actionss      r%   tsysrJ   :   s#    ''\BG00r$   rC   z
U-JOB-NAMEi  c           
          [        5       n0 nUR                  SUS9n[        R                  " [	        UR
                  S/S9S/S/S/S/S/U /S.5      nS	UR                  R                  U/S
9S   ;   d   eg )Nnerconfigwordr1   r   tagdep)idsr2   tagsheadsdepsr   zJOB-NAME)examplesr5   )r	   create_piper   	from_dictr   r)   movesrG   )rC   nlprN   rL   examples        r%   test_issue1967r\   @   s     *CF
//%/
/CCIIfX&3XGSG	

G ..	.B1EEEEr$   i  c                  n   [        5       n U R                  S5      nUR                  S5        U R                  5         [        5       nUR                  S5        [	        UR                  S5      R                  5      S:X  d   eUR                  S5      R                  nUR                  S   " X0R                  S5      R                  R                  5        UR                  U R                  5       5        SUR                  S5      R                  ;  d   eUR                  S5      R                  S:X  d   eg)zGTest that spurious 'extra_labels' aren't created when initializing NER.rL   CITIZENSHIPr   resize_outputextra_labels)r^   N)r   add_pipe	add_label
initializelenget_pipelabelsmodelattrsrY   n_moves
from_bytesto_bytescfg)rZ   rL   nlp2rg   s       r%   test_issue2179rn   T   s     )C
,,u
CMM- NN9DMM%t}}U#**+q000MM% &&E	KK U(;(A(A(I(IJOOCLLN#u!5!9!9999==&&*::::r$   iQ	  c                      Sn [        U 5      / SQ:X  d   eSn[        U5      / SQ:X  d   eSn[        U5      / SQ:X  d   eSn[        U5      / SQ:X  d   eg	)
z9Test that IOB tags are correctly converted to BILUO tags.)	B-BRAWLER	I-BRAWLERrq   )rp   rq   z	L-BRAWLER)I-ORGrr   B-ORG)rs   L-ORGzU-ORG)B-PERSONzI-PERSONru   )ru   L-PERSONU-PERSON)B-MULTI-PERSONzI-MULTI-PERSONrx   )rx   zL-MULTI-PERSONzU-MULTI-PERSONN)r   )tags1tags2tags3tags4s       r%   test_issue2385r}   e   sh     4E"IIII'E"====0E"FFFFBE"XXXXr$   i
  c            	         [        5       n / nUR                  [        R                  " U R	                  S5      S/ 05      /5        [        S5       Vs/ s H  n[        U5      PM     nnU R                  S5      n[        U5       H  nUR                  U5        M     U R                  5       n[        S5       H6  n0 n[        R                  " U5        U H  nU R                  U/XgSS9  M     M8     gs  snf )	z\Test issue that arises when too many labels are added to NER model.
Used to cause segfault.
zOne sentencer   i  rL      g      ?)sgdlossesdropN)r   extendr   rX   make_docrangestrra   listrb   rc   randomshuffleupdate)	rZ   
train_datairD   rL   entity_type	optimizerr   r[   s	            r%   test_issue2800r   v   s    
 )CJ			3<<7*b9I	JK %*$K0KqCFKL0
,,u
CL)k" * I2Yz"!GJJyiSJI "  1s   C3i  c                     [        5       n U R                  S5      nUR                  S5        U R                  5         / SQnUR                  U:X  d   e[        5       nUR                  S5      nUR
                  nUR                  S   " XQR                  R                  5        UR                  U R                  5       5        UR                  U:X  d   eg)zTest issue that occurred in spaCy nightly where NER labels were being
mapped to classes incorrectly after loading the model, when the labels
were added using ner.add_label().
rL   ANIMAL)OzB-ANIMALzI-ANIMALzL-ANIMALzU-ANIMALr_   N)r   ra   rb   rc   
move_namesrg   rh   rY   ri   rj   rk   )rZ   rL   r   rm   ner2rg   s         r%   test_issue3209r      s     )C
,,u
CMM(NNFJ>>Z'''9D==DJJE	KK 		(9(9:OOCLLN#??j(((r$   c                      [        5       n U R                  S5      nUR                  S5        U R                  5         / SQnS1nUR                  U:X  d   e[        UR                  5      U:X  d   eg)zBTest that labels are inferred correctly when there's a - in label.rL   zLARGE-ANIMAL)r   zB-LARGE-ANIMALzI-LARGE-ANIMALzL-LARGE-ANIMALzU-LARGE-ANIMALN)r   ra   rb   rc   r   r@   rf   )rZ   rL   r   rf   s       r%   test_labels_from_BILUOr      sf    
)C
,,u
CMM.!NNJ F>>Z'''szz?f$$$r$   i  c                  $   [        5       n U R                  S5      nUR                  S5        U R                  5         SU R                  ;   d   eU " S5      nUR                  S5      (       d   eU H  nUR                  S:X  a  M   e   SSS./nU R                  S	5      nUR                  U5        S	U R                  ;   d   eSU R                  ;   d   eU " S5      nUR                  S5      (       d   eU H  nUR                  S:X  a  M   e   g
)zDTest that running an entity_ruler after ner gives consistent resultsrL   PEOPLEhir   r   SOFTWAREspacyrC   patternentity_rulerN)r   ra   rb   rc   
pipe_nameshas_annotationent_iobadd_patterns)rZ   rL   doc1tokenpatternsrulerdoc2s          r%   test_issue4267r      s    )C
,,u
CMM(NNCNN"""t9Dy))))}}!!!  %9:HLL(E	x S^^+++CNN"""t9Dy))))}}!!! r$   i  c                     Sn Sn[        5       nU US.nUR                  SUS9nUR                  S5        UR                  5         U" S5      n[	        UR
                  5      S:X  d   eSUR
                  ;   d   e[        US	S
SS9n[        UR                  5      U/-   Ul        U/nUR                  USXS9  [	        UR
                  5      S:X  d   eSUR
                  ;   d   eg)z:This should not crash or exit with some strange error code   -C6?
beam_widthbeam_densitybeam_nerrM   
SOME_LABELzWhat do you think about Apple ?r5   r7      MY_ORGrC           )r   r   r   r   N)
r   ra   rb   rc   rd   rf   r   r   ents
beam_parse)r   r   rZ   rN   rL   r3   	apple_entdocss           r%   test_issue4313r      s     JL
)C $F ,,z&,
1CMM,NN
/
0Cszz?a3::%%%S!Qh/ICHH~+CH 5DNN4cjNTszz?aszz!!!r$   c                     [         R                  " USU05      nU R                  USS9nU Vs/ s H  oPR                  U5      PM     nnU/ SQ:X  d   eg s  snf )Nr   F)_debug)rw   r   r   B-GPEL-GPEr   )r   rX   get_oracle_sequenceget_class_name)rJ   r3   r=   r[   act_classesactnamess          r%   test_get_oracle_movesr      s_    j-%@AG**75*AK1<=#  %E=AAAA >s   Ac                    X R                   S'   [        USS/S9nSS/n[        R                  " USU05      n[	        UR
                  SSS	S
9[	        UR
                  SSSS
9/UR
                  R                  U'   U R                  U5      nU Vs/ s H  opR                  U5      PM     nnU(       d   eUS   S	:w  d   eUS   S:w  d   eUS   S:w  d   egs  snf )zTest that we don't get stuck in a two word input when we have a negative
span. This could happen if we don't have the right check on the B action.
r&   ABr1   Nr   r   r5   r   r   r   r   ru   rv   	rl   r   r   rX   r   yspansr   r   	rJ   r)   r&   r3   r=   r[   r   r   r   s	            r%   $test_negative_samples_two_word_inputr      s     "HHY
eC:
&C4LMj-%@AG 	WYY1C(WYY1H- GIIOOG **73K1<=#  %E=L58s??8z!!!8z!!!	 >s   Cc                    X R                   S'   [        U/ SQS9n/ SQn[        R                  " USU05      n[	        UR
                  SSSS	9[	        UR
                  SS
SS	9/UR
                  R                  U'   U R                  U5      nU Vs/ s H  opR                  U5      PM     nnU(       d   eUS   S:w  d   eUS   S:w  d   egs  snf )HTest that we exclude a 2-word entity correctly using a negative example.r&   )r   r   Cr1   )NNNr   r   r5   r   r   r   r   ru   Nr   r   s	            r%   &test_negative_samples_three_word_inputr     s    !HHY
e?
+C&Mj-%@AG 	WYY1C(WYY1H- GIIOOG **73K1<=#  %E=L58s??8z!!! >s   Cc                    X R                   S'   [        US/S9nS/n[        R                  " USU05      n[	        UR
                  SSSS	9[	        UR
                  SSS
S	9/UR
                  R                  U'   U R                  U5      nU Vs/ s H  opR                  U5      PM     nnU(       d   eUS   S:w  d   eUS   S:w  d   egs  snf )r   r&   r   r1   Nr   r   r5   r   r   r   rw   r   r   s	            r%   test_negative_samples_U_entityr     s    !HHY
eC5
!CFMj-%@AG 	WYY1C(WYY1H- GIIOOG **73K1<=#  %E=L58s??8z!!! >s   C
c                     [         R                  " US9n[        U R                  USS9nUR                  S   S:X  d   eg )NrF   r"   )incorrect_spans_keyr&   )r   rG   rH   rl   )r)   rD   rI   rJ   s       r%   %test_negative_sample_key_is_in_configr   *  s;    ''\BG^TD88I.000r$   zNo longer supported)reasonc                 8   / SQn/ SQn[        XS9n[        R                  " X1US.5      n[        U R                  5      nSnU H  nUc  M  US:X  a#  UR                  UR                  S5      S5        M1  [        U5      u  pUR                  UR                  S5      U	5        UR                  UR                  S	5      U	5        UR                  UR                  S
5      U	5        UR                  UR                  S5      U	5        M     UR                  U5        g )N)r   52Bomber)NNz	L-PRODUCTr1   )r2   r   Mr   ILUr   r    r   r   r   r   	r   r   rX   r   rH   
add_actionindexr   r   
en_vocabr2   
biluo_tagsr3   r[   rY   
move_typesrP   actionrC   s
             r%   test_oracle_moves_missing_Br   2  s    !E*J
h
$C*%MNG(**+E/J;CZZ--c2B7,S1MFZ--c2E:Z--c2E:Z--c2E:Z--c2E:  
g&r$   c                 r   / SQn/ SQn[        XS9n[        R                  " USU05      n[        U R                  5      nSnU H_  nUc  M  US:X  a#  UR                  UR                  S5      S5        M1  [        U5      u  pUR                  UR                  U5      U	5        Ma     UR                  U5        g )N)	
production
ofNorthropr   zCorp.r   z'sradar)	r   r   r   rs   Nrr   rt   r   r   r1   r   r   r   r   r   r   s
             r%   test_oracle_moves_whitespacer   L  s    VEKJ
h
$Cj*%=>G(**+E/J;CZZ--c2B7,S1MFZ--f5u=  
g&r$   c                  T   [        5       n U " S5      n0 nU R                  SUS9nU Vs/ s H  oDR                  PM     sn/ SQ:X  d   eU Vs/ s H  oDR                  PM     sn/ SQ:X  d   eUR                  R                  SS5        UR                  S5        UR                  R                  U/5      S   nUR                  R                  US	5        UR                  R                  US	5        UR                  R                  US	5        UR                  R                  US
5      (       d   e[        5       nU" S5      n0 nUR                  SUS9nUR                  / USS /SS9  U Vs/ s H  oDR                  PM     sn/ SQ:X  d   eU Vs/ s H  oDR                  PM     sn/ SQ:X  d   eUR                  R                  SS5        UR                  R                  SS5        UR                  S5        UR                  R                  U/5      S   n	UR                  R                  U	S	5        UR                  R                  U	S	5        UR                  R                  U	S	5        UR                  R                  U	S
5      (       a   eUR                  R                  U	S5      (       d   eUR                  R                  U	S5        UR                  R                  U	S
5      (       a   eUR                  R                  U	S5      (       d   egs  snf s  snf s  snf s  snf )z5Test succesful blocking of tokens to be in an entity.I live in New YorkrL   rM   r   r   r   r   r   r7   r   r8   r   r   r   r6   
unmodifiedblockeddefault)r   r   r   r   r      zU-N)r   rW   ent_iob_	ent_type_rY   r   rb   
init_batchapply_transitionis_validset_ents)
nlp1r   rN   ner1r   state1rm   r   r   state2s
             r%   test_accept_blocked_tokenr  a  s    9D$%DFE&1D(,-uNN-1EEEE)-.OO.2FFFF 	JJ!R NN5ZZ""D6*1-FJJ,JJ,JJ,::vw//// 9D$%DFE&1D 	MM"tAayk<M@(,-uNN-1GGGG)-.OO.2FFFF 	JJ!R JJ!R NN5ZZ""D6*1-FJJ,JJ,JJ,zz""673333::vt,,,,JJ-zz""673333::vt,,,,O ..* ..s   LL-L L%c            	         SSS/04SS/ 04/n [        5       n/ nU  H>  nUR                  [        R                  " UR	                  US   5      US   5      5        M@     UR                  SSS	9nUR                  S
5        UR                  5         [        S5       H2  n0 n[        R                  " USS9nU H  nUR                  XS9  M     M4     g)z7Test that training an empty text does not throw errors.r   r   r   r   r   r5   rL   Tlastr   r      sizer   N)r   appendr   rX   r   ra   rb   rc   r   r   	minibatchr   	r   rZ   train_examplestrL   itnr   batchesbatchs	            r%   test_train_emptyr    s     

->,?@A	j"J
 )CNg//QqT0BAaDIJ 
,,u4,
(CMM(NNQx..a8EJJuJ,  r$   c            	         SSS/04/n [        5       n/ nU  H>  nUR                  [        R                  " UR	                  US   5      US   5      5        M@     UR                  SSS9nUR                  S	5        UR                  5         [        S
5       HU  n0 n[        R                  " USS9nU H5  n[        R                  " [        5         UR                  XS9  SSS5        M7     MW     g! , (       d  f       MM  = f)zFTest that the deprecated negative entity format raises a custom error.r   r   )r   r   z!PERSONr   r5   rL   Tr  r   r   r  r  r	  N)r   r
  r   rX   r   ra   rb   rc   r   r   r  pytestraises
ValueErrorr   r  s	            r%   test_train_negative_deprecatedr    s     

-?,@ABJ )CNg//QqT0BAaDIJ 
,,u4,
(CMM(NNQx..a8Ez*

5
0 +*   +*s   C//
C>c                     [        5       n U R                  S5        U R                  5         U " S5      nU Vs/ s H  o"R                  PM     sn/ SQ:X  d   eU Vs/ s H  o"R                  PM     sn/ SQ:X  d   e0 nU R                  SUS9nUR                  R                  SS5        UR                  S5        UR                  R                  U/5      S	   nUR                  R                  US
5      (       d   eUR                  R                  US5      (       d   eUR                  R                  US
5        UR                  R                  US5      (       d   eUR                  R                  US5      (       d   eg s  snf s  snf )NrL   r   )r   r   r   r   r   r   rM   r7   r   r8   r   r   zU-GPEzI-GPEr   )r   ra   rc   r   r   rW   rY   r   rb   r   r   r   )rZ   r3   r   rN   r   states         r%   test_overwrite_tokenr    sL   
)CLLNN
"
#C(+,uNN,0IIII),-OO-1EEEEF??5?0DJJ!R NN5JJ!!3%(+E::ug....::ug....JJw/::ug....::ug.... --s   FFc                      [        5       n U R                  S5      nUR                  S5        U R                  5         U " S5      n/ SQnU Vs/ s H  oDR                  PM     snU:X  d   eg s  snf )NrL   MY_LABELz3John is watching the news about Croatia's elections)	r   r   r   r   r   r   r   r   r   )r   ra   rb   rc   r   )rZ   rL   r3   resultr   s        r%   test_empty_nerr    s_    
)C
,,u
CMM*NN
C
DC:F(+,uNN,666,s   A,c                     [        5       n SSS./nU R                  S5      nU R                  S5      nUR                  S5        U R                  5         UR	                  U5        U " S5      n/ SQn/ S	QnU Vs/ s H  owR
                  PM     snU:X  d   eU Vs/ s H  owR                  PM     snU:X  d   eg
s  snf s  snf )zLTest that an NER works after an entity_ruler: the second can add annotationsTHINGThisr   r   rL   r  *This is Antti Korhonen speaking in Finlandr   r   r   r   r   r   r   r   r   r   r   r   r   r   Nr   ra   rb   rc   r   r   r   )rZ   r   r   untrained_nerr3   expected_iobsexpected_typesr   s           r%   test_ruler_before_nerr)    s    
)C "f56HLL(E LL'MJ'NN	x 
:
;C7M6N(+,uNN,===),-OO-??? --s   9B:B?c                 z    SS0nS[         0n[        R                  " USS9S   n[        X40 UD6  [        X5        g )Nupdate_with_oracle_cut_sized   rg   T)validate)r   r   resolver   )r   rN   rl   rg   s       r%   test_ner_constructorr/    sG    %sF %
&CS409EX//X%r$   c                     [        5       n U R                  SSS9nUR                  S5        U R                  5         SSS./nU R                  S5      nUR	                  U5        U " S	5      n/ S
Qn/ SQnU Vs/ s H  owR
                  PM     snU:X  d   eU Vs/ s H  owR                  PM     snU:X  d   egs  snf s  snf )zTTest that an entity_ruler works after an NER: the second can overwrite O annotationsrL   uner)namer  r   r!  r   r   r"  r#  r$  Nr%  )rZ   r&  r   r   r3   r'  r(  r   s           r%   test_ner_before_rulerr3    s    
)C LLVL4MJ'NN "f56HLL(E	x 
:
;C7M6N(+,uNN,===),-OO-??? --s   8B9B>c                  ^   [        5       n U R                  SSSS.S9  U R                  S5      nUR                  S5        U R                  5         U " S5      n/ S	Qn/ S
QnU Vs/ s H  oUR                  PM     snU:X  d   eU Vs/ s H  oUR
                  PM     snU:X  d   egs  snf s  snf )zITest functionality for blocking tokens so they can't be in a named entityblockerr   r7   )startendrM   rL   r  z,This is Antti L Korhonen speaking in Finland)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   N)r   ra   rb   rc   r   r   )rZ   r&  r3   r'  r(  r   s         r%   test_block_nerr8    s     )CLLQq#9L:LL'MJ'NN
<
=C<M5N(+,uNN,===),-OO-??? --s   $B%B*	use_upperTFc                    [        5       nUR                  SSSU 00S9n/ n[         Hf  u  pEUR                  [        R
                  " UR                  U5      U5      5        UR                  S5       H  nUR                  US   5        M     Mh     UR                  5       n[        S5       H  n0 n	UR                  X7U	S9  M     W	S   S	:  d   eS
n
U" U
5      nUR                  n[        U5      S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   e[!        5        nUR#                  U5        [$        R&                  " U5      nU" U
5      nUR                  n[        U5      S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eUR)                  S5      nUR*                  R,                  S   U :X  d   eUR                  S5        U" U
5      nUR                  n[        U5      S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   e S S S 5        / SQnUR/                  U5       Vs/ s H  oR1                  [2        /5      PM     nnUR/                  U5       Vs/ s H  oR1                  [2        /5      PM     nnU Vs/ s H
  oA" U5      PM     sn Vs/ s H  oR1                  [2        /5      PM     nn[5        UU5        [5        UU5        Sn
UR                  U
5      n[7        USSSSS9/Ul        UR                  n[        U5      S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R8                  S:X  d   eUR)                  S5      " U5      nUR                  n[        U5      S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R8                  S:X  d   eUS   R                  S:X  d   eUS   R                  S:X  d   eUS   R8                  S:X  d   eg ! , (       d  f       GN= fs  snf s  snf s  snf s  snf )NrL   rg   r9  rM   r   r   2   r   r   h㈵>I like London.r5   r   Londonr   	has_upperRANDOM_NEW_LABEL)zJust a sentence.z$Then one more sentence about London.zHere is another one.r>  zI like London and London.r6   i  )rC   kb_id)r   ra   
TRAIN_DATAr
  r   rX   r   getrb   rc   r   r   r   rd   textlabel_r   to_diskr   load_model_from_pathre   rg   rh   pipeto_arrayr   r   r   rB  )r9  rZ   rL   r  rE  annotationsentr   r   r   	test_textr3   r   tmp_dirrm   r   ents2r   doc3ents3textsbatch_deps_1batch_deps_2no_batch_depss                           r%   test_overfitting_IOrV    s    )C
,,ugY/G%H,
ICN'g//T0BKPQ??:.CMM#a&! / (  I2Y

>
@  %=7""" !I
i.C88Dt9>>7<<8###7>>U""" 
7G((1I		5zQQx}}(((Qx%'''}}U#zz,	999)*I		5zQQx}}(((Qx%'''! 
&E 8;xxGLL'+LG7:xxGLL'+LGOT8UutTu8UV8U\\7),8UMV|,}- ,I
,,y
!CS!Qe489CH88Dt9>>7<<8###7>>U"""7==D   
,,u
c
"C88Dt9>>7<<8###7>>U"""7==D   7<<8###7>>U"""7==Ac 
2 HG8UVs%   +DP/QQ+QQ/
P>c                     Sn Sn[        5       nU US.nUR                  SUS9n/ n[         Hf  u  pgUR                  [        R
                  " UR                  U5      U5      5        UR                  S5       H  nUR                  US   5        M     Mh     UR                  5       n	0 n
UR                  XYU
S9  S	nUR                  U5      nU/nUR                  U5      nUR                  U5      S
   n[        [        U5      5       H;  nUR                   H(  nUUUS-   U4   nSnS
U-
  Us=::  a  SU-   ::  a  M%   e   e   M=     g )Nr   r   r   r   rM   r   r   r<  r>  r   r5   r=  )r   ra   rC  r
  r   rX   r   rD  rb   rc   r   predictscored_entsr   rd   rf   )r   r   rZ   rN   rL   r  rE  rK  rL  r   r   rM  r3   r   beamsentity_scoresjrC   scoreepss                       r%   test_beam_ner_scoresr_  l  sJ   JL
)C $F ,,z&,
1CN'g//T0BKPQ??:.CMM#a&! / (  I FJJ~VJ< !I
,,y
!C5DKKEOOE*1-M3s8_ZZE!1a!eU"34ECs7e.q3w.....   r$   c                    [        5       nSnSnUUU S.nUR                  SUS9n/ n[         Hf  u  pxUR                  [        R
                  " UR                  U5      U5      5        UR                  S5       H  n	UR                  U	S   5        M     Mh     UR                  5       n
[        S5       H  n0 nUR                  XjUS	9  M     WS   S:  d   eS
nUR                  U5      /nUR                  U5      nUR                  U5      S   nUS   S:X  d   eUS   S:X  d   e[        U" U5      R                  5      S:X  d   e[!        5        nUR#                  U5        [$        R&                  " U5      nUR                  U5      /nUR)                  S5      nUR                  U5      nUR                  U5      S   nUS   S:X  d   eUS   S:X  d   e S S S 5        UR                  U5      n[	        UU5      n[+        USSS5      /UR,                  R.                  U '   U/n[        S5       H  n0 nUR                  UXS	9  M     [        U" U5      R                  5      S:X  d   eg ! , (       d  f       N= f)Nr   r   r   r   r   r   rM   r   r   r;  r<  zI like Londonr   )r   r6   r   g      ?)r   r6   r   r   r5   r6   r   r   )r   ra   rC  r
  r   rX   r   rD  rb   rc   r   r   rX  rY  rd   r   r   rG  r   rH  re   r   	referencer   )r&   rZ   r   r   rN   rL   r  rE  rK  rL  r   r   r   rM  r   rZ  r[  rN  rm   docs2r   beams2entity_scores2neg_docneg_exneg_train_exampless                             r%   test_beam_overfitting_IOri    s{   
)CJL $&F
 ,,z&,
1CN'g//T0BKPQ??:.CMM#a&! / (  I 2Y

>
@  *&&&  ILL#$DKKEOOE*1-M'3...)*c111s9~""#q((( 
7G((1y)*}}Z(e$))&1!4m,333./3666 
 ll9%GWg&F'+GQ5'A&BF7# 2Y

%9
D 
 s9~""#q(((+ 
s   BI++
I9c                    [        5       nSnSnUUU S.nUR                  SUS9nSnUR                  U5      nUR                  S5        UR                  S5        [        R
                  " US	S
/05      n[        UR                  SSS5      [        UR                  SSS5      [        UR                  SSS5      /UR                  R                  U '   UR                  5       n	[        S5       H  n
0 nUR                  U/XS9  M     g)zCheck that the NER update works with a negative annotation that is a different label of the correct one,
or partly overlapping, etcr   r   ra  r   rM   r   r   ORGr   r   r   r   r6   r5   r<  N)r   ra   r   rb   r   rX   r   rb  r   rc   r   r   r&   rZ   r   r   rN   rL   
train_textrf  r[   r   r   r   s               r%   test_neg_annotationrn    s    )CJL $&F
 ,,z&,
1C%Jll:&GMM(MM%*7H6I)JKGW1e,W1h/W1h/(GG$  I1X

G9)
; r$   c                    [        5       nSnSnUUU S.nUR                  SUS9nSnUR                  U5      nUR                  S5        UR                  S5        [        R
                  " US	S
/05      n[        UR                  SSS5      /UR                  R                  U '   [        UR                  R                  5      S:X  d   eUR                  R                  S   R                  S:X  d   eUR                  R                  S   R                  S:X  d   e[        UR                  R                  U    5      S:X  d   eUR                  R                  U    S   R                  S:X  d   eUR                  R                  U    S   R                  S:X  d   eUR                  5       n	[        S5       H9  n
0 n[        R                   " ["        5         UR%                  U/XS9  S S S 5        M;     g ! , (       d  f       MM  = f)Nr   r   ra  r   rM   r   r   r   r   r   r   r   r5   r   z
Shaka Khanr<  )r   ra   r   rb   r   rX   r   rb  r   rd   r   rE  rF  rc   r   r  r  r  r   rl  s               r%   test_neg_annotation_conflictrp    s   
)CJL $&F
 ,,z&,
1C%Jll:&GMM(MM%*7H6I)JKG(,W->->1h(O'PGG$w  %%&!+++!!!$))\999!!!$++x777w  &&w/0A555""7+A.33|CCC""7+A.55AAA I1X]]:&JJyiJ? '& &&s   G44
H	c                    [        5       nSnSnUUU S.nUR                  SUS9  / SQn/ SQn[        UR                  US9n[        R
                  " US	U05      n[        UR                  S
SS5      n	U	/UR                  R                  U '   UR                  5       n
[        S5       H  n0 nUR                  U/XS9  M     SW;   d   eg)z/Regression test for previously flakey behaviourr   r   ra  r   rM   )5FEDERALNATIONALMORTGAGEASSOCIATION(FannieMaez):Postedyieldson30yearmortgagecommitmentsfordeliverywithinr|  daysrv  pricedatpar)z9.75%,standardconventionalfixed-rate	mortgages;z8.70r  r  z6/2r  cappedoner  r}  
adjustabler  r  r0   Source:TelerateSystemszInc.)5rs   rr   rr   rt   r   rs   rt   r   r   r   r   B-DATEL-DATEr   r   r   r   r   r  r  r   r   r   r   r   	B-PERCENT	L-PERCENTr   r   r   r   r   r   r   r   r  r  r   z
U-CARDINALr   r   r  zI-DATEr  r   r   r   r   r   r   r   r   r   r1   rL   r;  5   rk  r7   r<  N)r   ra   r   r)   r   rX   r   rb  r   rc   r   r   )r&   rZ   r   r   rN   tokensiobr3   r[   neg_spanr   r   r   s                r%   test_beam_valid_parser    s    
)CJL $&F
 LLFL+ VF xC ciiv
&CeS\2GG%%r2u5H(0zGG$ I1X

G9)
;  r$   c                 "   [        5       nUR                  [        R                  ;   d   e[	        5       UR
                  l        [        UR
                  R                  5      (       a   eUR                  S5        U R                  [        R                  5         UR                  5         SU R                  ;   d   e S S S 5        U R                  5         UR
                  R                  R                  S5        SUR
                  R                  R!                  S5      S'   U R                  [        R                  5         UR                  5         SU R                  ;  d   e S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)NrL   W033lexeme_normr   a)r   langr   LEXEME_NORM_LANGSr
   r)   lookupsrd   ra   at_levelloggingDEBUGrc   rE  clear	add_table	get_table)caplogrZ   s     r%   test_ner_warns_no_lookupsr  "  s   
)C88t-----	CII399$$%%%%LL		'$$$ 
( LLNII.69CII.s3		'V[[((( 
(	' 
(	' 
(	's   #E/#F /
E= 
Fr5  c                   $    \ rS rSrSS jrS rSrg)BlockerComponent1i3  c                 (    X l         X0l        X@l        g r(   )r6  r7  r2  )selfrZ   r6  r7  r2  s        r%   __init__BlockerComponent1.__init__5  s    
	r$   c                 V    UR                  / XR                  U R                   /SS9  U$ )Nr   r   )r   r6  r7  )r  r3   s     r%   __call__BlockerComponent1.__call__:  s)    R#jj488"<!=|T
r$   )r7  r2  r6  N)
my_blocker)__name__
__module____qualname____firstlineno__r  r  __static_attributes__r#   r$   r%   r  r  3  s    
r$   r  )Or  r   r  numpy.testingr   r   r   r   spacy.attrsr   spacy.lang.enr   spacy.lang.itr   spacy.languager	   spacy.lookupsr
   spacy.pipeliner   $spacy.pipeline._parser_internals.nerr   spacy.pipeline.nerr   spacy.tokensr   r   spacy.trainingr   r   r   spacy.vocabr   r   rC  fixturer&   r)   r3   r=   rD   rJ   markparametrizeissuer\   rn   r}   r   r   r   r   r   r   r   r   r   r   skipr   r   r  r  r  r  r  r)  r/  r3  r8  rV  r_  ri  rn  rp  r  r  factoryr  r#   r$   r%   <module>r     s       &    ! ! # ! + > 0 " B B   J):(;<= :/P"QR
     I I   C C 1 1
 <.14F  2F$ 4; ;  4Y Y  4J J* 4) )&%$ 4" "2 4" "4B","&"&1 ./' 0'2 ./' 0'(.-b-*1*/*7@(&@*@ tUm4J 5JZ /F7)t<:@> :)" 
)  r$   