
    h\?                        S SK r S SKJrJr  S SKrS SKJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  \ R                  \R                  " S5      S	 5       5       r\ R                  S
 5       r\ R                  S 5       r\ R                  S 5       rS rS rS rS rS rS rS rS rS rS rS rS rS r \ RB                  RE                  SSS/5      S 5       r#S r$S r%S  r&S! r'S" r(S# r)S$ r*S% r+S& r,S' r-S( r.g))    N)NumpyOpsget_current_ops)registry)MatchPatternError)make_tempdir)Span)Examplespan_ruler_patternsc                  T    SSSS.SSS0SS0/S.SS	S0/S
S.SSSS./S.SSS.SSS./$ )NHELLOhello worldhello1)labelpatternidBYELOWERbyer   r   ORTHhello2COMPLEXfoo*)r   OPTECH_ORGApple	Microsoft r       ^/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/pipeline/test_span_ruler.pypatternsr"      sa     m8Dgu%57G$HI'8&9J%s)C(DE15 r    c                      SSS.SSS./$ )NFOOBARzfoo barr   BARBAZzbar bazr   r   r    r!   overlapping_patternsr&      s     y1y1 r    c                      SSS.SSS.SSS./$ )NPERSONDinar   ORGACMEACMr   r   r    r!   person_org_patternsr-   !   s&     v.F+E* r    c                     U SSS./-   $ )NDATE	June 14thr   r   )r-   s    r!   person_org_date_patternsr1   *   s    F{"K!LLLr    c                    [         R                  " S5      nUR                  SSS0S9nUR                  U 5        [	        S UR
                  R                  R                  5        5       5      nUS:  d   eUR                  / 5        [	        S UR
                  R                  R                  5        5       5      nXC:X  d   eg	)
z/Test that patterns don't get added excessively.xx
span_rulervalidateTconfigc              3   8   #    U  H  n[        U5      v   M     g 7fNlen.0mms     r!   	<genexpr>,test_span_ruler_add_empty.<locals>.<genexpr>4   s     K*JBB*J   r   c              3   8   #    U  H  n[        U5      v   M     g 7fr9   r:   r<   s     r!   r?   r@   7   s     I(H"c"gg(HrA   N)spacyblankadd_pipeadd_patternssummatcher	_patternsvalues)r"   nlprulerpattern_countafter_counts        r!   test_span_ruler_add_emptyrO   /   s    
++d
CLLz4.@LAE	x K%--*A*A*H*H*JKKM1	rI(?(?(F(F(HIIK'''r    c                 Z   [         R                  " S5      nUR                  S5      nUR                  U 5        [	        U5      [	        U 5      :X  d   e[	        UR
                  5      S:X  d   eSU;   d   eSU;   d   eU" S5      n[	        UR                  S   5      S:X  d   eUR                  S   S	   R                  S:X  d   eUR                  S   S	   R                  S
:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S:X  d   eg )Nr3   r4      r   r   hello world bye byerL      r   r       )	rC   rD   rE   rF   r;   labelsspanslabel_id_r"   rK   rL   docs       r!   test_span_ruler_initr\   ;   s"   
++d
CLL&E	x u:X&&&u||!!!eE>>
#
$Csyy!"a'''99Wa ''722299Wa $$00099Wa ''500099Wa $$***r    c                     [         R                  " S5      n U R                  S5      n[        U5      S:X  d   e[        UR                  5      S:X  d   eU R
                  S/:X  d   e[        R                  " [        5         U " S5      nS S S 5        [        WR                  S   5      S:X  d   eg ! , (       d  f       N-= f)Nr3   r4   r   rR   rL   )
rC   rD   rE   r;   rV   
pipe_namespytestwarnsUserWarningrW   )rK   rL   r[   s      r!   !test_span_ruler_no_patterns_warnsrb   K   s    
++d
CLL&Eu:??u||!!!>>l^+++	k	"'( 
#syy!"a''' 
#	"s   	B11
B?c                    [         R                  " S5      nUR                  S5      n[        UR                  5      S:X  d   eUR                  S U S9  [        UR                  5      S:X  d   eU" S5      nUR                  S   S   R                  S	:X  d   eUR                  S   S
   R                  S:X  d   eUR                  S5        SSS00UR                  S   S   S'   UR                  S5      n[        UR                  5      S:X  d   eUR                  5         [        UR                  5      S:X  d   eU" S5      nUR                  S   S   R                  S	:X  d   eUR                  S   S
   R                  S:X  d   eg )Nr3   r4   r   c                      / $ r9   r   r   r    r!   <lambda>/test_span_ruler_init_patterns.<locals>.<lambda>[       Rr    )r"   rQ   rR   rL   r   rT   r   r"   @miscr
   
initialize
components)
rC   rD   rE   r;   rV   ri   rW   rX   remove_piper7   rZ   s       r!   test_span_ruler_init_patternsrl   V   sx   
++d
CLL&Eu||!!!	Z(3u||!!!
#
$C99Wa ''722299Wa ''5000OOL! 	W34<CJJ|\*<8 LL&Eu||!!!NNu||!!!
#
$C99Wa ''722299Wa ''5000r    c                    [         R                  " S5      nUR                  S5      nUR                  U 5        [	        UR
                  5      S:X  d   eUR                  S 5        [	        UR
                  5      S:X  d   eg)z)Test that initialization clears patterns.r3   r4   rQ   c                      / $ r9   r   r   r    r!   re   ,test_span_ruler_init_clear.<locals>.<lambda>t   rg   r    r   N)rC   rD   rE   rF   r;   rV   ri   )r"   rK   rL   s      r!   test_span_ruler_init_clearrp   n   sh    
++d
CLL&E	x u||!!!	Z u||!!!r    c                     [         R                  " S5      nUR                  S5      nUR                  U 5        [	        UR
                  5      S:X  d   eU" S5      n[	        UR                  S   5      S:X  d   eUR                  5         [	        UR
                  5      S:X  d   e[        R                  " [        5         U" S5      nS S S 5        [	        UR                  S   5      S:X  d   eg ! , (       d  f       N-= f)Nr3   r4   rQ   r   rL   rT   r   )rC   rD   rE   rF   r;   rV   rW   clearr_   r`   ra   rZ   s       r!   test_span_ruler_clearrs   x   s    
++d
CLL&E	x u||!!!
m
Csyy!"a'''	KKMu||!!!	k	"-  
#syy!"a''' 
#	"s   ?	C//
C=c                 P   [         R                  " S5      nUR                  SSS0S9nUR                  U 5        UR	                  S5      nUSS /UR
                  S	'   U" U5      n[        UR
                  S	   5      S
:X  d   eUR
                  S	   S   USS :X  d   eUR
                  S	   S   R                  S:X  d   eUR
                  S	   S   R                  S:X  d   eUR
                  S	   S   R                  S:X  d   eUR
                  S	   S   R                  S:X  d   eg )Nr3   r4   	overwriteFr6   OH HELLO WORLD bye byer   rS   rL      rT   r   r   r   rU   )	rC   rD   rE   rF   make_docrW   r;   rX   rY   rZ   s       r!   test_span_ruler_existingry      s)   
++d
CLL{E.BLCE	x 
,,/
0Ca(CIIg
c(Csyy!"a'''99Wa C!H,,,99Wa ''722299Wa $$00099Wa ''500099Wa $$***r    c                    [         R                  " S5      nUR                  SSS0S9nUR                  U 5        UR	                  S5      nUSS /UR
                  S	'   U" U5      n[        UR
                  S	   5      S:X  d   eUR
                  S	   S   R                  S
:X  d   eUR
                  S	   S   R                  S
:X  d   eUR
                  S	   S   R                  S:X  d   eg )Nr3   r4   ru   Tr6   rv   r   rS   rL   r   rT   r   )	rC   rD   rE   rF   rx   rW   r;   rX   textrZ   s       r!   "test_span_ruler_existing_overwriter|      s    
++d
CLL{D.ALBE	x 
,,/
0Ca(CIIg
c(Csyy!"a'''99Wa ''722299Wa %%00099Wa ''5000r    c                 0   [         R                  " S5      nUR                  S5      nUR                  U 5        [	        U5      [	        U 5      :X  d   e[	        UR
                  5      S:X  d   eUR                  5       n[         R                  " S5      nUR                  S5      n[	        U5      S:X  d   e[	        UR
                  5      S:X  d   eUR                  U5      n[	        U5      [	        U 5      :X  d   e[	        UR
                  5      S:X  d   e[	        UR                  5      [	        UR                  5      :X  d   eUR                   H  nXeR                  ;   a  M   e   [        UR
                  5      [        UR
                  5      :X  d   eg )Nr3   r4   rQ   r   )
rC   rD   rE   rF   r;   rV   to_bytes
from_bytesr"   sorted)r"   rK   rL   ruler_bytesnew_nlp	new_rulerr   s          r!   test_span_ruler_serialize_bytesr      sQ   
++d
CLL&E	x u:X&&&u||!!!.."Kkk$G  .Iy>Qy A%%%$$[1Iy>S]***y A%%%y!!"c%..&9999>>,,,,, ")""#vell';;;;r    c                     [         R                  " S5      n U R                  S5      nU R                  SSSS0S9nSSS0/S	.nSS
S0/S	.n[        R                  " [
        5         UR                  U/5        S S S 5        UR                  U/5        [        R                  " [        5         UR                  U/5        S S S 5        g ! , (       d  f       NV= f! , (       d  f       g = f)Nr3   r4   validated_span_rulerr5   T)namer7   r   r   r   ASDF)rC   rD   rE   r_   raises
ValueErrorrF   r   )rK   rL   validated_rulervalid_patterninvalid_patterns        r!   test_span_ruler_validater      s    
++d
CLL&Ell1:t:L # O &GW3E2FGM 'fg5F4GHO 
z	"O,- 
#   -1 
(	)$$o%67 
*	) 
#	" 
*	)s   %C,C
C
C'c           
          [         R                  " S5      nUR                  SSS0S9nUR                  U 5        [	        UR
                  5      [	        [        U  Vs/ s H  o3S   PM	     sn5      5      :X  d   eg s  snf )Nr3   r4   ru   Tr6   r   )rC   rD   rE   rF   r   rV   set)r"   rK   rL   ps       r!   test_span_ruler_propertiesr      sj    
++d
CLL{D.ALBE	x %,,6#8.L8az8.L*M#NNNN.Ls   A;c                 f   [         R                  " S5      nUR                  S5      nUR                  U 5        U" UR	                  S5      5      n[        UR                  S   5      S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S	:X  d   eg )
Nr3   r4   foo bar bazrL   rS   r   r$   rT   r%   rC   rD   rE   rF   rx   r;   rW   rX   r&   rK   rL   r[   s       r!   !test_span_ruler_overlapping_spansr      s    
++d
CLL&E	+,
]+
,Csyy!"a'''99Wa ''833399Wa ''8333r    c                 "   [         R                  " S5      nUR                  S5      nUR                  U 5        SnU" UR	                  U5      5      n[        UR                  S   5      S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S	:X  d   eUR	                  U5      n[        USSSS
9/UR                  S'   UR                  [        XE5      /5      nUS   S:X  d   eUS   S:X  d   eg )Nr3   r4   r   rL   rS   r   r$   rT   r%   r   spans_ruler_pg      ?spans_ruler_rg      ?)rC   rD   rE   rF   rx   r;   rW   rX   r   evaluater	   )r&   rK   rL   r{   pred_docref_docscoress          r!   test_span_ruler_scorerr      s   
++d
CLL&E	+,DS\\$'(Hx~~g&'1,,,>>'"1%,,888>>'"1%,,888ll4 G"7AqABGMM'\\78567F/"c)))/"c)))r    	n_processrT   rS   c                 D   [        [        [        5      (       d  U S:  a  S/nSSS./n[        R                  " S5      nUR                  S5      nUR                  U5        UR                  USS9 H+  nUR                  S	    H  nUR                  S:X  a  M   e   M-     g g )
NrS   zI enjoy eating Pizza Hut pizza.FASTFOODz	Pizza Hutr   r3   r4   )r   rL   )

isinstancer   r   rC   rD   rE   rF   piperW   rX   )r   textsr"   rK   rL   r[   ents          r!   test_span_ruler_multiprocessingr      s    /8,,	A23([ABkk$\*8$88EQ8/Cyy)zzZ/// * 0 1>r    c                    [         R                  " S5      nUR                  S5      nUR                  U 5        [	        5        nUR                  US-  5        UR                  US-  5        [        R                  " [        5         UR                  US-  5        S S S 5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)Nr3   r4   
test_rulernon_existing_dir)
rC   rD   rE   rF   r   to_disk	from_diskr_   r   r   )r"   rK   rL   ds       r!   test_span_ruler_serialize_dirr      s    
++d
CLL&E	x 	1a,&'L()]]:&OOA 223 ' 
 '& 
s%   AB=B,B=,
B:	6B==
Cc                 ^   [         R                  " S5      nUR                  S5      nUR                  U 5        U" UR	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S	:X  d   eUR                  S5        U" UR	                  S5      5      n[        UR                  S   5      S:X  d   e[        UR                  5      S
:X  d   eg )Nr3   r4   Dina went to schoolrw   rL   rT   r   r(   r)   rS   rC   rD   rE   rF   rx   r;   r"   rW   rX   r{   remover-   rK   rL   r[   s       r!   test_span_ruler_remove_basicr   	  s   
++d
CLL&E	*+
23
4Cu~~!###syy!"a'''99Wa ''833399Wa %%///	LL
23
4Csyy!"a'''u~~!###r    c                    [         R                  " S5      nUR                  S5      nUR                  U 5        [	        UR
                  5      S:X  d   e[        R                  " [        5         UR                  S5        S S S 5        [        R                  " [        5         UR                  S5        S S S 5        g ! , (       d  f       NC= f! , (       d  f       g = f)Nr3   r4   rw   NE)rC   rD   rE   rF   r;   r"   r_   r   r   r   remove_by_id)r-   rK   rL   s      r!   *test_span_ruler_remove_nonexisting_patternr     s    
++d
CLL&E	*+u~~!###	z	"T 
#	z	"4  
#	" 
#	"	"	"s   .B="C=
C
Cc                    [         R                  " S5      nUR                  S5      nUR                  U 5        U" UR	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S	:X  d   eUR                  S   S
   R                  S:X  d   eUR                  S   S
   R                  S:X  d   eUR                  S5        U" UR	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S
:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S5        [        R                  " [        5         U" UR	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   e S S S 5        g ! , (       d  f       g = f)Nr3   r4   zDina founded the company ACME.rw   rL   rS   r   r(   r)   rT   r*   r+   zDina founded the company ACME)rC   rD   rE   rF   rx   r;   r"   rW   rX   r{   r   r_   r`   ra   r   s       r!   'test_span_ruler_remove_several_patternsr   #  s   
++d
CLL&E	*+
=>
?Cu~~!###syy!"a'''99Wa ''833399Wa %%///99Wa ''500099Wa %%///	LL
<=
>Cu~~!###syy!"a'''99Wa ''500099Wa %%///	LL	k	"CLL!@AB5>>"a'''399W%&!+++ 
#	"	"s   !AH<<
I
c                 $   [         R                  " S5      nUR                  S5      nUR                  U 5        U" UR	                  S5      5      n[        UR                  S   5      S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S	   R                  S
:X  d   eUR                  S   S	   R                  S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S   S   R                  S:X  d   eUR                  S
5        UR                  S5        U" UR	                  S5      5      n[        UR                  S   5      S	:X  d   eg )Nr3   r4   *Dina founded the company ACME on June 14thrL   rw   r   r(   r)   rT   r*   r+   rS   r/   r0   r   )
rC   rD   rE   rF   rx   r;   rW   rX   r{   r   r1   rK   rL   r[   s       r!   (test_span_ruler_remove_patterns_in_a_rowr   ;  sq   
++d
CLL&E	/0
IJ
KCsyy!"a'''99Wa ''833399Wa %%///99Wa ''500099Wa %%///99Wa ''611199Wa %%444	LL	LL
23
4Csyy!"a'''r    c                    [         R                  " S5      nUR                  S5      nUR                  U 5        [	        UR
                  5      S:X  d   eUR                  S5        [	        UR
                  5      S:X  d   eUR                  S5        [	        UR
                  5      S:X  d   eUR                  S5        [	        UR
                  5      S	:X  d   e[        R                  " [        5         U" UR                  S
5      5      n[	        UR                  S   5      S	:X  d   e S S S 5        g ! , (       d  f       g = f)Nr3   r4   rQ   r(   rw   r*   rT   r/   r   r   rL   )rC   rD   rE   rF   r;   r"   r   r_   r`   ra   rx   rW   r   s       r!   #test_span_ruler_remove_all_patternsr   M  s    
++d
CLL&E	/0u~~!###	LLu~~!###	LLu~~!###	LLu~~!###	k	"CLL!MNO399W%&!+++ 
#	"	"s   26D22
E c                  "   [         R                  " S5      n U R                  S5      nSSS./nUR                  U5        U" U R	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eUR                  S   S	   R                  S:X  d   eUR                  S   S	   R                  S:X  d   eS
SS./nUR                  U5        U" U R	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eUR                  S   S	   R                  S:X  d   eUR                  S   S	   R                  S:X  d   eUR                  S   S   R                  S
:X  d   eUR                  S   S   R                  S:X  d   eUR                  S5        U" U R	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eUR                  S   S	   R                  S
:X  d   eUR                  S   S	   R                  S:X  d   eUR                  U5        U" U R	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eSSS./nUR                  U5        U" U R	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eUR                  S5        U" U R	                  S5      5      n[        UR                  5      S:X  d   e[        UR                  S   5      S:X  d   eg )Nr3   r4   DATE1z	last timer   z=I saw him last time we met, this time he brought some flowersrT   rL   r   DATE2z	this timerS   DATE3zanother timez[I saw him last time we met, this time he brought some flowers, another time some chocolate.rw   r   )rK   rL   	patterns1r[   	patterns2	patterns3s         r!   test_span_ruler_remove_and_addr   ]  sq   
++d
CLL&E"{;<I	y!
TUC u~~!###syy!"a'''99Wa ''722299Wa %%444"{;<I	y!
TUC u~~!###syy!"a'''99Wa ''722299Wa %%44499Wa ''722299Wa %%444	LL
TUC u~~!###syy!"a'''99Wa ''722299Wa %%444	y!
TUC u~~!###syy!"a'''"~>?I	y!
i	
C
 u~~!###syy!"a'''	LL
i	
C
 u~~!###syy!"a'''r    c                 (   [         R                  " S5      nUR                  SSSS00S9nUR                  U 5        U" UR	                  S5      5      n[        UR                  S   5      S	:X  d   eUR                  S   S
   R                  S:X  d   eg )Nr3   r4   spans_filterrh   z#spacy.first_longest_spans_filter.v1r6   r   rL   rT   r   r$   r   r   s       r!   test_span_ruler_spans_filterr     s    
++d
CLL*O PQ  E 
+,
]+
,Csyy!"a'''99Wa ''8333r    c                    [         R                  " S5      nUR                  SSS0S9nUR                  U 5        U" UR	                  S5      5      n[        UR                  5      S:X  d   eUR                  S   R                  S	:X  d   eg )
Nr3   r4   annotate_entsTr6   r   rT   r   r$   )rC   rD   rE   rF   rx   r;   entsrX   r   s       r!   #test_span_ruler_ents_default_filterr     s{    
++d
CLL.ELFE	+,
]+
,Csxx=A88A;)))r    c                    [         R                  " S5      nUR                  SSSSS0S.S9nUR                  U 5        UR	                  S	5      n[        US
SSS9[        USSSS9/Ul        U" U5      n[        UR                  5      S:X  d   eUR                  S   R                  S:X  d   eUR                  S
   R                  S:X  d   eg )Nr3   r4   TFrh   z#spacy.prioritize_new_ents_filter.v1)r   ru   ents_filterr6   zfoo bar baz a b crT   rw   r%   r      ABCrS   r   r$   )	rC   rD   rE   rF   rx   r   r   r;   rX   r   s       r!   %test_span_ruler_ents_overwrite_filterr     s    
++d
CLL!#%JK
  E 
+,
,,*
+CS!Qh/c1au1MNCH
*Csxx=A88A;)))88A;&&&r    c                 J   [         R                  " S5      S 5       n[        R                  " S5      nUR	                  SSSS0S.S9nUR                  U 5        [        R                  " [        5         U" UR                  S	5      5        S S S 5        g ! , (       d  f       g = f)
Ntest_pass_through_filterc                      S n U $ )Nc                 
    X-   $ r9   r   )spans1spans2s     r!   pass_through_filter^test_span_ruler_ents_bad_filter.<locals>.make_pass_through_filter.<locals>.pass_through_filter  s
    ?"r    r   )r   s    r!   make_pass_through_filterAtest_span_ruler_ents_bad_filter.<locals>.make_pass_through_filter  s    	# #"r    r3   r4   Trh   )r   r   r6   r   )
r   miscrC   rD   rE   rF   r_   r   r   rx   )r&   r   rK   rL   s       r!   test_span_ruler_ents_bad_filterr     s    ]]-.# /# ++d
CLL!#%?@
  E 
+,	z	"cll=)* 
#	"	"s   3B
B")/r_   	thinc.apir   r   rC   r   spacy.errorsr   spacy.tests.utilr   spacy.tokensr   spacy.trainingr	   fixturer   r"   r&   r-   r1   rO   r\   rb   rl   rp   rs   ry   r|   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   <module>r      s4    /   * )  " 	$% &      M M	(+ (10"(+
1<(8,O4*" q!f-0 .04$!,0($, 5(p	4*'(+r    