
    h/j              	       l   S SK r S SKrS SKJrJrJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJrJrJr  S SKJr  S	S
0S
SS./rS
SS.S	S
0/rS	S
0S	S
0/rS	S0S
SS.S	S0/rSSS.S
SS.S	S0/rSrSrSrSrSrSrSrSr Sr!Sr"\RF                  S 5       r$\RF                  S 5       r%\RL                  RO                  S5      \RL                  RQ                  SSS0/SS0SS0//SS0SS0/SS0///5      S 5       5       r)\RL                  RO                  S5      \RL                  RQ                  SSS0/SS0SS0//SS0SS0/SS0///5      S 5       5       r*\RL                  RO                  S 5      S! 5       r+\RL                  RO                  S"5      S# 5       r,\RL                  RO                  S$5      S% 5       r-\RL                  RO                  S&5      S' 5       r.\RL                  RO                  S(5      S) 5       r/\RL                  RO                  S*5      S+ 5       r0\RL                  RO                  S*5      S, 5       r1\RL                  RO                  S-5      S. 5       r2\RL                  RQ                  S// S0Q5      \RL                  RO                  S15      S2 5       5       r3\RL                  RO                  S35      S4 5       r4\RL                  RO                  S55      S6 5       r5\RL                  RO                  S55      S7 5       r6\RL                  RO                  S55      S8 5       r7\RL                  RO                  S55      S9 5       r8\RL                  RO                  S:5      S; 5       r9\RL                  RO                  S<5      S= 5       r:\RL                  RO                  S>5      S? 5       r;\RL                  RO                  S@5      SA 5       r<\RL                  RO                  SB5      SC 5       r=\RL                  RO                  SD5      SE 5       r>\RL                  R                  SF5      \RL                  RO                  SG5      SH 5       5       r@\RL                  RO                  SI5      SJ 5       rA\RL                  RO                  SK5      SL 5       rB\RL                  RO                  SM5      SN 5       rC\RL                  RO                  SO5      SP 5       rD\RL                  RQ                  SQ\\4\\4\\4\\4\\4/5      SR 5       rE\RL                  RQ                  SS\\4\\4\\ 4\\!4\\"4/5      ST 5       rFSU rGSV rH\RL                  RQ                  SQ\\4\\4\\4\\4\\4/5      SW 5       rISX rJ\RL                  RO                  S15      SY 5       rKSZ rL\RL                  R                  S[5      S\ 5       rNS] rOS^ rPg)_    N)IS_PUNCTLOWERORTH)MatchPatternError)English)	LEX_ATTRS)Matcher)DocSpanToken)Vocabr   A*r   OPBzAA*zA*AAAzBA*BzB*A*Bz	A A A A AzA AzB A A A A A BzB B A A A A A Bc                      g)Nz(BBAAAAAB). r       `/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/matcher/test_matcher_logic.pytextr       s    r   c                 4    U " SR                  U5      5      nU$ )N )join)en_tokenizerr   docs      r   r   r   %   s    
sxx~
&CJr   v   patternsr   celticsbostonc                 *   SnU " U5      nUR                   R                  S   n[        UR                   5      nUR                  SU5        [	        [        UR                  5      5      S:X  d   eU" U5       VVVs/ s H	  u  pgoXx4PM     n	nnnXSS4USS4/:X  d   eU	SS	 Ul        [        UR                  5      n
[	        U
5      S	:X  d   eU
S   R                  U:X  d   eU
S   R                  S:X  d   eU
S   R                  S:X  d   egs  snnnf )
5Test a bug that arose from having overlapping matchesLhow many points did lebron james score against the boston celtics last nightORGBostonCelticsr   	      
   N   )
vocabstringsr	   addlenlistentslabelstartendr   r   r   r   r%   matcher_r2   r3   matchesr0   s              r   test_issue118r8   +   s    	W 	 t
C
))

E
"Ccii GKK*tCHH~!###6=clCl]QsU lGCQ|c2r]3333r{CH>Dt9>>7==C7==A7;;" Ds   ;Dc                 F   SnU " U5      nUR                   R                  S   n[        UR                   5      nUR                  SU5        [	        [        UR                  5      5      S:X  d   eU" U5       VVVs/ s H	  u  pgoXx4PM     n	nnnU=R                  [        U	5      SS -  sl        XSS4USS	4/:X  d   eUR                  n
[	        U
5      S:X  d   eU
S   R                  U:X  d   eU
S   R                  S:X  d   eU
S   R                  S	:X  d   egs  snnnf )
r#   r$   r%   r&   r   r*   Nr'   r)   r(   )r+   r,   r	   r-   r.   r/   r0   tupler1   r2   r3   r4   s              r   test_issue118_prefix_reorderr;   G   s    	W 	 t
C
))

E
"Ccii GKK*tCHH~!###6=clCl]QsU lGCHHgqr""HQ|c1b\222288Dt9>>7==C7==A7;;" Ds   ;D   c                    SnSS0SS0/SS0SS0//nU " U5      n[        UR                  5      nUR                  SU5        U" U5       VVVs/ s H	  u  pVouXg4PM     nnnnUu  pU	S   S:X  d   eU	S	   S
:X  d   eU
S   S:X  d   eU
S	   S:X  d   e[        R                  " [
        5         U=R                  [        U5      -  sl        SSS5        gs  snnnf ! , (       d  f       g= f)z$Test overlapping multi-word phrases.zAThere are different food safety standards in different countries.r   foodsafety	standardsFOODr*                  N)r	   r+   r-   pytestraises
ValueErrorr0   r:   )r   r   r   r   r5   ent_typer2   r3   r7   match1match2s              r   test_issue242rM   c   s     OD
6	Wh/0
8	w45H t
Ccii GKK!BI#,O,*>(3%%,GONF!9>>!9>>!9>>!9>>	z	" 	E'N" 
#	" P 
#	"s   C,C
C)iK  c                    U " S5      n[        UR                  5      nUR                  S[        S0[        S0//5        U" U5      n[	        U5      S:X  d   eUR                  S[        S0[        S0[
        S0[        S0//5        U" U5      n[	        U5      S	:X  d   eUR                  S
[        S0[        S0[
        S0[        S0//5        U" U5      n[	        U5      S	:X  d   eg)z6Test that Matcher doesn't segfault on particular inputza b; cTEST1abr*   TEST2TcrC   TEST3dN)r	   r+   r-   r   r.   r   )r   r   r5   r7   s       r   test_issue587rV   z   s     x
 Ccii GKKD#;s456clGw<1KKD#;sh5Ec{STUclGw<1KKD#;sh5Ec{STUclGw<1r   iL  c                     [        U 5      n[        R                  " [        5         UR	                  S/ /5        SSS5        g! , (       d  f       g= f)z=Test if empty specs still cause an error when adding patternsTESTN)r	   rG   rH   rI   r-   )en_vocabr5   s     r   test_issue588rZ      s6     hG	z	"FRD! 
#	"	"s   A
AiN  c                     [        U / SQS9n[        U 5      nUR                  SSS0SS0SS0SS	0//5        UR                  SSS0SS
0SS0//5        U" U5      n[        U5      S:X  d   eg)zTest overlapping matches)n=1;rP   :5%wordsabIS_ALPHATr   r`   LIKE_NUMrb   r]   rC   N)r
   r	   r-   r.   rY   r   r5   r7   s       r   test_issue590ri      s     hF
GChGKKT"VSMJ3EPS}UV KKT*VSMJ;MNOPclGw<1r   ig  c                     S nSnSS0SS0/nSnU " U5      n[        UR                  5      nUR                  XC/US9  U" U5        [        UR                  5      nU/ :w  d   eUS   R
                  S:w  d   eg )	Nc                    U[        U5      S-
  :w  a  gU VVVs/ s H  u  pEn[        XXdS9PM     nnnnUR                  5        nU H^  n	U	R                  (       a  SOU	R                  R
                  n
XR                  S.nUR                  XS9  UR                  U	4-   Ul        M`     SSS5        gs  snnnf ! , (       d  f       g= f)zMerge a phrase. We have to be careful here because we'll change the
token indices. To avoid problems, merge all the phrases once we're called
on the last match.r*   Nr1   NNP)taglemma)attrs)	r.   r   
retokenizelabel_roottag_r   merger0   )r5   r   ir7   r1   r2   r3   spansretokenizerspanrn   rp   s               r   merge_phrases$test_issue615.<locals>.merge_phrases   s     Gq  MTUW8Icc#3WU^^#{{e		 #ii8!!$!488tg-	   Vs   B3A%B::
CzThe golf club is brokenr   golfclubSport_Equipment)on_matchr   )r	   r+   r-   r/   r0   r1   )r   rz   r   patternr1   r   r5   entitiess           r   test_issue615r      s    . %D&&!12GE
t
Ccii GKKy=K9CLCHH~Hr>>A;!!!r   iR  c                     [        [        S 0S9n [        U 5      nSS0SS0SS0/nUR                  SU/5        [	        UR
                  / S	QS
9nU" U5      n[        U5      S:X  d   eUS   u  pVnUS:X  d   eUS:X  d   eg)zbThe variable-length pattern matches the succeeding token. Check we
handle the ambiguity correctly.c                 "    U R                  5       $ Nlowerstrings    r   <lambda>test_issue850.<locals>.<lambda>   
    &,,.r   lex_attr_gettersr   bobr   r   frankFarAwayr   andr   r   rc   r*   r   rE   Nr   r   r	   r-   r
   r+   r.   r+   r5   r   r   matchent_idr2   r3   s           r   test_issue850r      s     E+H#IJEenG$w.@AGKK	G9%
gmm#A
BCCLEu:??qF3A::!8O8r   c                     [        [        S 0S9n [        U 5      nSS0SSS.SS0/nUR                  S	U/5        [	        UR
                  / S
QS9nU" U5      n[        U5      S:X  d   eUS   u  pVnUS:X  d   eUS:X  d   eg)z7Test Matcher matches with '*' operator and Boolean flagc                 "    U R                  5       $ r   r   r   s    r   r   %test_issue850_basic.<locals>.<lambda>   r   r   r   r   r   r   r   )r   r   r   r   r   rc   r*   r   rE   Nr   r   s           r   test_issue850_basicr      s     E+H#IJEenGe!<w>PQGKK	G9%
gmm#A
BCCLEu:??qF3A::!8O8r   i  c                      SS0SSS./n [        [        S9n[        USS/S9n[        US/S9n[        U5      nUR	                  S	U /5        U" U5      nU(       d   eU" U5      nU(       d   eg
)z=Test matches occur when optional element at end of short doc.r   HelloT?)rf   r   r   Worldrc   	MyMatcherN)r   r   r
   r	   r-   )r   r+   hello_worldhellor5   r7   s         r   test_issue1434r      s{      t3"?@G9-EeGW#56KgY'EenGKKgY'k"GN7enGN7r   zstring,start,end))rP   r   r*   )a br   rC   )a cr   r*   )a b cr   rC   )a b b cr   rB   )a b br   rB   i  c                    SS0SSS./n[        [        5       5      nUR                  SU/5        [        [        5       U R	                  5       S9nU" U5      nUb  Uc  U/ :X  d   eUS	   S
   U:X  d   eUS	   S   U:X  d   eg)z5Test matcher works when patterns end with * operator.r   rP   rQ   r   r   TSTENDrc   Nr*   rC   )r	   r   r-   r
   split)r   r2   r3   r   r5   r   r7   s          r   test_issue1450r      s     }s#67GegGKK7)$
egV\\^
,CclG}"}}2;q>U"""2;q>S   r   i  c                      [        [        5       5      n U R                  SSS0SS0//5        [        U R                  / SQS9nU " U5      n[        U5      S:X  d   eUS   SS	 S
:X  d   eUS   SS	 S:X  d   eg	)z0Test regression in Matcher introduced in v2.0.6.MWEorthrP   )rP   rP   rP   rc   rC   r   r*   N)r   rC   )r*   rB   )r	   r   r-   r
   r+   r.   )r5   r   r7   s      r   test_issue1945r     s     egGKK&#678
gmm?
3CclGw<11:ab>V###1:ab>V###r   i  c           
      2   [        U 5      nSS0SSS.SS0SS.SSS./n[        R                  " SS	S
9  UR                  SU/5        [	        U / SQS9nU" U5      n[        U VVVs/ s H  u  pVouU R                  ;   PM     snnn5      (       d   eg s  snnnf )Nr   Doe!r   r   optionalT)r6   r   FdefaultrX   )r   Johnr   r   rc   )r	   r   set_extensionr-   r
   allr,   )rY   r5   r   r   r7   match_idr2   r3   s           r   test_issue1971r     s     hG	C 4 ,C 	G 

E2KK	"
h;
<C clGQ1E#H,,,QRRRRQs   &Bc                     [        U 5      nSSS/0S.SS0/nSS0SS0/n[        U / SQS	9nUR                  S
X#/5        U" U5      n[        U5      S:X  d   eg )NEURINeur)r   r   rg   Tr   )r   10isr   r   rc   rO   rC   r	   r
   r-   r.   )rY   r5   pattern1pattern2r   r7   s         r   test_issue_1971_2r   "  sr    hG$9J;MNHT"VUO4H
h>
?CKK(-.clGw<1r   c                 j  ^  [         R                  " SSSS9  [         R                  " SSSS9  [        T SS/S	9n[        T 5      nUR	                  S
SSS00//5        UR	                  SSSS00//5        [        U 4S jU" U5       5       5      n[        U5      S:X  d   eU[        / SQ5      :X  d   eg)zFTest that pattern matches correctly for multiple extension attributes.rP   r*   Tr   forcerQ   rC   r   worldrc   r   r6   r   c              3   L   >#    U  H  u  pnTR                   U   X#4v   M     g 7fr   )r,   ).0m_idserY   s       r   	<genexpr>$test_issue_1971_3.<locals>.<genexpr>6  s%     Sl
h&&t,a3ls   !$rE   ))r   r   r*   )r   r*   rC   )r   r   r*   )r   r*   rC   N)r   r   r
   r	   r-   sortedr.   rh   s   `   r   test_issue_1971_3r   -  s     
Qd3	Qd3
hw0
1ChGKKc1X'()KKc1X'()SgclSSGw<1fQRRRRr   c                 .   [         R                  " SSSS9  [         R                  " SSSS9  [        U 5      n[        U / SQS9nS	SSS
.0/S-  nUR	                  SU/5        U" U5      n[        U5      S:X  d   eUS   U R                  S   SS4:X  d   eg)z`Test that pattern matches correctly with multiple extension attribute
values on a single token.
ext_astr_aTr   ext_bstr_b)thisr   r   rc   r6   )r   r   rB   rX   r*   r   N)r   r   r	   r
   r-   r.   r,   )rY   r5   r   r   r7   s        r   test_issue_1971_4r   ;  s    
 
=	=hG
h4
5Cw9:;a?GKK	"clGw<11:(**62Aq9999r   i	  c                     [        U 5      n[        U SS/S9nUR                  SSS0SS0//5        U" U5      n[        U5      S:X  d   eg)	zITest problem with successive ?. This is the same bug, so putting it here.rP   rQ   rc   4r   r   rB   Nr   )rY   r5   r   r7   s       r   test_issue2464r   L  sW     hG
hsCj
)CKKc{T3K012clGw<1r   i	
  c                    U " S5      n[        USSUR                  R                  S   S9/Ul        [	        UR                  5      nUR                  SSSS.//5        U" U5       VVVs/ s H	  u  p4oQXE PM     nnnn[        U[        S	S
9n[        U5      S:X  d   e[        US   5      S:X  d   eUS   R                  S:X  d   egs  snnnf )zTest that operator + is greedy.zIt is May 15, 1993.rC   rF   DATErl   RULE+)ENT_TYPEr   T)keyreverser)   r   rE   zMay 15, 1993N)	r   r+   r,   r0   r	   r-   r   r.   r   )r   r   r5   r6   r2   r3   matcheds          r   test_issue2569r   V  s     ,
-CS!Qcii&7&7&?@ACHcii GKKvS9:;<3:3<@<-!C5~<G@W#t4Gw<2wqz?a1:??n,,,	 As   *Cio
  c                  n   [        5       n [        U R                  5      nSnSS0SSS.SS0/nUR                  X#/5        U " S5      nU " S	5      nU" U5      nU H%  u  pxn	U R                  R                  U   U:X  a  M%   e   U" U5      n
U
 H%  u  pxn	U R                  R                  U   U:X  a  M%   e   g
)zVEnsure the correct entity ID is returned for matches with quantifiers.
See also #2675
test_patternr   highTr   )r   r   
adrenalinez$This is a high-adrenaline situation.z$This is a high adrenaline situation.N)r   r	   r+   r-   r,   )nlpr5   
pattern_idr   doc1doc2matches1r   r2   r3   matches2s              r   test_issue2671r   d  s    
 )Ccii GJ	&%	,G
 KK
I&56D56Dt}H (yy  *j888 !)t}H (yy  *j888 !)r   i  c                 .   SS0SS0SS0SS0/SS0SS	S
S.SS0SS0SS0/SS0SS	SS.SS0SS0SS0//n/ SQn/ SQn/ SQn[        XX4S9n[        U 5      n[        U5       H2  u  pxUR                  [	        U5      U/5        U" U5      n	U	(       a  M2   e   g)z%Test problem with matcher quantifiersr   hasr   todoTAGr   TFr   )IS_ASCIIr   r   r   )alsor   r   r   with)RBVBZTOVBr   )ADVVERBADPr   r  )rd   tagsposN)r
   r	   	enumerater-   str)
rY   r   rd   r  r  r   r5   rv   r   r7   s
             r   test_issue3009r  |  s     %7D/GT?UDMJUO5<dOdODM	
 UO5<dOdODM	
H" 0E*D
/C
h$
8ChG)
CFWI&#,ww *r   i   c                    [        U / SQS9n[        U 5      nSSSS/00/SSSS/00//nUR                  S	U5        U" U5      n[        U5      S
:X  d   eU VVVs/ s H  u  pVoqXg R                  PM     nnnnU/ SQ:X  d   eg s  snnnf )N)r   ,howareyoudoingr   rc   r   r   r   r	  r  r  rX   rE   )r   r	  r  r  )r
   r	   r-   r.   r   )	rY   r   r5   r   r7   r6   r2   r3   matched_textss	            r   test_issue3328r    s    
hO
PChG
D7E*+	,-
D5'*+	,-H KK!clGw<1>EFg]Qs^((gMF<<<< Gs   Bi  c                     [        U SS9nSS0SS0/nUR                  SU/5        [        R                  " [        5         UR                  SSS	0//5        S
S
S
5        g
! , (       d  f       g
= f)zATest that match pattern validation doesn't raise on empty errors.T)validater   r   r   GOODBADXYN)r	   r-   rG   rH   r   )rY   r5   r   s      r   test_issue3549r    sc     h.G!GW#56GKK	"	(	)Ec3ZL>* 
*	)	)s    A  
A.z5Matching currently only works on strings and integersi  c                     [         R                  " SSS9  [        U 5      nSS0SSS00/nUR                  SU/5        [	        U SS	/S
9nU" U5        g)zBTest that custom extensions with default None don't break matcher.	issue3555Nr   r   haver6   TrX   applerc   )r   r   r	   r-   r
   )rY   r5   r   r   s       r   test_issue3555r    s_     
T2hG#T':!;<GKK	"
hvw/
0CCLr   i  c                 R   [        U / SQS9n[        U 5      nSnSS0SS0SS0/nSS0SS0SS0SS0/nUR                  X4/5        U" U5      nUS	   S	   U R                  U   :X  d   e[        U 5      nUR                  X5/5        U" U5      nUS	   S	   U R                  U   :X  d   eg
)zJTest that match IDs returned by the matcher are correct, are in the string)terrificgroupofpeoplerc   PATTERNr   r  r   r   r  r   N)r
   r	   r-   r,   )rY   r   r5   r   r   r   r7   s          r   test_issue3839r!    s     hC
DChGH*%c{Wg4FGH*%c{T3K'7ASTHKK*%clG1:a=H,,X6666hGKK*%clG1:a=H,,X6666r   i'  c                     [        U / SQS9n[        U5      S:X  d   eSSS.SS0SS	0/n[        U 5      nUR                  S
U/5        [        U" U5      5      S:X  d   eg )N)Thisr   rP   test.rc   rD   r#  r   r   r   r   r$  rX   rC   )r
   r.   r	   r-   )rY   r   r   r5   s       r   test_issue3879r&    sj    
h>
?Cs8q==c*T3K&&9IJGhGKK	"ws|!!!r   io  c                     [        U 5      nSS0SSS.SS0SS0/nUR                  SU/5        [        U / S	QS
9nU" U5      n[        U5      S:X  d   eg)z?Test that combinations of optional rules are matched correctly.r   r   r   r   )r   r   r   r   rX   )r   mynewr   rc   r   Nr	   r-   r
   r.   )rY   r5   r   r   r7   s        r   test_issue3951r+    sn     hG	'$	s	'	G KK	"
h=
>CclGw<1r   i  c                     [        U 5      nUR                  SSS0SS0//5        [        U S/S9n[        U" U5      5      S:X  d   e[        U / SQS9n[        U" U5      5      S	:X  d   e[        U 5      nUR                  SSS0SS0SS
0//5        [        U / SQS9n[        U" U5      5      S	:X  d   e[        U 5      nUR                  SSS0SS0S
SS.//5        [        U / SQS9n[        U" U5      5      S:X  d   eg)z=Test that matches without a final {OP: ?} token are returned.rX   r   rP   r   r   rc   r*   )rP   rQ   rS   rC   rQ   )rP   rQ   rQ   rS   r   rB   Nr*  )rY   r5   r   r   doc3doc4s         r   test_issue4120r/    s    hGKK63-$567xu%Dwt}"""x/Dwt}"""hGKK63-$vsmDEFx34Dwt}"""hGKK63-$s#6NOPQx34Dwt}"""r   zpattern,re_patternc                 D   [        U R                  5      nUR                  X2/SS9  U" U 5      n[        R                  " X15       Vs/ s H  ofR                  5       PM     nn[        XW5       H+  u  u  pn
u  pX	U
 R                  XU R                  :X  a  M+   e   gs  snf )z[Test that the greedy matching behavior "FIRST" is consistent with
other re implementations.FIRSTgreedyN)r	   r+   r-   refinditerry   zipr   )r   r   r   
re_patternr5   r7   m
re_matchesr   m_sm_ere_sre_es                r   test_greedy_matching_firstr>    s     cii GKK
IgK6clG$&KK
$AB$Aq&&($AJB),W)A%3$s|  CTN$7$7777 *B Cs   Bzpattern,longestc                     [        U R                  5      nUR                  SU/SS9  U" U 5      nU H  u  pgnXU R                  U:X  a  M   e   g)z+Test the "LONGEST" greedy matching behaviorr   LONGESTr2  N)r	   r+   r-   r   )	r   r   r   longestr5   r7   r   r   r   s	            r   test_greedy_matching_longestrB    sS     cii GKK	)K4clGQx}}''' r   c                     U " SR                  S5      5      n[        UR                  5      nSS0SS0/nUR                  SU/SS9  U" U5      n[	        U5      S:X  d   eUS	   S   S	:X  d   eUS	   S
   S
:X  d   eg)zJTest that "LONGEST" matching prefers the first of two equally long matchesr   CCCr   Cr   r@  r2  r*   r   rC   N)r   r	   r+   r-   r.   )r   r   r5   r   r7   s        r   "test_greedy_matching_longest_firstrF  '  s    
sxx
'Ccii G}vsm,GKK	)K4clGw<11:a=A1:a=Ar   c                     [        U R                  5      n[        R                  " [        5         UR                  S[        /SS9  S S S 5        g ! , (       d  f       g = f)Nr   GREEDYr2  )r	   r+   rG   rH   rI   r-   r   )r   r   r5   s      r   test_invalid_greedinessrI  4  s<    cii G	z	"FXJx8 
#	"	"s   A
Ac                    [        U R                  5      nUR                  X2/SS9  U" U 5      n[        R                  " X15       Vs/ s H  ofR                  5       PM     nn[        U5      [        U5      :X  d   egs  snf )zLTest that matcher.__call__ consumes tokens on a match similar to
re.findall.r1  r2  N)r	   r+   r-   r4  r5  ry   r.   )r   r   r   r7  r5   r7   r8  r9  s           r   test_match_consumingrK  :  sl     cii GKK
IgK6clG$&KK
$AB$Aq&&($AJBw<3z?*** Cs   A=c                    / SQnU H  u  p#n[        U 5      n[        UR                  [        U5      S9n/ nUR	                  5        HE  nUR                  S5      (       a  UR                  US   SS.5        M2  UR                  SU05        MG     UR                  SU/5        U" U5      n	U(       a  U	(       d   X#45       eM  U	(       d  M   X#45       e   g )N))aaaba a a bT)rM  a+ bT)rM  a+ a+ bT)rM  	a+ a+ a bT)rM  
a+ a+ a+ bT)rM  a+ a a bTrM  a+ a aTrM  za+T)aaarO  F)rW  rP  F)rW  rR  F)rW  a+ a bF)rW  rS  FrT  rV  )rM  rX  Trc   r   r   r   r   r   )r	   r
   r+   r/   r   endswithappendr-   )
rY   casesr   pattern_strresultr5   r   r   partr7   s
             r   test_operator_combosr_  N  s    E$ (-#V(#'--tF|4%%'D}}S!!Qs;<~.	 (
 	Iy)#,1V117w5 55; (-r   c                   ^ [        U 5      mSS0SSS./nTR                  SU/5        U4S jn[        T" U" S5      5      5      S:X  d   e[        T" U" S	5      5      5      S
:X  d   e[        T" U" S5      5      5      S:X  d   e[        T" U" S5      5      5      S
:X  d   e[        T" U" S5      5      5      S:X  d   e[        T" U" S5      5      5      S:X  d   eg)zBTest matcher works when patterns end with * operator. (issue 1450)r   rP   rQ   r   r   r   c                 H   > [        TR                  U R                  5       S9$ )Nrc   )r
   r+   r   )r   r5   s    r   r   ,test_matcher_end_zero_plus.<locals>.<lambda>x  s    W]]&,,.Ar   r*   r   rC   r   r   r   rB   r   N)r	   r-   r.   )rY   r   r   r5   s      @r   test_matcher_end_zero_plusrc  r  s     hG}s#67GKK7)$
ACws3x !Q&&&ws5z"#q(((ws5z"#q(((ws7|$%***ws9~&'1,,,ws7|$%***r   c                 $   [        U 5      nSSS/00/SSS/00/SSS/00//nUR                  SU5        [        U SR                  5       S9nU" U5      nU VVVs/ s H  u  pVn[	        X6XuS	9R
                  PM     nnnnU/ S
Q:X  d   eg s  snnnf )Nr   r   zeroonetworX   zzero one two threerc   rl   )re  rf  rg  )r	   r-   r
   r   r   r   )	rY   r5   r   r   r7   Lr   r   textss	            r   'test_matcher_sets_return_correct_tokensrj    s    hG
D6(#	$%
D5'?	#$
D5'?	#$H
 KK!
h288:
;CclG:AB'wqQT#!%**'EB**** Cs   !Bzignore:\[W036c                     [        5       n [        U R                  5      nSnSS0SS0/n[        U5      S:X  d   eUR	                  SU/5        SU;   d   eU" U " U5      5      n[        U5      S:X  d   eUR                  S5        U" U " U5      5      n[        U5      S:X  d   e[        R                  " [        5         UR                  S5        S S S 5        g ! , (       d  f       g = f)	NzThis is a test case.r   r$  r   r   r   RulerC   )	r   r	   r+   r.   r-   removerG   rH   rI   )r   r5   r   r   results1results2s         r   test_matcher_removerp    s    
)Ccii G!D$-Gw<1KK	"W s4y!Hx=A NN6 s4y!Hx=A 
z	"v 
#	"	"s   ?C
C(c                    SS/ SQ4SS/ SQ4SS/ SQ4SS	/ SQ4S
S/ SQ4SS/ SQ4S
S/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS/ SQ4SS / SQ4SS!/ SQ4SS"/ S#Q4SS$/ SQ4/nU GHU  u  p#n[        U 5      n[        UR                  [        U5      S%9n/ nUR	                  5        H  nUR                  S&5      (       a  UR                  US'   S&S(.5        M2  UR                  S)5      (       a  UR                  US'   S)S(.5        Ma  UR                  S*5      (       a  UR                  US'   S*S(.5        M  UR                  S+5      (       a  UR                  US'   US,S  S(.5        M  UR                  S-U05        M     UR                  S.U/S/S09  U" US1S29n	[        U	5      n
U	S'   u  ppX:X  a  GMN   X#XU
45       e   g )3NrM  a* br   r   r   r*   baabb a* br   r*   r*   rC   rN  r   r*   rC   rB   rO  aabaa+ b a+r   r   r*   rC   aabaar   r   r*   rC   rC   a+ b a*aaaaa*r   r   r   r   	b a* b b*aabba* b* a*r   r   r*   r*   rQ  rR  rS  rU  r   r*   rC   a+ a a?
a a a a a?rX  rP  a{2,} bza{,3} ba{2} br   r   r*   a{2,3} brc   r   r   r   r   r   }r*   r   r   r@  r2  Twith_alignments	r	   r
   r+   r/   r   rY  rZ  r-   r.   )rY   r[  r   r\  r]  r5   r   r   r^  r7   	n_matchesr6   r   r   expecteds                  r   +test_matcher_with_alignments_greedy_longestr    s#   	&	<(	L)	&	L)	)_-	L)	|$	l+	\*	l+	|,	\*	9%	I&	|,	<(	L)	L)	L)	9%	\*-E0 (-#V(#'--tF|4%%'D}}S!!Qs;<s##Qs;<s##Qs;<s##QtABx@A~. ( 	Iy;#t4L	#AJa!IFy#II!+ (-r   c                    SSSSS// SQ/ SQS//4SSS/ S	Q/4S
SS/ SQ/4SSSSS// SQ/ SQ/4SSS/ SQ/ SQ/4SSS/ SQ/ SQ/ SQ/ SQ/4SSSSS// SQ/ SQ/ SQ/4SSSS/SS// SQ/ SQ/4SSS / S	Q/4S!S"S#S/S
/S
S
/SS// SQ/ S$Q/ S%QSS//4S&SS'/ SQ/4S(SS)/ SQ/4S*SS+/ SQ/4S,SS-/ SQ/4S.SS/SS// SQ/4S0SS1/ SQ/4S2SS3/ SQ/ SQ/4S4SS5/ SQ/ SQ/4S6SS7/ SQ/ SQ/4S8SS9/ SQ/4S:SS;/ SQ/4S<SS=/ SQ/ SQ/4/nU GHm  u  p#pE[        U 5      n[        UR                  [        U5      S>9n/ nUR	                  5        H  n	U	R                  S?5      (       a  UR                  U	S   S?S@.5        M2  U	R                  SA5      (       a  UR                  U	S   SAS@.5        Ma  U	R                  SB5      (       a  UR                  U	S   SBS@.5        M  U	R                  SC5      (       a  UR                  U	S   U	SS  S@.5        M  UR                  SDU	05        M     UR                  SEU/5        U" USFSG9n
[        U
5      nU
 H(  u  ppX;   d
   X#XMX45       e[        U5      X-
  :X  a  M(   e   GMp     g )HNr   rM  rr  r*   r  rs  rt  ru  rv  rC   rN  rw  rB   rO  rE   rx  ry  r  rz  rD   r{  r|  )r   r*   rC   rC   rF   r}     r~  r  )r   r   r   r     r  r'   r  r  r  )r   r*   r*   r)   rQ  r(   rR     rS     rU     r     r     rX     rP     r     za{3} b   r     r  rc   r   r   r   r   r  r   r   Tr  r  )rY   r[  case_idr   r\  resultsr5   r   r   r^  r7   r  r6   r   r   r  s                   r   'test_matcher_with_alignments_non_greedyr    s   	
FFaVYqcBC	
FH|n-	
FI~.	
FFaVY=>	
FI	<89o|D		
 
FIA	<KL	
FDA3A	<@A	
FK,0S1#1v1vy,	Aq6R		
 
V[<.1	V\L>2	VZ,0	VX	{+	VY!Q 34	V\L>2	VX	<89	VYL 9:	VYL 9:	VX~.	VX	{+	VZ)\!:;A!ED 27-(#'--tF|4%%'D}}S!!Qs;<s##Qs;<s##Qs;<s##QtABx@A~. ( 	Iy)#t4L	!(A!&W+!(WW&x=AE))) ")) 27r   )Qr4  rG   spacy.attrsr   r   r   spacy.errorsr   spacy.lang.enr   spacy.lang.lex_attrsr   spacy.matcherr	   spacy.tokensr
   r   r   spacy.vocabr   r   r   pattern3pattern4pattern5re_pattern1re_pattern2re_pattern3re_pattern4re_pattern5longest1longest2longest3longest4longest5fixturer   r   markissueparametrizer8   r;   rM   rV   rZ   ri   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  skipr  r!  r&  r+  r/  r>  rB  rF  rI  rK  r_  rc  rj  filterwarningsrp  r  r  r   r   r   <module>r     s   	  - - * ! * ! ) ) SMCs34$vsm4SMFC=)SMCs3fc]C$s#&>N    
 3I
	7H"57K!LMH
3	47K6LM ( 3H
	'8!4w	6J KLH
3	47J6KL ( 3# #, 3  3" " 3	 	 3" "6 3  3  4  
 4
! 

! 4$ $ 4S S& 4  4
S 
S 4: :  4  4
- 
- 49 9. 4 < 4= = 4+ + IJ4  K 47 7  4" " 4  4# #$ 	;	;	;	;	;		8		8 	8	8	8	8	8	(	(
9 	;	;	;	;	;	+	+!6H 4+ ++ ,- .4.Jb9*r   