
    h/F                     B   S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	  S SK
JrJr  S SKJr  SSKJr  \R"                  R%                  S	5      S
 5       r\R"                  R%                  S5      S 5       r\R"                  R%                  S5      S 5       r\R"                  R%                  S5      S 5       r\R"                  R%                  S5      S 5       r\R"                  R%                  S5      S 5       r\R"                  R%                  S5      S 5       r\R"                  R%                  S5      S 5       rS rS rS rS rS rS r S r!S  r"S! r#S" r$S# r%S$ r&S% r'S& r(S' r)S( r*S) r+S* r,S+ r-\R"                  R]                  S,S-S./5      S/ 5       r/S0 r0S1 r1g)2    N)Mock)English)MatcherPhraseMatcher)DocSpan)Vocab   )make_tempdiri  c                      [        5       n [        U R                  5      nUR                  SU " S5      U " S5      U " S5      /5        UR                  SU " S5      /5        [	        U5      S:X  d   eg)	z`Test that the PhraseMatcher correctly reports its number of rules, not
total number of patterns.TEST1abcTEST2dr
   N)r   r   vocabaddlen)nlpmatchers     a/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/matcher/test_phrase_matcher.pytest_issue3248_1r      s`     )CCII&GKK#c(CHc#h78KK#c($w<1    i  c                 R   [        U 5      nUR                  S[        U SS/S9/5        UR                  S[        U SS/S9/5        [        U / SQS9nU" U5      n[        U5      S:X  d   eU R                  US   S      U R                  US	   S      /n[        U5      SS/:X  d   eg
)z[Test that duplicate patterns for different rules result in multiple
matches, one per rule.
ABarackObamawordsB)r   r   liftsAmericar
   r      N)r   r   r   r   stringssorted)en_vocabr   docmatches	match_idss        r   test_issue3331r+      s    
 H%GKKc(8W*=>?@KKc(8W*=>?@
hE
FCclGw<1!!'!*Q-0(2B2B71:a=2QRI)c
***r   i  c                 L   [        U 5      nUR                  S[        U SS/S9/5        UR                  S[        U SS/S9/5        [        U / SQS9nU" U5      n[        U5      S:X  d   eU VVs/ s H  u  n  oPR                  U   PM     nnnSU;   d   eSU;   d   egs  snnf )	zGTest that the PhraseMatcher returns duplicates for duplicate match IDs.r   NewYorkr   r!   )Iliveinr-   r.   r
   N)r   r   r   r   r%   )r'   r   r(   r)   ent_id_	found_idss          r   test_issue3972r5   )   s     H%GKKc(5&/:;<KKc(5&/:;<
h@
ACclGw<1 AHHnvq!!!&)IH)) Is   0B i  c                 B   [        U SS9n[        U SS/S9nU Vs/ s H  o3R                  PM     snSS/:X  d   eUR                  SU/5        [        U / SQS9nU Vs/ s H  o3R                  PM     sn/ SQ:X  d   eU" U5      n[	        U5      S:X  d   e[        U SS9n[        U S	S
/S9nSUS   l        SUS   l        U Vs/ s H  o3R                  PM     snSS/:X  d   eUR                  SU/5        U" U5      n[	        U5      S:X  d   egs  snf s  snf s  snf )zETest that the PhraseMatcher can match on overwritten NORM attributes.NORMattrr   r   r   TEST)r   r   r   r   r$   12r   N)r   r   norm_r   r   )r'   r   pattern1tr(   r)   pattern2s          r   test_issue4002rA   :   s,    H62G8C:.H%&XGGX&3*444KK
#
h2
3C !SGGS!%9999clGw<1H62G8C:.HHQKHQK%&XGGX&3*444KK
#clGw<1 ' " 's   DDDi  c                      [        [        5       5      n [        U R                  [        5      (       d   e[	        [        5       5      n [        U R                  [        5      (       d   eg)zCTest that PhraseMatcher.vocab can be accessed (like Matcher.vocab).N)r   r	   
isinstancer   r   )r   s    r   test_issue4373rD   O   sJ     egGgmmU++++EG$GgmmU++++r   i+  c                  j   Sn [        5       nSSSS./nUR                  SSS0S	9nUR                  U5        U" U 5      nUR                   Vs/ s H%  oUR                  UR
                  UR                  4PM'     nn[        5       n[        5        nUS
-  n	UR                  U	5        UR                  S5      R                  U	5        SSS5        U" U 5      n
U
R                   Vs/ s H%  oUR                  UR
                  UR                  4PM'     nnXk:X  d   egs  snf ! , (       d  f       N_= fs  snf )zTest that the EntityRuler PhraseMatcher is deserialized correctly using
the method from_disk when the EntityRuler argument phrase_matcher_attr is
specified.
z!Spacy is a python library for nlp
PYTHON_LIBspacyspaCy)labelpatternidentity_rulerphrase_matcher_attrLOWER)configentityrulerN)
r   add_pipeadd_patternsentstextlabel_ent_id_r   to_disk	from_disk)rT   r   patternsrulerr(   entresnlp_reloadedr   	file_pathdoc_reloadedres_reloadeds               r   'test_issue4651_with_phrase_matcher_attrra   X   s    /D
)C&7'JKHLL1F0PLQE	x 
d)C:=((
C(3HHcjj#++.(C
C9L	1%	i n-77	B 
  %LCOCTCTUCTCXXszz3;;7CTLU D	
 Vs   ,D7D$,D0
D-i  c                     / SQn[        XS9nUSS n[        U / SQS9n[        U 5      nUR                  SU/5        U" U5      nU(       d   eg)z/Ensure that PhraseMatcher accepts Span as inputr/   likeSpansandDocsr1   myinput,rf   nothingelse.r   N   re   rf   rg   SPACY)r   r   r   )r'   r    r(   spanrJ   r   r)   s          r   test_issue6839rr   o   sV     kE
h
$Cr7D(":;GH%GKK'#dmGN7r   i)  c                    / SQn[        XS9n[        U S/S9[        U SS/S9S.n[        U 5      nUR                  5        H  u  pVUR                  XV/5        M     U" U5      nXpR                  S   SS4U R                  S	   SS
4/:X  d   eUR                  S5        [        U5      S:X  d   eU" U5      nXR                  S	   SS
4/:X  d   eUR                  S	5        [        U5      S:X  d   eU" U5      n	U	(       a   eg)z:Ensure overlapping terms can be removed from PhraseMatcher)Onlysaveoutthebinarydataforrw   
individual
componentsrm   r   rx   ry   )0r;   r}         r;      r$   r   N)r   r   itemsr   r%   remover   )
r'   r    r(   termsr   match_idtermr)   new_matches
no_matchess
             r   test_issue10643r   ~   s   
 lE
h
$C(,(F!34E H%G++-Hf% ( clG((-q!4x7G7G7LaQR6STTTTNN3w<1#,K,,S11a89999NN3w<1J>zr   c                    [        U / SQS9n[        U SS/S9n[        U 5      nUR                  SU/5        [        U" U5      5      S:X  d   e[        U S/S9n[        U 5      nUR                  SU/5        [        U" U5      5      S:X  d   e[        U SS/S9n[        U 5      nUR                  S	U/5        [        U" U5      5      S:X  d   e[        U S
/S9n[        U 5      nUR                  SU/5        [        U" U5      5      S:X  d   e[        U SS
/S9n[        U 5      nUR                  SU/5        [        U" U5      5      S:X  d   eg )Nr/   rd   GoogleNowbestr   r   r   COMPANYr$   r/   rd   ILIKEr   BESTNOWBESTr   r   r   r   )r'   r(   rJ   r   s       r   test_matcher_phrase_matcherr      sQ   
hD
EC(8U"34GH%GKK	G9%ws|!!!(3%(GH%GKKgYws|!!!(3-0GH%GKK'#ws|!!!(6(+GH%GKK	"ws|!!!(5&/2GH%GKK	G9%ws|!!!r   c                     [        U 5      n[        U5      S:X  d   eUR                  S[        U S/S9/5        [        U5      S:X  d   eUR                  S[        U S/S9/5        [        U5      S:X  d   eg )	Nr   r:   testr   r$   r   test2r
   )r   r   r   r   r'   r   s     r   test_phrase_matcher_lengthr      sv    H%Gw<1KKXfX678w<1KK#hwi89:w<1r   c                 r    [        U 5      nUR                  S[        U S/S9/5        SU;   d   eSU;  d   eg )Nr:   r   r   r   )r   r   r   r   s     r   test_phrase_matcher_containsr      sB    H%GKKXfX678W'!!!r   c                 ^   [        U SS/S9n[        U S/S9[        U SS/S9/n[        U 5      nUR                  " SS /UQ76   [        U" U5      5      S:X  d   e[        U 5      n[	        5       nUR                  " SU/UQ76   [        U" U5      5      S:X  d   eUR
                  S:X  d   e[        U 5      nUR                  SU5        [        U" U5      5      S:X  d   e[        U 5      n[	        5       nUR                  SX$S	9  [        U" U5      5      S:X  d   eUR
                  S:X  d   eg )
Nr   r   r   OLD_APIr
   OLD_API_CALLBACKNEW_APINEW_API_CALLBACKon_match)r   r   r   r   r   
call_count)r'   r(   rY   r   r   s        r   test_phrase_matcher_add_new_apir      s0   
hsCj
)CHSE*Cc
,KLHH%GKK	4+(+ws|!!!H%GvHKK"H8x8ws|!!!!###H%GKK	8$ws|!!!H%GvHKK"HK@ws|!!!!###r   c                 `   [        U 5      nUR                  S[        U S/S9/5        UR                  S[        U S/S9/5        UR                  S[        U S/S9/5        UR                  S[        U S/S9/5        [        U / SQS9nSU;   d   eSU;  d   e[        U" U5      5      S:X  d   eg )Nr:   rd   r   r   r   r$   )r   r   r   r   r'   r   r(   s      r    test_phrase_matcher_repeated_addr      s    H%GKKXfX678KKXfX678KKXfX678KKXfX678
hD
ECW'!!!ws|!!!r   c                    [        U 5      nUR                  S[        U S/S9/5        UR                  S[        U S/S9/5        [        U / SQS9nSU;   d   eSU;   d   eSU;  d   e[        U" U5      5      S:X  d   eUR	                  S5        SU;  d   eSU;   d   eSU;  d   e[        U" U5      5      S	:X  d   eUR	                  S5        SU;  d   eSU;  d   eSU;  d   e[        U" U5      5      S
:X  d   e[
        R                  " [        5         UR	                  S5        S S S 5        SU;  d   eSU;  d   eSU;  d   e[        U" U5      5      S
:X  d   eg ! , (       d  f       N>= f)Nr   rd   r   r   r   r   TEST3r
   r$   r   )r   r   r   r   r   pytestraisesKeyErrorr   s      r   test_phrase_matcher_remover      s   H%GKK#hvh789KK#hvh789
hD
ECgg'!!!ws|!!!NN7'!!!g'!!!ws|!!!NN7'!!!'!!!'!!!ws|!!!	x	 w 
!'!!!'!!!'!!!ws|!!! 
!	 s   E##
E1c                 H   [        U 5      nUR                  S[        U S/S9/5        UR                  S[        U S/S9/5        [        U / SQS9nSU;   d   e[        U5      S:X  d   e[        U" U5      5      S:X  d   eUR	                  S5        SU;  d   e[        U5      S:X  d   e[        U" U5      5      S:X  d   eU" U5      S   S   U R
                  S   :X  d   eUR	                  S5        SU;  d   e[        U5      S:X  d   e[        U" U5      5      S:X  d   eg )	Nr:   rd   r   r   r   r
   r$   r   )r   r   r   r   r   r%   r   s      r   +test_phrase_matcher_overlapping_with_remover     s0   H%GKKXfX678KK#hvh789
hD
ECWw<1ws|!!!NN6   w<1ws|!!!3<?1!1!1'!::::NN7'!!!w<1ws|!!!r   c                    / SQn/ SQn/ SQn/ SQn[        XUS9n[        U SS9nUR                  SU/5        [        XUS9nU" U5      n[        U5      S	:X  d   eUS
   u  pnXR                  S   :X  d   eU
S:X  d   eUS:X  d   eg )Nr/   rd   catsPRONVERBNOUN)Yesrj   youhatedogsverymuch)INTJPUNCTr   r   r   ADVr   r    posPOSr8   r:   r$   r   r
   r   r   r   r   r   r%   )r'   words1pos1words2pos2rJ   r   r(   r)   r   startends               r    test_phrase_matcher_string_attrsr     s    "F#D@FBD(d3GH51GKK	"
h$
/CclGw<1"1:HS''////A::!8O8r   c                     / SQn/ SQn/ SQn/ SQn[        XUS9n[        U SS9nUR                  SU/5        [        XUS9nU" U5      n[        U5      S	:X  d   eg
)zATest that token with the control codes as ORTH are *not* matched.r   r   )zmatcher:POS-PRONzmatcher:POS-VERBzmatcher:POS-NOUN)Xr   r   r   r   r8   r:   r   Nr   )	r'   r   r   r   r   rJ   r   r(   r)   s	            r   )test_phrase_matcher_string_attrs_negativer   .  se    "F#DIFD(d3GH51GKK	"
h$
/CclGw<1r   c                 T   / SQn/ SQn[        XS9n[        U SS9nUR                  SU/5        [        XS9nU" U5      n[        U5      S:X  d   eUS   u  pxn	US	   u  pnXpR                  S   :X  d   eXR                  S   :X  d   eUS:X  d   eU	S
:X  d   eUS
:X  d   eUS:X  d   eg )N)Helloworld!)Noproblemrj   hesaidrm   r   IS_PUNCTr8   r:   r
   r   r$      r   r   )r'   r   r   rJ   r   r(   r)   	match_id1start1end1	match_id2start2end2s                r   test_phrase_matcher_bool_attrsr   <  s    $F6F()GH:6GKK	"
h
%CclGw<1%ajIt%ajIt((0000((0000Q;;199Q;;199r   c                 P   [        U S/S9nSUS   l        [        U S/S9nSUS   l        SUS   l        US   R	                  S5        [        U S/S9n[        U SS	9n[        R                  " [        5         UR                  S
U/5        S S S 5        [        R                  " [        5         UR                  SU/5        S S S 5        [        R                  " 5          [        R                  " S5        UR                  SU/5        S S S 5        [        U SSS9n[        R                  " 5          [        R                  " S5        UR                  SU/5        S S S 5        g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTestr   ROOTr   TAGr   Feat=ValT)validater   r   errorr   r   )r9   r   TEST4)r   dep_tag_pos_	set_morphr   r   warnsUserWarningr   warningscatch_warningssimplefilter)r'   doc1doc2doc3r   s        r   test_phrase_matcher_validationr   O  s>   xx(DDGLxx(DDGLDGLGj!xx(DHt4G	k	"GdV$ 
#	k	"GdV$ 
#		 	 	"g&GdV$ 
# H54@G		 	 	"g&GdV$ 
#	" 
#	"	"	"	"	" 
#	"s0   8E$.E5*F1*F$
E25
F
F
F%c                     [         R                  " [        5         [        U SS9  S S S 5        g ! , (       d  f       g = f)NUNSUPPORTEDr8   )r   r   
ValueErrorr   )r'   s    r   test_attr_validationr   e  s$    	z	"h]3 
#	"	"s   /
=c                    [        U S/S9nSUS   l        [        U S/S9nSUS   l        SUS   l        US   R	                  S5        SUS   l        [        U S/S9n[        U S	S
9nUR                  SU/5        [        R                  " [        5         UR                  SU/5        S S S 5        [        R                  " [        5         UR                  SU/5        S S S 5        S H  n[        XS
9nUR                  SU/5        [        R                  " [        5         UR                  SU/5        S S S 5        [        R                  " [        5         UR                  SU/5        S S S 5        M     [        U SS
9nUR                  SU/5        [        U SS
9nUR                  SU/5        g ! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       GM  = f)Nr   r   r   r   r   r   r   LEMMADEPr8   r   r   r   )r   r   r   ORTHTEXT)r   r   r   r   r   lemma_r   r   r   r   r   )r'   r   r   r   r   r9   s         r   test_attr_pipeline_checksr   j  s   xx(DDGLxx(DDGLDGLGj!DGNxx(DH51GKK$ 	z	"GdV$ 
#	z	"GdV$ 
# (4GdV$]]:&KK$( ']]:&KK$( '& ( H62GKK$ H62GKK$ ! 
#	"	"	" '&&&s0   F3G"GG'3
G
G
G$	'
G7	c                     [        5       n[        U / SQS9n[        U SS/S9n[        U 5      nUR                  SU/US9  U" U5      nUR	                  XBSU5        g )Nr   r   r   r   r   r   r   )r   r   r   r   assert_called_once_with)r'   mockr(   rJ   r   r)   s         r   test_phrase_matcher_callbackr    s`    6D
hD
EC(8U"34GH%GKK	G9tK4clG  q':r   c                     [        U 5      n[        U S/S9n[        U SS/S9n[        U / SQS9n[        U / SQS9nUR                  SX#XE/5        UR                  S5        g )Nthisr   is)r  r  r   )r  r  r   wordTHIS)r   r   r   r   )r'   r   r>   r@   pattern3pattern4s         r   /test_phrase_matcher_remove_overlapping_patternsr	    sc    H%G8F8,H8FD>2H8#67H8#>?HKKX@ANN6r   c                     [        U 5      n[        U SS/S9n[        R                  " [        5         UR                  SU5        S S S 5        g ! , (       d  f       g = f)Nhellor   r   r:   )r   r   r   r   r   r   )r'   r   rJ   s      r   test_phrase_matcher_basic_checkr    sC    H%G(7G"45G	z	"FG$ 
#	"	"s   A
Ac                    [        U 5      n[        5       nUR                  S[        U S/S9/5        UR                  S[        U S/S9/US9  [        U / SQS9n[	        U5      S:X  d   e[
        R                  " U5      n[
        R                  " U5      nU" U5      nU" U5      n[	        U5      [	        U5      :X  d   eXg:X  d   eUR                  5       S	   u  pp[        U
R                  S5      [        5      (       d   eg )
Nr:   r   r   r   r   r   )thesearetests:r   r   r
   r$   )r   r   r   r   r   srslypickle_dumpspickle_loads
__reduce__rC   get)r'   r   r   r(   r   matcher_unpickledr)   matches_unpickledr   docs	callbacksr9   s               r   test_phrase_matcher_pickler    s    H%G6DKKXfX678KK#hwi89DKI
hM
NCw<17#A**1- clG)#.w<301111''' &7%A%A%CA%F"U)immG,d3333r   c                    [        U 5      nUR                  S[        U SS/S9/5        UR                  S[        U S/S9/5        [        U / SQS9nU" USS	9n[        U5      S
:X  d   e[	        US   [
        5      (       d   eUS   R                  S:X  d   eUS   R                  S:X  d   e[	        US   [
        5      (       d   eUS   R                  S:X  d   eUS   R                  S:X  d   eg)zTest the new as_spans=True API.r   r  r   r   r!   r   )z...r  r   r  r  r   r   T)as_spansr
   r   zhello worldr$   N)r   r   r   r   rC   r   rT   rU   )r'   r   r(   r)   s       r   test_phrase_matcher_as_spansr    s    H%GKKc(7G*<=>?KKc(6(345
hR
SCcD)Gw<1gaj$''''1:??m+++1:###gaj$''''1:??f$$$1:###r   c                 |   [        U 5      nUR                  S[        U S/S9/5        [        U SS/S9n[        R                  " [
        5       nUR                  U/5       H  nM     UR                  (       d   eS[        UR                  S   R                  5      ;   d   e S S S 5        g ! , (       d  f       g = f)Nr:   helllor   r  r   z
spaCy v3.0r   )
r   r   r   r   r   DeprecationWarningpipeliststrmessage)r'   r   r(   recordr3   s        r   test_phrase_matcher_deprecatedr'    s    H%GKKXhZ89:
hw0
1C	(	)Vse$A %{{{s6;;q>#9#9::::	 
*	)	)s   AB--
B;r9   
SENT_STARTIS_SENT_STARTc                     [        XS9ng )Nr8   )r   )r'   r9   r3   s      r   test_phrase_matcher_sent_startr+    s    h*Ar   c                     / SQn[        XS9nUSS n[        U / SQS9n[        U 5      nUR                  SU/5        U" U5      nU" U5      n[        U5      S:X  d   e[        U5      S:X  d   eg)z7Ensure that PhraseMatcher accepts Span and Doc as inputrc   r   Nrn   ro   rp   r$   r   r'   r    r(   rq   rJ   r   matches_docmatches_spans           r   test_span_in_phrasematcherr0    s~     kE
h
$Cr7D(":;GH%GKK'##,K4=L{q   |!!!r   c                     / SQn[        XS9nUSS n[        U / SQS9n[        U 5      nUR                  SU/5        U" U5      nU" U5      n[        U5      S:X  d   e[        U5      S:X  d   eg	)
zREnsure that PhraseMatcher only returns matches in input Span and not in entire Doc)r/   rd   re   rf   rg   r1   rh   ri   rj   re   rf   rg   r1   rh   matchersz,andre   rf   rg   
everywhererm   r   	      ro   rp   r   r$   Nr   r-  s           r    test_span_v_doc_in_phrasematcherr6    s    E h
$Cq9D(":;GH%GKK'##,K4=L{q   |!!!r   )2r   r   r  r   r   spacy.lang.enr   spacy.matcherr   r   spacy.tokensr   r   spacy.vocabr	   utilr   markissuer   r+   r5   rA   rD   ra   rr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r	  r  r  r  r'  parametrizer+  r0  r6   r   r   <module>r@     s       ! 0 "   4  4+ + 4   4 ( 4, , 4 , 4  5 :":"$,
""6","&%,4
!>;%4.$ ; ,!@A+ B+" "r   